VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103622

Last change on this file since 103622 was 103622, checked in by vboxsync, 15 months ago

VMM/IEM: Obfuscate most variable indexes we pass around in strict builds so we easily catch register/variable index mixups. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 654.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103622 2024-03-01 00:42:36Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when detecting opcode changes.
1625 * @see iemThreadeFuncWorkerObsoleteTb
1626 */
1627IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1628{
1629 /* We set fSafeToFree to false where as we're being called in the context
1630 of a TB callback function, which for native TBs means we cannot release
1631 the executable memory till we've returned our way back to iemTbExec as
1632 that return path codes via the native code generated for the TB. */
1633 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1634 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1635 return VINF_IEM_REEXEC_BREAK;
1636}
1637
1638
1639/**
1640 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1643{
1644 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1645 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1646 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1647 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1648 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1649 return VINF_IEM_REEXEC_BREAK;
1650}
1651
1652
1653/**
1654 * Used by TB code when we missed a PC check after a branch.
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1657{
1658 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1659 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1660 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1661 pVCpu->iem.s.pbInstrBuf));
1662 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667
1668/*********************************************************************************************************************************
1669* Helpers: Segmented memory fetches and stores. *
1670*********************************************************************************************************************************/
1671
1672/**
1673 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1674 */
1675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1678 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1679#else
1680 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1687 * to 16 bits.
1688 */
1689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1692 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1693#else
1694 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1701 * to 32 bits.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712/**
1713 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1714 * to 64 bits.
1715 */
1716IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1717{
1718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1719 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1720#else
1721 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1722#endif
1723}
1724
1725
1726/**
1727 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1728 */
1729IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1730{
1731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1732 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1733#else
1734 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1735#endif
1736}
1737
1738
1739/**
1740 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1741 * to 32 bits.
1742 */
1743IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1744{
1745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1746 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1747#else
1748 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1749#endif
1750}
1751
1752
1753/**
1754 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1755 * to 64 bits.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1769 */
1770IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1771{
1772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1773 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1774#else
1775 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1776#endif
1777}
1778
1779
1780/**
1781 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1782 * to 64 bits.
1783 */
1784IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1785{
1786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1787 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1788#else
1789 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1790#endif
1791}
1792
1793
1794/**
1795 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1796 */
1797IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1798{
1799#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1800 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1801#else
1802 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1803#endif
1804}
1805
1806
1807/**
1808 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1809 */
1810IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1811{
1812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1813 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1814#else
1815 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1816#endif
1817}
1818
1819
1820/**
1821 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1826 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1827#else
1828 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1839 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1840#else
1841 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1848 */
1849IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1850{
1851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1852 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1853#else
1854 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1855#endif
1856}
1857
1858
1859
1860/**
1861 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1866 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1867#else
1868 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1879 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1880#else
1881 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store an 32-bit selector value onto a generic stack.
1888 *
1889 * Intel CPUs doesn't do write a whole dword, thus the special function.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1895#else
1896 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1908#else
1909 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1920 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1921#else
1922 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1923#endif
1924}
1925
1926
1927/**
1928 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1933 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1934#else
1935 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1946 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1947#else
1948 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1949#endif
1950}
1951
1952
1953
1954/*********************************************************************************************************************************
1955* Helpers: Flat memory fetches and stores. *
1956*********************************************************************************************************************************/
1957
1958/**
1959 * Used by TB code to load unsigned 8-bit data w/ flat address.
1960 * @note Zero extending the value to 64-bit to simplify assembly.
1961 */
1962IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1963{
1964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1965 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1966#else
1967 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1968#endif
1969}
1970
1971
1972/**
1973 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1974 * to 16 bits.
1975 * @note Zero extending the value to 64-bit to simplify assembly.
1976 */
1977IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1978{
1979#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1980 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1981#else
1982 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1983#endif
1984}
1985
1986
1987/**
1988 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1989 * to 32 bits.
1990 * @note Zero extending the value to 64-bit to simplify assembly.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1993{
1994#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1995 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1996#else
1997 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1998#endif
1999}
2000
2001
2002/**
2003 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2004 * to 64 bits.
2005 */
2006IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2007{
2008#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2009 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2010#else
2011 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2012#endif
2013}
2014
2015
2016/**
2017 * Used by TB code to load unsigned 16-bit data w/ flat address.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2032 * to 32 bits.
2033 * @note Zero extending the value to 64-bit to simplify assembly.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2038 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2039#else
2040 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2041#endif
2042}
2043
2044
2045/**
2046 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2047 * to 64 bits.
2048 * @note Zero extending the value to 64-bit to simplify assembly.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2053 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2054#else
2055 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to load unsigned 32-bit data w/ flat address.
2062 * @note Zero extending the value to 64-bit to simplify assembly.
2063 */
2064IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2065{
2066#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2067 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2068#else
2069 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2070#endif
2071}
2072
2073
2074/**
2075 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2076 * to 64 bits.
2077 * @note Zero extending the value to 64-bit to simplify assembly.
2078 */
2079IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2080{
2081#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2082 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2083#else
2084 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2085#endif
2086}
2087
2088
2089/**
2090 * Used by TB code to load unsigned 64-bit data w/ flat address.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to store unsigned 8-bit data w/ flat address.
2104 */
2105IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2106{
2107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2108 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2109#else
2110 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2111#endif
2112}
2113
2114
2115/**
2116 * Used by TB code to store unsigned 16-bit data w/ flat address.
2117 */
2118IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2119{
2120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2121 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2122#else
2123 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2124#endif
2125}
2126
2127
2128/**
2129 * Used by TB code to store unsigned 32-bit data w/ flat address.
2130 */
2131IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2132{
2133#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2134 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2135#else
2136 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2137#endif
2138}
2139
2140
2141/**
2142 * Used by TB code to store unsigned 64-bit data w/ flat address.
2143 */
2144IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2145{
2146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2147 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2148#else
2149 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2150#endif
2151}
2152
2153
2154
2155/**
2156 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2161 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2162#else
2163 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2174 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2175#else
2176 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store a segment selector value onto a flat stack.
2183 *
2184 * Intel CPUs doesn't do write a whole dword, thus the special function.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2190#else
2191 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2203#else
2204 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2211 */
2212IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2213{
2214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2215 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2216#else
2217 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2218#endif
2219}
2220
2221
2222/**
2223 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2224 */
2225IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2226{
2227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2228 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2229#else
2230 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2231#endif
2232}
2233
2234
2235/**
2236 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2237 */
2238IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2239{
2240#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2241 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2242#else
2243 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2244#endif
2245}
2246
2247
2248
2249/*********************************************************************************************************************************
2250* Helpers: Segmented memory mapping. *
2251*********************************************************************************************************************************/
2252
2253/**
2254 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2255 * segmentation.
2256 */
2257IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2258 RTGCPTR GCPtrMem, uint8_t iSegReg))
2259{
2260#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2261 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2262#else
2263 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2264#endif
2265}
2266
2267
2268/**
2269 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2270 */
2271IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2272 RTGCPTR GCPtrMem, uint8_t iSegReg))
2273{
2274#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2275 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2276#else
2277 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2278#endif
2279}
2280
2281
2282/**
2283 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2312 * segmentation.
2313 */
2314IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2315 RTGCPTR GCPtrMem, uint8_t iSegReg))
2316{
2317#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2318 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2319#else
2320 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2321#endif
2322}
2323
2324
2325/**
2326 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2327 */
2328IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2329 RTGCPTR GCPtrMem, uint8_t iSegReg))
2330{
2331#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2332 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2333#else
2334 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2335#endif
2336}
2337
2338
2339/**
2340 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2369 * segmentation.
2370 */
2371IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2372 RTGCPTR GCPtrMem, uint8_t iSegReg))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2375 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2376#else
2377 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2386 RTGCPTR GCPtrMem, uint8_t iSegReg))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2389 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2390#else
2391 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2392#endif
2393}
2394
2395
2396/**
2397 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2426 * segmentation.
2427 */
2428IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2429 RTGCPTR GCPtrMem, uint8_t iSegReg))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2432 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2433#else
2434 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2443 RTGCPTR GCPtrMem, uint8_t iSegReg))
2444{
2445#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2446 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2447#else
2448 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2449#endif
2450}
2451
2452
2453/**
2454 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2511 * segmentation.
2512 */
2513IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2514 RTGCPTR GCPtrMem, uint8_t iSegReg))
2515{
2516#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2517 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2518#else
2519 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2520#endif
2521}
2522
2523
2524/**
2525 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2526 */
2527IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2528 RTGCPTR GCPtrMem, uint8_t iSegReg))
2529{
2530#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2531 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#else
2533 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2534#endif
2535}
2536
2537
2538/**
2539 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/*********************************************************************************************************************************
2567* Helpers: Flat memory mapping. *
2568*********************************************************************************************************************************/
2569
2570/**
2571 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2572 * address.
2573 */
2574IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2578#else
2579 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2580#endif
2581}
2582
2583
2584/**
2585 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2586 */
2587IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2588{
2589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2590 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2591#else
2592 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2593#endif
2594}
2595
2596
2597/**
2598 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2599 */
2600IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2601{
2602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2603 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2604#else
2605 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2606#endif
2607}
2608
2609
2610/**
2611 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2612 */
2613IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2614{
2615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2616 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2617#else
2618 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2619#endif
2620}
2621
2622
2623/**
2624 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2625 * address.
2626 */
2627IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2628{
2629#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2630 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2631#else
2632 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2633#endif
2634}
2635
2636
2637/**
2638 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2678 * address.
2679 */
2680IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2684#else
2685 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2731 * address.
2732 */
2733IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2734{
2735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2736 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2737#else
2738 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2739#endif
2740}
2741
2742
2743/**
2744 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2797 */
2798IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2799{
2800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2801 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2802#else
2803 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2804#endif
2805}
2806
2807
2808/**
2809 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2810 * address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/*********************************************************************************************************************************
2862* Helpers: Commit, rollback & unmap *
2863*********************************************************************************************************************************/
2864
2865/**
2866 * Used by TB code to commit and unmap a read-write memory mapping.
2867 */
2868IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2869{
2870 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2871}
2872
2873
2874/**
2875 * Used by TB code to commit and unmap a read-write memory mapping.
2876 */
2877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2878{
2879 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2880}
2881
2882
2883/**
2884 * Used by TB code to commit and unmap a write-only memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-only memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Reinitializes the native recompiler state.
2903 *
2904 * Called before starting a new recompile job.
2905 */
2906static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2907{
2908 pReNative->cLabels = 0;
2909 pReNative->bmLabelTypes = 0;
2910 pReNative->cFixups = 0;
2911#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2912 pReNative->pDbgInfo->cEntries = 0;
2913#endif
2914 pReNative->pTbOrg = pTb;
2915 pReNative->cCondDepth = 0;
2916 pReNative->uCondSeqNo = 0;
2917 pReNative->uCheckIrqSeqNo = 0;
2918 pReNative->uTlbSeqNo = 0;
2919
2920 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2921#if IEMNATIVE_HST_GREG_COUNT < 32
2922 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2923#endif
2924 ;
2925 pReNative->Core.bmHstRegsWithGstShadow = 0;
2926 pReNative->Core.bmGstRegShadows = 0;
2927 pReNative->Core.bmVars = 0;
2928 pReNative->Core.bmStack = 0;
2929 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2930 pReNative->Core.u64ArgVars = UINT64_MAX;
2931
2932 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 11);
2933 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2934 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2935 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2936 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2937 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2938 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2939 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2940 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2941 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2942 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2943 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2944
2945 /* Full host register reinit: */
2946 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2947 {
2948 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2949 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2950 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2951 }
2952
2953 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2954 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2955#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2956 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2957#endif
2958#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2959 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2960#endif
2961 );
2962 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2963 {
2964 fRegs &= ~RT_BIT_32(idxReg);
2965 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2966 }
2967
2968 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2969#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2970 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2971#endif
2972#ifdef IEMNATIVE_REG_FIXED_TMP0
2973 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2974#endif
2975 return pReNative;
2976}
2977
2978
2979/**
2980 * Allocates and initializes the native recompiler state.
2981 *
2982 * This is called the first time an EMT wants to recompile something.
2983 *
2984 * @returns Pointer to the new recompiler state.
2985 * @param pVCpu The cross context virtual CPU structure of the calling
2986 * thread.
2987 * @param pTb The TB that's about to be recompiled.
2988 * @thread EMT(pVCpu)
2989 */
2990static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2991{
2992 VMCPU_ASSERT_EMT(pVCpu);
2993
2994 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2995 AssertReturn(pReNative, NULL);
2996
2997 /*
2998 * Try allocate all the buffers and stuff we need.
2999 */
3000 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3001 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3002 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3003#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3004 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3005#endif
3006 if (RT_LIKELY( pReNative->pInstrBuf
3007 && pReNative->paLabels
3008 && pReNative->paFixups)
3009#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3010 && pReNative->pDbgInfo
3011#endif
3012 )
3013 {
3014 /*
3015 * Set the buffer & array sizes on success.
3016 */
3017 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3018 pReNative->cLabelsAlloc = _8K;
3019 pReNative->cFixupsAlloc = _16K;
3020#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3021 pReNative->cDbgInfoAlloc = _16K;
3022#endif
3023
3024 /* Other constant stuff: */
3025 pReNative->pVCpu = pVCpu;
3026
3027 /*
3028 * Done, just need to save it and reinit it.
3029 */
3030 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3031 return iemNativeReInit(pReNative, pTb);
3032 }
3033
3034 /*
3035 * Failed. Cleanup and return.
3036 */
3037 AssertFailed();
3038 RTMemFree(pReNative->pInstrBuf);
3039 RTMemFree(pReNative->paLabels);
3040 RTMemFree(pReNative->paFixups);
3041#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3042 RTMemFree(pReNative->pDbgInfo);
3043#endif
3044 RTMemFree(pReNative);
3045 return NULL;
3046}
3047
3048
3049/**
3050 * Creates a label
3051 *
3052 * If the label does not yet have a defined position,
3053 * call iemNativeLabelDefine() later to set it.
3054 *
3055 * @returns Label ID. Throws VBox status code on failure, so no need to check
3056 * the return value.
3057 * @param pReNative The native recompile state.
3058 * @param enmType The label type.
3059 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3060 * label is not yet defined (default).
3061 * @param uData Data associated with the lable. Only applicable to
3062 * certain type of labels. Default is zero.
3063 */
3064DECL_HIDDEN_THROW(uint32_t)
3065iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3066 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3067{
3068 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3069
3070 /*
3071 * Locate existing label definition.
3072 *
3073 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3074 * and uData is zero.
3075 */
3076 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3077 uint32_t const cLabels = pReNative->cLabels;
3078 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3079#ifndef VBOX_STRICT
3080 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3081 && offWhere == UINT32_MAX
3082 && uData == 0
3083#endif
3084 )
3085 {
3086#ifndef VBOX_STRICT
3087 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3088 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3089 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3090 if (idxLabel < pReNative->cLabels)
3091 return idxLabel;
3092#else
3093 for (uint32_t i = 0; i < cLabels; i++)
3094 if ( paLabels[i].enmType == enmType
3095 && paLabels[i].uData == uData)
3096 {
3097 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3098 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3099 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3100 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3101 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3102 return i;
3103 }
3104 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3105 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3106#endif
3107 }
3108
3109 /*
3110 * Make sure we've got room for another label.
3111 */
3112 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3113 { /* likely */ }
3114 else
3115 {
3116 uint32_t cNew = pReNative->cLabelsAlloc;
3117 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3118 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3119 cNew *= 2;
3120 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3121 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3122 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3123 pReNative->paLabels = paLabels;
3124 pReNative->cLabelsAlloc = cNew;
3125 }
3126
3127 /*
3128 * Define a new label.
3129 */
3130 paLabels[cLabels].off = offWhere;
3131 paLabels[cLabels].enmType = enmType;
3132 paLabels[cLabels].uData = uData;
3133 pReNative->cLabels = cLabels + 1;
3134
3135 Assert((unsigned)enmType < 64);
3136 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3137
3138 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3139 {
3140 Assert(uData == 0);
3141 pReNative->aidxUniqueLabels[enmType] = cLabels;
3142 }
3143
3144 if (offWhere != UINT32_MAX)
3145 {
3146#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3147 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3148 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3149#endif
3150 }
3151 return cLabels;
3152}
3153
3154
3155/**
3156 * Defines the location of an existing label.
3157 *
3158 * @param pReNative The native recompile state.
3159 * @param idxLabel The label to define.
3160 * @param offWhere The position.
3161 */
3162DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3163{
3164 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3165 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3166 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3167 pLabel->off = offWhere;
3168#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3169 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3170 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3171#endif
3172}
3173
3174
3175/**
3176 * Looks up a lable.
3177 *
3178 * @returns Label ID if found, UINT32_MAX if not.
3179 */
3180static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3181 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3182{
3183 Assert((unsigned)enmType < 64);
3184 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3185 {
3186 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3187 return pReNative->aidxUniqueLabels[enmType];
3188
3189 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3190 uint32_t const cLabels = pReNative->cLabels;
3191 for (uint32_t i = 0; i < cLabels; i++)
3192 if ( paLabels[i].enmType == enmType
3193 && paLabels[i].uData == uData
3194 && ( paLabels[i].off == offWhere
3195 || offWhere == UINT32_MAX
3196 || paLabels[i].off == UINT32_MAX))
3197 return i;
3198 }
3199 return UINT32_MAX;
3200}
3201
3202
3203/**
3204 * Adds a fixup.
3205 *
3206 * @throws VBox status code (int) on failure.
3207 * @param pReNative The native recompile state.
3208 * @param offWhere The instruction offset of the fixup location.
3209 * @param idxLabel The target label ID for the fixup.
3210 * @param enmType The fixup type.
3211 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3212 */
3213DECL_HIDDEN_THROW(void)
3214iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3215 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3216{
3217 Assert(idxLabel <= UINT16_MAX);
3218 Assert((unsigned)enmType <= UINT8_MAX);
3219
3220 /*
3221 * Make sure we've room.
3222 */
3223 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3224 uint32_t const cFixups = pReNative->cFixups;
3225 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3226 { /* likely */ }
3227 else
3228 {
3229 uint32_t cNew = pReNative->cFixupsAlloc;
3230 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3231 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3232 cNew *= 2;
3233 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3234 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3235 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3236 pReNative->paFixups = paFixups;
3237 pReNative->cFixupsAlloc = cNew;
3238 }
3239
3240 /*
3241 * Add the fixup.
3242 */
3243 paFixups[cFixups].off = offWhere;
3244 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3245 paFixups[cFixups].enmType = enmType;
3246 paFixups[cFixups].offAddend = offAddend;
3247 pReNative->cFixups = cFixups + 1;
3248}
3249
3250
3251/**
3252 * Slow code path for iemNativeInstrBufEnsure.
3253 */
3254DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3255{
3256 /* Double the buffer size till we meet the request. */
3257 uint32_t cNew = pReNative->cInstrBufAlloc;
3258 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3259 do
3260 cNew *= 2;
3261 while (cNew < off + cInstrReq);
3262
3263 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3264#ifdef RT_ARCH_ARM64
3265 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3266#else
3267 uint32_t const cbMaxInstrBuf = _2M;
3268#endif
3269 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3270
3271 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3272 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3273
3274#ifdef VBOX_STRICT
3275 pReNative->offInstrBufChecked = off + cInstrReq;
3276#endif
3277 pReNative->cInstrBufAlloc = cNew;
3278 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3279}
3280
3281#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3282
3283/**
3284 * Grows the static debug info array used during recompilation.
3285 *
3286 * @returns Pointer to the new debug info block; throws VBox status code on
3287 * failure, so no need to check the return value.
3288 */
3289DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3290{
3291 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3292 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3293 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3294 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3295 pReNative->pDbgInfo = pDbgInfo;
3296 pReNative->cDbgInfoAlloc = cNew;
3297 return pDbgInfo;
3298}
3299
3300
3301/**
3302 * Adds a new debug info uninitialized entry, returning the pointer to it.
3303 */
3304DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3305{
3306 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3307 { /* likely */ }
3308 else
3309 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3310 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3311}
3312
3313
3314/**
3315 * Debug Info: Adds a native offset record, if necessary.
3316 */
3317static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3318{
3319 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3320
3321 /*
3322 * Search backwards to see if we've got a similar record already.
3323 */
3324 uint32_t idx = pDbgInfo->cEntries;
3325 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3326 while (idx-- > idxStop)
3327 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3328 {
3329 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3330 return;
3331 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3332 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3333 break;
3334 }
3335
3336 /*
3337 * Add it.
3338 */
3339 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3340 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3341 pEntry->NativeOffset.offNative = off;
3342}
3343
3344
3345/**
3346 * Debug Info: Record info about a label.
3347 */
3348static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3349{
3350 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3351 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3352 pEntry->Label.uUnused = 0;
3353 pEntry->Label.enmLabel = (uint8_t)enmType;
3354 pEntry->Label.uData = uData;
3355}
3356
3357
3358/**
3359 * Debug Info: Record info about a threaded call.
3360 */
3361static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3362{
3363 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3364 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3365 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3366 pEntry->ThreadedCall.uUnused = 0;
3367 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3368}
3369
3370
3371/**
3372 * Debug Info: Record info about a new guest instruction.
3373 */
3374static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3375{
3376 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3377 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3378 pEntry->GuestInstruction.uUnused = 0;
3379 pEntry->GuestInstruction.fExec = fExec;
3380}
3381
3382
3383/**
3384 * Debug Info: Record info about guest register shadowing.
3385 */
3386static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3387 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3388{
3389 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3390 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3391 pEntry->GuestRegShadowing.uUnused = 0;
3392 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3393 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3394 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3395}
3396
3397#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3398
3399
3400/*********************************************************************************************************************************
3401* Register Allocator *
3402*********************************************************************************************************************************/
3403
3404/**
3405 * Register parameter indexes (indexed by argument number).
3406 */
3407DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3408{
3409 IEMNATIVE_CALL_ARG0_GREG,
3410 IEMNATIVE_CALL_ARG1_GREG,
3411 IEMNATIVE_CALL_ARG2_GREG,
3412 IEMNATIVE_CALL_ARG3_GREG,
3413#if defined(IEMNATIVE_CALL_ARG4_GREG)
3414 IEMNATIVE_CALL_ARG4_GREG,
3415# if defined(IEMNATIVE_CALL_ARG5_GREG)
3416 IEMNATIVE_CALL_ARG5_GREG,
3417# if defined(IEMNATIVE_CALL_ARG6_GREG)
3418 IEMNATIVE_CALL_ARG6_GREG,
3419# if defined(IEMNATIVE_CALL_ARG7_GREG)
3420 IEMNATIVE_CALL_ARG7_GREG,
3421# endif
3422# endif
3423# endif
3424#endif
3425};
3426
3427/**
3428 * Call register masks indexed by argument count.
3429 */
3430DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3431{
3432 0,
3433 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3434 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3435 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3436 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3437 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3438#if defined(IEMNATIVE_CALL_ARG4_GREG)
3439 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3440 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3441# if defined(IEMNATIVE_CALL_ARG5_GREG)
3442 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3443 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3444# if defined(IEMNATIVE_CALL_ARG6_GREG)
3445 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3446 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3447 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3448# if defined(IEMNATIVE_CALL_ARG7_GREG)
3449 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3450 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3451 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3452# endif
3453# endif
3454# endif
3455#endif
3456};
3457
3458#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3459/**
3460 * BP offset of the stack argument slots.
3461 *
3462 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3463 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3464 */
3465DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3466{
3467 IEMNATIVE_FP_OFF_STACK_ARG0,
3468# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3469 IEMNATIVE_FP_OFF_STACK_ARG1,
3470# endif
3471# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3472 IEMNATIVE_FP_OFF_STACK_ARG2,
3473# endif
3474# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3475 IEMNATIVE_FP_OFF_STACK_ARG3,
3476# endif
3477};
3478AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3479#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3480
3481/**
3482 * Info about shadowed guest register values.
3483 * @see IEMNATIVEGSTREG
3484 */
3485static struct
3486{
3487 /** Offset in VMCPU. */
3488 uint32_t off;
3489 /** The field size. */
3490 uint8_t cb;
3491 /** Name (for logging). */
3492 const char *pszName;
3493} const g_aGstShadowInfo[] =
3494{
3495#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3496 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3497 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3498 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3499 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3500 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3501 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3502 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3503 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3504 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3505 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3506 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3507 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3508 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3509 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3510 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3511 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3512 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3513 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3514 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3515 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3516 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3517 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3518 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3519 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3520 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3521 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3522 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3523 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3524 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3525 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3526 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3527 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3528 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3529 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3530 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3531 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3532 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3533 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3534 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3535 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3536 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3537 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3538 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3539 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3540 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3541 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3542#undef CPUMCTX_OFF_AND_SIZE
3543};
3544AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3545
3546
3547/** Host CPU general purpose register names. */
3548DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3549{
3550#ifdef RT_ARCH_AMD64
3551 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3552#elif RT_ARCH_ARM64
3553 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3554 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3555#else
3556# error "port me"
3557#endif
3558};
3559
3560
3561DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3562 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3563{
3564 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3565
3566 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3567 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3568 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3569 return (uint8_t)idxReg;
3570}
3571
3572
3573#if 0 /* unused */
3574/**
3575 * Tries to locate a suitable register in the given register mask.
3576 *
3577 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3578 * failed.
3579 *
3580 * @returns Host register number on success, returns UINT8_MAX on failure.
3581 */
3582static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3583{
3584 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3585 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3586 if (fRegs)
3587 {
3588 /** @todo pick better here: */
3589 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3590
3591 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3592 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3593 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3594 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3595
3596 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3597 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3598 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3599 return idxReg;
3600 }
3601 return UINT8_MAX;
3602}
3603#endif /* unused */
3604
3605
3606/**
3607 * Locate a register, possibly freeing one up.
3608 *
3609 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3610 * failed.
3611 *
3612 * @returns Host register number on success. Returns UINT8_MAX if no registers
3613 * found, the caller is supposed to deal with this and raise a
3614 * allocation type specific status code (if desired).
3615 *
3616 * @throws VBox status code if we're run into trouble spilling a variable of
3617 * recording debug info. Does NOT throw anything if we're out of
3618 * registers, though.
3619 */
3620static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3621 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3622{
3623 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3624 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3625 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3626
3627 /*
3628 * Try a freed register that's shadowing a guest register.
3629 */
3630 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3631 if (fRegs)
3632 {
3633 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3634
3635#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3636 /*
3637 * When we have livness information, we use it to kick out all shadowed
3638 * guest register that will not be needed any more in this TB. If we're
3639 * lucky, this may prevent us from ending up here again.
3640 *
3641 * Note! We must consider the previous entry here so we don't free
3642 * anything that the current threaded function requires (current
3643 * entry is produced by the next threaded function).
3644 */
3645 uint32_t const idxCurCall = pReNative->idxCurCall;
3646 if (idxCurCall > 0)
3647 {
3648 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3649
3650# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3651 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3652 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3653 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3654#else
3655 /* Construct a mask of the registers not in the read or write state.
3656 Note! We could skips writes, if they aren't from us, as this is just
3657 a hack to prevent trashing registers that have just been written
3658 or will be written when we retire the current instruction. */
3659 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3660 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3661 & IEMLIVENESSBIT_MASK;
3662#endif
3663 /* Merge EFLAGS. */
3664 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3665 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3666 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3667 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3668 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3669
3670 /* If it matches any shadowed registers. */
3671 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3672 {
3673 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3674 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3675 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3676
3677 /* See if we've got any unshadowed registers we can return now. */
3678 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3679 if (fUnshadowedRegs)
3680 {
3681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3682 return (fPreferVolatile
3683 ? ASMBitFirstSetU32(fUnshadowedRegs)
3684 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3685 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3686 - 1;
3687 }
3688 }
3689 }
3690#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3691
3692 unsigned const idxReg = (fPreferVolatile
3693 ? ASMBitFirstSetU32(fRegs)
3694 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3695 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3696 - 1;
3697
3698 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3699 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3700 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3701 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3702
3703 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3704 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3705 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3706 return idxReg;
3707 }
3708
3709 /*
3710 * Try free up a variable that's in a register.
3711 *
3712 * We do two rounds here, first evacuating variables we don't need to be
3713 * saved on the stack, then in the second round move things to the stack.
3714 */
3715 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3716 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3717 {
3718 uint32_t fVars = pReNative->Core.bmVars;
3719 while (fVars)
3720 {
3721 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3722 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3723 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3724 && (RT_BIT_32(idxReg) & fRegMask)
3725 && ( iLoop == 0
3726 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3727 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3728 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3729 {
3730 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3731 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3732 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3733 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3734 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3735 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3736
3737 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3738 {
3739 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3740 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3741 }
3742
3743 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3744 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3745
3746 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3747 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3748 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3749 return idxReg;
3750 }
3751 fVars &= ~RT_BIT_32(idxVar);
3752 }
3753 }
3754
3755 return UINT8_MAX;
3756}
3757
3758
3759/**
3760 * Reassigns a variable to a different register specified by the caller.
3761 *
3762 * @returns The new code buffer position.
3763 * @param pReNative The native recompile state.
3764 * @param off The current code buffer position.
3765 * @param idxVar The variable index.
3766 * @param idxRegOld The old host register number.
3767 * @param idxRegNew The new host register number.
3768 * @param pszCaller The caller for logging.
3769 */
3770static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3771 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3772{
3773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3774 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3775 RT_NOREF(pszCaller);
3776
3777 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3778
3779 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3780 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3781 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3783
3784 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3785 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3786 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3787 if (fGstRegShadows)
3788 {
3789 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3790 | RT_BIT_32(idxRegNew);
3791 while (fGstRegShadows)
3792 {
3793 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3794 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3795
3796 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3797 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3798 }
3799 }
3800
3801 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3802 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3803 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3804 return off;
3805}
3806
3807
3808/**
3809 * Moves a variable to a different register or spills it onto the stack.
3810 *
3811 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3812 * kinds can easily be recreated if needed later.
3813 *
3814 * @returns The new code buffer position.
3815 * @param pReNative The native recompile state.
3816 * @param off The current code buffer position.
3817 * @param idxVar The variable index.
3818 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3819 * call-volatile registers.
3820 */
3821static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3822 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3823{
3824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3825 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3826 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3827 Assert(!pVar->fRegAcquired);
3828
3829 uint8_t const idxRegOld = pVar->idxReg;
3830 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3831 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3832 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3833 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3834 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3835 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3836 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3837 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3838
3839
3840 /** @todo Add statistics on this.*/
3841 /** @todo Implement basic variable liveness analysis (python) so variables
3842 * can be freed immediately once no longer used. This has the potential to
3843 * be trashing registers and stack for dead variables.
3844 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3845
3846 /*
3847 * First try move it to a different register, as that's cheaper.
3848 */
3849 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3850 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3851 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3852 if (fRegs)
3853 {
3854 /* Avoid using shadow registers, if possible. */
3855 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3856 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3857 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3858 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3859 }
3860
3861 /*
3862 * Otherwise we must spill the register onto the stack.
3863 */
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3865 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3866 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3867 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3868
3869 pVar->idxReg = UINT8_MAX;
3870 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3871 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3874 return off;
3875}
3876
3877
3878/**
3879 * Allocates a temporary host general purpose register.
3880 *
3881 * This may emit code to save register content onto the stack in order to free
3882 * up a register.
3883 *
3884 * @returns The host register number; throws VBox status code on failure,
3885 * so no need to check the return value.
3886 * @param pReNative The native recompile state.
3887 * @param poff Pointer to the variable with the code buffer position.
3888 * This will be update if we need to move a variable from
3889 * register to stack in order to satisfy the request.
3890 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3891 * registers (@c true, default) or the other way around
3892 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3893 */
3894DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3895{
3896 /*
3897 * Try find a completely unused register, preferably a call-volatile one.
3898 */
3899 uint8_t idxReg;
3900 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3901 & ~pReNative->Core.bmHstRegsWithGstShadow
3902 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3903 if (fRegs)
3904 {
3905 if (fPreferVolatile)
3906 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3907 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3908 else
3909 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3910 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3911 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3912 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3913 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3914 }
3915 else
3916 {
3917 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3918 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3919 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3920 }
3921 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3922}
3923
3924
3925/**
3926 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3927 * registers.
3928 *
3929 * @returns The host register number; throws VBox status code on failure,
3930 * so no need to check the return value.
3931 * @param pReNative The native recompile state.
3932 * @param poff Pointer to the variable with the code buffer position.
3933 * This will be update if we need to move a variable from
3934 * register to stack in order to satisfy the request.
3935 * @param fRegMask Mask of acceptable registers.
3936 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3937 * registers (@c true, default) or the other way around
3938 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3939 */
3940DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3941 bool fPreferVolatile /*= true*/)
3942{
3943 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3944 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3945
3946 /*
3947 * Try find a completely unused register, preferably a call-volatile one.
3948 */
3949 uint8_t idxReg;
3950 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3951 & ~pReNative->Core.bmHstRegsWithGstShadow
3952 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3953 & fRegMask;
3954 if (fRegs)
3955 {
3956 if (fPreferVolatile)
3957 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3958 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3959 else
3960 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3961 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3962 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3963 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3964 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3965 }
3966 else
3967 {
3968 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3969 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3970 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3971 }
3972 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3973}
3974
3975
3976/**
3977 * Allocates a temporary register for loading an immediate value into.
3978 *
3979 * This will emit code to load the immediate, unless there happens to be an
3980 * unused register with the value already loaded.
3981 *
3982 * The caller will not modify the returned register, it must be considered
3983 * read-only. Free using iemNativeRegFreeTmpImm.
3984 *
3985 * @returns The host register number; throws VBox status code on failure, so no
3986 * need to check the return value.
3987 * @param pReNative The native recompile state.
3988 * @param poff Pointer to the variable with the code buffer position.
3989 * @param uImm The immediate value that the register must hold upon
3990 * return.
3991 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3992 * registers (@c true, default) or the other way around
3993 * (@c false).
3994 *
3995 * @note Reusing immediate values has not been implemented yet.
3996 */
3997DECL_HIDDEN_THROW(uint8_t)
3998iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3999{
4000 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4001 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4002 return idxReg;
4003}
4004
4005#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4006
4007# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4008/**
4009 * Helper for iemNativeLivenessGetStateByGstReg.
4010 *
4011 * @returns IEMLIVENESS_STATE_XXX
4012 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4013 * ORed together.
4014 */
4015DECL_FORCE_INLINE(uint32_t)
4016iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4017{
4018 /* INPUT trumps anything else. */
4019 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4020 return IEMLIVENESS_STATE_INPUT;
4021
4022 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4023 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4024 {
4025 /* If not all sub-fields are clobbered they must be considered INPUT. */
4026 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4027 return IEMLIVENESS_STATE_INPUT;
4028 return IEMLIVENESS_STATE_CLOBBERED;
4029 }
4030
4031 /* XCPT_OR_CALL trumps UNUSED. */
4032 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4033 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4034
4035 return IEMLIVENESS_STATE_UNUSED;
4036}
4037# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4038
4039
4040DECL_FORCE_INLINE(uint32_t)
4041iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4042{
4043# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4044 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4045 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4046# else
4047 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4048 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4049 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4050 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4051# endif
4052}
4053
4054
4055DECL_FORCE_INLINE(uint32_t)
4056iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4057{
4058 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4059 if (enmGstReg == kIemNativeGstReg_EFlags)
4060 {
4061 /* Merge the eflags states to one. */
4062# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4063 uRet = RT_BIT_32(uRet);
4064 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4065 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4066 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4067 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4068 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4069 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4070 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4071# else
4072 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4073 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4074 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4075 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4076 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4077 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4078 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4079# endif
4080 }
4081 return uRet;
4082}
4083
4084
4085# ifdef VBOX_STRICT
4086/** For assertions only, user checks that idxCurCall isn't zerow. */
4087DECL_FORCE_INLINE(uint32_t)
4088iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4089{
4090 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4091}
4092# endif /* VBOX_STRICT */
4093
4094#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4095
4096/**
4097 * Marks host register @a idxHstReg as containing a shadow copy of guest
4098 * register @a enmGstReg.
4099 *
4100 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4101 * host register before calling.
4102 */
4103DECL_FORCE_INLINE(void)
4104iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4105{
4106 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4107 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4108 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4109
4110 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4111 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4112 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4113 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4114#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4115 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4116 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4117#else
4118 RT_NOREF(off);
4119#endif
4120}
4121
4122
4123/**
4124 * Clear any guest register shadow claims from @a idxHstReg.
4125 *
4126 * The register does not need to be shadowing any guest registers.
4127 */
4128DECL_FORCE_INLINE(void)
4129iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4130{
4131 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4132 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4133 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4134 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4135 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4136
4137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4138 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4139 if (fGstRegs)
4140 {
4141 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4142 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4143 while (fGstRegs)
4144 {
4145 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4146 fGstRegs &= ~RT_BIT_64(iGstReg);
4147 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4148 }
4149 }
4150#else
4151 RT_NOREF(off);
4152#endif
4153
4154 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4155 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4156 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4157}
4158
4159
4160/**
4161 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4162 * and global overview flags.
4163 */
4164DECL_FORCE_INLINE(void)
4165iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4166{
4167 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4168 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4169 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4170 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4171 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4172 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4173 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4174
4175#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4176 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4177 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4178#else
4179 RT_NOREF(off);
4180#endif
4181
4182 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4183 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4184 if (!fGstRegShadowsNew)
4185 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4186 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4187}
4188
4189
4190#if 0 /* unused */
4191/**
4192 * Clear any guest register shadow claim for @a enmGstReg.
4193 */
4194DECL_FORCE_INLINE(void)
4195iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4196{
4197 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4198 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4199 {
4200 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4201 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4202 }
4203}
4204#endif
4205
4206
4207/**
4208 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4209 * as the new shadow of it.
4210 *
4211 * Unlike the other guest reg shadow helpers, this does the logging for you.
4212 * However, it is the liveness state is not asserted here, the caller must do
4213 * that.
4214 */
4215DECL_FORCE_INLINE(void)
4216iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4217 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4218{
4219 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4220 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4221 {
4222 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4223 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4224 if (idxHstRegOld == idxHstRegNew)
4225 return;
4226 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4227 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4228 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4229 }
4230 else
4231 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4232 g_aGstShadowInfo[enmGstReg].pszName));
4233 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4234}
4235
4236
4237/**
4238 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4239 * to @a idxRegTo.
4240 */
4241DECL_FORCE_INLINE(void)
4242iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4243 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4244{
4245 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4246 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4247 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4248 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4249 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4250 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4251 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4252 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4253 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4254
4255 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4256 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4257 if (!fGstRegShadowsFrom)
4258 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4259 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4260 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4261 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4264 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4265#else
4266 RT_NOREF(off);
4267#endif
4268}
4269
4270
4271/**
4272 * Allocates a temporary host general purpose register for keeping a guest
4273 * register value.
4274 *
4275 * Since we may already have a register holding the guest register value,
4276 * code will be emitted to do the loading if that's not the case. Code may also
4277 * be emitted if we have to free up a register to satify the request.
4278 *
4279 * @returns The host register number; throws VBox status code on failure, so no
4280 * need to check the return value.
4281 * @param pReNative The native recompile state.
4282 * @param poff Pointer to the variable with the code buffer
4283 * position. This will be update if we need to move a
4284 * variable from register to stack in order to satisfy
4285 * the request.
4286 * @param enmGstReg The guest register that will is to be updated.
4287 * @param enmIntendedUse How the caller will be using the host register.
4288 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4289 * register is okay (default). The ASSUMPTION here is
4290 * that the caller has already flushed all volatile
4291 * registers, so this is only applied if we allocate a
4292 * new register.
4293 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4294 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4295 */
4296DECL_HIDDEN_THROW(uint8_t)
4297iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4298 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4299 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4300{
4301 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4302#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4303 AssertMsg( fSkipLivenessAssert
4304 || pReNative->idxCurCall == 0
4305 || enmGstReg == kIemNativeGstReg_Pc
4306 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4307 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4308 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4309 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4310 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4311 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4312#endif
4313 RT_NOREF(fSkipLivenessAssert);
4314#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4315 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4316#endif
4317 uint32_t const fRegMask = !fNoVolatileRegs
4318 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4319 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4320
4321 /*
4322 * First check if the guest register value is already in a host register.
4323 */
4324 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4325 {
4326 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4327 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4328 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4329 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4330
4331 /* It's not supposed to be allocated... */
4332 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4333 {
4334 /*
4335 * If the register will trash the guest shadow copy, try find a
4336 * completely unused register we can use instead. If that fails,
4337 * we need to disassociate the host reg from the guest reg.
4338 */
4339 /** @todo would be nice to know if preserving the register is in any way helpful. */
4340 /* If the purpose is calculations, try duplicate the register value as
4341 we'll be clobbering the shadow. */
4342 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4343 && ( ~pReNative->Core.bmHstRegs
4344 & ~pReNative->Core.bmHstRegsWithGstShadow
4345 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4346 {
4347 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4348
4349 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4350
4351 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4352 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4353 g_apszIemNativeHstRegNames[idxRegNew]));
4354 idxReg = idxRegNew;
4355 }
4356 /* If the current register matches the restrictions, go ahead and allocate
4357 it for the caller. */
4358 else if (fRegMask & RT_BIT_32(idxReg))
4359 {
4360 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4361 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4362 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4363 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4364 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4365 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4366 else
4367 {
4368 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4369 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4370 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4371 }
4372 }
4373 /* Otherwise, allocate a register that satisfies the caller and transfer
4374 the shadowing if compatible with the intended use. (This basically
4375 means the call wants a non-volatile register (RSP push/pop scenario).) */
4376 else
4377 {
4378 Assert(fNoVolatileRegs);
4379 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4380 !fNoVolatileRegs
4381 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4382 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4383 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4384 {
4385 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4386 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4387 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4388 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4389 }
4390 else
4391 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4392 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4393 g_apszIemNativeHstRegNames[idxRegNew]));
4394 idxReg = idxRegNew;
4395 }
4396 }
4397 else
4398 {
4399 /*
4400 * Oops. Shadowed guest register already allocated!
4401 *
4402 * Allocate a new register, copy the value and, if updating, the
4403 * guest shadow copy assignment to the new register.
4404 */
4405 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4406 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4407 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4408 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4409
4410 /** @todo share register for readonly access. */
4411 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4412 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4413
4414 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4415 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4416
4417 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4418 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4419 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4420 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4421 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4422 else
4423 {
4424 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4425 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4426 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4427 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4428 }
4429 idxReg = idxRegNew;
4430 }
4431 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4432
4433#ifdef VBOX_STRICT
4434 /* Strict builds: Check that the value is correct. */
4435 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4436#endif
4437
4438 return idxReg;
4439 }
4440
4441 /*
4442 * Allocate a new register, load it with the guest value and designate it as a copy of the
4443 */
4444 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4445
4446 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4447 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4448
4449 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4450 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4451 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4452 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4453
4454 return idxRegNew;
4455}
4456
4457
4458/**
4459 * Allocates a temporary host general purpose register that already holds the
4460 * given guest register value.
4461 *
4462 * The use case for this function is places where the shadowing state cannot be
4463 * modified due to branching and such. This will fail if the we don't have a
4464 * current shadow copy handy or if it's incompatible. The only code that will
4465 * be emitted here is value checking code in strict builds.
4466 *
4467 * The intended use can only be readonly!
4468 *
4469 * @returns The host register number, UINT8_MAX if not present.
4470 * @param pReNative The native recompile state.
4471 * @param poff Pointer to the instruction buffer offset.
4472 * Will be updated in strict builds if a register is
4473 * found.
4474 * @param enmGstReg The guest register that will is to be updated.
4475 * @note In strict builds, this may throw instruction buffer growth failures.
4476 * Non-strict builds will not throw anything.
4477 * @sa iemNativeRegAllocTmpForGuestReg
4478 */
4479DECL_HIDDEN_THROW(uint8_t)
4480iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4481{
4482 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4483#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4484 AssertMsg( pReNative->idxCurCall == 0
4485 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4486 || enmGstReg == kIemNativeGstReg_Pc,
4487 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4488#endif
4489
4490 /*
4491 * First check if the guest register value is already in a host register.
4492 */
4493 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4494 {
4495 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4496 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4497 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4498 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4499
4500 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4501 {
4502 /*
4503 * We only do readonly use here, so easy compared to the other
4504 * variant of this code.
4505 */
4506 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4507 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4508 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4509 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4510 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4511
4512#ifdef VBOX_STRICT
4513 /* Strict builds: Check that the value is correct. */
4514 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4515#else
4516 RT_NOREF(poff);
4517#endif
4518 return idxReg;
4519 }
4520 }
4521
4522 return UINT8_MAX;
4523}
4524
4525
4526/**
4527 * Allocates argument registers for a function call.
4528 *
4529 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4530 * need to check the return value.
4531 * @param pReNative The native recompile state.
4532 * @param off The current code buffer offset.
4533 * @param cArgs The number of arguments the function call takes.
4534 */
4535DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4536{
4537 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4538 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4539 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4540 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4541
4542 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4543 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4544 else if (cArgs == 0)
4545 return true;
4546
4547 /*
4548 * Do we get luck and all register are free and not shadowing anything?
4549 */
4550 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4551 for (uint32_t i = 0; i < cArgs; i++)
4552 {
4553 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4554 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4555 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4556 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4557 }
4558 /*
4559 * Okay, not lucky so we have to free up the registers.
4560 */
4561 else
4562 for (uint32_t i = 0; i < cArgs; i++)
4563 {
4564 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4565 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4566 {
4567 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4568 {
4569 case kIemNativeWhat_Var:
4570 {
4571 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4572 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4573 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4574 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4575 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4576
4577 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4578 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4579 else
4580 {
4581 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4582 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4583 }
4584 break;
4585 }
4586
4587 case kIemNativeWhat_Tmp:
4588 case kIemNativeWhat_Arg:
4589 case kIemNativeWhat_rc:
4590 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4591 default:
4592 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4593 }
4594
4595 }
4596 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4597 {
4598 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4599 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4600 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4601 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4602 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4603 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4604 }
4605 else
4606 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4607 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4608 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4609 }
4610 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4611 return true;
4612}
4613
4614
4615DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4616
4617
4618#if 0
4619/**
4620 * Frees a register assignment of any type.
4621 *
4622 * @param pReNative The native recompile state.
4623 * @param idxHstReg The register to free.
4624 *
4625 * @note Does not update variables.
4626 */
4627DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4628{
4629 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4630 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4631 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4632 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4633 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4634 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4635 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4636 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4637 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4638 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4639 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4640 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4641 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4642 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4643
4644 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4645 /* no flushing, right:
4646 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4647 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4648 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4649 */
4650}
4651#endif
4652
4653
4654/**
4655 * Frees a temporary register.
4656 *
4657 * Any shadow copies of guest registers assigned to the host register will not
4658 * be flushed by this operation.
4659 */
4660DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4661{
4662 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4663 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4664 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4665 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4666 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4667}
4668
4669
4670/**
4671 * Frees a temporary immediate register.
4672 *
4673 * It is assumed that the call has not modified the register, so it still hold
4674 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4675 */
4676DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4677{
4678 iemNativeRegFreeTmp(pReNative, idxHstReg);
4679}
4680
4681
4682/**
4683 * Frees a register assigned to a variable.
4684 *
4685 * The register will be disassociated from the variable.
4686 */
4687DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4688{
4689 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4690 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4691 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4693 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4694
4695 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4696 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4697 if (!fFlushShadows)
4698 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4699 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4700 else
4701 {
4702 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4703 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4704 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4705 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4706 uint64_t fGstRegShadows = fGstRegShadowsOld;
4707 while (fGstRegShadows)
4708 {
4709 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4710 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4711
4712 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4713 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4714 }
4715 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4716 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4717 }
4718}
4719
4720
4721/**
4722 * Called right before emitting a call instruction to move anything important
4723 * out of call-volatile registers, free and flush the call-volatile registers,
4724 * optionally freeing argument variables.
4725 *
4726 * @returns New code buffer offset, UINT32_MAX on failure.
4727 * @param pReNative The native recompile state.
4728 * @param off The code buffer offset.
4729 * @param cArgs The number of arguments the function call takes.
4730 * It is presumed that the host register part of these have
4731 * been allocated as such already and won't need moving,
4732 * just freeing.
4733 * @param fKeepVars Mask of variables that should keep their register
4734 * assignments. Caller must take care to handle these.
4735 */
4736DECL_HIDDEN_THROW(uint32_t)
4737iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4738{
4739 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4740
4741 /* fKeepVars will reduce this mask. */
4742 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4743
4744 /*
4745 * Move anything important out of volatile registers.
4746 */
4747 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4748 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4749 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4750#ifdef IEMNATIVE_REG_FIXED_TMP0
4751 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4752#endif
4753 & ~g_afIemNativeCallRegs[cArgs];
4754
4755 fRegsToMove &= pReNative->Core.bmHstRegs;
4756 if (!fRegsToMove)
4757 { /* likely */ }
4758 else
4759 {
4760 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4761 while (fRegsToMove != 0)
4762 {
4763 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4764 fRegsToMove &= ~RT_BIT_32(idxReg);
4765
4766 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4767 {
4768 case kIemNativeWhat_Var:
4769 {
4770 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4771 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4772 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4773 Assert(pVar->idxReg == idxReg);
4774 if (!(RT_BIT_32(idxVar) & fKeepVars))
4775 {
4776 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4777 idxVar, pVar->enmKind, pVar->idxReg));
4778 if (pVar->enmKind != kIemNativeVarKind_Stack)
4779 pVar->idxReg = UINT8_MAX;
4780 else
4781 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4782 }
4783 else
4784 fRegsToFree &= ~RT_BIT_32(idxReg);
4785 continue;
4786 }
4787
4788 case kIemNativeWhat_Arg:
4789 AssertMsgFailed(("What?!?: %u\n", idxReg));
4790 continue;
4791
4792 case kIemNativeWhat_rc:
4793 case kIemNativeWhat_Tmp:
4794 AssertMsgFailed(("Missing free: %u\n", idxReg));
4795 continue;
4796
4797 case kIemNativeWhat_FixedTmp:
4798 case kIemNativeWhat_pVCpuFixed:
4799 case kIemNativeWhat_pCtxFixed:
4800 case kIemNativeWhat_FixedReserved:
4801 case kIemNativeWhat_Invalid:
4802 case kIemNativeWhat_End:
4803 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4804 }
4805 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4806 }
4807 }
4808
4809 /*
4810 * Do the actual freeing.
4811 */
4812 if (pReNative->Core.bmHstRegs & fRegsToFree)
4813 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4814 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4815 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4816
4817 /* If there are guest register shadows in any call-volatile register, we
4818 have to clear the corrsponding guest register masks for each register. */
4819 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4820 if (fHstRegsWithGstShadow)
4821 {
4822 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4823 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4824 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4825 do
4826 {
4827 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4828 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4829
4830 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4831 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4832 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4833 } while (fHstRegsWithGstShadow != 0);
4834 }
4835
4836 return off;
4837}
4838
4839
4840/**
4841 * Flushes a set of guest register shadow copies.
4842 *
4843 * This is usually done after calling a threaded function or a C-implementation
4844 * of an instruction.
4845 *
4846 * @param pReNative The native recompile state.
4847 * @param fGstRegs Set of guest registers to flush.
4848 */
4849DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4850{
4851 /*
4852 * Reduce the mask by what's currently shadowed
4853 */
4854 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4855 fGstRegs &= bmGstRegShadowsOld;
4856 if (fGstRegs)
4857 {
4858 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4859 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4860 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4861 if (bmGstRegShadowsNew)
4862 {
4863 /*
4864 * Partial.
4865 */
4866 do
4867 {
4868 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4869 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4870 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4871 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4872 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4873
4874 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4875 fGstRegs &= ~fInThisHstReg;
4876 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4877 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4878 if (!fGstRegShadowsNew)
4879 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4880 } while (fGstRegs != 0);
4881 }
4882 else
4883 {
4884 /*
4885 * Clear all.
4886 */
4887 do
4888 {
4889 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4890 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4891 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4892 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4893 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4894
4895 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4896 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4897 } while (fGstRegs != 0);
4898 pReNative->Core.bmHstRegsWithGstShadow = 0;
4899 }
4900 }
4901}
4902
4903
4904/**
4905 * Flushes guest register shadow copies held by a set of host registers.
4906 *
4907 * This is used with the TLB lookup code for ensuring that we don't carry on
4908 * with any guest shadows in volatile registers, as these will get corrupted by
4909 * a TLB miss.
4910 *
4911 * @param pReNative The native recompile state.
4912 * @param fHstRegs Set of host registers to flush guest shadows for.
4913 */
4914DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4915{
4916 /*
4917 * Reduce the mask by what's currently shadowed.
4918 */
4919 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4920 fHstRegs &= bmHstRegsWithGstShadowOld;
4921 if (fHstRegs)
4922 {
4923 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4924 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4925 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4926 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4927 if (bmHstRegsWithGstShadowNew)
4928 {
4929 /*
4930 * Partial (likely).
4931 */
4932 uint64_t fGstShadows = 0;
4933 do
4934 {
4935 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4936 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4937 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4938 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4939
4940 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4941 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4942 fHstRegs &= ~RT_BIT_32(idxHstReg);
4943 } while (fHstRegs != 0);
4944 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4945 }
4946 else
4947 {
4948 /*
4949 * Clear all.
4950 */
4951 do
4952 {
4953 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4954 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4955 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4956 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4957
4958 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4959 fHstRegs &= ~RT_BIT_32(idxHstReg);
4960 } while (fHstRegs != 0);
4961 pReNative->Core.bmGstRegShadows = 0;
4962 }
4963 }
4964}
4965
4966
4967/**
4968 * Restores guest shadow copies in volatile registers.
4969 *
4970 * This is used after calling a helper function (think TLB miss) to restore the
4971 * register state of volatile registers.
4972 *
4973 * @param pReNative The native recompile state.
4974 * @param off The code buffer offset.
4975 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4976 * be active (allocated) w/o asserting. Hack.
4977 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4978 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4979 */
4980DECL_HIDDEN_THROW(uint32_t)
4981iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4982{
4983 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4984 if (fHstRegs)
4985 {
4986 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4987 do
4988 {
4989 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4990
4991 /* It's not fatal if a register is active holding a variable that
4992 shadowing a guest register, ASSUMING all pending guest register
4993 writes were flushed prior to the helper call. However, we'll be
4994 emitting duplicate restores, so it wasts code space. */
4995 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4996 RT_NOREF(fHstRegsActiveShadows);
4997
4998 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4999 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5000 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5001 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5002
5003 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5004 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5005
5006 fHstRegs &= ~RT_BIT_32(idxHstReg);
5007 } while (fHstRegs != 0);
5008 }
5009 return off;
5010}
5011
5012
5013/**
5014 * Flushes delayed write of a specific guest register.
5015 *
5016 * This must be called prior to calling CImpl functions and any helpers that use
5017 * the guest state (like raising exceptions) and such.
5018 *
5019 * This optimization has not yet been implemented. The first target would be
5020 * RIP updates, since these are the most common ones.
5021 */
5022DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5023 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5024{
5025 RT_NOREF(pReNative, enmClass, idxReg);
5026 return off;
5027}
5028
5029
5030/**
5031 * Flushes any delayed guest register writes.
5032 *
5033 * This must be called prior to calling CImpl functions and any helpers that use
5034 * the guest state (like raising exceptions) and such.
5035 *
5036 * This optimization has not yet been implemented. The first target would be
5037 * RIP updates, since these are the most common ones.
5038 */
5039DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5040{
5041 RT_NOREF(pReNative, off);
5042 return off;
5043}
5044
5045
5046#ifdef VBOX_STRICT
5047/**
5048 * Does internal register allocator sanity checks.
5049 */
5050static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5051{
5052 /*
5053 * Iterate host registers building a guest shadowing set.
5054 */
5055 uint64_t bmGstRegShadows = 0;
5056 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5057 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5058 while (bmHstRegsWithGstShadow)
5059 {
5060 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5061 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5062 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5063
5064 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5065 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5066 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5067 bmGstRegShadows |= fThisGstRegShadows;
5068 while (fThisGstRegShadows)
5069 {
5070 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5071 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5072 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5073 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5074 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5075 }
5076 }
5077 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5078 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5079 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5080
5081 /*
5082 * Now the other way around, checking the guest to host index array.
5083 */
5084 bmHstRegsWithGstShadow = 0;
5085 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5086 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5087 while (bmGstRegShadows)
5088 {
5089 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5090 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5091 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5092
5093 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5094 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5095 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5096 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5097 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5098 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5099 }
5100 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5101 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5102 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5103}
5104#endif
5105
5106
5107/*********************************************************************************************************************************
5108* Code Emitters (larger snippets) *
5109*********************************************************************************************************************************/
5110
5111/**
5112 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5113 * extending to 64-bit width.
5114 *
5115 * @returns New code buffer offset on success, UINT32_MAX on failure.
5116 * @param pReNative .
5117 * @param off The current code buffer position.
5118 * @param idxHstReg The host register to load the guest register value into.
5119 * @param enmGstReg The guest register to load.
5120 *
5121 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5122 * that is something the caller needs to do if applicable.
5123 */
5124DECL_HIDDEN_THROW(uint32_t)
5125iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5126{
5127 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5128 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5129
5130 switch (g_aGstShadowInfo[enmGstReg].cb)
5131 {
5132 case sizeof(uint64_t):
5133 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5134 case sizeof(uint32_t):
5135 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5136 case sizeof(uint16_t):
5137 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5138#if 0 /* not present in the table. */
5139 case sizeof(uint8_t):
5140 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5141#endif
5142 default:
5143 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5144 }
5145}
5146
5147
5148#ifdef VBOX_STRICT
5149/**
5150 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5151 *
5152 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5153 * Trashes EFLAGS on AMD64.
5154 */
5155static uint32_t
5156iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5157{
5158# ifdef RT_ARCH_AMD64
5159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5160
5161 /* rol reg64, 32 */
5162 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5163 pbCodeBuf[off++] = 0xc1;
5164 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5165 pbCodeBuf[off++] = 32;
5166
5167 /* test reg32, ffffffffh */
5168 if (idxReg >= 8)
5169 pbCodeBuf[off++] = X86_OP_REX_B;
5170 pbCodeBuf[off++] = 0xf7;
5171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5172 pbCodeBuf[off++] = 0xff;
5173 pbCodeBuf[off++] = 0xff;
5174 pbCodeBuf[off++] = 0xff;
5175 pbCodeBuf[off++] = 0xff;
5176
5177 /* je/jz +1 */
5178 pbCodeBuf[off++] = 0x74;
5179 pbCodeBuf[off++] = 0x01;
5180
5181 /* int3 */
5182 pbCodeBuf[off++] = 0xcc;
5183
5184 /* rol reg64, 32 */
5185 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5186 pbCodeBuf[off++] = 0xc1;
5187 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5188 pbCodeBuf[off++] = 32;
5189
5190# elif defined(RT_ARCH_ARM64)
5191 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5192 /* lsr tmp0, reg64, #32 */
5193 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5194 /* cbz tmp0, +1 */
5195 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5196 /* brk #0x1100 */
5197 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5198
5199# else
5200# error "Port me!"
5201# endif
5202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5203 return off;
5204}
5205#endif /* VBOX_STRICT */
5206
5207
5208#ifdef VBOX_STRICT
5209/**
5210 * Emitting code that checks that the content of register @a idxReg is the same
5211 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5212 * instruction if that's not the case.
5213 *
5214 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5215 * Trashes EFLAGS on AMD64.
5216 */
5217static uint32_t
5218iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5219{
5220# ifdef RT_ARCH_AMD64
5221 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5222
5223 /* cmp reg, [mem] */
5224 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5225 {
5226 if (idxReg >= 8)
5227 pbCodeBuf[off++] = X86_OP_REX_R;
5228 pbCodeBuf[off++] = 0x38;
5229 }
5230 else
5231 {
5232 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5233 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5234 else
5235 {
5236 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5237 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5238 else
5239 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5240 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5241 if (idxReg >= 8)
5242 pbCodeBuf[off++] = X86_OP_REX_R;
5243 }
5244 pbCodeBuf[off++] = 0x39;
5245 }
5246 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5247
5248 /* je/jz +1 */
5249 pbCodeBuf[off++] = 0x74;
5250 pbCodeBuf[off++] = 0x01;
5251
5252 /* int3 */
5253 pbCodeBuf[off++] = 0xcc;
5254
5255 /* For values smaller than the register size, we must check that the rest
5256 of the register is all zeros. */
5257 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5258 {
5259 /* test reg64, imm32 */
5260 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5261 pbCodeBuf[off++] = 0xf7;
5262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5263 pbCodeBuf[off++] = 0;
5264 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5265 pbCodeBuf[off++] = 0xff;
5266 pbCodeBuf[off++] = 0xff;
5267
5268 /* je/jz +1 */
5269 pbCodeBuf[off++] = 0x74;
5270 pbCodeBuf[off++] = 0x01;
5271
5272 /* int3 */
5273 pbCodeBuf[off++] = 0xcc;
5274 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5275 }
5276 else
5277 {
5278 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5279 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5280 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5281 }
5282
5283# elif defined(RT_ARCH_ARM64)
5284 /* mov TMP0, [gstreg] */
5285 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5286
5287 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5288 /* sub tmp0, tmp0, idxReg */
5289 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5290 /* cbz tmp0, +1 */
5291 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5292 /* brk #0x1000+enmGstReg */
5293 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5294 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5295
5296# else
5297# error "Port me!"
5298# endif
5299 return off;
5300}
5301#endif /* VBOX_STRICT */
5302
5303
5304#ifdef VBOX_STRICT
5305/**
5306 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5307 * important bits.
5308 *
5309 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5310 * Trashes EFLAGS on AMD64.
5311 */
5312static uint32_t
5313iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5314{
5315 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5316 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5317 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5318 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5319
5320#ifdef RT_ARCH_AMD64
5321 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5322
5323 /* je/jz +1 */
5324 pbCodeBuf[off++] = 0x74;
5325 pbCodeBuf[off++] = 0x01;
5326
5327 /* int3 */
5328 pbCodeBuf[off++] = 0xcc;
5329
5330# elif defined(RT_ARCH_ARM64)
5331 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5332
5333 /* b.eq +1 */
5334 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5335 /* brk #0x2000 */
5336 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5337
5338# else
5339# error "Port me!"
5340# endif
5341 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5342
5343 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5344 return off;
5345}
5346#endif /* VBOX_STRICT */
5347
5348
5349/**
5350 * Emits a code for checking the return code of a call and rcPassUp, returning
5351 * from the code if either are non-zero.
5352 */
5353DECL_HIDDEN_THROW(uint32_t)
5354iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5355{
5356#ifdef RT_ARCH_AMD64
5357 /*
5358 * AMD64: eax = call status code.
5359 */
5360
5361 /* edx = rcPassUp */
5362 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5363# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5364 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5365# endif
5366
5367 /* edx = eax | rcPassUp */
5368 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5369 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5370 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5372
5373 /* Jump to non-zero status return path. */
5374 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5375
5376 /* done. */
5377
5378#elif RT_ARCH_ARM64
5379 /*
5380 * ARM64: w0 = call status code.
5381 */
5382# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5383 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5384# endif
5385 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5386
5387 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5388
5389 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5390
5391 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5392 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5393 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5394
5395#else
5396# error "port me"
5397#endif
5398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5399 RT_NOREF_PV(idxInstr);
5400 return off;
5401}
5402
5403
5404/**
5405 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5406 * raising a \#GP(0) if it isn't.
5407 *
5408 * @returns New code buffer offset, UINT32_MAX on failure.
5409 * @param pReNative The native recompile state.
5410 * @param off The code buffer offset.
5411 * @param idxAddrReg The host register with the address to check.
5412 * @param idxInstr The current instruction.
5413 */
5414DECL_HIDDEN_THROW(uint32_t)
5415iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5416{
5417 /*
5418 * Make sure we don't have any outstanding guest register writes as we may
5419 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5420 */
5421 off = iemNativeRegFlushPendingWrites(pReNative, off);
5422
5423#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5424 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5425#else
5426 RT_NOREF(idxInstr);
5427#endif
5428
5429#ifdef RT_ARCH_AMD64
5430 /*
5431 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5432 * return raisexcpt();
5433 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5434 */
5435 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5436
5437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5438 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5439 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5440 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5441 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5442
5443 iemNativeRegFreeTmp(pReNative, iTmpReg);
5444
5445#elif defined(RT_ARCH_ARM64)
5446 /*
5447 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5448 * return raisexcpt();
5449 * ----
5450 * mov x1, 0x800000000000
5451 * add x1, x0, x1
5452 * cmp xzr, x1, lsr 48
5453 * b.ne .Lraisexcpt
5454 */
5455 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5456
5457 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5458 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5459 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5460 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5461
5462 iemNativeRegFreeTmp(pReNative, iTmpReg);
5463
5464#else
5465# error "Port me"
5466#endif
5467 return off;
5468}
5469
5470
5471/**
5472 * Emits code to check if that the content of @a idxAddrReg is within the limit
5473 * of CS, raising a \#GP(0) if it isn't.
5474 *
5475 * @returns New code buffer offset; throws VBox status code on error.
5476 * @param pReNative The native recompile state.
5477 * @param off The code buffer offset.
5478 * @param idxAddrReg The host register (32-bit) with the address to
5479 * check.
5480 * @param idxInstr The current instruction.
5481 */
5482DECL_HIDDEN_THROW(uint32_t)
5483iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5484 uint8_t idxAddrReg, uint8_t idxInstr)
5485{
5486 /*
5487 * Make sure we don't have any outstanding guest register writes as we may
5488 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5489 */
5490 off = iemNativeRegFlushPendingWrites(pReNative, off);
5491
5492#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5493 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5494#else
5495 RT_NOREF(idxInstr);
5496#endif
5497
5498 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5499 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5500 kIemNativeGstRegUse_ReadOnly);
5501
5502 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5503 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5504
5505 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5506 return off;
5507}
5508
5509
5510/**
5511 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5512 *
5513 * @returns The flush mask.
5514 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5515 * @param fGstShwFlush The starting flush mask.
5516 */
5517DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5518{
5519 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5520 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5521 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5522 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5523 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5524 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5525 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5526 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5527 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5528 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5529 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5530 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5531 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5532 return fGstShwFlush;
5533}
5534
5535
5536/**
5537 * Emits a call to a CImpl function or something similar.
5538 */
5539DECL_HIDDEN_THROW(uint32_t)
5540iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5541 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5542{
5543 /*
5544 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5545 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5546 */
5547 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5548 fGstShwFlush
5549 | RT_BIT_64(kIemNativeGstReg_Pc)
5550 | RT_BIT_64(kIemNativeGstReg_EFlags));
5551 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5552
5553 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5554
5555 /*
5556 * Load the parameters.
5557 */
5558#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5559 /* Special code the hidden VBOXSTRICTRC pointer. */
5560 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5561 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5562 if (cAddParams > 0)
5563 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5564 if (cAddParams > 1)
5565 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5566 if (cAddParams > 2)
5567 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5568 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5569
5570#else
5571 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5572 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5573 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5574 if (cAddParams > 0)
5575 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5576 if (cAddParams > 1)
5577 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5578 if (cAddParams > 2)
5579# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5580 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5581# else
5582 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5583# endif
5584#endif
5585
5586 /*
5587 * Make the call.
5588 */
5589 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5590
5591#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5592 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5593#endif
5594
5595 /*
5596 * Check the status code.
5597 */
5598 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5599}
5600
5601
5602/**
5603 * Emits a call to a threaded worker function.
5604 */
5605DECL_HIDDEN_THROW(uint32_t)
5606iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5607{
5608 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5609 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5610
5611#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5612 /* The threaded function may throw / long jmp, so set current instruction
5613 number if we're counting. */
5614 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5615#endif
5616
5617 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5618
5619#ifdef RT_ARCH_AMD64
5620 /* Load the parameters and emit the call. */
5621# ifdef RT_OS_WINDOWS
5622# ifndef VBOXSTRICTRC_STRICT_ENABLED
5623 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5624 if (cParams > 0)
5625 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5626 if (cParams > 1)
5627 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5628 if (cParams > 2)
5629 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5630# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5631 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5632 if (cParams > 0)
5633 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5634 if (cParams > 1)
5635 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5636 if (cParams > 2)
5637 {
5638 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5639 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5640 }
5641 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5642# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5643# else
5644 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5645 if (cParams > 0)
5646 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5647 if (cParams > 1)
5648 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5649 if (cParams > 2)
5650 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5651# endif
5652
5653 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5654
5655# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5656 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5657# endif
5658
5659#elif RT_ARCH_ARM64
5660 /*
5661 * ARM64:
5662 */
5663 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5664 if (cParams > 0)
5665 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5666 if (cParams > 1)
5667 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5668 if (cParams > 2)
5669 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5670
5671 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5672
5673#else
5674# error "port me"
5675#endif
5676
5677 /*
5678 * Check the status code.
5679 */
5680 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5681
5682 return off;
5683}
5684
5685#ifdef VBOX_WITH_STATISTICS
5686/**
5687 * Emits code to update the thread call statistics.
5688 */
5689DECL_INLINE_THROW(uint32_t)
5690iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5691{
5692 /*
5693 * Update threaded function stats.
5694 */
5695 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5696 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5697# if defined(RT_ARCH_ARM64)
5698 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5699 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5700 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5701 iemNativeRegFreeTmp(pReNative, idxTmp1);
5702 iemNativeRegFreeTmp(pReNative, idxTmp2);
5703# else
5704 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5705# endif
5706 return off;
5707}
5708#endif /* VBOX_WITH_STATISTICS */
5709
5710
5711/**
5712 * Emits the code at the CheckBranchMiss label.
5713 */
5714static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5715{
5716 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5717 if (idxLabel != UINT32_MAX)
5718 {
5719 iemNativeLabelDefine(pReNative, idxLabel, off);
5720
5721 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5722 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5723 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5724
5725 /* jump back to the return sequence. */
5726 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5727 }
5728 return off;
5729}
5730
5731
5732/**
5733 * Emits the code at the NeedCsLimChecking label.
5734 */
5735static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5736{
5737 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5738 if (idxLabel != UINT32_MAX)
5739 {
5740 iemNativeLabelDefine(pReNative, idxLabel, off);
5741
5742 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5743 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5744 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5745
5746 /* jump back to the return sequence. */
5747 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5748 }
5749 return off;
5750}
5751
5752
5753/**
5754 * Emits the code at the ObsoleteTb label.
5755 */
5756static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5757{
5758 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5759 if (idxLabel != UINT32_MAX)
5760 {
5761 iemNativeLabelDefine(pReNative, idxLabel, off);
5762
5763 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5764 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5765 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5766
5767 /* jump back to the return sequence. */
5768 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5769 }
5770 return off;
5771}
5772
5773
5774/**
5775 * Emits the code at the RaiseGP0 label.
5776 */
5777static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5778{
5779 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5780 if (idxLabel != UINT32_MAX)
5781 {
5782 iemNativeLabelDefine(pReNative, idxLabel, off);
5783
5784 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5785 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5786 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5787
5788 /* jump back to the return sequence. */
5789 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5790 }
5791 return off;
5792}
5793
5794
5795/**
5796 * Emits the code at the RaiseNm label.
5797 */
5798static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5799{
5800 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5801 if (idxLabel != UINT32_MAX)
5802 {
5803 iemNativeLabelDefine(pReNative, idxLabel, off);
5804
5805 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5806 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5807 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5808
5809 /* jump back to the return sequence. */
5810 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5811 }
5812 return off;
5813}
5814
5815
5816/**
5817 * Emits the code at the RaiseUd label.
5818 */
5819static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5820{
5821 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5822 if (idxLabel != UINT32_MAX)
5823 {
5824 iemNativeLabelDefine(pReNative, idxLabel, off);
5825
5826 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5827 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5828 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5829
5830 /* jump back to the return sequence. */
5831 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5832 }
5833 return off;
5834}
5835
5836
5837/**
5838 * Emits the code at the ReturnWithFlags label (returns
5839 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
5840 */
5841static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5842{
5843 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
5844 if (idxLabel != UINT32_MAX)
5845 {
5846 iemNativeLabelDefine(pReNative, idxLabel, off);
5847
5848 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
5849
5850 /* jump back to the return sequence. */
5851 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5852 }
5853 return off;
5854}
5855
5856
5857/**
5858 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
5859 */
5860static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5861{
5862 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
5863 if (idxLabel != UINT32_MAX)
5864 {
5865 iemNativeLabelDefine(pReNative, idxLabel, off);
5866
5867 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
5868
5869 /* jump back to the return sequence. */
5870 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5871 }
5872 return off;
5873}
5874
5875
5876/**
5877 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
5878 */
5879static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5880{
5881 /*
5882 * Generate the rc + rcPassUp fiddling code if needed.
5883 */
5884 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5885 if (idxLabel != UINT32_MAX)
5886 {
5887 iemNativeLabelDefine(pReNative, idxLabel, off);
5888
5889 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
5890#ifdef RT_ARCH_AMD64
5891# ifdef RT_OS_WINDOWS
5892# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
5894# endif
5895 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
5897# else
5898 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5899 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
5900# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5901 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
5902# endif
5903# endif
5904# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5905 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
5906# endif
5907
5908#else
5909 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
5910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5911 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
5912#endif
5913
5914 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
5915 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5916 }
5917 return off;
5918}
5919
5920
5921/**
5922 * Emits a standard epilog.
5923 */
5924static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
5925{
5926 *pidxReturnLabel = UINT32_MAX;
5927
5928 /*
5929 * Successful return, so clear the return register (eax, w0).
5930 */
5931 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
5932
5933 /*
5934 * Define label for common return point.
5935 */
5936 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
5937 *pidxReturnLabel = idxReturn;
5938
5939 /*
5940 * Restore registers and return.
5941 */
5942#ifdef RT_ARCH_AMD64
5943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5944
5945 /* Reposition esp at the r15 restore point. */
5946 pbCodeBuf[off++] = X86_OP_REX_W;
5947 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
5948 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
5949 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
5950
5951 /* Pop non-volatile registers and return */
5952 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
5953 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
5954 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
5955 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
5956 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
5957 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
5958 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
5959 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
5960# ifdef RT_OS_WINDOWS
5961 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
5962 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
5963# endif
5964 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
5965 pbCodeBuf[off++] = 0xc9; /* leave */
5966 pbCodeBuf[off++] = 0xc3; /* ret */
5967 pbCodeBuf[off++] = 0xcc; /* int3 poison */
5968
5969#elif RT_ARCH_ARM64
5970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5971
5972 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
5973 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
5974 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
5975 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
5976 IEMNATIVE_FRAME_VAR_SIZE / 8);
5977 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
5978 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5979 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
5980 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5981 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
5982 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5983 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
5984 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5985 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
5986 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
5987 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
5988 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
5989
5990 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
5991 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
5992 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
5993 IEMNATIVE_FRAME_SAVE_REG_SIZE);
5994
5995 /* retab / ret */
5996# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
5997 if (1)
5998 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
5999 else
6000# endif
6001 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6002
6003#else
6004# error "port me"
6005#endif
6006 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6007
6008 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6009}
6010
6011
6012/**
6013 * Emits a standard prolog.
6014 */
6015static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6016{
6017#ifdef RT_ARCH_AMD64
6018 /*
6019 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6020 * reserving 64 bytes for stack variables plus 4 non-register argument
6021 * slots. Fixed register assignment: xBX = pReNative;
6022 *
6023 * Since we always do the same register spilling, we can use the same
6024 * unwind description for all the code.
6025 */
6026 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6027 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6028 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6029 pbCodeBuf[off++] = 0x8b;
6030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6031 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6032 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6033# ifdef RT_OS_WINDOWS
6034 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6035 pbCodeBuf[off++] = 0x8b;
6036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6037 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6038 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6039# else
6040 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6041 pbCodeBuf[off++] = 0x8b;
6042 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6043# endif
6044 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6045 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6046 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6047 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6048 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6049 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6050 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6051 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6052
6053# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6054 /* Save the frame pointer. */
6055 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6056# endif
6057
6058 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6059 X86_GREG_xSP,
6060 IEMNATIVE_FRAME_ALIGN_SIZE
6061 + IEMNATIVE_FRAME_VAR_SIZE
6062 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6063 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6064 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6065 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6066 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6067
6068#elif RT_ARCH_ARM64
6069 /*
6070 * We set up a stack frame exactly like on x86, only we have to push the
6071 * return address our selves here. We save all non-volatile registers.
6072 */
6073 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6074
6075# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6076 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6077 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6078 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6079 /* pacibsp */
6080 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6081# endif
6082
6083 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6084 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6085 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6086 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6087 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6088 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6090 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6091 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6092 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6094 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6095 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6096 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6097 /* Save the BP and LR (ret address) registers at the top of the frame. */
6098 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6099 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6100 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6101 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6103 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6104
6105 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6106 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6107
6108 /* mov r28, r0 */
6109 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6110 /* mov r27, r1 */
6111 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6112
6113# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6114 /* Save the frame pointer. */
6115 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6116 ARMV8_A64_REG_X2);
6117# endif
6118
6119#else
6120# error "port me"
6121#endif
6122 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6123 return off;
6124}
6125
6126
6127
6128
6129/*********************************************************************************************************************************
6130* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6131*********************************************************************************************************************************/
6132
6133#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6134 { \
6135 Assert(pReNative->Core.bmVars == 0); \
6136 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6137 Assert(pReNative->Core.bmStack == 0); \
6138 pReNative->fMc = (a_fMcFlags); \
6139 pReNative->fCImpl = (a_fCImplFlags); \
6140 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6141
6142/** We have to get to the end in recompilation mode, as otherwise we won't
6143 * generate code for all the IEM_MC_IF_XXX branches. */
6144#define IEM_MC_END() \
6145 iemNativeVarFreeAll(pReNative); \
6146 } return off
6147
6148
6149
6150/*********************************************************************************************************************************
6151* Native Emitter Support. *
6152*********************************************************************************************************************************/
6153
6154
6155#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
6156
6157#define IEM_MC_NATIVE_ELSE() } else {
6158
6159#define IEM_MC_NATIVE_ENDIF() } ((void)0)
6160
6161
6162#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
6163 off = a_fnEmitter(pReNative, off)
6164
6165#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
6166 off = a_fnEmitter(pReNative, off, (a0))
6167
6168#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
6169 off = a_fnEmitter(pReNative, off, (a0), (a1))
6170
6171#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
6172 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
6173
6174#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
6175 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
6176
6177#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
6178 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
6179
6180#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
6181 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
6182
6183#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
6184 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
6185
6186#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
6187 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
6188
6189
6190/**
6191 * This is an implementation of IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGICAL
6192 * and friends.
6193 *
6194 * It takes liveness stuff into account.
6195 */
6196DECL_INLINE_THROW(uint32_t)
6197iemNativeEmitEFlagsForLogical(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEfl,
6198 uint8_t cOpBits, uint8_t idxRegResult)
6199{
6200#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6201 if (1) /** @todo check if all bits are clobbered. */
6202#endif
6203 {
6204#ifdef RT_ARCH_AMD64
6205 /*
6206 * Collect flags and merge them with eflags.
6207 */
6208 /** @todo we could alternatively use SAHF here when host rax is free since,
6209 * OF is cleared. */
6210 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 /* pushf - do this before any reg allocations as they may emit instructions too. */
6212 pCodeBuf[off++] = 0x9c;
6213
6214 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
6215 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6216 pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + 6 + 6 + 3);
6217 /* pop tmp */
6218 if (idxTmpReg >= 8)
6219 pCodeBuf[off++] = X86_OP_REX_B;
6220 pCodeBuf[off++] = 0x58 + (idxTmpReg & 7);
6221 /* and tmp, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF */
6222 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF);
6223 /* Clear the status bits in EFLs. */
6224 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegEfl, ~X86_EFL_STATUS_BITS);
6225 /* OR in the flags we collected. */
6226 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxRegEfl, idxTmpReg);
6227 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
6228 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6229 RT_NOREF(cOpBits, idxRegResult);
6230
6231#elif defined(RT_ARCH_ARM64)
6232 /*
6233 * Calculate flags.
6234 */
6235 uint8_t const idxRegEfl = iemNativeVarRegisterAcquire(pReNative, idxVarEfl, &off, true /*fInitialized*/);
6236 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6237 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
6238
6239 /* Clear the status bits. ~0x8D5 (or ~0x8FD) can't be AND immediate, so use idxTmpReg for constant. */
6240 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, ~X86_EFL_STATUS_BITS);
6241 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxRegEfl, idxTmpReg);
6242
6243 /* Calculate zero: mov tmp, zf; cmp result,zero; csel.eq tmp,tmp,wxr */
6244 if (cOpBits > 32)
6245 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
6246 else
6247 off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, idxRegResult, ARMV8_A64_REG_XZR);
6248# if 0
6249 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_EFL_ZF);
6250 pCodeBuf[off++] = Armv8A64MkInstrCSel(idxTmpReg, idxTmpReg, ARMV8_A64_REG_XZR, kArmv8InstrCond_Eq, false /*f64Bit*/);
6251 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/);
6252# else
6253 pCodeBuf[off++] = Armv8A64MkInstrCSet(idxTmpReg, kArmv8InstrCond_Eq, false /*f64Bit*/);
6254 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_ZF_BIT);
6255# endif
6256
6257 /* Calculate signed: We could use the native SF flag, but it's just as simple to calculate it by shifting. */
6258 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(idxTmpReg, idxRegResult, cOpBits - 1, cOpBits > 32 /*f64Bit*/);
6259 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxRegEfl, idxRegEfl, idxTmpReg, false /*f64Bit*/, X86_EFL_SF_BIT);
6260
6261 /* Calculate 8-bit parity of the result. */
6262 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxRegResult, idxRegResult, false /*f64Bit*/,
6263 4 /*offShift6*/, kArmv8A64InstrShift_Lsr);
6264 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
6265 2 /*offShift6*/, kArmv8A64InstrShift_Lsr);
6266 pCodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxTmpReg, idxTmpReg, false /*f64Bit*/,
6267 1 /*offShift6*/, kArmv8A64InstrShift_Lsr);
6268 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
6269 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
6270 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegEfl, idxTmpReg, X86_EFL_PF_BIT, 1, false /*f64Bit*/);
6271
6272 iemNativeVarRegisterRelease(pReNative, idxVarEfl);
6273 iemNativeRegFreeTmp(pReNative, idxTmpReg);
6274#else
6275# error "port me"
6276#endif
6277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6278 }
6279 return off;
6280}
6281
6282/** @todo move this somewhere else ... */
6283DECL_INLINE_THROW(uint32_t)
6284iemNativeEmit_xor_r_r_efl(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6285 uint8_t idxVarDst, uint8_t idxVarSrc, uint8_t idxVarEfl, uint8_t cOpBits)
6286{
6287 /*
6288 * The XOR instruction will clear OF, CF and AF (latter is off undefined),
6289 * so we don't need the initial destination value.
6290 */
6291 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off, true /*fInitialized*/);
6292 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
6293 //off = iemNativeEmitBrk(pReNative, off, 0x2222);
6294 if (cOpBits > 32)
6295 off = iemNativeEmitXorGprByGpr(pReNative, off, idxRegDst, idxRegSrc);
6296 else
6297 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxRegDst, idxRegSrc);
6298 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
6299
6300 off = iemNativeEmitEFlagsForLogical(pReNative, off, idxVarEfl, cOpBits, idxRegDst);
6301 iemNativeVarRegisterRelease(pReNative, idxVarDst);
6302 return off;
6303}
6304
6305
6306/*********************************************************************************************************************************
6307* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6308*********************************************************************************************************************************/
6309
6310#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6311 pReNative->fMc = 0; \
6312 pReNative->fCImpl = (a_fFlags); \
6313 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6314
6315
6316#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6317 pReNative->fMc = 0; \
6318 pReNative->fCImpl = (a_fFlags); \
6319 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6320
6321DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6322 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6323 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6324{
6325 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6326}
6327
6328
6329#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6330 pReNative->fMc = 0; \
6331 pReNative->fCImpl = (a_fFlags); \
6332 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6333 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6334
6335DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6336 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6337 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6338{
6339 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6340}
6341
6342
6343#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6344 pReNative->fMc = 0; \
6345 pReNative->fCImpl = (a_fFlags); \
6346 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6347 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6348
6349DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6350 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6351 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6352 uint64_t uArg2)
6353{
6354 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6355}
6356
6357
6358
6359/*********************************************************************************************************************************
6360* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6361*********************************************************************************************************************************/
6362
6363/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6364 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6365DECL_INLINE_THROW(uint32_t)
6366iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6367{
6368 /*
6369 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6370 * return with special status code and make the execution loop deal with
6371 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6372 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6373 * could continue w/o interruption, it probably will drop into the
6374 * debugger, so not worth the effort of trying to services it here and we
6375 * just lump it in with the handling of the others.
6376 *
6377 * To simplify the code and the register state management even more (wrt
6378 * immediate in AND operation), we always update the flags and skip the
6379 * extra check associated conditional jump.
6380 */
6381 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6382 <= UINT32_MAX);
6383#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6384 AssertMsg( pReNative->idxCurCall == 0
6385 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6386 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6387#endif
6388
6389 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6390 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6391 true /*fSkipLivenessAssert*/);
6392 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6393 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6394 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6395 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6396 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6397
6398 /* Free but don't flush the EFLAGS register. */
6399 iemNativeRegFreeTmp(pReNative, idxEflReg);
6400
6401 return off;
6402}
6403
6404
6405/** The VINF_SUCCESS dummy. */
6406template<int const a_rcNormal>
6407DECL_FORCE_INLINE(uint32_t)
6408iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6409{
6410 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6411 if (a_rcNormal != VINF_SUCCESS)
6412 {
6413#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6414 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6415#else
6416 RT_NOREF_PV(idxInstr);
6417#endif
6418 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6419 }
6420 return off;
6421}
6422
6423
6424#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6425 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6426 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6427
6428#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6429 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6430 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6431 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6432
6433/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6434DECL_INLINE_THROW(uint32_t)
6435iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6436{
6437 /* Allocate a temporary PC register. */
6438 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6439
6440 /* Perform the addition and store the result. */
6441 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6442 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6443
6444 /* Free but don't flush the PC register. */
6445 iemNativeRegFreeTmp(pReNative, idxPcReg);
6446
6447 return off;
6448}
6449
6450
6451#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6452 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6453 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6454
6455#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6456 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6457 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6458 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6459
6460/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6461DECL_INLINE_THROW(uint32_t)
6462iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6463{
6464 /* Allocate a temporary PC register. */
6465 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6466
6467 /* Perform the addition and store the result. */
6468 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6469 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6470
6471 /* Free but don't flush the PC register. */
6472 iemNativeRegFreeTmp(pReNative, idxPcReg);
6473
6474 return off;
6475}
6476
6477
6478#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6479 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6480 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6481
6482#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6483 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6485 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6486
6487/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6488DECL_INLINE_THROW(uint32_t)
6489iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6490{
6491 /* Allocate a temporary PC register. */
6492 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6493
6494 /* Perform the addition and store the result. */
6495 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6496 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6497 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6498
6499 /* Free but don't flush the PC register. */
6500 iemNativeRegFreeTmp(pReNative, idxPcReg);
6501
6502 return off;
6503}
6504
6505
6506
6507/*********************************************************************************************************************************
6508* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6509*********************************************************************************************************************************/
6510
6511#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6512 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6513 (a_enmEffOpSize), pCallEntry->idxInstr); \
6514 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6515
6516#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6517 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6518 (a_enmEffOpSize), pCallEntry->idxInstr); \
6519 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6521
6522#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6524 IEMMODE_16BIT, pCallEntry->idxInstr); \
6525 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6526
6527#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6528 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6529 IEMMODE_16BIT, pCallEntry->idxInstr); \
6530 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6532
6533#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6535 IEMMODE_64BIT, pCallEntry->idxInstr); \
6536 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6537
6538#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6539 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6540 IEMMODE_64BIT, pCallEntry->idxInstr); \
6541 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6543
6544/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6545 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6546 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6547DECL_INLINE_THROW(uint32_t)
6548iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6549 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6550{
6551 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6552
6553 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6554 off = iemNativeRegFlushPendingWrites(pReNative, off);
6555
6556 /* Allocate a temporary PC register. */
6557 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6558
6559 /* Perform the addition. */
6560 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6561
6562 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6563 {
6564 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6565 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6566 }
6567 else
6568 {
6569 /* Just truncate the result to 16-bit IP. */
6570 Assert(enmEffOpSize == IEMMODE_16BIT);
6571 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6572 }
6573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6574
6575 /* Free but don't flush the PC register. */
6576 iemNativeRegFreeTmp(pReNative, idxPcReg);
6577
6578 return off;
6579}
6580
6581
6582#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6583 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6584 (a_enmEffOpSize), pCallEntry->idxInstr); \
6585 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6586
6587#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6588 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6589 (a_enmEffOpSize), pCallEntry->idxInstr); \
6590 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6591 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6592
6593#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6594 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6595 IEMMODE_16BIT, pCallEntry->idxInstr); \
6596 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6597
6598#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6599 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6600 IEMMODE_16BIT, pCallEntry->idxInstr); \
6601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6603
6604#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6605 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6606 IEMMODE_32BIT, pCallEntry->idxInstr); \
6607 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6608
6609#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6610 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6611 IEMMODE_32BIT, pCallEntry->idxInstr); \
6612 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6613 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6614
6615/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6616 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6617 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6618DECL_INLINE_THROW(uint32_t)
6619iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6620 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6621{
6622 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6623
6624 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6625 off = iemNativeRegFlushPendingWrites(pReNative, off);
6626
6627 /* Allocate a temporary PC register. */
6628 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6629
6630 /* Perform the addition. */
6631 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6632
6633 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6634 if (enmEffOpSize == IEMMODE_16BIT)
6635 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6636
6637 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6638/** @todo we can skip this in 32-bit FLAT mode. */
6639 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6640
6641 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6642
6643 /* Free but don't flush the PC register. */
6644 iemNativeRegFreeTmp(pReNative, idxPcReg);
6645
6646 return off;
6647}
6648
6649
6650#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6651 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6652 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6653
6654#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6655 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6656 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6657 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6658
6659#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6660 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6661 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6662
6663#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6664 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6665 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6666 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6667
6668#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6671
6672#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6676
6677/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6678DECL_INLINE_THROW(uint32_t)
6679iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6680 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6681{
6682 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6683 off = iemNativeRegFlushPendingWrites(pReNative, off);
6684
6685 /* Allocate a temporary PC register. */
6686 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6687
6688 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6689 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6690 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6691 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6692 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6693
6694 /* Free but don't flush the PC register. */
6695 iemNativeRegFreeTmp(pReNative, idxPcReg);
6696
6697 return off;
6698}
6699
6700
6701
6702/*********************************************************************************************************************************
6703* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6704*********************************************************************************************************************************/
6705
6706/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6707#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6708 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6709
6710/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6711#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6712 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6713
6714/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6715#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6716 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6717
6718/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6719 * clears flags. */
6720#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6721 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6722 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6723
6724/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6725 * clears flags. */
6726#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6727 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6728 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6729
6730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6731 * clears flags. */
6732#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6733 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6734 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6735
6736#undef IEM_MC_SET_RIP_U16_AND_FINISH
6737
6738
6739/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6740#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6741 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6742
6743/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6744#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6745 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6746
6747/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6748 * clears flags. */
6749#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6750 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6751 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6752
6753/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6754 * and clears flags. */
6755#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6756 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6757 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6758
6759#undef IEM_MC_SET_RIP_U32_AND_FINISH
6760
6761
6762/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6763#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6764 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6765
6766/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6767 * and clears flags. */
6768#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6769 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6770 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6771
6772#undef IEM_MC_SET_RIP_U64_AND_FINISH
6773
6774
6775/** Same as iemRegRipJumpU16AndFinishNoFlags,
6776 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6777DECL_INLINE_THROW(uint32_t)
6778iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6779 uint8_t idxInstr, uint8_t cbVar)
6780{
6781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6782 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
6783
6784 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6785 off = iemNativeRegFlushPendingWrites(pReNative, off);
6786
6787 /* Get a register with the new PC loaded from idxVarPc.
6788 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6789 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6790
6791 /* Check limit (may #GP(0) + exit TB). */
6792 if (!f64Bit)
6793/** @todo we can skip this test in FLAT 32-bit mode. */
6794 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6795 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6796 else if (cbVar > sizeof(uint32_t))
6797 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6798
6799 /* Store the result. */
6800 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6801
6802 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6803 /** @todo implictly free the variable? */
6804
6805 return off;
6806}
6807
6808
6809
6810/*********************************************************************************************************************************
6811* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6812*********************************************************************************************************************************/
6813
6814#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6815 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6816
6817/**
6818 * Emits code to check if a \#NM exception should be raised.
6819 *
6820 * @returns New code buffer offset, UINT32_MAX on failure.
6821 * @param pReNative The native recompile state.
6822 * @param off The code buffer offset.
6823 * @param idxInstr The current instruction.
6824 */
6825DECL_INLINE_THROW(uint32_t)
6826iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6827{
6828 /*
6829 * Make sure we don't have any outstanding guest register writes as we may
6830 * raise an #NM and all guest register must be up to date in CPUMCTX.
6831 *
6832 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6833 */
6834 off = iemNativeRegFlushPendingWrites(pReNative, off);
6835
6836#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6837 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6838#else
6839 RT_NOREF(idxInstr);
6840#endif
6841
6842 /* Allocate a temporary CR0 register. */
6843 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6844 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6845
6846 /*
6847 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
6848 * return raisexcpt();
6849 */
6850 /* Test and jump. */
6851 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
6852
6853 /* Free but don't flush the CR0 register. */
6854 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6855
6856 return off;
6857}
6858
6859
6860#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
6861 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
6862
6863/**
6864 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
6865 *
6866 * @returns New code buffer offset, UINT32_MAX on failure.
6867 * @param pReNative The native recompile state.
6868 * @param off The code buffer offset.
6869 * @param idxInstr The current instruction.
6870 */
6871DECL_INLINE_THROW(uint32_t)
6872iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6873{
6874 /*
6875 * Make sure we don't have any outstanding guest register writes as we may
6876 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
6877 *
6878 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
6879 */
6880 off = iemNativeRegFlushPendingWrites(pReNative, off);
6881
6882#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6883 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6884#else
6885 RT_NOREF(idxInstr);
6886#endif
6887
6888 /* Allocate a temporary CR0 and CR4 register. */
6889 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
6890 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
6891 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
6892 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
6893
6894 /** @todo r=aeichner Optimize this more later to have less compares and branches,
6895 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
6896 * actual performance benefit first). */
6897 /*
6898 * if (cr0 & X86_CR0_EM)
6899 * return raisexcpt();
6900 */
6901 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM, idxLabelRaiseUd);
6902 /*
6903 * if (!(cr4 & X86_CR4_OSFXSR))
6904 * return raisexcpt();
6905 */
6906 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR, idxLabelRaiseUd);
6907 /*
6908 * if (cr0 & X86_CR0_TS)
6909 * return raisexcpt();
6910 */
6911 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_TS, idxLabelRaiseNm);
6912
6913 /* Free but don't flush the CR0 and CR4 register. */
6914 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
6915 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
6916
6917 return off;
6918}
6919
6920
6921
6922/*********************************************************************************************************************************
6923* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
6924*********************************************************************************************************************************/
6925
6926/**
6927 * Pushes an IEM_MC_IF_XXX onto the condition stack.
6928 *
6929 * @returns Pointer to the condition stack entry on success, NULL on failure
6930 * (too many nestings)
6931 */
6932DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
6933{
6934 uint32_t const idxStack = pReNative->cCondDepth;
6935 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
6936
6937 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
6938 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
6939
6940 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
6941 pEntry->fInElse = false;
6942 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
6943 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
6944
6945 return pEntry;
6946}
6947
6948
6949/**
6950 * Start of the if-block, snapshotting the register and variable state.
6951 */
6952DECL_INLINE_THROW(void)
6953iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
6954{
6955 Assert(offIfBlock != UINT32_MAX);
6956 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6957 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6958 Assert(!pEntry->fInElse);
6959
6960 /* Define the start of the IF block if request or for disassembly purposes. */
6961 if (idxLabelIf != UINT32_MAX)
6962 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
6963#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6964 else
6965 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
6966#else
6967 RT_NOREF(offIfBlock);
6968#endif
6969
6970 /* Copy the initial state so we can restore it in the 'else' block. */
6971 pEntry->InitialState = pReNative->Core;
6972}
6973
6974
6975#define IEM_MC_ELSE() } while (0); \
6976 off = iemNativeEmitElse(pReNative, off); \
6977 do {
6978
6979/** Emits code related to IEM_MC_ELSE. */
6980DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6981{
6982 /* Check sanity and get the conditional stack entry. */
6983 Assert(off != UINT32_MAX);
6984 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
6985 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
6986 Assert(!pEntry->fInElse);
6987
6988 /* Jump to the endif */
6989 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
6990
6991 /* Define the else label and enter the else part of the condition. */
6992 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
6993 pEntry->fInElse = true;
6994
6995 /* Snapshot the core state so we can do a merge at the endif and restore
6996 the snapshot we took at the start of the if-block. */
6997 pEntry->IfFinalState = pReNative->Core;
6998 pReNative->Core = pEntry->InitialState;
6999
7000 return off;
7001}
7002
7003
7004#define IEM_MC_ENDIF() } while (0); \
7005 off = iemNativeEmitEndIf(pReNative, off)
7006
7007/** Emits code related to IEM_MC_ENDIF. */
7008DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7009{
7010 /* Check sanity and get the conditional stack entry. */
7011 Assert(off != UINT32_MAX);
7012 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7013 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7014
7015 /*
7016 * Now we have find common group with the core state at the end of the
7017 * if-final. Use the smallest common denominator and just drop anything
7018 * that isn't the same in both states.
7019 */
7020 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
7021 * which is why we're doing this at the end of the else-block.
7022 * But we'd need more info about future for that to be worth the effort. */
7023 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
7024 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
7025 {
7026 /* shadow guest stuff first. */
7027 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
7028 if (fGstRegs)
7029 {
7030 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
7031 do
7032 {
7033 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
7034 fGstRegs &= ~RT_BIT_64(idxGstReg);
7035
7036 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
7037 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
7038 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
7039 {
7040 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
7041 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
7042 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
7043 }
7044 } while (fGstRegs);
7045 }
7046 else
7047 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
7048
7049 /* Check variables next. For now we must require them to be identical
7050 or stuff we can recreate. */
7051 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
7052 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
7053 if (fVars)
7054 {
7055 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
7056 do
7057 {
7058 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
7059 fVars &= ~RT_BIT_32(idxVar);
7060
7061 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
7062 {
7063 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
7064 continue;
7065 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7066 {
7067 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7068 if (idxHstReg != UINT8_MAX)
7069 {
7070 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7071 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7072 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
7073 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7074 }
7075 continue;
7076 }
7077 }
7078 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
7079 continue;
7080
7081 /* Irreconcilable, so drop it. */
7082 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7083 if (idxHstReg != UINT8_MAX)
7084 {
7085 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7086 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7087 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
7088 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7089 }
7090 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7091 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7092 } while (fVars);
7093 }
7094
7095 /* Finally, check that the host register allocations matches. */
7096 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
7097 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
7098 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
7099 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
7100 }
7101
7102 /*
7103 * Define the endif label and maybe the else one if we're still in the 'if' part.
7104 */
7105 if (!pEntry->fInElse)
7106 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7107 else
7108 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
7109 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
7110
7111 /* Pop the conditional stack.*/
7112 pReNative->cCondDepth -= 1;
7113
7114 return off;
7115}
7116
7117
7118#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
7119 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
7120 do {
7121
7122/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
7123DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7124{
7125 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7126
7127 /* Get the eflags. */
7128 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7129 kIemNativeGstRegUse_ReadOnly);
7130
7131 /* Test and jump. */
7132 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7133
7134 /* Free but don't flush the EFlags register. */
7135 iemNativeRegFreeTmp(pReNative, idxEflReg);
7136
7137 /* Make a copy of the core state now as we start the if-block. */
7138 iemNativeCondStartIfBlock(pReNative, off);
7139
7140 return off;
7141}
7142
7143
7144#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
7145 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
7146 do {
7147
7148/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
7149DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7150{
7151 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7152
7153 /* Get the eflags. */
7154 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7155 kIemNativeGstRegUse_ReadOnly);
7156
7157 /* Test and jump. */
7158 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7159
7160 /* Free but don't flush the EFlags register. */
7161 iemNativeRegFreeTmp(pReNative, idxEflReg);
7162
7163 /* Make a copy of the core state now as we start the if-block. */
7164 iemNativeCondStartIfBlock(pReNative, off);
7165
7166 return off;
7167}
7168
7169
7170#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7171 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7172 do {
7173
7174/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7175DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7176{
7177 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7178
7179 /* Get the eflags. */
7180 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7181 kIemNativeGstRegUse_ReadOnly);
7182
7183 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7184 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7185
7186 /* Test and jump. */
7187 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7188
7189 /* Free but don't flush the EFlags register. */
7190 iemNativeRegFreeTmp(pReNative, idxEflReg);
7191
7192 /* Make a copy of the core state now as we start the if-block. */
7193 iemNativeCondStartIfBlock(pReNative, off);
7194
7195 return off;
7196}
7197
7198
7199#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7200 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7201 do {
7202
7203/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7204DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7205{
7206 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7207
7208 /* Get the eflags. */
7209 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7210 kIemNativeGstRegUse_ReadOnly);
7211
7212 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7213 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7214
7215 /* Test and jump. */
7216 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7217
7218 /* Free but don't flush the EFlags register. */
7219 iemNativeRegFreeTmp(pReNative, idxEflReg);
7220
7221 /* Make a copy of the core state now as we start the if-block. */
7222 iemNativeCondStartIfBlock(pReNative, off);
7223
7224 return off;
7225}
7226
7227
7228#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7229 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7230 do {
7231
7232#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7233 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7234 do {
7235
7236/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7237DECL_INLINE_THROW(uint32_t)
7238iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7239 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7240{
7241 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7242
7243 /* Get the eflags. */
7244 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7245 kIemNativeGstRegUse_ReadOnly);
7246
7247 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7248 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7249
7250 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7251 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7252 Assert(iBitNo1 != iBitNo2);
7253
7254#ifdef RT_ARCH_AMD64
7255 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7256
7257 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7258 if (iBitNo1 > iBitNo2)
7259 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7260 else
7261 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7262 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7263
7264#elif defined(RT_ARCH_ARM64)
7265 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7266 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7267
7268 /* and tmpreg, eflreg, #1<<iBitNo1 */
7269 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7270
7271 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7272 if (iBitNo1 > iBitNo2)
7273 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7274 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7275 else
7276 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7277 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7278
7279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7280
7281#else
7282# error "Port me"
7283#endif
7284
7285 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7286 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7287 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7288
7289 /* Free but don't flush the EFlags and tmp registers. */
7290 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7291 iemNativeRegFreeTmp(pReNative, idxEflReg);
7292
7293 /* Make a copy of the core state now as we start the if-block. */
7294 iemNativeCondStartIfBlock(pReNative, off);
7295
7296 return off;
7297}
7298
7299
7300#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7301 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7302 do {
7303
7304#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7305 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7306 do {
7307
7308/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7309 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7310DECL_INLINE_THROW(uint32_t)
7311iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7312 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7313{
7314 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7315
7316 /* We need an if-block label for the non-inverted variant. */
7317 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7318 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7319
7320 /* Get the eflags. */
7321 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7322 kIemNativeGstRegUse_ReadOnly);
7323
7324 /* Translate the flag masks to bit numbers. */
7325 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7326 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7327
7328 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7329 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7330 Assert(iBitNo1 != iBitNo);
7331
7332 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7333 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7334 Assert(iBitNo2 != iBitNo);
7335 Assert(iBitNo2 != iBitNo1);
7336
7337#ifdef RT_ARCH_AMD64
7338 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7339#elif defined(RT_ARCH_ARM64)
7340 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7341#endif
7342
7343 /* Check for the lone bit first. */
7344 if (!fInverted)
7345 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7346 else
7347 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7348
7349 /* Then extract and compare the other two bits. */
7350#ifdef RT_ARCH_AMD64
7351 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7352 if (iBitNo1 > iBitNo2)
7353 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7354 else
7355 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7356 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7357
7358#elif defined(RT_ARCH_ARM64)
7359 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7360
7361 /* and tmpreg, eflreg, #1<<iBitNo1 */
7362 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7363
7364 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7365 if (iBitNo1 > iBitNo2)
7366 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7367 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7368 else
7369 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7370 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7371
7372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7373
7374#else
7375# error "Port me"
7376#endif
7377
7378 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7379 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7380 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7381
7382 /* Free but don't flush the EFlags and tmp registers. */
7383 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7384 iemNativeRegFreeTmp(pReNative, idxEflReg);
7385
7386 /* Make a copy of the core state now as we start the if-block. */
7387 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7388
7389 return off;
7390}
7391
7392
7393#define IEM_MC_IF_CX_IS_NZ() \
7394 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7395 do {
7396
7397/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7398DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7399{
7400 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7401
7402 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7403 kIemNativeGstRegUse_ReadOnly);
7404 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7405 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7406
7407 iemNativeCondStartIfBlock(pReNative, off);
7408 return off;
7409}
7410
7411
7412#define IEM_MC_IF_ECX_IS_NZ() \
7413 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7414 do {
7415
7416#define IEM_MC_IF_RCX_IS_NZ() \
7417 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7418 do {
7419
7420/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7421DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7422{
7423 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7424
7425 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7426 kIemNativeGstRegUse_ReadOnly);
7427 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7428 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7429
7430 iemNativeCondStartIfBlock(pReNative, off);
7431 return off;
7432}
7433
7434
7435#define IEM_MC_IF_CX_IS_NOT_ONE() \
7436 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7437 do {
7438
7439/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7440DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7441{
7442 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7443
7444 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7445 kIemNativeGstRegUse_ReadOnly);
7446#ifdef RT_ARCH_AMD64
7447 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7448#else
7449 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7450 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7451 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7452#endif
7453 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7454
7455 iemNativeCondStartIfBlock(pReNative, off);
7456 return off;
7457}
7458
7459
7460#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7461 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7462 do {
7463
7464#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7465 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7466 do {
7467
7468/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7469DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7470{
7471 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7472
7473 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7474 kIemNativeGstRegUse_ReadOnly);
7475 if (f64Bit)
7476 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7477 else
7478 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7479 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7480
7481 iemNativeCondStartIfBlock(pReNative, off);
7482 return off;
7483}
7484
7485
7486#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7487 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7488 do {
7489
7490#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7491 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7492 do {
7493
7494/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7495 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7496DECL_INLINE_THROW(uint32_t)
7497iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7498{
7499 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7500
7501 /* We have to load both RCX and EFLAGS before we can start branching,
7502 otherwise we'll end up in the else-block with an inconsistent
7503 register allocator state.
7504 Doing EFLAGS first as it's more likely to be loaded, right? */
7505 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7506 kIemNativeGstRegUse_ReadOnly);
7507 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7508 kIemNativeGstRegUse_ReadOnly);
7509
7510 /** @todo we could reduce this to a single branch instruction by spending a
7511 * temporary register and some setnz stuff. Not sure if loops are
7512 * worth it. */
7513 /* Check CX. */
7514#ifdef RT_ARCH_AMD64
7515 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7516#else
7517 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7518 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7519 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7520#endif
7521
7522 /* Check the EFlags bit. */
7523 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7524 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7525 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7526 !fCheckIfSet /*fJmpIfSet*/);
7527
7528 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7529 iemNativeRegFreeTmp(pReNative, idxEflReg);
7530
7531 iemNativeCondStartIfBlock(pReNative, off);
7532 return off;
7533}
7534
7535
7536#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7537 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7538 do {
7539
7540#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7541 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7542 do {
7543
7544#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7545 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7546 do {
7547
7548#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7549 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7550 do {
7551
7552/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7553 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7554 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7555 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7556DECL_INLINE_THROW(uint32_t)
7557iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7558 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7559{
7560 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
7561
7562 /* We have to load both RCX and EFLAGS before we can start branching,
7563 otherwise we'll end up in the else-block with an inconsistent
7564 register allocator state.
7565 Doing EFLAGS first as it's more likely to be loaded, right? */
7566 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7567 kIemNativeGstRegUse_ReadOnly);
7568 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7569 kIemNativeGstRegUse_ReadOnly);
7570
7571 /** @todo we could reduce this to a single branch instruction by spending a
7572 * temporary register and some setnz stuff. Not sure if loops are
7573 * worth it. */
7574 /* Check RCX/ECX. */
7575 if (f64Bit)
7576 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7577 else
7578 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7579
7580 /* Check the EFlags bit. */
7581 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7582 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7583 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7584 !fCheckIfSet /*fJmpIfSet*/);
7585
7586 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7587 iemNativeRegFreeTmp(pReNative, idxEflReg);
7588
7589 iemNativeCondStartIfBlock(pReNative, off);
7590 return off;
7591}
7592
7593
7594
7595/*********************************************************************************************************************************
7596* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7597*********************************************************************************************************************************/
7598/** Number of hidden arguments for CIMPL calls.
7599 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7600#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7601# define IEM_CIMPL_HIDDEN_ARGS 3
7602#else
7603# define IEM_CIMPL_HIDDEN_ARGS 2
7604#endif
7605
7606#define IEM_MC_NOREF(a_Name) \
7607 RT_NOREF_PV(a_Name)
7608
7609#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7610 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7611
7612#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7613 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7614
7615#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7616 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7617
7618#define IEM_MC_LOCAL(a_Type, a_Name) \
7619 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7620
7621#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7622 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7623
7624
7625/**
7626 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7627 */
7628DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7629{
7630 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7631 return IEM_CIMPL_HIDDEN_ARGS;
7632 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7633 return 1;
7634 return 0;
7635}
7636
7637
7638/**
7639 * Internal work that allocates a variable with kind set to
7640 * kIemNativeVarKind_Invalid and no current stack allocation.
7641 *
7642 * The kind will either be set by the caller or later when the variable is first
7643 * assigned a value.
7644 *
7645 * @returns Unpacked index.
7646 * @internal
7647 */
7648static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7649{
7650 Assert(cbType > 0 && cbType <= 64);
7651 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7652 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7653 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7654 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7655 pReNative->Core.aVars[idxVar].cbVar = cbType;
7656 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7657 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7658 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7659 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7660 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7661 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7662 pReNative->Core.aVars[idxVar].u.uValue = 0;
7663 return idxVar;
7664}
7665
7666
7667/**
7668 * Internal work that allocates an argument variable w/o setting enmKind.
7669 *
7670 * @returns Unpacked index.
7671 * @internal
7672 */
7673static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7674{
7675 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7676 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7677 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7678
7679 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7680 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7681 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7682 return idxVar;
7683}
7684
7685
7686/**
7687 * Gets the stack slot for a stack variable, allocating one if necessary.
7688 *
7689 * Calling this function implies that the stack slot will contain a valid
7690 * variable value. The caller deals with any register currently assigned to the
7691 * variable, typically by spilling it into the stack slot.
7692 *
7693 * @returns The stack slot number.
7694 * @param pReNative The recompiler state.
7695 * @param idxVar The variable.
7696 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7697 */
7698DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7699{
7700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7701 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7702 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7703
7704 /* Already got a slot? */
7705 uint8_t const idxStackSlot = pVar->idxStackSlot;
7706 if (idxStackSlot != UINT8_MAX)
7707 {
7708 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7709 return idxStackSlot;
7710 }
7711
7712 /*
7713 * A single slot is easy to allocate.
7714 * Allocate them from the top end, closest to BP, to reduce the displacement.
7715 */
7716 if (pVar->cbVar <= sizeof(uint64_t))
7717 {
7718 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7719 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7720 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7721 pVar->idxStackSlot = (uint8_t)iSlot;
7722 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7723 return (uint8_t)iSlot;
7724 }
7725
7726 /*
7727 * We need more than one stack slot.
7728 *
7729 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7730 */
7731 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7732 Assert(pVar->cbVar <= 64);
7733 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7734 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7735 uint32_t bmStack = ~pReNative->Core.bmStack;
7736 while (bmStack != UINT32_MAX)
7737 {
7738/** @todo allocate from the top to reduce BP displacement. */
7739 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7740 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7741 if (!(iSlot & fBitAlignMask))
7742 {
7743 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7744 {
7745 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7746 pVar->idxStackSlot = (uint8_t)iSlot;
7747 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7748 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7749 return (uint8_t)iSlot;
7750 }
7751 }
7752 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7753 }
7754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7755}
7756
7757
7758/**
7759 * Changes the variable to a stack variable.
7760 *
7761 * Currently this is s only possible to do the first time the variable is used,
7762 * switching later is can be implemented but not done.
7763 *
7764 * @param pReNative The recompiler state.
7765 * @param idxVar The variable.
7766 * @throws VERR_IEM_VAR_IPE_2
7767 */
7768static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7769{
7770 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7771 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7772 if (pVar->enmKind != kIemNativeVarKind_Stack)
7773 {
7774 /* We could in theory transition from immediate to stack as well, but it
7775 would involve the caller doing work storing the value on the stack. So,
7776 till that's required we only allow transition from invalid. */
7777 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7778 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7779 pVar->enmKind = kIemNativeVarKind_Stack;
7780
7781 /* Note! We don't allocate a stack slot here, that's only done when a
7782 slot is actually needed to hold a variable value. */
7783 }
7784}
7785
7786
7787/**
7788 * Sets it to a variable with a constant value.
7789 *
7790 * This does not require stack storage as we know the value and can always
7791 * reload it, unless of course it's referenced.
7792 *
7793 * @param pReNative The recompiler state.
7794 * @param idxVar The variable.
7795 * @param uValue The immediate value.
7796 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7797 */
7798static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7799{
7800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7801 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7802 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7803 {
7804 /* Only simple transitions for now. */
7805 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7806 pVar->enmKind = kIemNativeVarKind_Immediate;
7807 }
7808 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7809
7810 pVar->u.uValue = uValue;
7811 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7812 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7813 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7814}
7815
7816
7817/**
7818 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7819 *
7820 * This does not require stack storage as we know the value and can always
7821 * reload it. Loading is postponed till needed.
7822 *
7823 * @param pReNative The recompiler state.
7824 * @param idxVar The variable. Unpacked.
7825 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7826 *
7827 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7828 * @internal
7829 */
7830static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7831{
7832 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7833 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7834
7835 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7836 {
7837 /* Only simple transitions for now. */
7838 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7839 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7840 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7841 }
7842 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7843
7844 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7845
7846 /* Update the other variable, ensure it's a stack variable. */
7847 /** @todo handle variables with const values... that'll go boom now. */
7848 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7849 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7850}
7851
7852
7853/**
7854 * Sets the variable to a reference (pointer) to a guest register reference.
7855 *
7856 * This does not require stack storage as we know the value and can always
7857 * reload it. Loading is postponed till needed.
7858 *
7859 * @param pReNative The recompiler state.
7860 * @param idxVar The variable.
7861 * @param enmRegClass The class guest registers to reference.
7862 * @param idxReg The register within @a enmRegClass to reference.
7863 *
7864 * @throws VERR_IEM_VAR_IPE_2
7865 */
7866static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7867 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7868{
7869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7870 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7871
7872 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7873 {
7874 /* Only simple transitions for now. */
7875 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7876 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7877 }
7878 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7879
7880 pVar->u.GstRegRef.enmClass = enmRegClass;
7881 pVar->u.GstRegRef.idx = idxReg;
7882}
7883
7884
7885DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7886{
7887 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7888}
7889
7890
7891DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7892{
7893 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7894
7895 /* Since we're using a generic uint64_t value type, we must truncate it if
7896 the variable is smaller otherwise we may end up with too large value when
7897 scaling up a imm8 w/ sign-extension.
7898
7899 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7900 in the bios, bx=1) when running on arm, because clang expect 16-bit
7901 register parameters to have bits 16 and up set to zero. Instead of
7902 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7903 CF value in the result. */
7904 switch (cbType)
7905 {
7906 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7907 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7908 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7909 }
7910 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7911 return idxVar;
7912}
7913
7914
7915DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7916{
7917 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7918 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7919 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7920 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7921 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7922 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7923
7924 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7925 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7926 return idxArgVar;
7927}
7928
7929
7930DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7931{
7932 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7933 /* Don't set to stack now, leave that to the first use as for instance
7934 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7935 return idxVar;
7936}
7937
7938
7939DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7940{
7941 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7942
7943 /* Since we're using a generic uint64_t value type, we must truncate it if
7944 the variable is smaller otherwise we may end up with too large value when
7945 scaling up a imm8 w/ sign-extension. */
7946 switch (cbType)
7947 {
7948 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7949 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7950 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7951 }
7952 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7953 return idxVar;
7954}
7955
7956
7957/**
7958 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7959 * fixed till we call iemNativeVarRegisterRelease.
7960 *
7961 * @returns The host register number.
7962 * @param pReNative The recompiler state.
7963 * @param idxVar The variable.
7964 * @param poff Pointer to the instruction buffer offset.
7965 * In case a register needs to be freed up or the value
7966 * loaded off the stack.
7967 * @param fInitialized Set if the variable must already have been initialized.
7968 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7969 * the case.
7970 * @param idxRegPref Preferred register number or UINT8_MAX.
7971 */
7972DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7973 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7974{
7975 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7976 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7977 Assert(pVar->cbVar <= 8);
7978 Assert(!pVar->fRegAcquired);
7979
7980 uint8_t idxReg = pVar->idxReg;
7981 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7982 {
7983 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7984 && pVar->enmKind < kIemNativeVarKind_End);
7985 pVar->fRegAcquired = true;
7986 return idxReg;
7987 }
7988
7989 /*
7990 * If the kind of variable has not yet been set, default to 'stack'.
7991 */
7992 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7993 && pVar->enmKind < kIemNativeVarKind_End);
7994 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7995 iemNativeVarSetKindToStack(pReNative, idxVar);
7996
7997 /*
7998 * We have to allocate a register for the variable, even if its a stack one
7999 * as we don't know if there are modification being made to it before its
8000 * finalized (todo: analyze and insert hints about that?).
8001 *
8002 * If we can, we try get the correct register for argument variables. This
8003 * is assuming that most argument variables are fetched as close as possible
8004 * to the actual call, so that there aren't any interfering hidden calls
8005 * (memory accesses, etc) inbetween.
8006 *
8007 * If we cannot or it's a variable, we make sure no argument registers
8008 * that will be used by this MC block will be allocated here, and we always
8009 * prefer non-volatile registers to avoid needing to spill stuff for internal
8010 * call.
8011 */
8012 /** @todo Detect too early argument value fetches and warn about hidden
8013 * calls causing less optimal code to be generated in the python script. */
8014
8015 uint8_t const uArgNo = pVar->uArgNo;
8016 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8017 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8018 {
8019 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8020 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8021 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8022 }
8023 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8024 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8025 {
8026 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8027 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8028 & ~pReNative->Core.bmHstRegsWithGstShadow
8029 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8030 & fNotArgsMask;
8031 if (fRegs)
8032 {
8033 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8034 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8038 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8039 }
8040 else
8041 {
8042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8043 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8044 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8045 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8046 }
8047 }
8048 else
8049 {
8050 idxReg = idxRegPref;
8051 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8052 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8053 }
8054 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8055 pVar->idxReg = idxReg;
8056
8057 /*
8058 * Load it off the stack if we've got a stack slot.
8059 */
8060 uint8_t const idxStackSlot = pVar->idxStackSlot;
8061 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8062 {
8063 Assert(fInitialized);
8064 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8065 switch (pVar->cbVar)
8066 {
8067 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8068 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8069 case 3: AssertFailed(); RT_FALL_THRU();
8070 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8071 default: AssertFailed(); RT_FALL_THRU();
8072 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8073 }
8074 }
8075 else
8076 {
8077 Assert(idxStackSlot == UINT8_MAX);
8078 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8079 }
8080 pVar->fRegAcquired = true;
8081 return idxReg;
8082}
8083
8084
8085/**
8086 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8087 * guest register.
8088 *
8089 * This function makes sure there is a register for it and sets it to be the
8090 * current shadow copy of @a enmGstReg.
8091 *
8092 * @returns The host register number.
8093 * @param pReNative The recompiler state.
8094 * @param idxVar The variable.
8095 * @param enmGstReg The guest register this variable will be written to
8096 * after this call.
8097 * @param poff Pointer to the instruction buffer offset.
8098 * In case a register needs to be freed up or if the
8099 * variable content needs to be loaded off the stack.
8100 *
8101 * @note We DO NOT expect @a idxVar to be an argument variable,
8102 * because we can only in the commit stage of an instruction when this
8103 * function is used.
8104 */
8105DECL_HIDDEN_THROW(uint8_t)
8106iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8107{
8108 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8109 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8110 Assert(!pVar->fRegAcquired);
8111 AssertMsgStmt( pVar->cbVar <= 8
8112 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8113 || pVar->enmKind == kIemNativeVarKind_Stack),
8114 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8115 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8116 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8117
8118 /*
8119 * This shouldn't ever be used for arguments, unless it's in a weird else
8120 * branch that doesn't do any calling and even then it's questionable.
8121 *
8122 * However, in case someone writes crazy wrong MC code and does register
8123 * updates before making calls, just use the regular register allocator to
8124 * ensure we get a register suitable for the intended argument number.
8125 */
8126 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8127
8128 /*
8129 * If there is already a register for the variable, we transfer/set the
8130 * guest shadow copy assignment to it.
8131 */
8132 uint8_t idxReg = pVar->idxReg;
8133 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8134 {
8135 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8136 {
8137 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8138 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8139 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8140 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8141 }
8142 else
8143 {
8144 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8145 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8146 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8147 }
8148 /** @todo figure this one out. We need some way of making sure the register isn't
8149 * modified after this point, just in case we start writing crappy MC code. */
8150 pVar->enmGstReg = enmGstReg;
8151 pVar->fRegAcquired = true;
8152 return idxReg;
8153 }
8154 Assert(pVar->uArgNo == UINT8_MAX);
8155
8156 /*
8157 * Because this is supposed to be the commit stage, we're just tag along with the
8158 * temporary register allocator and upgrade it to a variable register.
8159 */
8160 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8161 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8162 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8163 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8164 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8165 pVar->idxReg = idxReg;
8166
8167 /*
8168 * Now we need to load the register value.
8169 */
8170 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8171 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8172 else
8173 {
8174 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8175 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8176 switch (pVar->cbVar)
8177 {
8178 case sizeof(uint64_t):
8179 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8180 break;
8181 case sizeof(uint32_t):
8182 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8183 break;
8184 case sizeof(uint16_t):
8185 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8186 break;
8187 case sizeof(uint8_t):
8188 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8189 break;
8190 default:
8191 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8192 }
8193 }
8194
8195 pVar->fRegAcquired = true;
8196 return idxReg;
8197}
8198
8199
8200/**
8201 * Sets the host register for @a idxVarRc to @a idxReg.
8202 *
8203 * The register must not be allocated. Any guest register shadowing will be
8204 * implictly dropped by this call.
8205 *
8206 * The variable must not have any register associated with it (causes
8207 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8208 * implied.
8209 *
8210 * @returns idxReg
8211 * @param pReNative The recompiler state.
8212 * @param idxVar The variable.
8213 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8214 * @param off For recording in debug info.
8215 *
8216 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8217 */
8218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8219{
8220 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8221 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8222 Assert(!pVar->fRegAcquired);
8223 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8224 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8225 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8226
8227 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8228 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8229
8230 iemNativeVarSetKindToStack(pReNative, idxVar);
8231 pVar->idxReg = idxReg;
8232
8233 return idxReg;
8234}
8235
8236
8237/**
8238 * A convenient helper function.
8239 */
8240DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8241 uint8_t idxReg, uint32_t *poff)
8242{
8243 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8244 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
8245 return idxReg;
8246}
8247
8248
8249/**
8250 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8251 *
8252 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8253 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8254 * requirement of flushing anything in volatile host registers when making a
8255 * call.
8256 *
8257 * @returns New @a off value.
8258 * @param pReNative The recompiler state.
8259 * @param off The code buffer position.
8260 * @param fHstRegsNotToSave Set of registers not to save & restore.
8261 */
8262DECL_HIDDEN_THROW(uint32_t)
8263iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8264{
8265 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8266 if (fHstRegs)
8267 {
8268 do
8269 {
8270 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8271 fHstRegs &= ~RT_BIT_32(idxHstReg);
8272
8273 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8274 {
8275 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8276 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8277 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8278 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8279 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8280 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8281 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8282 {
8283 case kIemNativeVarKind_Stack:
8284 {
8285 /* Temporarily spill the variable register. */
8286 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8287 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8288 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8289 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8290 continue;
8291 }
8292
8293 case kIemNativeVarKind_Immediate:
8294 case kIemNativeVarKind_VarRef:
8295 case kIemNativeVarKind_GstRegRef:
8296 /* It is weird to have any of these loaded at this point. */
8297 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8298 continue;
8299
8300 case kIemNativeVarKind_End:
8301 case kIemNativeVarKind_Invalid:
8302 break;
8303 }
8304 AssertFailed();
8305 }
8306 else
8307 {
8308 /*
8309 * Allocate a temporary stack slot and spill the register to it.
8310 */
8311 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8312 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8313 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8314 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8315 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8316 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8317 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8318 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8319 }
8320 } while (fHstRegs);
8321 }
8322 return off;
8323}
8324
8325
8326/**
8327 * Emit code to restore volatile registers after to a call to a helper.
8328 *
8329 * @returns New @a off value.
8330 * @param pReNative The recompiler state.
8331 * @param off The code buffer position.
8332 * @param fHstRegsNotToSave Set of registers not to save & restore.
8333 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8334 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8335 */
8336DECL_HIDDEN_THROW(uint32_t)
8337iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8338{
8339 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8340 if (fHstRegs)
8341 {
8342 do
8343 {
8344 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8345 fHstRegs &= ~RT_BIT_32(idxHstReg);
8346
8347 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8348 {
8349 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8350 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8351 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8352 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8353 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8354 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8355 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8356 {
8357 case kIemNativeVarKind_Stack:
8358 {
8359 /* Unspill the variable register. */
8360 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8361 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8362 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8363 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8364 continue;
8365 }
8366
8367 case kIemNativeVarKind_Immediate:
8368 case kIemNativeVarKind_VarRef:
8369 case kIemNativeVarKind_GstRegRef:
8370 /* It is weird to have any of these loaded at this point. */
8371 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8372 continue;
8373
8374 case kIemNativeVarKind_End:
8375 case kIemNativeVarKind_Invalid:
8376 break;
8377 }
8378 AssertFailed();
8379 }
8380 else
8381 {
8382 /*
8383 * Restore from temporary stack slot.
8384 */
8385 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8386 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8387 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8388 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8389
8390 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8391 }
8392 } while (fHstRegs);
8393 }
8394 return off;
8395}
8396
8397
8398/**
8399 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8400 *
8401 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8402 *
8403 * ASSUMES that @a idxVar is valid and unpacked.
8404 */
8405DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8406{
8407 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8408 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8409 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8410 {
8411 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8412 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8413 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8414 Assert(cSlots > 0);
8415 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8416 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8417 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8418 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8419 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8420 }
8421 else
8422 Assert(idxStackSlot == UINT8_MAX);
8423}
8424
8425
8426/**
8427 * Worker that frees a single variable.
8428 *
8429 * ASSUMES that @a idxVar is valid and unpacked.
8430 */
8431DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8432{
8433 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8434 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8435 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8436
8437 /* Free the host register first if any assigned. */
8438 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8439 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8440 {
8441 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8442 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8443 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8444 }
8445
8446 /* Free argument mapping. */
8447 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8448 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8449 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8450
8451 /* Free the stack slots. */
8452 iemNativeVarFreeStackSlots(pReNative, idxVar);
8453
8454 /* Free the actual variable. */
8455 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8456 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8457}
8458
8459
8460/**
8461 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8462 */
8463DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8464{
8465 while (bmVars != 0)
8466 {
8467 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8468 bmVars &= ~RT_BIT_32(idxVar);
8469
8470#if 1 /** @todo optimize by simplifying this later... */
8471 iemNativeVarFreeOneWorker(pReNative, idxVar);
8472#else
8473 /* Only need to free the host register, the rest is done as bulk updates below. */
8474 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8475 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8476 {
8477 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8478 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8479 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8480 }
8481#endif
8482 }
8483#if 0 /** @todo optimize by simplifying this later... */
8484 pReNative->Core.bmVars = 0;
8485 pReNative->Core.bmStack = 0;
8486 pReNative->Core.u64ArgVars = UINT64_MAX;
8487#endif
8488}
8489
8490
8491/**
8492 * This is called by IEM_MC_END() to clean up all variables.
8493 */
8494DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8495{
8496 uint32_t const bmVars = pReNative->Core.bmVars;
8497 if (bmVars != 0)
8498 iemNativeVarFreeAllSlow(pReNative, bmVars);
8499 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8500 Assert(pReNative->Core.bmStack == 0);
8501}
8502
8503
8504#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8505
8506/**
8507 * This is called by IEM_MC_FREE_LOCAL.
8508 */
8509DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8510{
8511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8512 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
8513 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8514}
8515
8516
8517#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8518
8519/**
8520 * This is called by IEM_MC_FREE_ARG.
8521 */
8522DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8523{
8524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8525 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8526 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8527}
8528
8529
8530#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8531
8532/**
8533 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8534 */
8535DECL_INLINE_THROW(uint32_t)
8536iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8537{
8538 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8539 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
8540 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8541 Assert( pVarDst->cbVar == sizeof(uint16_t)
8542 || pVarDst->cbVar == sizeof(uint32_t));
8543
8544 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8545 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
8546 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
8547 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
8548 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8549
8550 Assert(pVarDst->cbVar < pVarSrc->cbVar);
8551
8552 /*
8553 * Special case for immediates.
8554 */
8555 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
8556 {
8557 switch (pVarDst->cbVar)
8558 {
8559 case sizeof(uint16_t):
8560 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
8561 break;
8562 case sizeof(uint32_t):
8563 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
8564 break;
8565 default: AssertFailed(); break;
8566 }
8567 }
8568 else
8569 {
8570 /*
8571 * The generic solution for now.
8572 */
8573 /** @todo optimize this by having the python script make sure the source
8574 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8575 * statement. Then we could just transfer the register assignments. */
8576 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8577 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8578 switch (pVarDst->cbVar)
8579 {
8580 case sizeof(uint16_t):
8581 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8582 break;
8583 case sizeof(uint32_t):
8584 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8585 break;
8586 default: AssertFailed(); break;
8587 }
8588 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8589 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8590 }
8591 return off;
8592}
8593
8594
8595
8596/*********************************************************************************************************************************
8597* Emitters for IEM_MC_CALL_CIMPL_XXX *
8598*********************************************************************************************************************************/
8599
8600/**
8601 * Emits code to load a reference to the given guest register into @a idxGprDst.
8602 */
8603DECL_INLINE_THROW(uint32_t)
8604iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8605 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8606{
8607 /*
8608 * Get the offset relative to the CPUMCTX structure.
8609 */
8610 uint32_t offCpumCtx;
8611 switch (enmClass)
8612 {
8613 case kIemNativeGstRegRef_Gpr:
8614 Assert(idxRegInClass < 16);
8615 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8616 break;
8617
8618 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8619 Assert(idxRegInClass < 4);
8620 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8621 break;
8622
8623 case kIemNativeGstRegRef_EFlags:
8624 Assert(idxRegInClass == 0);
8625 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8626 break;
8627
8628 case kIemNativeGstRegRef_MxCsr:
8629 Assert(idxRegInClass == 0);
8630 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8631 break;
8632
8633 case kIemNativeGstRegRef_FpuReg:
8634 Assert(idxRegInClass < 8);
8635 AssertFailed(); /** @todo what kind of indexing? */
8636 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8637 break;
8638
8639 case kIemNativeGstRegRef_MReg:
8640 Assert(idxRegInClass < 8);
8641 AssertFailed(); /** @todo what kind of indexing? */
8642 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8643 break;
8644
8645 case kIemNativeGstRegRef_XReg:
8646 Assert(idxRegInClass < 16);
8647 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8648 break;
8649
8650 default:
8651 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8652 }
8653
8654 /*
8655 * Load the value into the destination register.
8656 */
8657#ifdef RT_ARCH_AMD64
8658 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8659
8660#elif defined(RT_ARCH_ARM64)
8661 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8662 Assert(offCpumCtx < 4096);
8663 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8664
8665#else
8666# error "Port me!"
8667#endif
8668
8669 return off;
8670}
8671
8672
8673/**
8674 * Common code for CIMPL and AIMPL calls.
8675 *
8676 * These are calls that uses argument variables and such. They should not be
8677 * confused with internal calls required to implement an MC operation,
8678 * like a TLB load and similar.
8679 *
8680 * Upon return all that is left to do is to load any hidden arguments and
8681 * perform the call. All argument variables are freed.
8682 *
8683 * @returns New code buffer offset; throws VBox status code on error.
8684 * @param pReNative The native recompile state.
8685 * @param off The code buffer offset.
8686 * @param cArgs The total nubmer of arguments (includes hidden
8687 * count).
8688 * @param cHiddenArgs The number of hidden arguments. The hidden
8689 * arguments must not have any variable declared for
8690 * them, whereas all the regular arguments must
8691 * (tstIEMCheckMc ensures this).
8692 */
8693DECL_HIDDEN_THROW(uint32_t)
8694iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8695{
8696#ifdef VBOX_STRICT
8697 /*
8698 * Assert sanity.
8699 */
8700 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8701 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8702 for (unsigned i = 0; i < cHiddenArgs; i++)
8703 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8704 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8705 {
8706 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8707 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8708 }
8709 iemNativeRegAssertSanity(pReNative);
8710#endif
8711
8712 /*
8713 * Before we do anything else, go over variables that are referenced and
8714 * make sure they are not in a register.
8715 */
8716 uint32_t bmVars = pReNative->Core.bmVars;
8717 if (bmVars)
8718 {
8719 do
8720 {
8721 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8722 bmVars &= ~RT_BIT_32(idxVar);
8723
8724 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8725 {
8726 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8727 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8728 {
8729 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8730 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8731 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8732 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8733 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8734
8735 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8736 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8737 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8738 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8739 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8740 }
8741 }
8742 } while (bmVars != 0);
8743#if 0 //def VBOX_STRICT
8744 iemNativeRegAssertSanity(pReNative);
8745#endif
8746 }
8747
8748 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8749
8750 /*
8751 * First, go over the host registers that will be used for arguments and make
8752 * sure they either hold the desired argument or are free.
8753 */
8754 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8755 {
8756 for (uint32_t i = 0; i < cRegArgs; i++)
8757 {
8758 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8759 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8760 {
8761 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8762 {
8763 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8765 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8766 Assert(pVar->idxReg == idxArgReg);
8767 uint8_t const uArgNo = pVar->uArgNo;
8768 if (uArgNo == i)
8769 { /* prefect */ }
8770 /* The variable allocator logic should make sure this is impossible,
8771 except for when the return register is used as a parameter (ARM,
8772 but not x86). */
8773#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8774 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8775 {
8776# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8777# error "Implement this"
8778# endif
8779 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8780 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8781 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8782 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8783 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8784 }
8785#endif
8786 else
8787 {
8788 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8789
8790 if (pVar->enmKind == kIemNativeVarKind_Stack)
8791 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8792 else
8793 {
8794 /* just free it, can be reloaded if used again */
8795 pVar->idxReg = UINT8_MAX;
8796 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8797 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8798 }
8799 }
8800 }
8801 else
8802 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8803 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8804 }
8805 }
8806#if 0 //def VBOX_STRICT
8807 iemNativeRegAssertSanity(pReNative);
8808#endif
8809 }
8810
8811 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8812
8813#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8814 /*
8815 * If there are any stack arguments, make sure they are in their place as well.
8816 *
8817 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8818 * the caller) be loading it later and it must be free (see first loop).
8819 */
8820 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8821 {
8822 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8823 {
8824 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8825 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8826 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8827 {
8828 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8829 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8830 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8831 pVar->idxReg = UINT8_MAX;
8832 }
8833 else
8834 {
8835 /* Use ARG0 as temp for stuff we need registers for. */
8836 switch (pVar->enmKind)
8837 {
8838 case kIemNativeVarKind_Stack:
8839 {
8840 uint8_t const idxStackSlot = pVar->idxStackSlot;
8841 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8842 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8843 iemNativeStackCalcBpDisp(idxStackSlot));
8844 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8845 continue;
8846 }
8847
8848 case kIemNativeVarKind_Immediate:
8849 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8850 continue;
8851
8852 case kIemNativeVarKind_VarRef:
8853 {
8854 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8855 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8856 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8857 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8858 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8859 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8860 {
8861 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8862 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8863 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8864 }
8865 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8866 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8867 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8868 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8869 continue;
8870 }
8871
8872 case kIemNativeVarKind_GstRegRef:
8873 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8874 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8875 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8876 continue;
8877
8878 case kIemNativeVarKind_Invalid:
8879 case kIemNativeVarKind_End:
8880 break;
8881 }
8882 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8883 }
8884 }
8885# if 0 //def VBOX_STRICT
8886 iemNativeRegAssertSanity(pReNative);
8887# endif
8888 }
8889#else
8890 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8891#endif
8892
8893 /*
8894 * Make sure the argument variables are loaded into their respective registers.
8895 *
8896 * We can optimize this by ASSUMING that any register allocations are for
8897 * registeres that have already been loaded and are ready. The previous step
8898 * saw to that.
8899 */
8900 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8901 {
8902 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8903 {
8904 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8905 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8906 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8907 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8908 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8909 else
8910 {
8911 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8912 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8913 {
8914 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8916 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8917 | RT_BIT_32(idxArgReg);
8918 pVar->idxReg = idxArgReg;
8919 }
8920 else
8921 {
8922 /* Use ARG0 as temp for stuff we need registers for. */
8923 switch (pVar->enmKind)
8924 {
8925 case kIemNativeVarKind_Stack:
8926 {
8927 uint8_t const idxStackSlot = pVar->idxStackSlot;
8928 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8929 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8930 continue;
8931 }
8932
8933 case kIemNativeVarKind_Immediate:
8934 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8935 continue;
8936
8937 case kIemNativeVarKind_VarRef:
8938 {
8939 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8940 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8941 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8942 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8943 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8944 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8945 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8946 {
8947 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8948 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8949 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8950 }
8951 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8952 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8953 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8954 continue;
8955 }
8956
8957 case kIemNativeVarKind_GstRegRef:
8958 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8959 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8960 continue;
8961
8962 case kIemNativeVarKind_Invalid:
8963 case kIemNativeVarKind_End:
8964 break;
8965 }
8966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8967 }
8968 }
8969 }
8970#if 0 //def VBOX_STRICT
8971 iemNativeRegAssertSanity(pReNative);
8972#endif
8973 }
8974#ifdef VBOX_STRICT
8975 else
8976 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8977 {
8978 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8979 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8980 }
8981#endif
8982
8983 /*
8984 * Free all argument variables (simplified).
8985 * Their lifetime always expires with the call they are for.
8986 */
8987 /** @todo Make the python script check that arguments aren't used after
8988 * IEM_MC_CALL_XXXX. */
8989 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8990 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8991 * an argument value. There is also some FPU stuff. */
8992 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8993 {
8994 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8995 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8996
8997 /* no need to free registers: */
8998 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8999 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9000 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9001 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9002 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9003 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9004
9005 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9006 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9007 iemNativeVarFreeStackSlots(pReNative, idxVar);
9008 }
9009 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9010
9011 /*
9012 * Flush volatile registers as we make the call.
9013 */
9014 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9015
9016 return off;
9017}
9018
9019
9020/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
9021DECL_HIDDEN_THROW(uint32_t)
9022iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
9023 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
9024
9025{
9026 /*
9027 * Do all the call setup and cleanup.
9028 */
9029 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
9030
9031 /*
9032 * Load the two or three hidden arguments.
9033 */
9034#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9035 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
9036 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9037 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
9038#else
9039 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9040 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
9041#endif
9042
9043 /*
9044 * Make the call and check the return code.
9045 *
9046 * Shadow PC copies are always flushed here, other stuff depends on flags.
9047 * Segment and general purpose registers are explictily flushed via the
9048 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
9049 * macros.
9050 */
9051 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
9052#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9053 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
9054#endif
9055 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
9056 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
9057 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
9058 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
9059
9060 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
9061}
9062
9063
9064#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
9065 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
9066
9067/** Emits code for IEM_MC_CALL_CIMPL_1. */
9068DECL_INLINE_THROW(uint32_t)
9069iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9070 uintptr_t pfnCImpl, uint8_t idxArg0)
9071{
9072 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9073 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
9074}
9075
9076
9077#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
9078 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
9079
9080/** Emits code for IEM_MC_CALL_CIMPL_2. */
9081DECL_INLINE_THROW(uint32_t)
9082iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9083 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
9084{
9085 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9086 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9087 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
9088}
9089
9090
9091#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
9092 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9093 (uintptr_t)a_pfnCImpl, a0, a1, a2)
9094
9095/** Emits code for IEM_MC_CALL_CIMPL_3. */
9096DECL_INLINE_THROW(uint32_t)
9097iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9098 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9099{
9100 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9101 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9102 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9103 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
9104}
9105
9106
9107#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
9108 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9109 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
9110
9111/** Emits code for IEM_MC_CALL_CIMPL_4. */
9112DECL_INLINE_THROW(uint32_t)
9113iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9114 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9115{
9116 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9117 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9118 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9119 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9120 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
9121}
9122
9123
9124#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
9125 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9126 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
9127
9128/** Emits code for IEM_MC_CALL_CIMPL_4. */
9129DECL_INLINE_THROW(uint32_t)
9130iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9131 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
9132{
9133 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9134 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9135 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9136 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9137 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
9138 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
9139}
9140
9141
9142/** Recompiler debugging: Flush guest register shadow copies. */
9143#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
9144
9145
9146
9147/*********************************************************************************************************************************
9148* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
9149*********************************************************************************************************************************/
9150
9151/**
9152 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
9153 */
9154DECL_INLINE_THROW(uint32_t)
9155iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9156 uintptr_t pfnAImpl, uint8_t cArgs)
9157{
9158 if (idxVarRc != UINT8_MAX)
9159 {
9160 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
9161 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
9162 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
9163 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
9164 }
9165
9166 /*
9167 * Do all the call setup and cleanup.
9168 */
9169 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
9170
9171 /*
9172 * Make the call and update the return code variable if we've got one.
9173 */
9174 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9175 if (idxVarRc != UINT8_MAX)
9176 {
9177off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9178 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9179 }
9180
9181 return off;
9182}
9183
9184
9185
9186#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9187 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9188
9189#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9190 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9191
9192/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9193DECL_INLINE_THROW(uint32_t)
9194iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9195{
9196 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9197}
9198
9199
9200#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9201 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9202
9203#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9204 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9205
9206/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9207DECL_INLINE_THROW(uint32_t)
9208iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9209{
9210 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9211 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9212}
9213
9214
9215#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9216 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9217
9218#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9219 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9220
9221/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9222DECL_INLINE_THROW(uint32_t)
9223iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9224 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9225{
9226 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9227 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9228 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9229}
9230
9231
9232#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9233 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9234
9235#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9236 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9237
9238/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9239DECL_INLINE_THROW(uint32_t)
9240iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9241 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9242{
9243 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9246 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9247}
9248
9249
9250#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9251 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9252
9253#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9254 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9255
9256/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9257DECL_INLINE_THROW(uint32_t)
9258iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9259 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9260{
9261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9262 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9263 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9264 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9265 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9266}
9267
9268
9269
9270/*********************************************************************************************************************************
9271* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9272*********************************************************************************************************************************/
9273
9274#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9275 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9276
9277#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9278 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9279
9280#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9281 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9282
9283#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9284 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9285
9286
9287/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9288 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9289DECL_INLINE_THROW(uint32_t)
9290iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9291{
9292 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9293 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9294 Assert(iGRegEx < 20);
9295
9296 /* Same discussion as in iemNativeEmitFetchGregU16 */
9297 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9298 kIemNativeGstRegUse_ReadOnly);
9299
9300 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9301 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9302
9303 /* The value is zero-extended to the full 64-bit host register width. */
9304 if (iGRegEx < 16)
9305 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9306 else
9307 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9308
9309 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9310 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9311 return off;
9312}
9313
9314
9315#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9316 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9317
9318#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9319 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9320
9321#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9322 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9323
9324/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9325DECL_INLINE_THROW(uint32_t)
9326iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9327{
9328 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9329 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9330 Assert(iGRegEx < 20);
9331
9332 /* Same discussion as in iemNativeEmitFetchGregU16 */
9333 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9334 kIemNativeGstRegUse_ReadOnly);
9335
9336 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9337 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9338
9339 if (iGRegEx < 16)
9340 {
9341 switch (cbSignExtended)
9342 {
9343 case sizeof(uint16_t):
9344 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9345 break;
9346 case sizeof(uint32_t):
9347 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9348 break;
9349 case sizeof(uint64_t):
9350 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9351 break;
9352 default: AssertFailed(); break;
9353 }
9354 }
9355 else
9356 {
9357 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9358 switch (cbSignExtended)
9359 {
9360 case sizeof(uint16_t):
9361 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9362 break;
9363 case sizeof(uint32_t):
9364 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9365 break;
9366 case sizeof(uint64_t):
9367 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9368 break;
9369 default: AssertFailed(); break;
9370 }
9371 }
9372
9373 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9374 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9375 return off;
9376}
9377
9378
9379
9380#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9381 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9382
9383#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9384 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9385
9386#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9387 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9388
9389/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9390DECL_INLINE_THROW(uint32_t)
9391iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9392{
9393 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9394 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9395 Assert(iGReg < 16);
9396
9397 /*
9398 * We can either just load the low 16-bit of the GPR into a host register
9399 * for the variable, or we can do so via a shadow copy host register. The
9400 * latter will avoid having to reload it if it's being stored later, but
9401 * will waste a host register if it isn't touched again. Since we don't
9402 * know what going to happen, we choose the latter for now.
9403 */
9404 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9405 kIemNativeGstRegUse_ReadOnly);
9406
9407 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9408 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9409 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9410 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9411
9412 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9413 return off;
9414}
9415
9416
9417#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9418 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9419
9420#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9421 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9422
9423/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9424DECL_INLINE_THROW(uint32_t)
9425iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9426{
9427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9428 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9429 Assert(iGReg < 16);
9430
9431 /*
9432 * We can either just load the low 16-bit of the GPR into a host register
9433 * for the variable, or we can do so via a shadow copy host register. The
9434 * latter will avoid having to reload it if it's being stored later, but
9435 * will waste a host register if it isn't touched again. Since we don't
9436 * know what going to happen, we choose the latter for now.
9437 */
9438 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9439 kIemNativeGstRegUse_ReadOnly);
9440
9441 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9442 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9443 if (cbSignExtended == sizeof(uint32_t))
9444 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9445 else
9446 {
9447 Assert(cbSignExtended == sizeof(uint64_t));
9448 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9449 }
9450 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9451
9452 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9453 return off;
9454}
9455
9456
9457#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9458 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9459
9460#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9461 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9462
9463/** Emits code for IEM_MC_FETCH_GREG_U32. */
9464DECL_INLINE_THROW(uint32_t)
9465iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9466{
9467 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9468 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9469 Assert(iGReg < 16);
9470
9471 /*
9472 * We can either just load the low 16-bit of the GPR into a host register
9473 * for the variable, or we can do so via a shadow copy host register. The
9474 * latter will avoid having to reload it if it's being stored later, but
9475 * will waste a host register if it isn't touched again. Since we don't
9476 * know what going to happen, we choose the latter for now.
9477 */
9478 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9479 kIemNativeGstRegUse_ReadOnly);
9480
9481 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9482 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9483 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9484 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9485
9486 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9487 return off;
9488}
9489
9490
9491#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9492 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9493
9494/** Emits code for IEM_MC_FETCH_GREG_U32. */
9495DECL_INLINE_THROW(uint32_t)
9496iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9497{
9498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9499 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9500 Assert(iGReg < 16);
9501
9502 /*
9503 * We can either just load the low 32-bit of the GPR into a host register
9504 * for the variable, or we can do so via a shadow copy host register. The
9505 * latter will avoid having to reload it if it's being stored later, but
9506 * will waste a host register if it isn't touched again. Since we don't
9507 * know what going to happen, we choose the latter for now.
9508 */
9509 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9510 kIemNativeGstRegUse_ReadOnly);
9511
9512 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9513 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9514 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9515 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9516
9517 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9518 return off;
9519}
9520
9521
9522#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9523 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9524
9525#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9526 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9527
9528/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9529 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9530DECL_INLINE_THROW(uint32_t)
9531iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9532{
9533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9534 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9535 Assert(iGReg < 16);
9536
9537 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9538 kIemNativeGstRegUse_ReadOnly);
9539
9540 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9541 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9542 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9543 /** @todo name the register a shadow one already? */
9544 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9545
9546 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9547 return off;
9548}
9549
9550
9551
9552/*********************************************************************************************************************************
9553* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9554*********************************************************************************************************************************/
9555
9556#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9557 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9558
9559/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9560DECL_INLINE_THROW(uint32_t)
9561iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9562{
9563 Assert(iGRegEx < 20);
9564 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9565 kIemNativeGstRegUse_ForUpdate);
9566#ifdef RT_ARCH_AMD64
9567 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9568
9569 /* To the lowest byte of the register: mov r8, imm8 */
9570 if (iGRegEx < 16)
9571 {
9572 if (idxGstTmpReg >= 8)
9573 pbCodeBuf[off++] = X86_OP_REX_B;
9574 else if (idxGstTmpReg >= 4)
9575 pbCodeBuf[off++] = X86_OP_REX;
9576 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9577 pbCodeBuf[off++] = u8Value;
9578 }
9579 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9580 else if (idxGstTmpReg < 4)
9581 {
9582 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9583 pbCodeBuf[off++] = u8Value;
9584 }
9585 else
9586 {
9587 /* ror reg64, 8 */
9588 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9589 pbCodeBuf[off++] = 0xc1;
9590 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9591 pbCodeBuf[off++] = 8;
9592
9593 /* mov reg8, imm8 */
9594 if (idxGstTmpReg >= 8)
9595 pbCodeBuf[off++] = X86_OP_REX_B;
9596 else if (idxGstTmpReg >= 4)
9597 pbCodeBuf[off++] = X86_OP_REX;
9598 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9599 pbCodeBuf[off++] = u8Value;
9600
9601 /* rol reg64, 8 */
9602 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9603 pbCodeBuf[off++] = 0xc1;
9604 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9605 pbCodeBuf[off++] = 8;
9606 }
9607
9608#elif defined(RT_ARCH_ARM64)
9609 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9611 if (iGRegEx < 16)
9612 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9613 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9614 else
9615 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9616 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9617 iemNativeRegFreeTmp(pReNative, idxImmReg);
9618
9619#else
9620# error "Port me!"
9621#endif
9622
9623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9624
9625 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9626
9627 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9628 return off;
9629}
9630
9631
9632#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9633 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9634
9635/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9636DECL_INLINE_THROW(uint32_t)
9637iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9638{
9639 Assert(iGRegEx < 20);
9640 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9641
9642 /*
9643 * If it's a constant value (unlikely) we treat this as a
9644 * IEM_MC_STORE_GREG_U8_CONST statement.
9645 */
9646 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9647 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9648 { /* likely */ }
9649 else
9650 {
9651 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9652 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9653 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
9654 }
9655
9656 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9657 kIemNativeGstRegUse_ForUpdate);
9658 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9659
9660#ifdef RT_ARCH_AMD64
9661 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
9662 if (iGRegEx < 16)
9663 {
9664 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
9665 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9666 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9667 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9668 pbCodeBuf[off++] = X86_OP_REX;
9669 pbCodeBuf[off++] = 0x8a;
9670 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9671 }
9672 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
9673 else if (idxGstTmpReg < 4 && idxVarReg < 4)
9674 {
9675 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
9676 pbCodeBuf[off++] = 0x8a;
9677 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
9678 }
9679 else
9680 {
9681 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
9682
9683 /* ror reg64, 8 */
9684 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9685 pbCodeBuf[off++] = 0xc1;
9686 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9687 pbCodeBuf[off++] = 8;
9688
9689 /* mov reg8, reg8(r/m) */
9690 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
9691 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
9692 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
9693 pbCodeBuf[off++] = X86_OP_REX;
9694 pbCodeBuf[off++] = 0x8a;
9695 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
9696
9697 /* rol reg64, 8 */
9698 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9699 pbCodeBuf[off++] = 0xc1;
9700 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9701 pbCodeBuf[off++] = 8;
9702 }
9703
9704#elif defined(RT_ARCH_ARM64)
9705 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
9706 or
9707 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
9708 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9709 if (iGRegEx < 16)
9710 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
9711 else
9712 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
9713
9714#else
9715# error "Port me!"
9716#endif
9717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9718
9719 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9720
9721 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9722 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9723 return off;
9724}
9725
9726
9727
9728#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
9729 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
9730
9731/** Emits code for IEM_MC_STORE_GREG_U16. */
9732DECL_INLINE_THROW(uint32_t)
9733iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
9734{
9735 Assert(iGReg < 16);
9736 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9737 kIemNativeGstRegUse_ForUpdate);
9738#ifdef RT_ARCH_AMD64
9739 /* mov reg16, imm16 */
9740 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9741 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9742 if (idxGstTmpReg >= 8)
9743 pbCodeBuf[off++] = X86_OP_REX_B;
9744 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
9745 pbCodeBuf[off++] = RT_BYTE1(uValue);
9746 pbCodeBuf[off++] = RT_BYTE2(uValue);
9747
9748#elif defined(RT_ARCH_ARM64)
9749 /* movk xdst, #uValue, lsl #0 */
9750 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9751 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
9752
9753#else
9754# error "Port me!"
9755#endif
9756
9757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9758
9759 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9760 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9761 return off;
9762}
9763
9764
9765#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
9766 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
9767
9768/** Emits code for IEM_MC_STORE_GREG_U16. */
9769DECL_INLINE_THROW(uint32_t)
9770iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9771{
9772 Assert(iGReg < 16);
9773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9774
9775 /*
9776 * If it's a constant value (unlikely) we treat this as a
9777 * IEM_MC_STORE_GREG_U16_CONST statement.
9778 */
9779 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9780 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9781 { /* likely */ }
9782 else
9783 {
9784 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9785 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9786 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
9787 }
9788
9789 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9790 kIemNativeGstRegUse_ForUpdate);
9791
9792#ifdef RT_ARCH_AMD64
9793 /* mov reg16, reg16 or [mem16] */
9794 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9795 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9796 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9797 {
9798 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
9799 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
9800 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
9801 pbCodeBuf[off++] = 0x8b;
9802 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
9803 }
9804 else
9805 {
9806 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
9807 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9808 if (idxGstTmpReg >= 8)
9809 pbCodeBuf[off++] = X86_OP_REX_R;
9810 pbCodeBuf[off++] = 0x8b;
9811 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9812 }
9813
9814#elif defined(RT_ARCH_ARM64)
9815 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
9816 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
9817 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
9818 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
9819 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9820
9821#else
9822# error "Port me!"
9823#endif
9824
9825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9826
9827 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9828 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9829 return off;
9830}
9831
9832
9833#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
9834 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
9835
9836/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
9837DECL_INLINE_THROW(uint32_t)
9838iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
9839{
9840 Assert(iGReg < 16);
9841 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9842 kIemNativeGstRegUse_ForFullWrite);
9843 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9844 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9845 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9846 return off;
9847}
9848
9849
9850#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
9851 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
9852
9853/** Emits code for IEM_MC_STORE_GREG_U32. */
9854DECL_INLINE_THROW(uint32_t)
9855iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9856{
9857 Assert(iGReg < 16);
9858 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9859
9860 /*
9861 * If it's a constant value (unlikely) we treat this as a
9862 * IEM_MC_STORE_GREG_U32_CONST statement.
9863 */
9864 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9865 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9866 { /* likely */ }
9867 else
9868 {
9869 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9870 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9871 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
9872 }
9873
9874 /*
9875 * For the rest we allocate a guest register for the variable and writes
9876 * it to the CPUMCTX structure.
9877 */
9878 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9879 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9880#ifdef VBOX_STRICT
9881 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
9882#endif
9883 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9884 return off;
9885}
9886
9887
9888#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
9889 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
9890
9891/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
9892DECL_INLINE_THROW(uint32_t)
9893iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
9894{
9895 Assert(iGReg < 16);
9896 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9897 kIemNativeGstRegUse_ForFullWrite);
9898 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
9899 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9900 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9901 return off;
9902}
9903
9904
9905#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
9906 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
9907
9908/** Emits code for IEM_MC_STORE_GREG_U64. */
9909DECL_INLINE_THROW(uint32_t)
9910iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
9911{
9912 Assert(iGReg < 16);
9913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9914
9915 /*
9916 * If it's a constant value (unlikely) we treat this as a
9917 * IEM_MC_STORE_GREG_U64_CONST statement.
9918 */
9919 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9920 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9921 { /* likely */ }
9922 else
9923 {
9924 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
9925 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9926 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
9927 }
9928
9929 /*
9930 * For the rest we allocate a guest register for the variable and writes
9931 * it to the CPUMCTX structure.
9932 */
9933 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
9934 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9935 iemNativeVarRegisterRelease(pReNative, idxValueVar);
9936 return off;
9937}
9938
9939
9940#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
9941 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
9942
9943/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
9944DECL_INLINE_THROW(uint32_t)
9945iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
9946{
9947 Assert(iGReg < 16);
9948 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9949 kIemNativeGstRegUse_ForUpdate);
9950 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
9951 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
9952 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9953 return off;
9954}
9955
9956
9957/*********************************************************************************************************************************
9958* General purpose register manipulation (add, sub). *
9959*********************************************************************************************************************************/
9960
9961#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
9962 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
9963
9964/** Emits code for IEM_MC_ADD_GREG_U16. */
9965DECL_INLINE_THROW(uint32_t)
9966iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
9967{
9968 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9969 kIemNativeGstRegUse_ForUpdate);
9970
9971#ifdef RT_ARCH_AMD64
9972 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
9973 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9974 if (idxGstTmpReg >= 8)
9975 pbCodeBuf[off++] = X86_OP_REX_B;
9976 if (uAddend == 1)
9977 {
9978 pbCodeBuf[off++] = 0xff; /* inc */
9979 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9980 }
9981 else
9982 {
9983 pbCodeBuf[off++] = 0x81;
9984 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9985 pbCodeBuf[off++] = uAddend;
9986 pbCodeBuf[off++] = 0;
9987 }
9988
9989#else
9990 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
9991 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9992
9993 /* sub tmp, gstgrp, uAddend */
9994 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
9995
9996 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
9997 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
9998
9999 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10000#endif
10001
10002 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10003
10004 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10005
10006 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10007 return off;
10008}
10009
10010
10011#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
10012 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10013
10014#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
10015 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10016
10017/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
10018DECL_INLINE_THROW(uint32_t)
10019iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
10020{
10021 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10022 kIemNativeGstRegUse_ForUpdate);
10023
10024#ifdef RT_ARCH_AMD64
10025 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10026 if (f64Bit)
10027 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10028 else if (idxGstTmpReg >= 8)
10029 pbCodeBuf[off++] = X86_OP_REX_B;
10030 if (uAddend == 1)
10031 {
10032 pbCodeBuf[off++] = 0xff; /* inc */
10033 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10034 }
10035 else if (uAddend < 128)
10036 {
10037 pbCodeBuf[off++] = 0x83; /* add */
10038 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10039 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10040 }
10041 else
10042 {
10043 pbCodeBuf[off++] = 0x81; /* add */
10044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10045 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10046 pbCodeBuf[off++] = 0;
10047 pbCodeBuf[off++] = 0;
10048 pbCodeBuf[off++] = 0;
10049 }
10050
10051#else
10052 /* sub tmp, gstgrp, uAddend */
10053 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10054 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
10055
10056#endif
10057
10058 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10059
10060 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10061
10062 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10063 return off;
10064}
10065
10066
10067
10068#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10069 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10070
10071/** Emits code for IEM_MC_SUB_GREG_U16. */
10072DECL_INLINE_THROW(uint32_t)
10073iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
10074{
10075 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10076 kIemNativeGstRegUse_ForUpdate);
10077
10078#ifdef RT_ARCH_AMD64
10079 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10080 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10081 if (idxGstTmpReg >= 8)
10082 pbCodeBuf[off++] = X86_OP_REX_B;
10083 if (uSubtrahend == 1)
10084 {
10085 pbCodeBuf[off++] = 0xff; /* dec */
10086 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10087 }
10088 else
10089 {
10090 pbCodeBuf[off++] = 0x81;
10091 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10092 pbCodeBuf[off++] = uSubtrahend;
10093 pbCodeBuf[off++] = 0;
10094 }
10095
10096#else
10097 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10098 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10099
10100 /* sub tmp, gstgrp, uSubtrahend */
10101 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
10102
10103 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10104 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10105
10106 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10107#endif
10108
10109 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10110
10111 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10112
10113 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10114 return off;
10115}
10116
10117
10118#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
10119 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10120
10121#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
10122 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10123
10124/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
10125DECL_INLINE_THROW(uint32_t)
10126iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
10127{
10128 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10129 kIemNativeGstRegUse_ForUpdate);
10130
10131#ifdef RT_ARCH_AMD64
10132 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10133 if (f64Bit)
10134 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10135 else if (idxGstTmpReg >= 8)
10136 pbCodeBuf[off++] = X86_OP_REX_B;
10137 if (uSubtrahend == 1)
10138 {
10139 pbCodeBuf[off++] = 0xff; /* dec */
10140 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10141 }
10142 else if (uSubtrahend < 128)
10143 {
10144 pbCodeBuf[off++] = 0x83; /* sub */
10145 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10146 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10147 }
10148 else
10149 {
10150 pbCodeBuf[off++] = 0x81; /* sub */
10151 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10152 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10153 pbCodeBuf[off++] = 0;
10154 pbCodeBuf[off++] = 0;
10155 pbCodeBuf[off++] = 0;
10156 }
10157
10158#else
10159 /* sub tmp, gstgrp, uSubtrahend */
10160 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10161 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
10162
10163#endif
10164
10165 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10166
10167 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10168
10169 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10170 return off;
10171}
10172
10173
10174/*********************************************************************************************************************************
10175* Local variable manipulation (add, sub, and, or). *
10176*********************************************************************************************************************************/
10177
10178#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
10179 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10180
10181#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10182 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10183
10184#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10185 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10186
10187#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10188 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10189
10190/** Emits code for AND'ing a local and a constant value. */
10191DECL_INLINE_THROW(uint32_t)
10192iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10193{
10194#ifdef VBOX_STRICT
10195 switch (cbMask)
10196 {
10197 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10198 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10199 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10200 case sizeof(uint64_t): break;
10201 default: AssertFailedBreak();
10202 }
10203#endif
10204
10205 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10206 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10207
10208 if (cbMask <= sizeof(uint32_t))
10209 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10210 else
10211 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10212
10213 iemNativeVarRegisterRelease(pReNative, idxVar);
10214 return off;
10215}
10216
10217
10218#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10219 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10220
10221#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10222 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10223
10224#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10225 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10226
10227#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10228 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10229
10230/** Emits code for OR'ing a local and a constant value. */
10231DECL_INLINE_THROW(uint32_t)
10232iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10233{
10234#ifdef VBOX_STRICT
10235 switch (cbMask)
10236 {
10237 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10238 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10239 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10240 case sizeof(uint64_t): break;
10241 default: AssertFailedBreak();
10242 }
10243#endif
10244
10245 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10246 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10247
10248 if (cbMask <= sizeof(uint32_t))
10249 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10250 else
10251 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10252
10253 iemNativeVarRegisterRelease(pReNative, idxVar);
10254 return off;
10255}
10256
10257
10258#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10259 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10260
10261#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10262 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10263
10264#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10265 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10266
10267/** Emits code for reversing the byte order in a local value. */
10268DECL_INLINE_THROW(uint32_t)
10269iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10270{
10271 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10272 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
10273
10274 switch (cbLocal)
10275 {
10276 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10277 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10278 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10279 default: AssertFailedBreak();
10280 }
10281
10282 iemNativeVarRegisterRelease(pReNative, idxVar);
10283 return off;
10284}
10285
10286
10287
10288/*********************************************************************************************************************************
10289* EFLAGS *
10290*********************************************************************************************************************************/
10291
10292#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10293# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10294#else
10295# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10296 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10297
10298DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10299{
10300 if (fEflOutput)
10301 {
10302 PVMCPUCC const pVCpu = pReNative->pVCpu;
10303# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10304 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10305 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10306 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10307# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10308 if (fEflOutput & (a_fEfl)) \
10309 { \
10310 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10311 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10312 else \
10313 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10314 } else do { } while (0)
10315# else
10316 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10317 IEMLIVENESSBIT const LivenessClobbered =
10318 {
10319 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10320 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10321 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10322 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10323 };
10324 IEMLIVENESSBIT const LivenessDelayable =
10325 {
10326 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10327 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10328 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10329 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10330 };
10331# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10332 if (fEflOutput & (a_fEfl)) \
10333 { \
10334 if (LivenessClobbered.a_fLivenessMember) \
10335 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10336 else if (LivenessDelayable.a_fLivenessMember) \
10337 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10338 else \
10339 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10340 } else do { } while (0)
10341# endif
10342 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10343 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10344 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10345 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10346 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10347 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10348 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10349# undef CHECK_FLAG_AND_UPDATE_STATS
10350 }
10351 RT_NOREF(fEflInput);
10352}
10353#endif /* VBOX_WITH_STATISTICS */
10354
10355#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10356#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10357 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10358
10359/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10360DECL_INLINE_THROW(uint32_t)
10361iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10362 uint32_t fEflInput, uint32_t fEflOutput)
10363{
10364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10365 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10366 RT_NOREF(fEflInput, fEflOutput);
10367
10368#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10369# ifdef VBOX_STRICT
10370 if ( pReNative->idxCurCall != 0
10371 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10372 {
10373 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10374 uint32_t const fBoth = fEflInput | fEflOutput;
10375# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10376 AssertMsg( !(fBoth & (a_fElfConst)) \
10377 || (!(fEflInput & (a_fElfConst)) \
10378 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10379 : !(fEflOutput & (a_fElfConst)) \
10380 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10381 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10382 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10383 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10384 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10385 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10386 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10387 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10388 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10389 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10390# undef ASSERT_ONE_EFL
10391 }
10392# endif
10393#endif
10394
10395 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10396 * the existing shadow copy. */
10397 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10398 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10399 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10400 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10401 return off;
10402}
10403
10404
10405
10406/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10407 * start using it with custom native code emission (inlining assembly
10408 * instruction helpers). */
10409#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10410#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10411 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10412 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10413
10414/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10415DECL_INLINE_THROW(uint32_t)
10416iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10417{
10418 RT_NOREF(fEflOutput);
10419 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10420 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10421
10422#ifdef VBOX_STRICT
10423 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10424 uint32_t offFixup = off;
10425 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10426 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10427 iemNativeFixupFixedJump(pReNative, offFixup, off);
10428
10429 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10430 offFixup = off;
10431 off = iemNativeEmitJzToFixed(pReNative, off, off);
10432 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10433 iemNativeFixupFixedJump(pReNative, offFixup, off);
10434
10435 /** @todo validate that only bits in the fElfOutput mask changed. */
10436#endif
10437
10438 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10439 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10440 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10441 return off;
10442}
10443
10444
10445
10446/*********************************************************************************************************************************
10447* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10448*********************************************************************************************************************************/
10449
10450#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10451 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10452
10453#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10454 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10455
10456#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10457 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10458
10459
10460/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10461 * IEM_MC_FETCH_SREG_ZX_U64. */
10462DECL_INLINE_THROW(uint32_t)
10463iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10464{
10465 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10466 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
10467 Assert(iSReg < X86_SREG_COUNT);
10468
10469 /*
10470 * For now, we will not create a shadow copy of a selector. The rational
10471 * is that since we do not recompile the popping and loading of segment
10472 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10473 * pushing and moving to registers, there is only a small chance that the
10474 * shadow copy will be accessed again before the register is reloaded. One
10475 * scenario would be nested called in 16-bit code, but I doubt it's worth
10476 * the extra register pressure atm.
10477 *
10478 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10479 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10480 * store scencario covered at present (r160730).
10481 */
10482 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10483 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10484 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10485 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10486 return off;
10487}
10488
10489
10490
10491/*********************************************************************************************************************************
10492* Register references. *
10493*********************************************************************************************************************************/
10494
10495#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10496 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10497
10498#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10499 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10500
10501/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10502DECL_INLINE_THROW(uint32_t)
10503iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10504{
10505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10506 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10507 Assert(iGRegEx < 20);
10508
10509 if (iGRegEx < 16)
10510 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10511 else
10512 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10513
10514 /* If we've delayed writing back the register value, flush it now. */
10515 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10516
10517 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10518 if (!fConst)
10519 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10520
10521 return off;
10522}
10523
10524#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10525 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10526
10527#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10528 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10529
10530#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10531 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10532
10533#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10534 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10535
10536#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10537 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10538
10539#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10540 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10541
10542#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10543 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10544
10545#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10546 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10547
10548#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10549 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10550
10551#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10552 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10553
10554/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10555DECL_INLINE_THROW(uint32_t)
10556iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10557{
10558 Assert(iGReg < 16);
10559 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10560 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10561
10562 /* If we've delayed writing back the register value, flush it now. */
10563 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10564
10565 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10566 if (!fConst)
10567 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10568
10569 return off;
10570}
10571
10572
10573#undef IEM_MC_REF_EFLAGS /* should not be used. */
10574#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10575 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10576 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10577
10578/** Handles IEM_MC_REF_EFLAGS. */
10579DECL_INLINE_THROW(uint32_t)
10580iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10581{
10582 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10583 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10584
10585 /* If we've delayed writing back the register value, flush it now. */
10586 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10587
10588 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10589 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10590
10591 return off;
10592}
10593
10594
10595/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10596 * different code from threaded recompiler, maybe it would be helpful. For now
10597 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10598#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10599
10600
10601
10602/*********************************************************************************************************************************
10603* Effective Address Calculation *
10604*********************************************************************************************************************************/
10605#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
10606 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
10607
10608/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
10609 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
10610DECL_INLINE_THROW(uint32_t)
10611iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10612 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
10613{
10614 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10615
10616 /*
10617 * Handle the disp16 form with no registers first.
10618 *
10619 * Convert to an immediate value, as that'll delay the register allocation
10620 * and assignment till the memory access / call / whatever and we can use
10621 * a more appropriate register (or none at all).
10622 */
10623 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
10624 {
10625 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
10626 return off;
10627 }
10628
10629 /* Determin the displacment. */
10630 uint16_t u16EffAddr;
10631 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10632 {
10633 case 0: u16EffAddr = 0; break;
10634 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
10635 case 2: u16EffAddr = u16Disp; break;
10636 default: AssertFailedStmt(u16EffAddr = 0);
10637 }
10638
10639 /* Determine the registers involved. */
10640 uint8_t idxGstRegBase;
10641 uint8_t idxGstRegIndex;
10642 switch (bRm & X86_MODRM_RM_MASK)
10643 {
10644 case 0:
10645 idxGstRegBase = X86_GREG_xBX;
10646 idxGstRegIndex = X86_GREG_xSI;
10647 break;
10648 case 1:
10649 idxGstRegBase = X86_GREG_xBX;
10650 idxGstRegIndex = X86_GREG_xDI;
10651 break;
10652 case 2:
10653 idxGstRegBase = X86_GREG_xBP;
10654 idxGstRegIndex = X86_GREG_xSI;
10655 break;
10656 case 3:
10657 idxGstRegBase = X86_GREG_xBP;
10658 idxGstRegIndex = X86_GREG_xDI;
10659 break;
10660 case 4:
10661 idxGstRegBase = X86_GREG_xSI;
10662 idxGstRegIndex = UINT8_MAX;
10663 break;
10664 case 5:
10665 idxGstRegBase = X86_GREG_xDI;
10666 idxGstRegIndex = UINT8_MAX;
10667 break;
10668 case 6:
10669 idxGstRegBase = X86_GREG_xBP;
10670 idxGstRegIndex = UINT8_MAX;
10671 break;
10672#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
10673 default:
10674#endif
10675 case 7:
10676 idxGstRegBase = X86_GREG_xBX;
10677 idxGstRegIndex = UINT8_MAX;
10678 break;
10679 }
10680
10681 /*
10682 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
10683 */
10684 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10685 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10686 kIemNativeGstRegUse_ReadOnly);
10687 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
10688 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10689 kIemNativeGstRegUse_ReadOnly)
10690 : UINT8_MAX;
10691#ifdef RT_ARCH_AMD64
10692 if (idxRegIndex == UINT8_MAX)
10693 {
10694 if (u16EffAddr == 0)
10695 {
10696 /* movxz ret, base */
10697 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
10698 }
10699 else
10700 {
10701 /* lea ret32, [base64 + disp32] */
10702 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10703 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10704 if (idxRegRet >= 8 || idxRegBase >= 8)
10705 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10706 pbCodeBuf[off++] = 0x8d;
10707 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10708 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
10709 else
10710 {
10711 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
10712 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10713 }
10714 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10715 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10716 pbCodeBuf[off++] = 0;
10717 pbCodeBuf[off++] = 0;
10718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10719
10720 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10721 }
10722 }
10723 else
10724 {
10725 /* lea ret32, [index64 + base64 (+ disp32)] */
10726 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10727 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10728 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10729 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10730 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10731 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10732 pbCodeBuf[off++] = 0x8d;
10733 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
10734 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10735 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
10736 if (bMod == X86_MOD_MEM4)
10737 {
10738 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
10739 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
10740 pbCodeBuf[off++] = 0;
10741 pbCodeBuf[off++] = 0;
10742 }
10743 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10744 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
10745 }
10746
10747#elif defined(RT_ARCH_ARM64)
10748 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10749 if (u16EffAddr == 0)
10750 {
10751 if (idxRegIndex == UINT8_MAX)
10752 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
10753 else
10754 {
10755 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
10756 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10757 }
10758 }
10759 else
10760 {
10761 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
10762 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
10763 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
10764 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
10765 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
10766 else
10767 {
10768 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
10769 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
10770 }
10771 if (idxRegIndex != UINT8_MAX)
10772 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
10773 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
10774 }
10775
10776#else
10777# error "port me"
10778#endif
10779
10780 if (idxRegIndex != UINT8_MAX)
10781 iemNativeRegFreeTmp(pReNative, idxRegIndex);
10782 iemNativeRegFreeTmp(pReNative, idxRegBase);
10783 iemNativeVarRegisterRelease(pReNative, idxVarRet);
10784 return off;
10785}
10786
10787
10788#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
10789 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
10790
10791/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
10792 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
10793DECL_INLINE_THROW(uint32_t)
10794iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
10795 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
10796{
10797 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
10798
10799 /*
10800 * Handle the disp32 form with no registers first.
10801 *
10802 * Convert to an immediate value, as that'll delay the register allocation
10803 * and assignment till the memory access / call / whatever and we can use
10804 * a more appropriate register (or none at all).
10805 */
10806 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
10807 {
10808 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
10809 return off;
10810 }
10811
10812 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
10813 uint32_t u32EffAddr = 0;
10814 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
10815 {
10816 case 0: break;
10817 case 1: u32EffAddr = (int8_t)u32Disp; break;
10818 case 2: u32EffAddr = u32Disp; break;
10819 default: AssertFailed();
10820 }
10821
10822 /* Get the register (or SIB) value. */
10823 uint8_t idxGstRegBase = UINT8_MAX;
10824 uint8_t idxGstRegIndex = UINT8_MAX;
10825 uint8_t cShiftIndex = 0;
10826 switch (bRm & X86_MODRM_RM_MASK)
10827 {
10828 case 0: idxGstRegBase = X86_GREG_xAX; break;
10829 case 1: idxGstRegBase = X86_GREG_xCX; break;
10830 case 2: idxGstRegBase = X86_GREG_xDX; break;
10831 case 3: idxGstRegBase = X86_GREG_xBX; break;
10832 case 4: /* SIB */
10833 {
10834 /* index /w scaling . */
10835 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
10836 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
10837 {
10838 case 0: idxGstRegIndex = X86_GREG_xAX; break;
10839 case 1: idxGstRegIndex = X86_GREG_xCX; break;
10840 case 2: idxGstRegIndex = X86_GREG_xDX; break;
10841 case 3: idxGstRegIndex = X86_GREG_xBX; break;
10842 case 4: cShiftIndex = 0; /*no index*/ break;
10843 case 5: idxGstRegIndex = X86_GREG_xBP; break;
10844 case 6: idxGstRegIndex = X86_GREG_xSI; break;
10845 case 7: idxGstRegIndex = X86_GREG_xDI; break;
10846 }
10847
10848 /* base */
10849 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
10850 {
10851 case 0: idxGstRegBase = X86_GREG_xAX; break;
10852 case 1: idxGstRegBase = X86_GREG_xCX; break;
10853 case 2: idxGstRegBase = X86_GREG_xDX; break;
10854 case 3: idxGstRegBase = X86_GREG_xBX; break;
10855 case 4:
10856 idxGstRegBase = X86_GREG_xSP;
10857 u32EffAddr += uSibAndRspOffset >> 8;
10858 break;
10859 case 5:
10860 if ((bRm & X86_MODRM_MOD_MASK) != 0)
10861 idxGstRegBase = X86_GREG_xBP;
10862 else
10863 {
10864 Assert(u32EffAddr == 0);
10865 u32EffAddr = u32Disp;
10866 }
10867 break;
10868 case 6: idxGstRegBase = X86_GREG_xSI; break;
10869 case 7: idxGstRegBase = X86_GREG_xDI; break;
10870 }
10871 break;
10872 }
10873 case 5: idxGstRegBase = X86_GREG_xBP; break;
10874 case 6: idxGstRegBase = X86_GREG_xSI; break;
10875 case 7: idxGstRegBase = X86_GREG_xDI; break;
10876 }
10877
10878 /*
10879 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
10880 * the start of the function.
10881 */
10882 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
10883 {
10884 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
10885 return off;
10886 }
10887
10888 /*
10889 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
10890 */
10891 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
10892 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
10893 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
10894 kIemNativeGstRegUse_ReadOnly);
10895 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
10896 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
10897 kIemNativeGstRegUse_ReadOnly);
10898
10899 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
10900 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
10901 {
10902 idxRegBase = idxRegIndex;
10903 idxRegIndex = UINT8_MAX;
10904 }
10905
10906#ifdef RT_ARCH_AMD64
10907 if (idxRegIndex == UINT8_MAX)
10908 {
10909 if (u32EffAddr == 0)
10910 {
10911 /* mov ret, base */
10912 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10913 }
10914 else
10915 {
10916 /* lea ret32, [base64 + disp32] */
10917 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
10918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10919 if (idxRegRet >= 8 || idxRegBase >= 8)
10920 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
10921 pbCodeBuf[off++] = 0x8d;
10922 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10923 if (idxRegBase != X86_GREG_x12 /*SIB*/)
10924 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
10925 else
10926 {
10927 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10928 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
10929 }
10930 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10931 if (bMod == X86_MOD_MEM4)
10932 {
10933 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10934 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10935 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10936 }
10937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10938 }
10939 }
10940 else
10941 {
10942 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
10943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
10944 if (idxRegBase == UINT8_MAX)
10945 {
10946 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
10947 if (idxRegRet >= 8 || idxRegIndex >= 8)
10948 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10949 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10950 pbCodeBuf[off++] = 0x8d;
10951 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
10952 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
10953 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10954 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10955 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10956 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10957 }
10958 else
10959 {
10960 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
10961 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
10962 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
10963 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
10964 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
10965 pbCodeBuf[off++] = 0x8d;
10966 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
10967 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
10968 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
10969 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
10970 if (bMod != X86_MOD_MEM0)
10971 {
10972 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
10973 if (bMod == X86_MOD_MEM4)
10974 {
10975 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
10976 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
10977 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
10978 }
10979 }
10980 }
10981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10982 }
10983
10984#elif defined(RT_ARCH_ARM64)
10985 if (u32EffAddr == 0)
10986 {
10987 if (idxRegIndex == UINT8_MAX)
10988 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
10989 else if (idxRegBase == UINT8_MAX)
10990 {
10991 if (cShiftIndex == 0)
10992 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
10993 else
10994 {
10995 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10996 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
10997 }
10998 }
10999 else
11000 {
11001 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11002 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11003 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11004 }
11005 }
11006 else
11007 {
11008 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
11009 {
11010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11011 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
11012 }
11013 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
11014 {
11015 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11016 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11017 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
11018 }
11019 else
11020 {
11021 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
11022 if (idxRegBase != UINT8_MAX)
11023 {
11024 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11025 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11026 }
11027 }
11028 if (idxRegIndex != UINT8_MAX)
11029 {
11030 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11031 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11032 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11033 }
11034 }
11035
11036#else
11037# error "port me"
11038#endif
11039
11040 if (idxRegIndex != UINT8_MAX)
11041 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11042 if (idxRegBase != UINT8_MAX)
11043 iemNativeRegFreeTmp(pReNative, idxRegBase);
11044 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11045 return off;
11046}
11047
11048
11049#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11050 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11051 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11052
11053#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11054 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11055 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11056
11057#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11058 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11059 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
11060
11061/**
11062 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
11063 *
11064 * @returns New off.
11065 * @param pReNative .
11066 * @param off .
11067 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
11068 * bit 4 to REX.X. The two bits are part of the
11069 * REG sub-field, which isn't needed in this
11070 * function.
11071 * @param uSibAndRspOffset Two parts:
11072 * - The first 8 bits make up the SIB byte.
11073 * - The next 8 bits are the fixed RSP/ESP offset
11074 * in case of a pop [xSP].
11075 * @param u32Disp The displacement byte/word/dword, if any.
11076 * @param cbInstr The size of the fully decoded instruction. Used
11077 * for RIP relative addressing.
11078 * @param idxVarRet The result variable number.
11079 * @param f64Bit Whether to use a 64-bit or 32-bit address size
11080 * when calculating the address.
11081 *
11082 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
11083 */
11084DECL_INLINE_THROW(uint32_t)
11085iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
11086 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
11087{
11088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11089
11090 /*
11091 * Special case the rip + disp32 form first.
11092 */
11093 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11094 {
11095 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11096 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11097 kIemNativeGstRegUse_ReadOnly);
11098#ifdef RT_ARCH_AMD64
11099 if (f64Bit)
11100 {
11101 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
11102 if ((int32_t)offFinalDisp == offFinalDisp)
11103 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
11104 else
11105 {
11106 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
11107 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
11108 }
11109 }
11110 else
11111 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
11112
11113#elif defined(RT_ARCH_ARM64)
11114 if (f64Bit)
11115 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11116 (int64_t)(int32_t)u32Disp + cbInstr);
11117 else
11118 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11119 (int32_t)u32Disp + cbInstr);
11120
11121#else
11122# error "Port me!"
11123#endif
11124 iemNativeRegFreeTmp(pReNative, idxRegPc);
11125 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11126 return off;
11127 }
11128
11129 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11130 int64_t i64EffAddr = 0;
11131 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11132 {
11133 case 0: break;
11134 case 1: i64EffAddr = (int8_t)u32Disp; break;
11135 case 2: i64EffAddr = (int32_t)u32Disp; break;
11136 default: AssertFailed();
11137 }
11138
11139 /* Get the register (or SIB) value. */
11140 uint8_t idxGstRegBase = UINT8_MAX;
11141 uint8_t idxGstRegIndex = UINT8_MAX;
11142 uint8_t cShiftIndex = 0;
11143 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
11144 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
11145 else /* SIB: */
11146 {
11147 /* index /w scaling . */
11148 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11149 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11150 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
11151 if (idxGstRegIndex == 4)
11152 {
11153 /* no index */
11154 cShiftIndex = 0;
11155 idxGstRegIndex = UINT8_MAX;
11156 }
11157
11158 /* base */
11159 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
11160 if (idxGstRegBase == 4)
11161 {
11162 /* pop [rsp] hack */
11163 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
11164 }
11165 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
11166 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
11167 {
11168 /* mod=0 and base=5 -> disp32, no base reg. */
11169 Assert(i64EffAddr == 0);
11170 i64EffAddr = (int32_t)u32Disp;
11171 idxGstRegBase = UINT8_MAX;
11172 }
11173 }
11174
11175 /*
11176 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11177 * the start of the function.
11178 */
11179 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11180 {
11181 if (f64Bit)
11182 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11183 else
11184 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11185 return off;
11186 }
11187
11188 /*
11189 * Now emit code that calculates:
11190 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11191 * or if !f64Bit:
11192 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11193 */
11194 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11195 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11196 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11197 kIemNativeGstRegUse_ReadOnly);
11198 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11199 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11200 kIemNativeGstRegUse_ReadOnly);
11201
11202 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11203 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11204 {
11205 idxRegBase = idxRegIndex;
11206 idxRegIndex = UINT8_MAX;
11207 }
11208
11209#ifdef RT_ARCH_AMD64
11210 uint8_t bFinalAdj;
11211 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11212 bFinalAdj = 0; /* likely */
11213 else
11214 {
11215 /* pop [rsp] with a problematic disp32 value. Split out the
11216 RSP offset and add it separately afterwards (bFinalAdj). */
11217 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11218 Assert(idxGstRegBase == X86_GREG_xSP);
11219 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11220 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11221 Assert(bFinalAdj != 0);
11222 i64EffAddr -= bFinalAdj;
11223 Assert((int32_t)i64EffAddr == i64EffAddr);
11224 }
11225 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11226//pReNative->pInstrBuf[off++] = 0xcc;
11227
11228 if (idxRegIndex == UINT8_MAX)
11229 {
11230 if (u32EffAddr == 0)
11231 {
11232 /* mov ret, base */
11233 if (f64Bit)
11234 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11235 else
11236 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11237 }
11238 else
11239 {
11240 /* lea ret, [base + disp32] */
11241 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11242 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11243 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11244 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11245 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11246 | (f64Bit ? X86_OP_REX_W : 0);
11247 pbCodeBuf[off++] = 0x8d;
11248 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11249 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11250 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11251 else
11252 {
11253 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11254 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11255 }
11256 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11257 if (bMod == X86_MOD_MEM4)
11258 {
11259 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11260 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11261 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11262 }
11263 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11264 }
11265 }
11266 else
11267 {
11268 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11269 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11270 if (idxRegBase == UINT8_MAX)
11271 {
11272 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11273 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11274 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11275 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11276 | (f64Bit ? X86_OP_REX_W : 0);
11277 pbCodeBuf[off++] = 0x8d;
11278 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11279 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11280 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11281 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11282 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11283 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11284 }
11285 else
11286 {
11287 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11288 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11289 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11290 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11291 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11292 | (f64Bit ? X86_OP_REX_W : 0);
11293 pbCodeBuf[off++] = 0x8d;
11294 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11295 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11296 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11297 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11298 if (bMod != X86_MOD_MEM0)
11299 {
11300 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11301 if (bMod == X86_MOD_MEM4)
11302 {
11303 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11304 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11305 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11306 }
11307 }
11308 }
11309 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11310 }
11311
11312 if (!bFinalAdj)
11313 { /* likely */ }
11314 else
11315 {
11316 Assert(f64Bit);
11317 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11318 }
11319
11320#elif defined(RT_ARCH_ARM64)
11321 if (i64EffAddr == 0)
11322 {
11323 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11324 if (idxRegIndex == UINT8_MAX)
11325 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11326 else if (idxRegBase != UINT8_MAX)
11327 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11328 f64Bit, false /*fSetFlags*/, cShiftIndex);
11329 else
11330 {
11331 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11332 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11333 }
11334 }
11335 else
11336 {
11337 if (f64Bit)
11338 { /* likely */ }
11339 else
11340 i64EffAddr = (int32_t)i64EffAddr;
11341
11342 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11343 {
11344 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11345 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11346 }
11347 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11348 {
11349 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11350 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11351 }
11352 else
11353 {
11354 if (f64Bit)
11355 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11356 else
11357 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11358 if (idxRegBase != UINT8_MAX)
11359 {
11360 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11361 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11362 }
11363 }
11364 if (idxRegIndex != UINT8_MAX)
11365 {
11366 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11367 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11368 f64Bit, false /*fSetFlags*/, cShiftIndex);
11369 }
11370 }
11371
11372#else
11373# error "port me"
11374#endif
11375
11376 if (idxRegIndex != UINT8_MAX)
11377 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11378 if (idxRegBase != UINT8_MAX)
11379 iemNativeRegFreeTmp(pReNative, idxRegBase);
11380 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11381 return off;
11382}
11383
11384
11385/*********************************************************************************************************************************
11386* TLB Lookup. *
11387*********************************************************************************************************************************/
11388
11389/**
11390 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11391 */
11392DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11393{
11394 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11395 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11396 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11397 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11398
11399 /* Do the lookup manually. */
11400 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11401 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11402 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11403 if (RT_LIKELY(pTlbe->uTag == uTag))
11404 {
11405 /*
11406 * Check TLB page table level access flags.
11407 */
11408 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11409 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11410 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11411 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11412 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11413 | IEMTLBE_F_PG_UNASSIGNED
11414 | IEMTLBE_F_PT_NO_ACCESSED
11415 | fNoWriteNoDirty | fNoUser);
11416 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11417 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11418 {
11419 /*
11420 * Return the address.
11421 */
11422 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11423 if ((uintptr_t)pbAddr == uResult)
11424 return;
11425 RT_NOREF(cbMem);
11426 AssertFailed();
11427 }
11428 else
11429 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11430 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11431 }
11432 else
11433 AssertFailed();
11434 RT_BREAKPOINT();
11435}
11436
11437/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11438
11439
11440/*********************************************************************************************************************************
11441* Memory fetches and stores common *
11442*********************************************************************************************************************************/
11443
11444typedef enum IEMNATIVEMITMEMOP
11445{
11446 kIemNativeEmitMemOp_Store = 0,
11447 kIemNativeEmitMemOp_Fetch,
11448 kIemNativeEmitMemOp_Fetch_Zx_U16,
11449 kIemNativeEmitMemOp_Fetch_Zx_U32,
11450 kIemNativeEmitMemOp_Fetch_Zx_U64,
11451 kIemNativeEmitMemOp_Fetch_Sx_U16,
11452 kIemNativeEmitMemOp_Fetch_Sx_U32,
11453 kIemNativeEmitMemOp_Fetch_Sx_U64
11454} IEMNATIVEMITMEMOP;
11455
11456/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11457 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11458 * (with iSegReg = UINT8_MAX). */
11459DECL_INLINE_THROW(uint32_t)
11460iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11461 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11462 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11463{
11464 /*
11465 * Assert sanity.
11466 */
11467 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11468 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
11469 Assert( enmOp != kIemNativeEmitMemOp_Store
11470 || pVarValue->enmKind == kIemNativeVarKind_Immediate
11471 || pVarValue->enmKind == kIemNativeVarKind_Stack);
11472 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11473 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
11474 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
11475 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
11476 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11477 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11478 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11479 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11480#ifdef VBOX_STRICT
11481 if (iSegReg == UINT8_MAX)
11482 {
11483 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11484 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11485 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11486 switch (cbMem)
11487 {
11488 case 1:
11489 Assert( pfnFunction
11490 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11491 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11492 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11493 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11494 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11495 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11496 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11497 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11498 : UINT64_C(0xc000b000a0009000) ));
11499 break;
11500 case 2:
11501 Assert( pfnFunction
11502 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11503 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11504 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11505 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11506 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11507 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11508 : UINT64_C(0xc000b000a0009000) ));
11509 break;
11510 case 4:
11511 Assert( pfnFunction
11512 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11513 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11514 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11515 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11516 : UINT64_C(0xc000b000a0009000) ));
11517 break;
11518 case 8:
11519 Assert( pfnFunction
11520 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11521 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11522 : UINT64_C(0xc000b000a0009000) ));
11523 break;
11524 }
11525 }
11526 else
11527 {
11528 Assert(iSegReg < 6);
11529 switch (cbMem)
11530 {
11531 case 1:
11532 Assert( pfnFunction
11533 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11534 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11535 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11536 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11537 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11538 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11539 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11540 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11541 : UINT64_C(0xc000b000a0009000) ));
11542 break;
11543 case 2:
11544 Assert( pfnFunction
11545 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11546 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11547 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11548 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11549 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11550 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11551 : UINT64_C(0xc000b000a0009000) ));
11552 break;
11553 case 4:
11554 Assert( pfnFunction
11555 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11556 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11557 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11558 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11559 : UINT64_C(0xc000b000a0009000) ));
11560 break;
11561 case 8:
11562 Assert( pfnFunction
11563 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11564 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11565 : UINT64_C(0xc000b000a0009000) ));
11566 break;
11567 }
11568 }
11569#endif
11570
11571#ifdef VBOX_STRICT
11572 /*
11573 * Check that the fExec flags we've got make sense.
11574 */
11575 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11576#endif
11577
11578 /*
11579 * To keep things simple we have to commit any pending writes first as we
11580 * may end up making calls.
11581 */
11582 /** @todo we could postpone this till we make the call and reload the
11583 * registers after returning from the call. Not sure if that's sensible or
11584 * not, though. */
11585 off = iemNativeRegFlushPendingWrites(pReNative, off);
11586
11587#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11588 /*
11589 * Move/spill/flush stuff out of call-volatile registers.
11590 * This is the easy way out. We could contain this to the tlb-miss branch
11591 * by saving and restoring active stuff here.
11592 */
11593 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
11594#endif
11595
11596 /*
11597 * Define labels and allocate the result register (trying for the return
11598 * register if we can).
11599 */
11600 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
11601 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
11602 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
11603 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
11604 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
11605 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
11606 uint8_t const idxRegValueStore = !TlbState.fSkip
11607 && enmOp == kIemNativeEmitMemOp_Store
11608 && pVarValue->enmKind != kIemNativeVarKind_Immediate
11609 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
11610 : UINT8_MAX;
11611 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
11612 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
11613 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
11614 : UINT32_MAX;
11615
11616 /*
11617 * Jump to the TLB lookup code.
11618 */
11619 if (!TlbState.fSkip)
11620 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
11621
11622 /*
11623 * TlbMiss:
11624 *
11625 * Call helper to do the fetching.
11626 * We flush all guest register shadow copies here.
11627 */
11628 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
11629
11630#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11631 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11632#else
11633 RT_NOREF(idxInstr);
11634#endif
11635
11636#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11637 /* Save variables in volatile registers. */
11638 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
11639 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
11640 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
11641 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
11642#endif
11643
11644 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
11645 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
11646 if (enmOp == kIemNativeEmitMemOp_Store)
11647 {
11648 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
11649 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
11650#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11651 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11652#else
11653 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
11654 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
11655#endif
11656 }
11657
11658 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
11659 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
11660#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11661 fVolGregMask);
11662#else
11663 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
11664#endif
11665
11666 if (iSegReg != UINT8_MAX)
11667 {
11668 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
11669 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
11670 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
11671 }
11672
11673 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
11674 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
11675
11676 /* Done setting up parameters, make the call. */
11677 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
11678
11679 /*
11680 * Put the result in the right register if this is a fetch.
11681 */
11682 if (enmOp != kIemNativeEmitMemOp_Store)
11683 {
11684 Assert(idxRegValueFetch == pVarValue->idxReg);
11685 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
11686 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
11687 }
11688
11689#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11690 /* Restore variables and guest shadow registers to volatile registers. */
11691 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
11692 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
11693#endif
11694
11695#ifdef IEMNATIVE_WITH_TLB_LOOKUP
11696 if (!TlbState.fSkip)
11697 {
11698 /* end of TlbMiss - Jump to the done label. */
11699 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
11700 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
11701
11702 /*
11703 * TlbLookup:
11704 */
11705 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
11706 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
11707 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
11708
11709 /*
11710 * Emit code to do the actual storing / fetching.
11711 */
11712 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
11713# ifdef VBOX_WITH_STATISTICS
11714 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
11715 enmOp == kIemNativeEmitMemOp_Store
11716 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
11717 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
11718# endif
11719 switch (enmOp)
11720 {
11721 case kIemNativeEmitMemOp_Store:
11722 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
11723 {
11724 switch (cbMem)
11725 {
11726 case 1:
11727 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11728 break;
11729 case 2:
11730 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11731 break;
11732 case 4:
11733 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11734 break;
11735 case 8:
11736 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
11737 break;
11738 default:
11739 AssertFailed();
11740 }
11741 }
11742 else
11743 {
11744 switch (cbMem)
11745 {
11746 case 1:
11747 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
11748 idxRegMemResult, TlbState.idxReg1);
11749 break;
11750 case 2:
11751 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
11752 idxRegMemResult, TlbState.idxReg1);
11753 break;
11754 case 4:
11755 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
11756 idxRegMemResult, TlbState.idxReg1);
11757 break;
11758 case 8:
11759 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
11760 idxRegMemResult, TlbState.idxReg1);
11761 break;
11762 default:
11763 AssertFailed();
11764 }
11765 }
11766 break;
11767
11768 case kIemNativeEmitMemOp_Fetch:
11769 case kIemNativeEmitMemOp_Fetch_Zx_U16:
11770 case kIemNativeEmitMemOp_Fetch_Zx_U32:
11771 case kIemNativeEmitMemOp_Fetch_Zx_U64:
11772 switch (cbMem)
11773 {
11774 case 1:
11775 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11776 break;
11777 case 2:
11778 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11779 break;
11780 case 4:
11781 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11782 break;
11783 case 8:
11784 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11785 break;
11786 default:
11787 AssertFailed();
11788 }
11789 break;
11790
11791 case kIemNativeEmitMemOp_Fetch_Sx_U16:
11792 Assert(cbMem == 1);
11793 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11794 break;
11795
11796 case kIemNativeEmitMemOp_Fetch_Sx_U32:
11797 Assert(cbMem == 1 || cbMem == 2);
11798 if (cbMem == 1)
11799 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11800 else
11801 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11802 break;
11803
11804 case kIemNativeEmitMemOp_Fetch_Sx_U64:
11805 switch (cbMem)
11806 {
11807 case 1:
11808 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11809 break;
11810 case 2:
11811 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11812 break;
11813 case 4:
11814 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
11815 break;
11816 default:
11817 AssertFailed();
11818 }
11819 break;
11820
11821 default:
11822 AssertFailed();
11823 }
11824
11825 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
11826
11827 /*
11828 * TlbDone:
11829 */
11830 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
11831
11832 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
11833
11834# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
11835 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
11836 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
11837# endif
11838 }
11839#else
11840 RT_NOREF(fAlignMask, idxLabelTlbMiss);
11841#endif
11842
11843 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
11844 iemNativeVarRegisterRelease(pReNative, idxVarValue);
11845 return off;
11846}
11847
11848
11849
11850/*********************************************************************************************************************************
11851* Memory fetches (IEM_MEM_FETCH_XXX). *
11852*********************************************************************************************************************************/
11853
11854/* 8-bit segmented: */
11855#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
11856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
11857 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11858 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11859
11860#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11862 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11863 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11864
11865#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11867 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11868 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11869
11870#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11871 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11872 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11873 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
11874
11875#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11876 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11877 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11878 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11879
11880#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11881 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11882 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11883 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11884
11885#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11886 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11887 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11888 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11889
11890/* 16-bit segmented: */
11891#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
11892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11893 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11894 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11895
11896#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
11898 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11899 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11900
11901#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11903 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11904 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11905
11906#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11908 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11909 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
11910
11911#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11913 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11914 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
11915
11916#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11918 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11919 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
11920
11921
11922/* 32-bit segmented: */
11923#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
11924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11925 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11926 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11927
11928#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
11929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
11930 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
11931 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
11932
11933#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11935 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11936 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
11937
11938#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11940 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11941 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
11942
11943
11944/* 64-bit segmented: */
11945#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
11946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
11947 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
11948 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
11949
11950
11951
11952/* 8-bit flat: */
11953#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
11954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
11955 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
11956 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11957
11958#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
11959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11960 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
11961 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11962
11963#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
11964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11965 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
11966 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11967
11968#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
11969 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11970 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
11971 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
11972
11973#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
11974 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11975 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
11976 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
11977
11978#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
11979 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
11980 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
11981 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
11982
11983#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
11984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
11985 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
11986 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
11987
11988
11989/* 16-bit flat: */
11990#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
11991 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11992 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11993 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
11994
11995#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
11996 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
11997 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
11998 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
11999
12000#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
12001 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12002 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12003 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12004
12005#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
12006 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12007 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12008 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12009
12010#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
12011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12012 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12013 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12014
12015#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
12016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12017 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12018 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12019
12020/* 32-bit flat: */
12021#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
12022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12023 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12024 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12025
12026#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
12027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12028 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12029 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12030
12031#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
12032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12033 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12034 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12035
12036#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
12037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12038 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12039 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12040
12041/* 64-bit flat: */
12042#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
12043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12044 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12045 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
12046
12047
12048
12049/*********************************************************************************************************************************
12050* Memory stores (IEM_MEM_STORE_XXX). *
12051*********************************************************************************************************************************/
12052
12053#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
12054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
12055 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12056 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12057
12058#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
12059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
12060 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12061 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12062
12063#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
12064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
12065 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12066 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12067
12068#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
12069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
12070 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12071 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12072
12073
12074#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
12075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
12076 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12077 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12078
12079#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
12080 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
12081 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12082 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12083
12084#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
12085 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
12086 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12087 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12088
12089#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
12090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
12091 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12092 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12093
12094
12095#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
12096 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12097 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12098
12099#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
12100 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12101 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12102
12103#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
12104 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12105 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12106
12107#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
12108 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12109 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12110
12111
12112#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
12113 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12114 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12115
12116#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
12117 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12118 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12119
12120#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
12121 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12122 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12123
12124#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
12125 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12126 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12127
12128/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
12129 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
12130DECL_INLINE_THROW(uint32_t)
12131iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
12132 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
12133{
12134 /*
12135 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
12136 * to do the grunt work.
12137 */
12138 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
12139 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
12140 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
12141 pfnFunction, idxInstr);
12142 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
12143 return off;
12144}
12145
12146
12147
12148/*********************************************************************************************************************************
12149* Stack Accesses. *
12150*********************************************************************************************************************************/
12151/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
12152#define IEM_MC_PUSH_U16(a_u16Value) \
12153 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12154 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
12155#define IEM_MC_PUSH_U32(a_u32Value) \
12156 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12157 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
12158#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
12159 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
12160 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
12161#define IEM_MC_PUSH_U64(a_u64Value) \
12162 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12163 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
12164
12165#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
12166 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12167 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12168#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
12169 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12170 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
12171#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
12172 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
12173 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
12174
12175#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12176 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12177 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12178#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12179 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12180 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12181
12182
12183DECL_FORCE_INLINE_THROW(uint32_t)
12184iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12185{
12186 /* Use16BitSp: */
12187#ifdef RT_ARCH_AMD64
12188 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12189 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12190#else
12191 /* sub regeff, regrsp, #cbMem */
12192 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12193 /* and regeff, regeff, #0xffff */
12194 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12195 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12196 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12197 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12198#endif
12199 return off;
12200}
12201
12202
12203DECL_FORCE_INLINE(uint32_t)
12204iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12205{
12206 /* Use32BitSp: */
12207 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12208 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12209 return off;
12210}
12211
12212
12213/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12214DECL_INLINE_THROW(uint32_t)
12215iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12216 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12217{
12218 /*
12219 * Assert sanity.
12220 */
12221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12222 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12223#ifdef VBOX_STRICT
12224 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12225 {
12226 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12227 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12228 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12229 Assert( pfnFunction
12230 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12231 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12232 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12233 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12234 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12235 : UINT64_C(0xc000b000a0009000) ));
12236 }
12237 else
12238 Assert( pfnFunction
12239 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12240 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12241 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12242 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12243 : UINT64_C(0xc000b000a0009000) ));
12244#endif
12245
12246#ifdef VBOX_STRICT
12247 /*
12248 * Check that the fExec flags we've got make sense.
12249 */
12250 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12251#endif
12252
12253 /*
12254 * To keep things simple we have to commit any pending writes first as we
12255 * may end up making calls.
12256 */
12257 /** @todo we could postpone this till we make the call and reload the
12258 * registers after returning from the call. Not sure if that's sensible or
12259 * not, though. */
12260 off = iemNativeRegFlushPendingWrites(pReNative, off);
12261
12262 /*
12263 * First we calculate the new RSP and the effective stack pointer value.
12264 * For 64-bit mode and flat 32-bit these two are the same.
12265 * (Code structure is very similar to that of PUSH)
12266 */
12267 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12268 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12269 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12270 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12271 ? cbMem : sizeof(uint16_t);
12272 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12273 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12274 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12275 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12276 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12277 if (cBitsFlat != 0)
12278 {
12279 Assert(idxRegEffSp == idxRegRsp);
12280 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12281 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12282 if (cBitsFlat == 64)
12283 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12284 else
12285 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12286 }
12287 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12288 {
12289 Assert(idxRegEffSp != idxRegRsp);
12290 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12291 kIemNativeGstRegUse_ReadOnly);
12292#ifdef RT_ARCH_AMD64
12293 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12294#else
12295 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12296#endif
12297 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12298 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12299 offFixupJumpToUseOtherBitSp = off;
12300 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12301 {
12302 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12303 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12304 }
12305 else
12306 {
12307 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12308 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12309 }
12310 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12311 }
12312 /* SpUpdateEnd: */
12313 uint32_t const offLabelSpUpdateEnd = off;
12314
12315 /*
12316 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12317 * we're skipping lookup).
12318 */
12319 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12320 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12321 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12322 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12323 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12324 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12325 : UINT32_MAX;
12326 uint8_t const idxRegValue = !TlbState.fSkip
12327 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12328 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12329 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12330 : UINT8_MAX;
12331 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12332
12333
12334 if (!TlbState.fSkip)
12335 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12336 else
12337 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12338
12339 /*
12340 * Use16BitSp:
12341 */
12342 if (cBitsFlat == 0)
12343 {
12344#ifdef RT_ARCH_AMD64
12345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12346#else
12347 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12348#endif
12349 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12350 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12351 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12352 else
12353 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12354 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12355 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12356 }
12357
12358 /*
12359 * TlbMiss:
12360 *
12361 * Call helper to do the pushing.
12362 */
12363 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12364
12365#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12366 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12367#else
12368 RT_NOREF(idxInstr);
12369#endif
12370
12371 /* Save variables in volatile registers. */
12372 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12373 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12374 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12375 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12376 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12377
12378 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12379 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12380 {
12381 /* Swap them using ARG0 as temp register: */
12382 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12383 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12384 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12385 }
12386 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12387 {
12388 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12389 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12390 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12391
12392 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12393 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12394 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12395 }
12396 else
12397 {
12398 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12399 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12400
12401 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12402 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12403 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12404 }
12405
12406 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12407 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12408
12409 /* Done setting up parameters, make the call. */
12410 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12411
12412 /* Restore variables and guest shadow registers to volatile registers. */
12413 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12414 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12415
12416#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12417 if (!TlbState.fSkip)
12418 {
12419 /* end of TlbMiss - Jump to the done label. */
12420 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12421 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12422
12423 /*
12424 * TlbLookup:
12425 */
12426 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12427 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12428
12429 /*
12430 * Emit code to do the actual storing / fetching.
12431 */
12432 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12433# ifdef VBOX_WITH_STATISTICS
12434 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12435 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12436# endif
12437 if (idxRegValue != UINT8_MAX)
12438 {
12439 switch (cbMemAccess)
12440 {
12441 case 2:
12442 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12443 break;
12444 case 4:
12445 if (!fIsIntelSeg)
12446 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12447 else
12448 {
12449 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12450 PUSH FS in real mode, so we have to try emulate that here.
12451 We borrow the now unused idxReg1 from the TLB lookup code here. */
12452 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12453 kIemNativeGstReg_EFlags);
12454 if (idxRegEfl != UINT8_MAX)
12455 {
12456#ifdef ARCH_AMD64
12457 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12458 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12459 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12460#else
12461 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12462 off, TlbState.idxReg1, idxRegEfl,
12463 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12464#endif
12465 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12466 }
12467 else
12468 {
12469 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12470 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12471 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12472 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12473 }
12474 /* ASSUMES the upper half of idxRegValue is ZERO. */
12475 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12476 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12477 }
12478 break;
12479 case 8:
12480 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12481 break;
12482 default:
12483 AssertFailed();
12484 }
12485 }
12486 else
12487 {
12488 switch (cbMemAccess)
12489 {
12490 case 2:
12491 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12492 idxRegMemResult, TlbState.idxReg1);
12493 break;
12494 case 4:
12495 Assert(!fIsSegReg);
12496 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12497 idxRegMemResult, TlbState.idxReg1);
12498 break;
12499 case 8:
12500 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
12501 break;
12502 default:
12503 AssertFailed();
12504 }
12505 }
12506
12507 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12508 TlbState.freeRegsAndReleaseVars(pReNative);
12509
12510 /*
12511 * TlbDone:
12512 *
12513 * Commit the new RSP value.
12514 */
12515 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12516 }
12517#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12518
12519 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12520 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12521 if (idxRegEffSp != idxRegRsp)
12522 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12523
12524 /* The value variable is implictly flushed. */
12525 if (idxRegValue != UINT8_MAX)
12526 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12527 iemNativeVarFreeLocal(pReNative, idxVarValue);
12528
12529 return off;
12530}
12531
12532
12533
12534/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12535#define IEM_MC_POP_GREG_U16(a_iGReg) \
12536 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12537 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12538#define IEM_MC_POP_GREG_U32(a_iGReg) \
12539 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12540 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
12541#define IEM_MC_POP_GREG_U64(a_iGReg) \
12542 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12543 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
12544
12545#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
12546 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12547 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12548#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
12549 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12550 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
12551
12552#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
12553 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12554 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
12555#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
12556 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12557 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
12558
12559
12560DECL_FORCE_INLINE_THROW(uint32_t)
12561iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
12562 uint8_t idxRegTmp)
12563{
12564 /* Use16BitSp: */
12565#ifdef RT_ARCH_AMD64
12566 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12567 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12568 RT_NOREF(idxRegTmp);
12569#else
12570 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
12571 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
12572 /* add tmp, regrsp, #cbMem */
12573 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
12574 /* and tmp, tmp, #0xffff */
12575 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12576 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
12577 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
12578 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
12579#endif
12580 return off;
12581}
12582
12583
12584DECL_FORCE_INLINE(uint32_t)
12585iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12586{
12587 /* Use32BitSp: */
12588 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12589 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12590 return off;
12591}
12592
12593
12594/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
12595DECL_INLINE_THROW(uint32_t)
12596iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
12597 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12598{
12599 /*
12600 * Assert sanity.
12601 */
12602 Assert(idxGReg < 16);
12603#ifdef VBOX_STRICT
12604 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12605 {
12606 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12607 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12608 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12609 Assert( pfnFunction
12610 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12611 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
12612 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
12613 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
12614 : UINT64_C(0xc000b000a0009000) ));
12615 }
12616 else
12617 Assert( pfnFunction
12618 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
12619 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
12620 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
12621 : UINT64_C(0xc000b000a0009000) ));
12622#endif
12623
12624#ifdef VBOX_STRICT
12625 /*
12626 * Check that the fExec flags we've got make sense.
12627 */
12628 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12629#endif
12630
12631 /*
12632 * To keep things simple we have to commit any pending writes first as we
12633 * may end up making calls.
12634 */
12635 off = iemNativeRegFlushPendingWrites(pReNative, off);
12636
12637 /*
12638 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
12639 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
12640 * directly as the effective stack pointer.
12641 * (Code structure is very similar to that of PUSH)
12642 */
12643 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12644 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12645 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12646 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12647 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12648 /** @todo can do a better job picking the register here. For cbMem >= 4 this
12649 * will be the resulting register value. */
12650 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
12651
12652 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12653 if (cBitsFlat != 0)
12654 {
12655 Assert(idxRegEffSp == idxRegRsp);
12656 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12657 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12658 }
12659 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12660 {
12661 Assert(idxRegEffSp != idxRegRsp);
12662 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12663 kIemNativeGstRegUse_ReadOnly);
12664#ifdef RT_ARCH_AMD64
12665 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12666#else
12667 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12668#endif
12669 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12670 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12671 offFixupJumpToUseOtherBitSp = off;
12672 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12673 {
12674/** @todo can skip idxRegRsp updating when popping ESP. */
12675 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12676 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12677 }
12678 else
12679 {
12680 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12681 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12682 }
12683 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12684 }
12685 /* SpUpdateEnd: */
12686 uint32_t const offLabelSpUpdateEnd = off;
12687
12688 /*
12689 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12690 * we're skipping lookup).
12691 */
12692 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12693 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
12694 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12695 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12696 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12697 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12698 : UINT32_MAX;
12699
12700 if (!TlbState.fSkip)
12701 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12702 else
12703 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12704
12705 /*
12706 * Use16BitSp:
12707 */
12708 if (cBitsFlat == 0)
12709 {
12710#ifdef RT_ARCH_AMD64
12711 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12712#else
12713 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12714#endif
12715 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12716 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12717 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
12718 else
12719 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12720 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12722 }
12723
12724 /*
12725 * TlbMiss:
12726 *
12727 * Call helper to do the pushing.
12728 */
12729 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12730
12731#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12732 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12733#else
12734 RT_NOREF(idxInstr);
12735#endif
12736
12737 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12738 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12739 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
12740 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12741
12742
12743 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
12744 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12746
12747 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12749
12750 /* Done setting up parameters, make the call. */
12751 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12752
12753 /* Move the return register content to idxRegMemResult. */
12754 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
12755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
12756
12757 /* Restore variables and guest shadow registers to volatile registers. */
12758 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12759 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12760
12761#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12762 if (!TlbState.fSkip)
12763 {
12764 /* end of TlbMiss - Jump to the done label. */
12765 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12766 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12767
12768 /*
12769 * TlbLookup:
12770 */
12771 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
12772 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12773
12774 /*
12775 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
12776 */
12777 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12778# ifdef VBOX_WITH_STATISTICS
12779 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12780 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12781# endif
12782 switch (cbMem)
12783 {
12784 case 2:
12785 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12786 break;
12787 case 4:
12788 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12789 break;
12790 case 8:
12791 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
12792 break;
12793 default:
12794 AssertFailed();
12795 }
12796
12797 TlbState.freeRegsAndReleaseVars(pReNative);
12798
12799 /*
12800 * TlbDone:
12801 *
12802 * Set the new RSP value (FLAT accesses needs to calculate it first) and
12803 * commit the popped register value.
12804 */
12805 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12806 }
12807#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12808
12809 if (idxGReg != X86_GREG_xSP)
12810 {
12811 /* Set the register. */
12812 if (cbMem >= sizeof(uint32_t))
12813 {
12814#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
12815 AssertMsg( pReNative->idxCurCall == 0
12816 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
12817 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
12818#endif
12819 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
12820 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
12821 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12822 }
12823 else
12824 {
12825 Assert(cbMem == sizeof(uint16_t));
12826 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
12827 kIemNativeGstRegUse_ForUpdate);
12828 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
12829 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
12830 iemNativeRegFreeTmp(pReNative, idxRegDst);
12831 }
12832
12833 /* Complete RSP calculation for FLAT mode. */
12834 if (idxRegEffSp == idxRegRsp)
12835 {
12836 if (cBitsFlat == 64)
12837 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12838 else
12839 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12840 }
12841 }
12842 else
12843 {
12844 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
12845 if (cbMem == sizeof(uint64_t))
12846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
12847 else if (cbMem == sizeof(uint32_t))
12848 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
12849 else
12850 {
12851 if (idxRegEffSp == idxRegRsp)
12852 {
12853 if (cBitsFlat == 64)
12854 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
12855 else
12856 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
12857 }
12858 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
12859 }
12860 }
12861 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
12862
12863 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12864 if (idxRegEffSp != idxRegRsp)
12865 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12866 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12867
12868 return off;
12869}
12870
12871
12872
12873/*********************************************************************************************************************************
12874* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
12875*********************************************************************************************************************************/
12876
12877#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12878 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12879 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
12880 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
12881
12882#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12883 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12884 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
12885 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
12886
12887#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12888 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12889 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
12890 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
12891
12892#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12893 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12894 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
12895 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
12896
12897
12898#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12899 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12900 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12901 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
12902
12903#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12904 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12905 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12906 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
12907
12908#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12909 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12910 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12911 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12912
12913#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12914 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12915 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12916 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
12917
12918#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12919 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
12920 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
12921 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
12922
12923
12924#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12925 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12926 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12927 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
12928
12929#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12930 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12931 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12932 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
12933
12934#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12935 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12936 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12937 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12938
12939#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12940 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12941 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12942 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
12943
12944#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12945 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
12946 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
12947 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
12948
12949
12950#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12951 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12952 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12953 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
12954
12955#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12956 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12957 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12958 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
12959#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12960 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12961 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12962 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12963
12964#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12965 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12966 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12967 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
12968
12969#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12970 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
12971 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12972 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
12973
12974
12975#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12976 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12977 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
12978 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
12979
12980#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12981 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
12982 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
12983 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
12984
12985
12986#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12987 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12988 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12989 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
12990
12991#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12992 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12993 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12994 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
12995
12996#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
12997 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
12998 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
12999 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
13000
13001#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13002 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13003 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13004 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
13005
13006
13007
13008#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13009 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13010 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13011 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
13012
13013#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13014 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13015 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13016 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
13017
13018#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13019 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13020 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13021 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
13022
13023#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13024 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13025 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13026 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
13027
13028
13029#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13030 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13031 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13032 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
13033
13034#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13035 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13036 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13037 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
13038
13039#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13040 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13041 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13042 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13043
13044#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13045 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13046 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13047 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
13048
13049#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
13050 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
13051 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13052 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13053
13054
13055#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13056 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13057 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13058 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
13059
13060#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13061 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13062 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13063 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
13064
13065#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13066 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13067 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13068 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13069
13070#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13071 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13072 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13073 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
13074
13075#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
13076 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
13077 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13078 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13079
13080
13081#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13083 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13084 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
13085
13086#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13088 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13089 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
13090
13091#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13092 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13093 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13094 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13095
13096#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13097 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13098 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13099 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
13100
13101#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
13102 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
13103 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13104 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13105
13106
13107#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
13108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13109 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13110 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
13111
13112#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
13113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13114 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13115 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
13116
13117
13118#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13119 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13120 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13121 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
13122
13123#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13124 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13125 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13126 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
13127
13128#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13129 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13130 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13131 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
13132
13133#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13134 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13135 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13136 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
13137
13138
13139DECL_INLINE_THROW(uint32_t)
13140iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
13141 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
13142 uintptr_t pfnFunction, uint8_t idxInstr)
13143{
13144 /*
13145 * Assert sanity.
13146 */
13147 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
13148 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
13149 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
13150 && pVarMem->cbVar == sizeof(void *),
13151 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13152
13153 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13155 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
13156 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
13157 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13158
13159 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
13160 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
13161 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
13162 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
13163 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13164
13165 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
13166
13167 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13168
13169#ifdef VBOX_STRICT
13170# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
13171 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
13172 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
13173 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
13174 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
13175# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
13176 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13177 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13178 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13179
13180 if (iSegReg == UINT8_MAX)
13181 {
13182 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13183 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13184 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13185 switch (cbMem)
13186 {
13187 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13188 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13189 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13190 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13191 case 10:
13192 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13193 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13194 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13195 break;
13196 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13197# if 0
13198 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13199 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13200# endif
13201 default: AssertFailed(); break;
13202 }
13203 }
13204 else
13205 {
13206 Assert(iSegReg < 6);
13207 switch (cbMem)
13208 {
13209 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13210 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13211 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13212 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13213 case 10:
13214 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13215 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13216 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13217 break;
13218 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13219# if 0
13220 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13221 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13222# endif
13223 default: AssertFailed(); break;
13224 }
13225 }
13226# undef IEM_MAP_HLP_FN
13227# undef IEM_MAP_HLP_FN_NO_AT
13228#endif
13229
13230#ifdef VBOX_STRICT
13231 /*
13232 * Check that the fExec flags we've got make sense.
13233 */
13234 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13235#endif
13236
13237 /*
13238 * To keep things simple we have to commit any pending writes first as we
13239 * may end up making calls.
13240 */
13241 off = iemNativeRegFlushPendingWrites(pReNative, off);
13242
13243#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13244 /*
13245 * Move/spill/flush stuff out of call-volatile registers.
13246 * This is the easy way out. We could contain this to the tlb-miss branch
13247 * by saving and restoring active stuff here.
13248 */
13249 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13250 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13251#endif
13252
13253 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13254 while the tlb-miss codepath will temporarily put it on the stack.
13255 Set the the type to stack here so we don't need to do it twice below. */
13256 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13257 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13258 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13259 * lookup is done. */
13260
13261 /*
13262 * Define labels and allocate the result register (trying for the return
13263 * register if we can).
13264 */
13265 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13266 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13267 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13268 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13269 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13270 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13271 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13272 : UINT32_MAX;
13273//off=iemNativeEmitBrk(pReNative, off, 0);
13274 /*
13275 * Jump to the TLB lookup code.
13276 */
13277 if (!TlbState.fSkip)
13278 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13279
13280 /*
13281 * TlbMiss:
13282 *
13283 * Call helper to do the fetching.
13284 * We flush all guest register shadow copies here.
13285 */
13286 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13287
13288#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13289 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13290#else
13291 RT_NOREF(idxInstr);
13292#endif
13293
13294#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13295 /* Save variables in volatile registers. */
13296 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13297 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13298#endif
13299
13300 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13301 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13302#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13303 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13304#else
13305 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13306#endif
13307
13308 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13309 if (iSegReg != UINT8_MAX)
13310 {
13311 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13312 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13313 }
13314
13315 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13316 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13317 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13318
13319 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13320 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13321
13322 /* Done setting up parameters, make the call. */
13323 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13324
13325 /*
13326 * Put the output in the right registers.
13327 */
13328 Assert(idxRegMemResult == pVarMem->idxReg);
13329 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13330 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13331
13332#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13333 /* Restore variables and guest shadow registers to volatile registers. */
13334 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13335 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13336#endif
13337
13338 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
13339 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13340
13341#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13342 if (!TlbState.fSkip)
13343 {
13344 /* end of tlbsmiss - Jump to the done label. */
13345 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13346 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13347
13348 /*
13349 * TlbLookup:
13350 */
13351 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13352 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13353# ifdef VBOX_WITH_STATISTICS
13354 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13355 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13356# endif
13357
13358 /* [idxVarUnmapInfo] = 0; */
13359 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13360
13361 /*
13362 * TlbDone:
13363 */
13364 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13365
13366 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13367
13368# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13369 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13370 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13371# endif
13372 }
13373#else
13374 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13375#endif
13376
13377 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13378 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13379
13380 return off;
13381}
13382
13383
13384#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13385 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13386 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13387
13388#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13389 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13390 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13391
13392#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13393 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13394 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13395
13396#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13397 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13398 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13399
13400DECL_INLINE_THROW(uint32_t)
13401iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13402 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13403{
13404 /*
13405 * Assert sanity.
13406 */
13407 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13408 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13409 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
13410 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13411 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13412#ifdef VBOX_STRICT
13413 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13414 {
13415 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13416 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13417 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13418 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13419 case IEM_ACCESS_TYPE_WRITE:
13420 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13421 case IEM_ACCESS_TYPE_READ:
13422 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13423 default: AssertFailed();
13424 }
13425#else
13426 RT_NOREF(fAccess);
13427#endif
13428
13429 /*
13430 * To keep things simple we have to commit any pending writes first as we
13431 * may end up making calls (there shouldn't be any at this point, so this
13432 * is just for consistency).
13433 */
13434 /** @todo we could postpone this till we make the call and reload the
13435 * registers after returning from the call. Not sure if that's sensible or
13436 * not, though. */
13437 off = iemNativeRegFlushPendingWrites(pReNative, off);
13438
13439 /*
13440 * Move/spill/flush stuff out of call-volatile registers.
13441 *
13442 * We exclude any register holding the bUnmapInfo variable, as we'll be
13443 * checking it after returning from the call and will free it afterwards.
13444 */
13445 /** @todo save+restore active registers and maybe guest shadows in miss
13446 * scenario. */
13447 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13448
13449 /*
13450 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13451 * to call the unmap helper function.
13452 *
13453 * The likelyhood of it being zero is higher than for the TLB hit when doing
13454 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13455 * access should also end up with a mapping that won't need special unmapping.
13456 */
13457 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13458 * should speed up things for the pure interpreter as well when TLBs
13459 * are enabled. */
13460#ifdef RT_ARCH_AMD64
13461 if (pVarUnmapInfo->idxReg == UINT8_MAX)
13462 {
13463 /* test byte [rbp - xxx], 0ffh */
13464 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13465 pbCodeBuf[off++] = 0xf6;
13466 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
13467 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13468 pbCodeBuf[off++] = 0xff;
13469 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13470 }
13471 else
13472#endif
13473 {
13474 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13475 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13476 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13477 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13478 }
13479 uint32_t const offJmpFixup = off;
13480 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13481
13482 /*
13483 * Call the unmap helper function.
13484 */
13485#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13486 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13487#else
13488 RT_NOREF(idxInstr);
13489#endif
13490
13491 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13492 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13493 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13494
13495 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13496 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13497
13498 /* Done setting up parameters, make the call. */
13499 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13500
13501 /* The bUnmapInfo variable is implictly free by these MCs. */
13502 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13503
13504 /*
13505 * Done, just fixup the jump for the non-call case.
13506 */
13507 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13508
13509 return off;
13510}
13511
13512
13513
13514/*********************************************************************************************************************************
13515* State and Exceptions *
13516*********************************************************************************************************************************/
13517
13518#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13519#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13520
13521#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13522#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13523#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13524
13525#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13526#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13527#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13528
13529
13530DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13531{
13532 /** @todo this needs a lot more work later. */
13533 RT_NOREF(pReNative, fForChange);
13534 return off;
13535}
13536
13537
13538
13539/*********************************************************************************************************************************
13540* Emitters for FPU related operations. *
13541*********************************************************************************************************************************/
13542
13543#define IEM_MC_FETCH_FCW(a_u16Fcw) \
13544 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
13545
13546/** Emits code for IEM_MC_FETCH_FCW. */
13547DECL_INLINE_THROW(uint32_t)
13548iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13549{
13550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13551 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13552
13553 /* Allocate a temporary FCW register. */
13554 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13555 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw, kIemNativeGstRegUse_ReadOnly);
13556
13557 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
13558
13559 /* Free but don't flush the FCW register. */
13560 iemNativeRegFreeTmp(pReNative, idxFcwReg);
13561
13562 return off;
13563}
13564
13565
13566#define IEM_MC_FETCH_FSW(a_u16Fsw) \
13567 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
13568
13569/** Emits code for IEM_MC_FETCH_FSW. */
13570DECL_INLINE_THROW(uint32_t)
13571iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
13572{
13573 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
13574 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
13575
13576 /* Allocate a temporary FSW register. */
13577 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
13578 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
13579
13580 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
13581
13582 /* Free but don't flush the FSW register. */
13583 iemNativeRegFreeTmp(pReNative, idxFswReg);
13584
13585 return off;
13586}
13587
13588
13589
13590/*********************************************************************************************************************************
13591* The native code generator functions for each MC block. *
13592*********************************************************************************************************************************/
13593
13594
13595/*
13596 * Include g_apfnIemNativeRecompileFunctions and associated functions.
13597 *
13598 * This should probably live in it's own file later, but lets see what the
13599 * compile times turn out to be first.
13600 */
13601#include "IEMNativeFunctions.cpp.h"
13602
13603
13604
13605/*********************************************************************************************************************************
13606* Recompiler Core. *
13607*********************************************************************************************************************************/
13608
13609
13610/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
13611static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
13612{
13613 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
13614 pDis->cbCachedInstr += cbMaxRead;
13615 RT_NOREF(cbMinRead);
13616 return VERR_NO_DATA;
13617}
13618
13619
13620DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
13621{
13622 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
13623 {
13624#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
13625 ENTRY(fLocalForcedActions),
13626 ENTRY(iem.s.rcPassUp),
13627 ENTRY(iem.s.fExec),
13628 ENTRY(iem.s.pbInstrBuf),
13629 ENTRY(iem.s.uInstrBufPc),
13630 ENTRY(iem.s.GCPhysInstrBuf),
13631 ENTRY(iem.s.cbInstrBufTotal),
13632 ENTRY(iem.s.idxTbCurInstr),
13633#ifdef VBOX_WITH_STATISTICS
13634 ENTRY(iem.s.StatNativeTlbHitsForFetch),
13635 ENTRY(iem.s.StatNativeTlbHitsForStore),
13636 ENTRY(iem.s.StatNativeTlbHitsForStack),
13637 ENTRY(iem.s.StatNativeTlbHitsForMapped),
13638 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
13639 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
13640 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
13641 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
13642#endif
13643 ENTRY(iem.s.DataTlb.aEntries),
13644 ENTRY(iem.s.DataTlb.uTlbRevision),
13645 ENTRY(iem.s.DataTlb.uTlbPhysRev),
13646 ENTRY(iem.s.DataTlb.cTlbHits),
13647 ENTRY(iem.s.CodeTlb.aEntries),
13648 ENTRY(iem.s.CodeTlb.uTlbRevision),
13649 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
13650 ENTRY(iem.s.CodeTlb.cTlbHits),
13651 ENTRY(pVMR3),
13652 ENTRY(cpum.GstCtx.rax),
13653 ENTRY(cpum.GstCtx.ah),
13654 ENTRY(cpum.GstCtx.rcx),
13655 ENTRY(cpum.GstCtx.ch),
13656 ENTRY(cpum.GstCtx.rdx),
13657 ENTRY(cpum.GstCtx.dh),
13658 ENTRY(cpum.GstCtx.rbx),
13659 ENTRY(cpum.GstCtx.bh),
13660 ENTRY(cpum.GstCtx.rsp),
13661 ENTRY(cpum.GstCtx.rbp),
13662 ENTRY(cpum.GstCtx.rsi),
13663 ENTRY(cpum.GstCtx.rdi),
13664 ENTRY(cpum.GstCtx.r8),
13665 ENTRY(cpum.GstCtx.r9),
13666 ENTRY(cpum.GstCtx.r10),
13667 ENTRY(cpum.GstCtx.r11),
13668 ENTRY(cpum.GstCtx.r12),
13669 ENTRY(cpum.GstCtx.r13),
13670 ENTRY(cpum.GstCtx.r14),
13671 ENTRY(cpum.GstCtx.r15),
13672 ENTRY(cpum.GstCtx.es.Sel),
13673 ENTRY(cpum.GstCtx.es.u64Base),
13674 ENTRY(cpum.GstCtx.es.u32Limit),
13675 ENTRY(cpum.GstCtx.es.Attr),
13676 ENTRY(cpum.GstCtx.cs.Sel),
13677 ENTRY(cpum.GstCtx.cs.u64Base),
13678 ENTRY(cpum.GstCtx.cs.u32Limit),
13679 ENTRY(cpum.GstCtx.cs.Attr),
13680 ENTRY(cpum.GstCtx.ss.Sel),
13681 ENTRY(cpum.GstCtx.ss.u64Base),
13682 ENTRY(cpum.GstCtx.ss.u32Limit),
13683 ENTRY(cpum.GstCtx.ss.Attr),
13684 ENTRY(cpum.GstCtx.ds.Sel),
13685 ENTRY(cpum.GstCtx.ds.u64Base),
13686 ENTRY(cpum.GstCtx.ds.u32Limit),
13687 ENTRY(cpum.GstCtx.ds.Attr),
13688 ENTRY(cpum.GstCtx.fs.Sel),
13689 ENTRY(cpum.GstCtx.fs.u64Base),
13690 ENTRY(cpum.GstCtx.fs.u32Limit),
13691 ENTRY(cpum.GstCtx.fs.Attr),
13692 ENTRY(cpum.GstCtx.gs.Sel),
13693 ENTRY(cpum.GstCtx.gs.u64Base),
13694 ENTRY(cpum.GstCtx.gs.u32Limit),
13695 ENTRY(cpum.GstCtx.gs.Attr),
13696 ENTRY(cpum.GstCtx.rip),
13697 ENTRY(cpum.GstCtx.eflags),
13698 ENTRY(cpum.GstCtx.uRipInhibitInt),
13699#undef ENTRY
13700 };
13701#ifdef VBOX_STRICT
13702 static bool s_fOrderChecked = false;
13703 if (!s_fOrderChecked)
13704 {
13705 s_fOrderChecked = true;
13706 uint32_t offPrev = s_aMembers[0].off;
13707 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
13708 {
13709 Assert(s_aMembers[i].off > offPrev);
13710 offPrev = s_aMembers[i].off;
13711 }
13712 }
13713#endif
13714
13715 /*
13716 * Binary lookup.
13717 */
13718 unsigned iStart = 0;
13719 unsigned iEnd = RT_ELEMENTS(s_aMembers);
13720 for (;;)
13721 {
13722 unsigned const iCur = iStart + (iEnd - iStart) / 2;
13723 uint32_t const offCur = s_aMembers[iCur].off;
13724 if (off < offCur)
13725 {
13726 if (iCur != iStart)
13727 iEnd = iCur;
13728 else
13729 break;
13730 }
13731 else if (off > offCur)
13732 {
13733 if (iCur + 1 < iEnd)
13734 iStart = iCur + 1;
13735 else
13736 break;
13737 }
13738 else
13739 return s_aMembers[iCur].pszName;
13740 }
13741#ifdef VBOX_WITH_STATISTICS
13742 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
13743 return "iem.s.acThreadedFuncStats[iFn]";
13744#endif
13745 return NULL;
13746}
13747
13748
13749/**
13750 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
13751 * @returns pszBuf.
13752 * @param fFlags The flags.
13753 * @param pszBuf The output buffer.
13754 * @param cbBuf The output buffer size. At least 32 bytes.
13755 */
13756DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
13757{
13758 Assert(cbBuf >= 32);
13759 static RTSTRTUPLE const s_aModes[] =
13760 {
13761 /* [00] = */ { RT_STR_TUPLE("16BIT") },
13762 /* [01] = */ { RT_STR_TUPLE("32BIT") },
13763 /* [02] = */ { RT_STR_TUPLE("!2!") },
13764 /* [03] = */ { RT_STR_TUPLE("!3!") },
13765 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
13766 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
13767 /* [06] = */ { RT_STR_TUPLE("!6!") },
13768 /* [07] = */ { RT_STR_TUPLE("!7!") },
13769 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
13770 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
13771 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
13772 /* [0b] = */ { RT_STR_TUPLE("!b!") },
13773 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
13774 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
13775 /* [0e] = */ { RT_STR_TUPLE("!e!") },
13776 /* [0f] = */ { RT_STR_TUPLE("!f!") },
13777 /* [10] = */ { RT_STR_TUPLE("!10!") },
13778 /* [11] = */ { RT_STR_TUPLE("!11!") },
13779 /* [12] = */ { RT_STR_TUPLE("!12!") },
13780 /* [13] = */ { RT_STR_TUPLE("!13!") },
13781 /* [14] = */ { RT_STR_TUPLE("!14!") },
13782 /* [15] = */ { RT_STR_TUPLE("!15!") },
13783 /* [16] = */ { RT_STR_TUPLE("!16!") },
13784 /* [17] = */ { RT_STR_TUPLE("!17!") },
13785 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
13786 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
13787 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
13788 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
13789 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
13790 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
13791 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
13792 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
13793 };
13794 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
13795 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
13796 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
13797
13798 pszBuf[off++] = ' ';
13799 pszBuf[off++] = 'C';
13800 pszBuf[off++] = 'P';
13801 pszBuf[off++] = 'L';
13802 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
13803 Assert(off < 32);
13804
13805 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
13806
13807 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
13808 {
13809 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
13810 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
13811 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
13812 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
13813 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
13814 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
13815 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
13816 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
13817 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
13818 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
13819 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
13820 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
13821 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
13822 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
13823 };
13824 if (fFlags)
13825 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
13826 if (s_aFlags[i].fFlag & fFlags)
13827 {
13828 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
13829 pszBuf[off++] = ' ';
13830 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
13831 off += s_aFlags[i].cchName;
13832 fFlags &= ~s_aFlags[i].fFlag;
13833 if (!fFlags)
13834 break;
13835 }
13836 pszBuf[off] = '\0';
13837
13838 return pszBuf;
13839}
13840
13841
13842DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
13843{
13844 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
13845#if defined(RT_ARCH_AMD64)
13846 static const char * const a_apszMarkers[] =
13847 {
13848 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
13849 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
13850 };
13851#endif
13852
13853 char szDisBuf[512];
13854 DISSTATE Dis;
13855 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
13856 uint32_t const cNative = pTb->Native.cInstructions;
13857 uint32_t offNative = 0;
13858#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13859 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
13860#endif
13861 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13862 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13863 : DISCPUMODE_64BIT;
13864#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13865 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
13866#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13867 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
13868#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
13869# error "Port me"
13870#else
13871 csh hDisasm = ~(size_t)0;
13872# if defined(RT_ARCH_AMD64)
13873 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
13874# elif defined(RT_ARCH_ARM64)
13875 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
13876# else
13877# error "Port me"
13878# endif
13879 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
13880#endif
13881
13882 /*
13883 * Print TB info.
13884 */
13885 pHlp->pfnPrintf(pHlp,
13886 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
13887 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
13888 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
13889 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
13890#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
13891 if (pDbgInfo && pDbgInfo->cEntries > 1)
13892 {
13893 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
13894
13895 /*
13896 * This disassembly is driven by the debug info which follows the native
13897 * code and indicates when it starts with the next guest instructions,
13898 * where labels are and such things.
13899 */
13900 uint32_t idxThreadedCall = 0;
13901 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
13902 uint8_t idxRange = UINT8_MAX;
13903 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
13904 uint32_t offRange = 0;
13905 uint32_t offOpcodes = 0;
13906 uint32_t const cbOpcodes = pTb->cbOpcodes;
13907 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
13908 uint32_t const cDbgEntries = pDbgInfo->cEntries;
13909 uint32_t iDbgEntry = 1;
13910 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
13911
13912 while (offNative < cNative)
13913 {
13914 /* If we're at or have passed the point where the next chunk of debug
13915 info starts, process it. */
13916 if (offDbgNativeNext <= offNative)
13917 {
13918 offDbgNativeNext = UINT32_MAX;
13919 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
13920 {
13921 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
13922 {
13923 case kIemTbDbgEntryType_GuestInstruction:
13924 {
13925 /* Did the exec flag change? */
13926 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
13927 {
13928 pHlp->pfnPrintf(pHlp,
13929 " fExec change %#08x -> %#08x %s\n",
13930 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13931 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
13932 szDisBuf, sizeof(szDisBuf)));
13933 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
13934 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
13935 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
13936 : DISCPUMODE_64BIT;
13937 }
13938
13939 /* New opcode range? We need to fend up a spurious debug info entry here for cases
13940 where the compilation was aborted before the opcode was recorded and the actual
13941 instruction was translated to a threaded call. This may happen when we run out
13942 of ranges, or when some complicated interrupts/FFs are found to be pending or
13943 similar. So, we just deal with it here rather than in the compiler code as it
13944 is a lot simpler to do here. */
13945 if ( idxRange == UINT8_MAX
13946 || idxRange >= cRanges
13947 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
13948 {
13949 idxRange += 1;
13950 if (idxRange < cRanges)
13951 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
13952 else
13953 continue;
13954 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
13955 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
13956 + (pTb->aRanges[idxRange].idxPhysPage == 0
13957 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
13958 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
13959 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
13960 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
13961 pTb->aRanges[idxRange].idxPhysPage);
13962 GCPhysPc += offRange;
13963 }
13964
13965 /* Disassemble the instruction. */
13966 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
13967 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
13968 uint32_t cbInstr = 1;
13969 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
13970 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
13971 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
13972 if (RT_SUCCESS(rc))
13973 {
13974 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
13975 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
13976 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
13977 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
13978
13979 static unsigned const s_offMarker = 55;
13980 static char const s_szMarker[] = " ; <--- guest";
13981 if (cch < s_offMarker)
13982 {
13983 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
13984 cch = s_offMarker;
13985 }
13986 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
13987 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
13988
13989 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
13990 }
13991 else
13992 {
13993 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
13994 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
13995 cbInstr = 1;
13996 }
13997 GCPhysPc += cbInstr;
13998 offOpcodes += cbInstr;
13999 offRange += cbInstr;
14000 continue;
14001 }
14002
14003 case kIemTbDbgEntryType_ThreadedCall:
14004 pHlp->pfnPrintf(pHlp,
14005 " Call #%u to %s (%u args) - %s\n",
14006 idxThreadedCall,
14007 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14008 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14009 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
14010 idxThreadedCall++;
14011 continue;
14012
14013 case kIemTbDbgEntryType_GuestRegShadowing:
14014 {
14015 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
14016 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
14017 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
14018 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
14019 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14020 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
14021 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
14022 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
14023 else
14024 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
14025 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
14026 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14027 continue;
14028 }
14029
14030 case kIemTbDbgEntryType_Label:
14031 {
14032 const char *pszName = "what_the_fudge";
14033 const char *pszComment = "";
14034 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
14035 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
14036 {
14037 case kIemNativeLabelType_Return:
14038 pszName = "Return";
14039 break;
14040 case kIemNativeLabelType_ReturnBreak:
14041 pszName = "ReturnBreak";
14042 break;
14043 case kIemNativeLabelType_ReturnWithFlags:
14044 pszName = "ReturnWithFlags";
14045 break;
14046 case kIemNativeLabelType_NonZeroRetOrPassUp:
14047 pszName = "NonZeroRetOrPassUp";
14048 break;
14049 case kIemNativeLabelType_RaiseGp0:
14050 pszName = "RaiseGp0";
14051 break;
14052 case kIemNativeLabelType_RaiseNm:
14053 pszName = "RaiseNm";
14054 break;
14055 case kIemNativeLabelType_RaiseUd:
14056 pszName = "RaiseUd";
14057 break;
14058 case kIemNativeLabelType_ObsoleteTb:
14059 pszName = "ObsoleteTb";
14060 break;
14061 case kIemNativeLabelType_NeedCsLimChecking:
14062 pszName = "NeedCsLimChecking";
14063 break;
14064 case kIemNativeLabelType_CheckBranchMiss:
14065 pszName = "CheckBranchMiss";
14066 break;
14067 case kIemNativeLabelType_If:
14068 pszName = "If";
14069 fNumbered = true;
14070 break;
14071 case kIemNativeLabelType_Else:
14072 pszName = "Else";
14073 fNumbered = true;
14074 pszComment = " ; regs state restored pre-if-block";
14075 break;
14076 case kIemNativeLabelType_Endif:
14077 pszName = "Endif";
14078 fNumbered = true;
14079 break;
14080 case kIemNativeLabelType_CheckIrq:
14081 pszName = "CheckIrq_CheckVM";
14082 fNumbered = true;
14083 break;
14084 case kIemNativeLabelType_TlbLookup:
14085 pszName = "TlbLookup";
14086 fNumbered = true;
14087 break;
14088 case kIemNativeLabelType_TlbMiss:
14089 pszName = "TlbMiss";
14090 fNumbered = true;
14091 break;
14092 case kIemNativeLabelType_TlbDone:
14093 pszName = "TlbDone";
14094 fNumbered = true;
14095 break;
14096 case kIemNativeLabelType_Invalid:
14097 case kIemNativeLabelType_End:
14098 break;
14099 }
14100 if (fNumbered)
14101 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
14102 else
14103 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
14104 continue;
14105 }
14106
14107 case kIemTbDbgEntryType_NativeOffset:
14108 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
14109 Assert(offDbgNativeNext > offNative);
14110 break;
14111
14112 default:
14113 AssertFailed();
14114 }
14115 iDbgEntry++;
14116 break;
14117 }
14118 }
14119
14120 /*
14121 * Disassemble the next native instruction.
14122 */
14123 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14124# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14125 uint32_t cbInstr = sizeof(paNative[0]);
14126 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14127 if (RT_SUCCESS(rc))
14128 {
14129# if defined(RT_ARCH_AMD64)
14130 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14131 {
14132 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14133 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14134 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14135 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14136 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14137 uInfo & 0x8000 ? "recompiled" : "todo");
14138 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14139 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14140 else
14141 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14142 }
14143 else
14144# endif
14145 {
14146 const char *pszAnnotation = NULL;
14147# ifdef RT_ARCH_AMD64
14148 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14149 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14150 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14151 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14152 PCDISOPPARAM pMemOp;
14153 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
14154 pMemOp = &Dis.Param1;
14155 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
14156 pMemOp = &Dis.Param2;
14157 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
14158 pMemOp = &Dis.Param3;
14159 else
14160 pMemOp = NULL;
14161 if ( pMemOp
14162 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
14163 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
14164 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
14165 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
14166
14167#elif defined(RT_ARCH_ARM64)
14168 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14169 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14170 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14171# else
14172# error "Port me"
14173# endif
14174 if (pszAnnotation)
14175 {
14176 static unsigned const s_offAnnotation = 55;
14177 size_t const cchAnnotation = strlen(pszAnnotation);
14178 size_t cchDis = strlen(szDisBuf);
14179 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14180 {
14181 if (cchDis < s_offAnnotation)
14182 {
14183 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14184 cchDis = s_offAnnotation;
14185 }
14186 szDisBuf[cchDis++] = ' ';
14187 szDisBuf[cchDis++] = ';';
14188 szDisBuf[cchDis++] = ' ';
14189 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14190 }
14191 }
14192 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14193 }
14194 }
14195 else
14196 {
14197# if defined(RT_ARCH_AMD64)
14198 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14199 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14200# elif defined(RT_ARCH_ARM64)
14201 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14202# else
14203# error "Port me"
14204# endif
14205 cbInstr = sizeof(paNative[0]);
14206 }
14207 offNative += cbInstr / sizeof(paNative[0]);
14208
14209# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14210 cs_insn *pInstr;
14211 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14212 (uintptr_t)pNativeCur, 1, &pInstr);
14213 if (cInstrs > 0)
14214 {
14215 Assert(cInstrs == 1);
14216# if defined(RT_ARCH_AMD64)
14217 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14218 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14219# else
14220 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14221 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14222# endif
14223 offNative += pInstr->size / sizeof(*pNativeCur);
14224 cs_free(pInstr, cInstrs);
14225 }
14226 else
14227 {
14228# if defined(RT_ARCH_AMD64)
14229 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14230 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14231# else
14232 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14233# endif
14234 offNative++;
14235 }
14236# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14237 }
14238 }
14239 else
14240#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14241 {
14242 /*
14243 * No debug info, just disassemble the x86 code and then the native code.
14244 *
14245 * First the guest code:
14246 */
14247 for (unsigned i = 0; i < pTb->cRanges; i++)
14248 {
14249 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14250 + (pTb->aRanges[i].idxPhysPage == 0
14251 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14252 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14253 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14254 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14255 unsigned off = pTb->aRanges[i].offOpcodes;
14256 /** @todo this ain't working when crossing pages! */
14257 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14258 while (off < cbOpcodes)
14259 {
14260 uint32_t cbInstr = 1;
14261 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14262 &pTb->pabOpcodes[off], cbOpcodes - off,
14263 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14264 if (RT_SUCCESS(rc))
14265 {
14266 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14267 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14268 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14269 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14270 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14271 GCPhysPc += cbInstr;
14272 off += cbInstr;
14273 }
14274 else
14275 {
14276 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14277 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14278 break;
14279 }
14280 }
14281 }
14282
14283 /*
14284 * Then the native code:
14285 */
14286 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14287 while (offNative < cNative)
14288 {
14289 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14290# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14291 uint32_t cbInstr = sizeof(paNative[0]);
14292 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14293 if (RT_SUCCESS(rc))
14294 {
14295# if defined(RT_ARCH_AMD64)
14296 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14297 {
14298 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14299 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14300 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14301 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14302 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14303 uInfo & 0x8000 ? "recompiled" : "todo");
14304 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14305 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14306 else
14307 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14308 }
14309 else
14310# endif
14311 {
14312# ifdef RT_ARCH_AMD64
14313 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14314 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14315 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14316 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14317# elif defined(RT_ARCH_ARM64)
14318 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14319 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14320 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14321# else
14322# error "Port me"
14323# endif
14324 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14325 }
14326 }
14327 else
14328 {
14329# if defined(RT_ARCH_AMD64)
14330 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14331 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14332# else
14333 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14334# endif
14335 cbInstr = sizeof(paNative[0]);
14336 }
14337 offNative += cbInstr / sizeof(paNative[0]);
14338
14339# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14340 cs_insn *pInstr;
14341 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14342 (uintptr_t)pNativeCur, 1, &pInstr);
14343 if (cInstrs > 0)
14344 {
14345 Assert(cInstrs == 1);
14346# if defined(RT_ARCH_AMD64)
14347 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14348 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14349# else
14350 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14351 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14352# endif
14353 offNative += pInstr->size / sizeof(*pNativeCur);
14354 cs_free(pInstr, cInstrs);
14355 }
14356 else
14357 {
14358# if defined(RT_ARCH_AMD64)
14359 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14360 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14361# else
14362 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14363# endif
14364 offNative++;
14365 }
14366# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14367 }
14368 }
14369
14370#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14371 /* Cleanup. */
14372 cs_close(&hDisasm);
14373#endif
14374}
14375
14376
14377/**
14378 * Recompiles the given threaded TB into a native one.
14379 *
14380 * In case of failure the translation block will be returned as-is.
14381 *
14382 * @returns pTb.
14383 * @param pVCpu The cross context virtual CPU structure of the calling
14384 * thread.
14385 * @param pTb The threaded translation to recompile to native.
14386 */
14387DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14388{
14389 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14390
14391 /*
14392 * The first time thru, we allocate the recompiler state, the other times
14393 * we just need to reset it before using it again.
14394 */
14395 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14396 if (RT_LIKELY(pReNative))
14397 iemNativeReInit(pReNative, pTb);
14398 else
14399 {
14400 pReNative = iemNativeInit(pVCpu, pTb);
14401 AssertReturn(pReNative, pTb);
14402 }
14403
14404#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14405 /*
14406 * First do liveness analysis. This is done backwards.
14407 */
14408 {
14409 uint32_t idxCall = pTb->Thrd.cCalls;
14410 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14411 { /* likely */ }
14412 else
14413 {
14414 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14415 while (idxCall > cAlloc)
14416 cAlloc *= 2;
14417 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14418 AssertReturn(pvNew, pTb);
14419 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14420 pReNative->cLivenessEntriesAlloc = cAlloc;
14421 }
14422 AssertReturn(idxCall > 0, pTb);
14423 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14424
14425 /* The initial (final) entry. */
14426 idxCall--;
14427 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14428
14429 /* Loop backwards thru the calls and fill in the other entries. */
14430 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14431 while (idxCall > 0)
14432 {
14433 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14434 if (pfnLiveness)
14435 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14436 else
14437 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14438 pCallEntry--;
14439 idxCall--;
14440 }
14441
14442# ifdef VBOX_WITH_STATISTICS
14443 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14444 to 'clobbered' rather that 'input'. */
14445 /** @todo */
14446# endif
14447 }
14448#endif
14449
14450 /*
14451 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14452 * for aborting if an error happens.
14453 */
14454 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14455#ifdef LOG_ENABLED
14456 uint32_t const cCallsOrg = cCallsLeft;
14457#endif
14458 uint32_t off = 0;
14459 int rc = VINF_SUCCESS;
14460 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14461 {
14462 /*
14463 * Emit prolog code (fixed).
14464 */
14465 off = iemNativeEmitProlog(pReNative, off);
14466
14467 /*
14468 * Convert the calls to native code.
14469 */
14470#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14471 int32_t iGstInstr = -1;
14472#endif
14473#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
14474 uint32_t cThreadedCalls = 0;
14475 uint32_t cRecompiledCalls = 0;
14476#endif
14477#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14478 uint32_t idxCurCall = 0;
14479#endif
14480 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
14481 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
14482 while (cCallsLeft-- > 0)
14483 {
14484 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
14485#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14486 pReNative->idxCurCall = idxCurCall;
14487#endif
14488
14489 /*
14490 * Debug info, assembly markup and statistics.
14491 */
14492#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
14493 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
14494 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
14495#endif
14496#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14497 iemNativeDbgInfoAddNativeOffset(pReNative, off);
14498 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
14499 {
14500 if (iGstInstr < (int32_t)pTb->cInstructions)
14501 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
14502 else
14503 Assert(iGstInstr == pTb->cInstructions);
14504 iGstInstr = pCallEntry->idxInstr;
14505 }
14506 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
14507#endif
14508#if defined(VBOX_STRICT)
14509 off = iemNativeEmitMarker(pReNative, off,
14510 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
14511#endif
14512#if defined(VBOX_STRICT)
14513 iemNativeRegAssertSanity(pReNative);
14514#endif
14515#ifdef VBOX_WITH_STATISTICS
14516 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
14517#endif
14518
14519 /*
14520 * Actual work.
14521 */
14522 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
14523 pfnRecom ? "(recompiled)" : "(todo)"));
14524 if (pfnRecom) /** @todo stats on this. */
14525 {
14526 off = pfnRecom(pReNative, off, pCallEntry);
14527 STAM_REL_STATS({cRecompiledCalls++;});
14528 }
14529 else
14530 {
14531 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
14532 STAM_REL_STATS({cThreadedCalls++;});
14533 }
14534 Assert(off <= pReNative->cInstrBufAlloc);
14535 Assert(pReNative->cCondDepth == 0);
14536
14537#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
14538 if (LogIs2Enabled())
14539 {
14540 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
14541# ifndef IEMLIVENESS_EXTENDED_LAYOUT
14542 static const char s_achState[] = "CUXI";
14543# else
14544 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
14545# endif
14546
14547 char szGpr[17];
14548 for (unsigned i = 0; i < 16; i++)
14549 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
14550 szGpr[16] = '\0';
14551
14552 char szSegBase[X86_SREG_COUNT + 1];
14553 char szSegLimit[X86_SREG_COUNT + 1];
14554 char szSegAttrib[X86_SREG_COUNT + 1];
14555 char szSegSel[X86_SREG_COUNT + 1];
14556 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
14557 {
14558 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
14559 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
14560 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
14561 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
14562 }
14563 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
14564 = szSegSel[X86_SREG_COUNT] = '\0';
14565
14566 char szEFlags[8];
14567 for (unsigned i = 0; i < 7; i++)
14568 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
14569 szEFlags[7] = '\0';
14570
14571 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
14572 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
14573 }
14574#endif
14575
14576 /*
14577 * Advance.
14578 */
14579 pCallEntry++;
14580#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
14581 idxCurCall++;
14582#endif
14583 }
14584
14585 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
14586 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
14587 if (!cThreadedCalls)
14588 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
14589
14590 /*
14591 * Emit the epilog code.
14592 */
14593 uint32_t idxReturnLabel;
14594 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
14595
14596 /*
14597 * Generate special jump labels.
14598 */
14599 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
14600 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
14601 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
14602 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
14603 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
14604 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
14605 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
14606 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
14607 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
14608 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
14609 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
14610 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
14611 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
14612 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
14613 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
14614 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
14615 }
14616 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
14617 {
14618 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
14619 return pTb;
14620 }
14621 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
14622 Assert(off <= pReNative->cInstrBufAlloc);
14623
14624 /*
14625 * Make sure all labels has been defined.
14626 */
14627 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
14628#ifdef VBOX_STRICT
14629 uint32_t const cLabels = pReNative->cLabels;
14630 for (uint32_t i = 0; i < cLabels; i++)
14631 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
14632#endif
14633
14634 /*
14635 * Allocate executable memory, copy over the code we've generated.
14636 */
14637 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
14638 if (pTbAllocator->pDelayedFreeHead)
14639 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
14640
14641 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
14642 AssertReturn(paFinalInstrBuf, pTb);
14643 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
14644
14645 /*
14646 * Apply fixups.
14647 */
14648 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
14649 uint32_t const cFixups = pReNative->cFixups;
14650 for (uint32_t i = 0; i < cFixups; i++)
14651 {
14652 Assert(paFixups[i].off < off);
14653 Assert(paFixups[i].idxLabel < cLabels);
14654 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
14655 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
14656 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
14657 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
14658 switch (paFixups[i].enmType)
14659 {
14660#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
14661 case kIemNativeFixupType_Rel32:
14662 Assert(paFixups[i].off + 4 <= off);
14663 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14664 continue;
14665
14666#elif defined(RT_ARCH_ARM64)
14667 case kIemNativeFixupType_RelImm26At0:
14668 {
14669 Assert(paFixups[i].off < off);
14670 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14671 Assert(offDisp >= -262144 && offDisp < 262144);
14672 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
14673 continue;
14674 }
14675
14676 case kIemNativeFixupType_RelImm19At5:
14677 {
14678 Assert(paFixups[i].off < off);
14679 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14680 Assert(offDisp >= -262144 && offDisp < 262144);
14681 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
14682 continue;
14683 }
14684
14685 case kIemNativeFixupType_RelImm14At5:
14686 {
14687 Assert(paFixups[i].off < off);
14688 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
14689 Assert(offDisp >= -8192 && offDisp < 8192);
14690 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
14691 continue;
14692 }
14693
14694#endif
14695 case kIemNativeFixupType_Invalid:
14696 case kIemNativeFixupType_End:
14697 break;
14698 }
14699 AssertFailed();
14700 }
14701
14702 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
14703 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
14704
14705 /*
14706 * Convert the translation block.
14707 */
14708 RTMemFree(pTb->Thrd.paCalls);
14709 pTb->Native.paInstructions = paFinalInstrBuf;
14710 pTb->Native.cInstructions = off;
14711 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
14712#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14713 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
14714 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
14715#endif
14716
14717 Assert(pTbAllocator->cThreadedTbs > 0);
14718 pTbAllocator->cThreadedTbs -= 1;
14719 pTbAllocator->cNativeTbs += 1;
14720 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
14721
14722#ifdef LOG_ENABLED
14723 /*
14724 * Disassemble to the log if enabled.
14725 */
14726 if (LogIs3Enabled())
14727 {
14728 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
14729 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
14730# ifdef DEBUG_bird
14731 RTLogFlush(NULL);
14732# endif
14733 }
14734#endif
14735 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
14736
14737 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
14738 return pTb;
14739}
14740
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette