VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103678

Last change on this file since 103678 was 103671, checked in by vboxsync, 15 months ago

VMM/IEM: Native translation of IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() body, bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 674.5 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103671 2024-03-04 15:48:34Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
136#endif
137#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
138static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
139static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
140#endif
141DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
142DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
143 IEMNATIVEGSTREG enmGstReg, uint32_t off);
144DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
145
146
147/*********************************************************************************************************************************
148* Executable Memory Allocator *
149*********************************************************************************************************************************/
150/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151 * Use an alternative chunk sub-allocator that does store internal data
152 * in the chunk.
153 *
154 * Using the RTHeapSimple is not practial on newer darwin systems where
155 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
156 * memory. We would have to change the protection of the whole chunk for
157 * every call to RTHeapSimple, which would be rather expensive.
158 *
159 * This alternative implemenation let restrict page protection modifications
160 * to the pages backing the executable memory we just allocated.
161 */
162#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
163/** The chunk sub-allocation unit size in bytes. */
164#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
165/** The chunk sub-allocation unit size as a shift factor. */
166#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
167
168#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
169# ifdef IEMNATIVE_USE_GDB_JIT
170# define IEMNATIVE_USE_GDB_JIT_ET_DYN
171
172/** GDB JIT: Code entry. */
173typedef struct GDBJITCODEENTRY
174{
175 struct GDBJITCODEENTRY *pNext;
176 struct GDBJITCODEENTRY *pPrev;
177 uint8_t *pbSymFile;
178 uint64_t cbSymFile;
179} GDBJITCODEENTRY;
180
181/** GDB JIT: Actions. */
182typedef enum GDBJITACTIONS : uint32_t
183{
184 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
185} GDBJITACTIONS;
186
187/** GDB JIT: Descriptor. */
188typedef struct GDBJITDESCRIPTOR
189{
190 uint32_t uVersion;
191 GDBJITACTIONS enmAction;
192 GDBJITCODEENTRY *pRelevant;
193 GDBJITCODEENTRY *pHead;
194 /** Our addition: */
195 GDBJITCODEENTRY *pTail;
196} GDBJITDESCRIPTOR;
197
198/** GDB JIT: Our simple symbol file data. */
199typedef struct GDBJITSYMFILE
200{
201 Elf64_Ehdr EHdr;
202# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
203 Elf64_Shdr aShdrs[5];
204# else
205 Elf64_Shdr aShdrs[7];
206 Elf64_Phdr aPhdrs[2];
207# endif
208 /** The dwarf ehframe data for the chunk. */
209 uint8_t abEhFrame[512];
210 char szzStrTab[128];
211 Elf64_Sym aSymbols[3];
212# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
213 Elf64_Sym aDynSyms[2];
214 Elf64_Dyn aDyn[6];
215# endif
216} GDBJITSYMFILE;
217
218extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
219extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
220
221/** Init once for g_IemNativeGdbJitLock. */
222static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
223/** Init once for the critical section. */
224static RTCRITSECT g_IemNativeGdbJitLock;
225
226/** GDB reads the info here. */
227GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
228
229/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
230DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
231{
232 ASMNopPause();
233}
234
235/** @callback_method_impl{FNRTONCE} */
236static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
237{
238 RT_NOREF(pvUser);
239 return RTCritSectInit(&g_IemNativeGdbJitLock);
240}
241
242
243# endif /* IEMNATIVE_USE_GDB_JIT */
244
245/**
246 * Per-chunk unwind info for non-windows hosts.
247 */
248typedef struct IEMEXECMEMCHUNKEHFRAME
249{
250# ifdef IEMNATIVE_USE_LIBUNWIND
251 /** The offset of the FDA into abEhFrame. */
252 uintptr_t offFda;
253# else
254 /** 'struct object' storage area. */
255 uint8_t abObject[1024];
256# endif
257# ifdef IEMNATIVE_USE_GDB_JIT
258# if 0
259 /** The GDB JIT 'symbol file' data. */
260 GDBJITSYMFILE GdbJitSymFile;
261# endif
262 /** The GDB JIT list entry. */
263 GDBJITCODEENTRY GdbJitEntry;
264# endif
265 /** The dwarf ehframe data for the chunk. */
266 uint8_t abEhFrame[512];
267} IEMEXECMEMCHUNKEHFRAME;
268/** Pointer to per-chunk info info for non-windows hosts. */
269typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
270#endif
271
272
273/**
274 * An chunk of executable memory.
275 */
276typedef struct IEMEXECMEMCHUNK
277{
278#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
279 /** Number of free items in this chunk. */
280 uint32_t cFreeUnits;
281 /** Hint were to start searching for free space in the allocation bitmap. */
282 uint32_t idxFreeHint;
283#else
284 /** The heap handle. */
285 RTHEAPSIMPLE hHeap;
286#endif
287 /** Pointer to the chunk. */
288 void *pvChunk;
289#ifdef IN_RING3
290 /**
291 * Pointer to the unwind information.
292 *
293 * This is used during C++ throw and longjmp (windows and probably most other
294 * platforms). Some debuggers (windbg) makes use of it as well.
295 *
296 * Windows: This is allocated from hHeap on windows because (at least for
297 * AMD64) the UNWIND_INFO structure address in the
298 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
299 *
300 * Others: Allocated from the regular heap to avoid unnecessary executable data
301 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
302 void *pvUnwindInfo;
303#elif defined(IN_RING0)
304 /** Allocation handle. */
305 RTR0MEMOBJ hMemObj;
306#endif
307} IEMEXECMEMCHUNK;
308/** Pointer to a memory chunk. */
309typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
310
311
312/**
313 * Executable memory allocator for the native recompiler.
314 */
315typedef struct IEMEXECMEMALLOCATOR
316{
317 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
318 uint32_t uMagic;
319
320 /** The chunk size. */
321 uint32_t cbChunk;
322 /** The maximum number of chunks. */
323 uint32_t cMaxChunks;
324 /** The current number of chunks. */
325 uint32_t cChunks;
326 /** Hint where to start looking for available memory. */
327 uint32_t idxChunkHint;
328 /** Statistics: Current number of allocations. */
329 uint32_t cAllocations;
330
331 /** The total amount of memory available. */
332 uint64_t cbTotal;
333 /** Total amount of free memory. */
334 uint64_t cbFree;
335 /** Total amount of memory allocated. */
336 uint64_t cbAllocated;
337
338#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
339 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
340 *
341 * Since the chunk size is a power of two and the minimum chunk size is a lot
342 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
343 * require a whole number of uint64_t elements in the allocation bitmap. So,
344 * for sake of simplicity, they are allocated as one continous chunk for
345 * simplicity/laziness. */
346 uint64_t *pbmAlloc;
347 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
348 uint32_t cUnitsPerChunk;
349 /** Number of bitmap elements per chunk (for quickly locating the bitmap
350 * portion corresponding to an chunk). */
351 uint32_t cBitmapElementsPerChunk;
352#else
353 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
354 * @{ */
355 /** The size of the heap internal block header. This is used to adjust the
356 * request memory size to make sure there is exacly enough room for a header at
357 * the end of the blocks we allocate before the next 64 byte alignment line. */
358 uint32_t cbHeapBlockHdr;
359 /** The size of initial heap allocation required make sure the first
360 * allocation is correctly aligned. */
361 uint32_t cbHeapAlignTweak;
362 /** The alignment tweak allocation address. */
363 void *pvAlignTweak;
364 /** @} */
365#endif
366
367#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
368 /** Pointer to the array of unwind info running parallel to aChunks (same
369 * allocation as this structure, located after the bitmaps).
370 * (For Windows, the structures must reside in 32-bit RVA distance to the
371 * actual chunk, so they are allocated off the chunk.) */
372 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
373#endif
374
375 /** The allocation chunks. */
376 RT_FLEXIBLE_ARRAY_EXTENSION
377 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
378} IEMEXECMEMALLOCATOR;
379/** Pointer to an executable memory allocator. */
380typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
381
382/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
383#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
384
385
386static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
387
388
389/**
390 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
391 * the heap statistics.
392 */
393static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
394 uint32_t cbReq, uint32_t idxChunk)
395{
396 pExecMemAllocator->cAllocations += 1;
397 pExecMemAllocator->cbAllocated += cbReq;
398#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
399 pExecMemAllocator->cbFree -= cbReq;
400#else
401 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
402#endif
403 pExecMemAllocator->idxChunkHint = idxChunk;
404
405#ifdef RT_OS_DARWIN
406 /*
407 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
408 * on darwin. So, we mark the pages returned as read+write after alloc and
409 * expect the caller to call iemExecMemAllocatorReadyForUse when done
410 * writing to the allocation.
411 *
412 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
413 * for details.
414 */
415 /** @todo detect if this is necessary... it wasn't required on 10.15 or
416 * whatever older version it was. */
417 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
418 AssertRC(rc);
419#endif
420
421 return pvRet;
422}
423
424
425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
426static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
427 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
428{
429 /*
430 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
431 */
432 Assert(!(cToScan & 63));
433 Assert(!(idxFirst & 63));
434 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
435 pbmAlloc += idxFirst / 64;
436
437 /*
438 * Scan the bitmap for cReqUnits of consequtive clear bits
439 */
440 /** @todo This can probably be done more efficiently for non-x86 systems. */
441 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
442 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
443 {
444 uint32_t idxAddBit = 1;
445 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
446 idxAddBit++;
447 if (idxAddBit >= cReqUnits)
448 {
449 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
450
451 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
452 pChunk->cFreeUnits -= cReqUnits;
453 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
454
455 void * const pvRet = (uint8_t *)pChunk->pvChunk
456 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
457
458 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
459 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
460 }
461
462 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
463 }
464 return NULL;
465}
466#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
467
468
469static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
470{
471#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
472 /*
473 * Figure out how much to allocate.
474 */
475 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
476 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
477 {
478 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
479 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
480 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
481 {
482 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
483 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
484 if (pvRet)
485 return pvRet;
486 }
487 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
488 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
489 cReqUnits, idxChunk);
490 }
491#else
492 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
493 if (pvRet)
494 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
495#endif
496 return NULL;
497
498}
499
500
501/**
502 * Allocates @a cbReq bytes of executable memory.
503 *
504 * @returns Pointer to the memory, NULL if out of memory or other problem
505 * encountered.
506 * @param pVCpu The cross context virtual CPU structure of the calling
507 * thread.
508 * @param cbReq How many bytes are required.
509 */
510static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
511{
512 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
513 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
514 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
515
516
517 for (unsigned iIteration = 0;; iIteration++)
518 {
519 /*
520 * Adjust the request size so it'll fit the allocator alignment/whatnot.
521 *
522 * For the RTHeapSimple allocator this means to follow the logic described
523 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
524 * existing chunks if we think we've got sufficient free memory around.
525 *
526 * While for the alternative one we just align it up to a whole unit size.
527 */
528#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
529 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
530#else
531 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
532#endif
533 if (cbReq <= pExecMemAllocator->cbFree)
534 {
535 uint32_t const cChunks = pExecMemAllocator->cChunks;
536 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
537 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
544 {
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 }
549 }
550
551 /*
552 * Can we grow it with another chunk?
553 */
554 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
555 {
556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
557 AssertLogRelRCReturn(rc, NULL);
558
559 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
560 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
561 if (pvRet)
562 return pvRet;
563 AssertFailed();
564 }
565
566 /*
567 * Try prune native TBs once.
568 */
569 if (iIteration == 0)
570 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
571 else
572 {
573 /** @todo stats... */
574 return NULL;
575 }
576 }
577
578}
579
580
581/** This is a hook that we may need later for changing memory protection back
582 * to readonly+exec */
583static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
584{
585#ifdef RT_OS_DARWIN
586 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
587 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
588 AssertRC(rc); RT_NOREF(pVCpu);
589
590 /*
591 * Flush the instruction cache:
592 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
593 */
594 /* sys_dcache_flush(pv, cb); - not necessary */
595 sys_icache_invalidate(pv, cb);
596#else
597 RT_NOREF(pVCpu, pv, cb);
598#endif
599}
600
601
602/**
603 * Frees executable memory.
604 */
605void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
606{
607 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
608 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
609 Assert(pv);
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612#else
613 Assert(!((uintptr_t)pv & 63));
614#endif
615
616 /* Align the size as we did when allocating the block. */
617#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
618 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#else
620 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
621#endif
622
623 /* Free it / assert sanity. */
624#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
627 bool fFound = false;
628 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
629 {
630 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
631 fFound = offChunk < cbChunk;
632 if (fFound)
633 {
634#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
635 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
636 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
637
638 /* Check that it's valid and free it. */
639 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
641 for (uint32_t i = 1; i < cReqUnits; i++)
642 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
643 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
644
645 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
646 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
647
648 /* Update the stats. */
649 pExecMemAllocator->cbAllocated -= cb;
650 pExecMemAllocator->cbFree += cb;
651 pExecMemAllocator->cAllocations -= 1;
652 return;
653#else
654 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
655 break;
656#endif
657 }
658 }
659# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
660 AssertFailed();
661# else
662 Assert(fFound);
663# endif
664#endif
665
666#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
667 /* Update stats while cb is freshly calculated.*/
668 pExecMemAllocator->cbAllocated -= cb;
669 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
670 pExecMemAllocator->cAllocations -= 1;
671
672 /* Free it. */
673 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
674#endif
675}
676
677
678
679#ifdef IN_RING3
680# ifdef RT_OS_WINDOWS
681
682/**
683 * Initializes the unwind info structures for windows hosts.
684 */
685static int
686iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
687 void *pvChunk, uint32_t idxChunk)
688{
689 RT_NOREF(pVCpu);
690
691 /*
692 * The AMD64 unwind opcodes.
693 *
694 * This is a program that starts with RSP after a RET instruction that
695 * ends up in recompiled code, and the operations we describe here will
696 * restore all non-volatile registers and bring RSP back to where our
697 * RET address is. This means it's reverse order from what happens in
698 * the prologue.
699 *
700 * Note! Using a frame register approach here both because we have one
701 * and but mainly because the UWOP_ALLOC_LARGE argument values
702 * would be a pain to write initializers for. On the positive
703 * side, we're impervious to changes in the the stack variable
704 * area can can deal with dynamic stack allocations if necessary.
705 */
706 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
707 {
708 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
709 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
710 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
711 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
712 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
713 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
714 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
715 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
716 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
717 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
718 };
719 union
720 {
721 IMAGE_UNWIND_INFO Info;
722 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
723 } s_UnwindInfo =
724 {
725 {
726 /* .Version = */ 1,
727 /* .Flags = */ 0,
728 /* .SizeOfProlog = */ 16, /* whatever */
729 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
730 /* .FrameRegister = */ X86_GREG_xBP,
731 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
732 }
733 };
734 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
735 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
736
737 /*
738 * Calc how much space we need and allocate it off the exec heap.
739 */
740 unsigned const cFunctionEntries = 1;
741 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
742 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
743# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
744 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
745 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
746 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
747# else
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
749 - pExecMemAllocator->cbHeapBlockHdr;
750 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
751 32 /*cbAlignment*/);
752# endif
753 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
754 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
755
756 /*
757 * Initialize the structures.
758 */
759 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
760
761 paFunctions[0].BeginAddress = 0;
762 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
763 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
764
765 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
766 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
767
768 /*
769 * Register it.
770 */
771 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
772 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
773
774 return VINF_SUCCESS;
775}
776
777
778# else /* !RT_OS_WINDOWS */
779
780/**
781 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
784{
785 if (iValue >= 64)
786 {
787 Assert(iValue < 0x2000);
788 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
789 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
790 }
791 else if (iValue >= 0)
792 *Ptr.pb++ = (uint8_t)iValue;
793 else if (iValue > -64)
794 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
795 else
796 {
797 Assert(iValue > -0x2000);
798 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
799 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
800 }
801 return Ptr;
802}
803
804
805/**
806 * Emits an ULEB128 encoded value (up to 64-bit wide).
807 */
808DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
809{
810 while (uValue >= 0x80)
811 {
812 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
813 uValue >>= 7;
814 }
815 *Ptr.pb++ = (uint8_t)uValue;
816 return Ptr;
817}
818
819
820/**
821 * Emits a CFA rule as register @a uReg + offset @a off.
822 */
823DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
824{
825 *Ptr.pb++ = DW_CFA_def_cfa;
826 Ptr = iemDwarfPutUleb128(Ptr, uReg);
827 Ptr = iemDwarfPutUleb128(Ptr, off);
828 return Ptr;
829}
830
831
832/**
833 * Emits a register (@a uReg) save location:
834 * CFA + @a off * data_alignment_factor
835 */
836DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
837{
838 if (uReg < 0x40)
839 *Ptr.pb++ = DW_CFA_offset | uReg;
840 else
841 {
842 *Ptr.pb++ = DW_CFA_offset_extended;
843 Ptr = iemDwarfPutUleb128(Ptr, uReg);
844 }
845 Ptr = iemDwarfPutUleb128(Ptr, off);
846 return Ptr;
847}
848
849
850# if 0 /* unused */
851/**
852 * Emits a register (@a uReg) save location, using signed offset:
853 * CFA + @a offSigned * data_alignment_factor
854 */
855DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
856{
857 *Ptr.pb++ = DW_CFA_offset_extended_sf;
858 Ptr = iemDwarfPutUleb128(Ptr, uReg);
859 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
860 return Ptr;
861}
862# endif
863
864
865/**
866 * Initializes the unwind info section for non-windows hosts.
867 */
868static int
869iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
870 void *pvChunk, uint32_t idxChunk)
871{
872 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
873 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
874
875 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
876
877 /*
878 * Generate the CIE first.
879 */
880# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
881 uint8_t const iDwarfVer = 3;
882# else
883 uint8_t const iDwarfVer = 4;
884# endif
885 RTPTRUNION const PtrCie = Ptr;
886 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
887 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
888 *Ptr.pb++ = iDwarfVer; /* DwARF version */
889 *Ptr.pb++ = 0; /* Augmentation. */
890 if (iDwarfVer >= 4)
891 {
892 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
893 *Ptr.pb++ = 0; /* Segment selector size. */
894 }
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
897# else
898 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
899# endif
900 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
901# ifdef RT_ARCH_AMD64
902 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
903# elif defined(RT_ARCH_ARM64)
904 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
905# else
906# error "port me"
907# endif
908 /* Initial instructions: */
909# ifdef RT_ARCH_AMD64
910 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
912 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
913 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
914 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
918# elif defined(RT_ARCH_ARM64)
919# if 1
920 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
921# else
922 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
923# endif
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
936 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
937 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
938# else
939# error "port me"
940# endif
941 while ((Ptr.u - PtrCie.u) & 3)
942 *Ptr.pb++ = DW_CFA_nop;
943 /* Finalize the CIE size. */
944 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
945
946 /*
947 * Generate an FDE for the whole chunk area.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
951# endif
952 RTPTRUNION const PtrFde = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
955 Ptr.pu32++;
956 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
957 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
958# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
959 *Ptr.pb++ = DW_CFA_nop;
960# endif
961 while ((Ptr.u - PtrFde.u) & 3)
962 *Ptr.pb++ = DW_CFA_nop;
963 /* Finalize the FDE size. */
964 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
965
966 /* Terminator entry. */
967 *Ptr.pu32++ = 0;
968 *Ptr.pu32++ = 0; /* just to be sure... */
969 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
970
971 /*
972 * Register it.
973 */
974# ifdef IEMNATIVE_USE_LIBUNWIND
975 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
976# else
977 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
978 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
979# endif
980
981# ifdef IEMNATIVE_USE_GDB_JIT
982 /*
983 * Now for telling GDB about this (experimental).
984 *
985 * This seems to work best with ET_DYN.
986 */
987 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
988# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
989 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
990 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
991# else
992 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
993 - pExecMemAllocator->cbHeapBlockHdr;
994 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
995# endif
996 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
997 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
998
999 RT_ZERO(*pSymFile);
1000
1001 /*
1002 * The ELF header:
1003 */
1004 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1005 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1006 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1007 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1008 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1009 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1010 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1011 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1012# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1013 pSymFile->EHdr.e_type = ET_DYN;
1014# else
1015 pSymFile->EHdr.e_type = ET_REL;
1016# endif
1017# ifdef RT_ARCH_AMD64
1018 pSymFile->EHdr.e_machine = EM_AMD64;
1019# elif defined(RT_ARCH_ARM64)
1020 pSymFile->EHdr.e_machine = EM_AARCH64;
1021# else
1022# error "port me"
1023# endif
1024 pSymFile->EHdr.e_version = 1; /*?*/
1025 pSymFile->EHdr.e_entry = 0;
1026# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1027 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1028# else
1029 pSymFile->EHdr.e_phoff = 0;
1030# endif
1031 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1032 pSymFile->EHdr.e_flags = 0;
1033 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1035 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1036 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1037# else
1038 pSymFile->EHdr.e_phentsize = 0;
1039 pSymFile->EHdr.e_phnum = 0;
1040# endif
1041 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1042 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1043 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1044
1045 uint32_t offStrTab = 0;
1046#define APPEND_STR(a_szStr) do { \
1047 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1048 offStrTab += sizeof(a_szStr); \
1049 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1050 } while (0)
1051#define APPEND_STR_FMT(a_szStr, ...) do { \
1052 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1053 offStrTab++; \
1054 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1055 } while (0)
1056
1057 /*
1058 * Section headers.
1059 */
1060 /* Section header #0: NULL */
1061 unsigned i = 0;
1062 APPEND_STR("");
1063 RT_ZERO(pSymFile->aShdrs[i]);
1064 i++;
1065
1066 /* Section header: .eh_frame */
1067 pSymFile->aShdrs[i].sh_name = offStrTab;
1068 APPEND_STR(".eh_frame");
1069 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1070 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1071# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1072 pSymFile->aShdrs[i].sh_offset
1073 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1074# else
1075 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1076 pSymFile->aShdrs[i].sh_offset = 0;
1077# endif
1078
1079 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1080 pSymFile->aShdrs[i].sh_link = 0;
1081 pSymFile->aShdrs[i].sh_info = 0;
1082 pSymFile->aShdrs[i].sh_addralign = 1;
1083 pSymFile->aShdrs[i].sh_entsize = 0;
1084 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1085 i++;
1086
1087 /* Section header: .shstrtab */
1088 unsigned const iShStrTab = i;
1089 pSymFile->EHdr.e_shstrndx = iShStrTab;
1090 pSymFile->aShdrs[i].sh_name = offStrTab;
1091 APPEND_STR(".shstrtab");
1092 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1093 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1095 pSymFile->aShdrs[i].sh_offset
1096 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1097# else
1098 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1099 pSymFile->aShdrs[i].sh_offset = 0;
1100# endif
1101 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1102 pSymFile->aShdrs[i].sh_link = 0;
1103 pSymFile->aShdrs[i].sh_info = 0;
1104 pSymFile->aShdrs[i].sh_addralign = 1;
1105 pSymFile->aShdrs[i].sh_entsize = 0;
1106 i++;
1107
1108 /* Section header: .symbols */
1109 pSymFile->aShdrs[i].sh_name = offStrTab;
1110 APPEND_STR(".symtab");
1111 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1112 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1113 pSymFile->aShdrs[i].sh_offset
1114 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1115 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1116 pSymFile->aShdrs[i].sh_link = iShStrTab;
1117 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1118 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1119 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1120 i++;
1121
1122# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1123 /* Section header: .symbols */
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".dynsym");
1126 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1128 pSymFile->aShdrs[i].sh_offset
1129 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1130 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1131 pSymFile->aShdrs[i].sh_link = iShStrTab;
1132 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1133 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1134 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1135 i++;
1136# endif
1137
1138# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1139 /* Section header: .dynamic */
1140 pSymFile->aShdrs[i].sh_name = offStrTab;
1141 APPEND_STR(".dynamic");
1142 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1143 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1144 pSymFile->aShdrs[i].sh_offset
1145 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1146 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1147 pSymFile->aShdrs[i].sh_link = iShStrTab;
1148 pSymFile->aShdrs[i].sh_info = 0;
1149 pSymFile->aShdrs[i].sh_addralign = 1;
1150 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1151 i++;
1152# endif
1153
1154 /* Section header: .text */
1155 unsigned const iShText = i;
1156 pSymFile->aShdrs[i].sh_name = offStrTab;
1157 APPEND_STR(".text");
1158 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1159 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1160# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1161 pSymFile->aShdrs[i].sh_offset
1162 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1163# else
1164 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1165 pSymFile->aShdrs[i].sh_offset = 0;
1166# endif
1167 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1168 pSymFile->aShdrs[i].sh_link = 0;
1169 pSymFile->aShdrs[i].sh_info = 0;
1170 pSymFile->aShdrs[i].sh_addralign = 1;
1171 pSymFile->aShdrs[i].sh_entsize = 0;
1172 i++;
1173
1174 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1175
1176# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1177 /*
1178 * The program headers:
1179 */
1180 /* Everything in a single LOAD segment: */
1181 i = 0;
1182 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1183 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1184 pSymFile->aPhdrs[i].p_offset
1185 = pSymFile->aPhdrs[i].p_vaddr
1186 = pSymFile->aPhdrs[i].p_paddr = 0;
1187 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1188 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1189 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1190 i++;
1191 /* The .dynamic segment. */
1192 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1193 pSymFile->aPhdrs[i].p_flags = PF_R;
1194 pSymFile->aPhdrs[i].p_offset
1195 = pSymFile->aPhdrs[i].p_vaddr
1196 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1197 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1198 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1199 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1200 i++;
1201
1202 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1203
1204 /*
1205 * The dynamic section:
1206 */
1207 i = 0;
1208 pSymFile->aDyn[i].d_tag = DT_SONAME;
1209 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1210 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1219 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1220 i++;
1221 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1222 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1223 i++;
1224 pSymFile->aDyn[i].d_tag = DT_NULL;
1225 i++;
1226 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1227# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1228
1229 /*
1230 * Symbol tables:
1231 */
1232 /** @todo gdb doesn't seem to really like this ... */
1233 i = 0;
1234 pSymFile->aSymbols[i].st_name = 0;
1235 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1236 pSymFile->aSymbols[i].st_value = 0;
1237 pSymFile->aSymbols[i].st_size = 0;
1238 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1239 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1240# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1241 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1242# endif
1243 i++;
1244
1245 pSymFile->aSymbols[i].st_name = 0;
1246 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1247 pSymFile->aSymbols[i].st_value = 0;
1248 pSymFile->aSymbols[i].st_size = 0;
1249 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1250 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1251 i++;
1252
1253 pSymFile->aSymbols[i].st_name = offStrTab;
1254 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1255# if 0
1256 pSymFile->aSymbols[i].st_shndx = iShText;
1257 pSymFile->aSymbols[i].st_value = 0;
1258# else
1259 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1260 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1261# endif
1262 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1263 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1264 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1265# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1266 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1267 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1268# endif
1269 i++;
1270
1271 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1272 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1273
1274 /*
1275 * The GDB JIT entry and informing GDB.
1276 */
1277 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1278# if 1
1279 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1280# else
1281 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1282# endif
1283
1284 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1285 RTCritSectEnter(&g_IemNativeGdbJitLock);
1286 pEhFrame->GdbJitEntry.pNext = NULL;
1287 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1288 if (__jit_debug_descriptor.pTail)
1289 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1290 else
1291 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1292 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1293 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1294
1295 /* Notify GDB: */
1296 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1297 __jit_debug_register_code();
1298 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1299 RTCritSectLeave(&g_IemNativeGdbJitLock);
1300
1301# else /* !IEMNATIVE_USE_GDB_JIT */
1302 RT_NOREF(pVCpu);
1303# endif /* !IEMNATIVE_USE_GDB_JIT */
1304
1305 return VINF_SUCCESS;
1306}
1307
1308# endif /* !RT_OS_WINDOWS */
1309#endif /* IN_RING3 */
1310
1311
1312/**
1313 * Adds another chunk to the executable memory allocator.
1314 *
1315 * This is used by the init code for the initial allocation and later by the
1316 * regular allocator function when it's out of memory.
1317 */
1318static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1319{
1320 /* Check that we've room for growth. */
1321 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1322 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1323
1324 /* Allocate a chunk. */
1325#ifdef RT_OS_DARWIN
1326 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1327#else
1328 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1329#endif
1330 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 int rc = VINF_SUCCESS;
1334#else
1335 /* Initialize the heap for the chunk. */
1336 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1337 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1338 AssertRC(rc);
1339 if (RT_SUCCESS(rc))
1340 {
1341 /*
1342 * We want the memory to be aligned on 64 byte, so the first time thru
1343 * here we do some exploratory allocations to see how we can achieve this.
1344 * On subsequent runs we only make an initial adjustment allocation, if
1345 * necessary.
1346 *
1347 * Since we own the heap implementation, we know that the internal block
1348 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1349 * so all we need to wrt allocation size adjustments is to add 32 bytes
1350 * to the size, align up by 64 bytes, and subtract 32 bytes.
1351 *
1352 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1353 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1354 * allocation to force subsequent allocations to return 64 byte aligned
1355 * user areas.
1356 */
1357 if (!pExecMemAllocator->cbHeapBlockHdr)
1358 {
1359 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1360 pExecMemAllocator->cbHeapAlignTweak = 64;
1361 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1362 32 /*cbAlignment*/);
1363 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1364
1365 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1372 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1373 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1374 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1375 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1376
1377 RTHeapSimpleFree(hHeap, pvTest2);
1378 RTHeapSimpleFree(hHeap, pvTest1);
1379 }
1380 else
1381 {
1382 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1383 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1384 }
1385 if (RT_SUCCESS(rc))
1386#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1387 {
1388 /*
1389 * Add the chunk.
1390 *
1391 * This must be done before the unwind init so windows can allocate
1392 * memory from the chunk when using the alternative sub-allocator.
1393 */
1394 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1395#ifdef IN_RING3
1396 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1397#endif
1398#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1399 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1400#else
1401 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1402 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1403 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1404 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1405#endif
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1411 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1412 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1413#else
1414 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1415 pExecMemAllocator->cbTotal += cbFree;
1416 pExecMemAllocator->cbFree += cbFree;
1417#endif
1418
1419#ifdef IN_RING3
1420 /*
1421 * Initialize the unwind information (this cannot really fail atm).
1422 * (This sets pvUnwindInfo.)
1423 */
1424 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1425 if (RT_SUCCESS(rc))
1426#endif
1427 {
1428 return VINF_SUCCESS;
1429 }
1430
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 /* Just in case the impossible happens, undo the above up: */
1433 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1434 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1435 pExecMemAllocator->cChunks = idxChunk;
1436 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1437 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1438 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1439 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1440#endif
1441 }
1442#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 }
1444#endif
1445 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1446 RT_NOREF(pVCpu);
1447 return rc;
1448}
1449
1450
1451/**
1452 * Initializes the executable memory allocator for native recompilation on the
1453 * calling EMT.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu The cross context virtual CPU structure of the calling
1457 * thread.
1458 * @param cbMax The max size of the allocator.
1459 * @param cbInitial The initial allocator size.
1460 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1461 * dependent).
1462 */
1463int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1464{
1465 /*
1466 * Validate input.
1467 */
1468 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1469 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1470 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1471 || cbChunk == 0
1472 || ( RT_IS_POWER_OF_TWO(cbChunk)
1473 && cbChunk >= _1M
1474 && cbChunk <= _256M
1475 && cbChunk <= cbMax),
1476 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1477 VERR_OUT_OF_RANGE);
1478
1479 /*
1480 * Adjust/figure out the chunk size.
1481 */
1482 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1483 {
1484 if (cbMax >= _256M)
1485 cbChunk = _64M;
1486 else
1487 {
1488 if (cbMax < _16M)
1489 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1490 else
1491 cbChunk = (uint32_t)cbMax / 4;
1492 if (!RT_IS_POWER_OF_TWO(cbChunk))
1493 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1494 }
1495 }
1496
1497 if (cbChunk > cbMax)
1498 cbMax = cbChunk;
1499 else
1500 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1501 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1502 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1503
1504 /*
1505 * Allocate and initialize the allocatore instance.
1506 */
1507 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1508#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1509 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1510 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1511 cbNeeded += cbBitmap * cMaxChunks;
1512 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1513 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1514#endif
1515#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1516 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1517 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1518#endif
1519 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1520 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1521 VERR_NO_MEMORY);
1522 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1523 pExecMemAllocator->cbChunk = cbChunk;
1524 pExecMemAllocator->cMaxChunks = cMaxChunks;
1525 pExecMemAllocator->cChunks = 0;
1526 pExecMemAllocator->idxChunkHint = 0;
1527 pExecMemAllocator->cAllocations = 0;
1528 pExecMemAllocator->cbTotal = 0;
1529 pExecMemAllocator->cbFree = 0;
1530 pExecMemAllocator->cbAllocated = 0;
1531#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1532 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1533 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1534 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1535 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1536#endif
1537#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1538 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1539#endif
1540 for (uint32_t i = 0; i < cMaxChunks; i++)
1541 {
1542#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1543 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1544 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1545#else
1546 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1547#endif
1548 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1549#ifdef IN_RING0
1550 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1551#else
1552 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1553#endif
1554 }
1555 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1556
1557 /*
1558 * Do the initial allocations.
1559 */
1560 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1561 {
1562 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1563 AssertLogRelRCReturn(rc, rc);
1564 }
1565
1566 pExecMemAllocator->idxChunkHint = 0;
1567
1568 return VINF_SUCCESS;
1569}
1570
1571
1572/*********************************************************************************************************************************
1573* Native Recompilation *
1574*********************************************************************************************************************************/
1575
1576
1577/**
1578 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1581{
1582 pVCpu->iem.s.cInstructions += idxInstr;
1583 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1584}
1585
1586
1587/**
1588 * Used by TB code when it wants to raise a \#GP(0).
1589 */
1590IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1591{
1592 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1593#ifndef _MSC_VER
1594 return VINF_IEM_RAISED_XCPT; /* not reached */
1595#endif
1596}
1597
1598
1599/**
1600 * Used by TB code when it wants to raise a \#NM.
1601 */
1602IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1603{
1604 iemRaiseDeviceNotAvailableJmp(pVCpu);
1605#ifndef _MSC_VER
1606 return VINF_IEM_RAISED_XCPT; /* not reached */
1607#endif
1608}
1609
1610
1611/**
1612 * Used by TB code when it wants to raise a \#UD.
1613 */
1614IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1615{
1616 iemRaiseUndefinedOpcodeJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise a \#MF.
1625 */
1626IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1627{
1628 iemRaiseMathFaultJmp(pVCpu);
1629#ifndef _MSC_VER
1630 return VINF_IEM_RAISED_XCPT; /* not reached */
1631#endif
1632}
1633
1634
1635/**
1636 * Used by TB code when it wants to raise a \#XF.
1637 */
1638IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1639{
1640 iemRaiseSimdFpExceptionJmp(pVCpu);
1641#ifndef _MSC_VER
1642 return VINF_IEM_RAISED_XCPT; /* not reached */
1643#endif
1644}
1645
1646
1647/**
1648 * Used by TB code when detecting opcode changes.
1649 * @see iemThreadeFuncWorkerObsoleteTb
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1652{
1653 /* We set fSafeToFree to false where as we're being called in the context
1654 of a TB callback function, which for native TBs means we cannot release
1655 the executable memory till we've returned our way back to iemTbExec as
1656 that return path codes via the native code generated for the TB. */
1657 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1658 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1659 return VINF_IEM_REEXEC_BREAK;
1660}
1661
1662
1663/**
1664 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1665 */
1666IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1667{
1668 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1669 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1670 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1671 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1672 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1673 return VINF_IEM_REEXEC_BREAK;
1674}
1675
1676
1677/**
1678 * Used by TB code when we missed a PC check after a branch.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1681{
1682 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1683 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1684 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1685 pVCpu->iem.s.pbInstrBuf));
1686 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1687 return VINF_IEM_REEXEC_BREAK;
1688}
1689
1690
1691
1692/*********************************************************************************************************************************
1693* Helpers: Segmented memory fetches and stores. *
1694*********************************************************************************************************************************/
1695
1696/**
1697 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1698 */
1699IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1700{
1701#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1702 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1703#else
1704 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1705#endif
1706}
1707
1708
1709/**
1710 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1711 * to 16 bits.
1712 */
1713IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1714{
1715#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1716 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1717#else
1718 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1719#endif
1720}
1721
1722
1723/**
1724 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1725 * to 32 bits.
1726 */
1727IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1728{
1729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1730 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1731#else
1732 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1733#endif
1734}
1735
1736/**
1737 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1738 * to 64 bits.
1739 */
1740IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1741{
1742#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1743 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1744#else
1745 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1746#endif
1747}
1748
1749
1750/**
1751 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1752 */
1753IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1754{
1755#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1756 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1757#else
1758 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1759#endif
1760}
1761
1762
1763/**
1764 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1765 * to 32 bits.
1766 */
1767IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1768{
1769#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1770 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1771#else
1772 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1773#endif
1774}
1775
1776
1777/**
1778 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1779 * to 64 bits.
1780 */
1781IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1782{
1783#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1784 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1785#else
1786 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1787#endif
1788}
1789
1790
1791/**
1792 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1793 */
1794IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1795{
1796#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1797 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1798#else
1799 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1800#endif
1801}
1802
1803
1804/**
1805 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1806 * to 64 bits.
1807 */
1808IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1809{
1810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1811 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1812#else
1813 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1814#endif
1815}
1816
1817
1818/**
1819 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1820 */
1821IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1822{
1823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1824 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1825#else
1826 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1827#endif
1828}
1829
1830
1831/**
1832 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1833 */
1834IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1835{
1836#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1837 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1838#else
1839 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1840#endif
1841}
1842
1843
1844/**
1845 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1846 */
1847IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1848{
1849#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1850 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1851#else
1852 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1853#endif
1854}
1855
1856
1857/**
1858 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1859 */
1860IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1861{
1862#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1863 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1864#else
1865 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1866#endif
1867}
1868
1869
1870/**
1871 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1872 */
1873IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1874{
1875#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1876 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1877#else
1878 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1879#endif
1880}
1881
1882
1883
1884/**
1885 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1886 */
1887IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1888{
1889#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1890 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1891#else
1892 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1893#endif
1894}
1895
1896
1897/**
1898 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1899 */
1900IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1901{
1902#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1903 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1904#else
1905 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1906#endif
1907}
1908
1909
1910/**
1911 * Used by TB code to store an 32-bit selector value onto a generic stack.
1912 *
1913 * Intel CPUs doesn't do write a whole dword, thus the special function.
1914 */
1915IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1916{
1917#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1918 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1919#else
1920 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1921#endif
1922}
1923
1924
1925/**
1926 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1927 */
1928IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1929{
1930#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1931 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1932#else
1933 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1934#endif
1935}
1936
1937
1938/**
1939 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1940 */
1941IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1942{
1943#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1944 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1945#else
1946 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1947#endif
1948}
1949
1950
1951/**
1952 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1953 */
1954IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1955{
1956#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1957 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1958#else
1959 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1960#endif
1961}
1962
1963
1964/**
1965 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1966 */
1967IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1968{
1969#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1970 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1971#else
1972 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1973#endif
1974}
1975
1976
1977
1978/*********************************************************************************************************************************
1979* Helpers: Flat memory fetches and stores. *
1980*********************************************************************************************************************************/
1981
1982/**
1983 * Used by TB code to load unsigned 8-bit data w/ flat address.
1984 * @note Zero extending the value to 64-bit to simplify assembly.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1989 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1990#else
1991 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1998 * to 16 bits.
1999 * @note Zero extending the value to 64-bit to simplify assembly.
2000 */
2001IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2002{
2003#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2004 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2005#else
2006 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2007#endif
2008}
2009
2010
2011/**
2012 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2013 * to 32 bits.
2014 * @note Zero extending the value to 64-bit to simplify assembly.
2015 */
2016IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2017{
2018#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2019 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2020#else
2021 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2022#endif
2023}
2024
2025
2026/**
2027 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2028 * to 64 bits.
2029 */
2030IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2031{
2032#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2033 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2034#else
2035 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2036#endif
2037}
2038
2039
2040/**
2041 * Used by TB code to load unsigned 16-bit data w/ flat address.
2042 * @note Zero extending the value to 64-bit to simplify assembly.
2043 */
2044IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2045{
2046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2047 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2048#else
2049 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2050#endif
2051}
2052
2053
2054/**
2055 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2056 * to 32 bits.
2057 * @note Zero extending the value to 64-bit to simplify assembly.
2058 */
2059IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2060{
2061#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2062 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2063#else
2064 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2065#endif
2066}
2067
2068
2069/**
2070 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2071 * to 64 bits.
2072 * @note Zero extending the value to 64-bit to simplify assembly.
2073 */
2074IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2075{
2076#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2077 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2078#else
2079 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2080#endif
2081}
2082
2083
2084/**
2085 * Used by TB code to load unsigned 32-bit data w/ flat address.
2086 * @note Zero extending the value to 64-bit to simplify assembly.
2087 */
2088IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2089{
2090#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2091 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2092#else
2093 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2094#endif
2095}
2096
2097
2098/**
2099 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2100 * to 64 bits.
2101 * @note Zero extending the value to 64-bit to simplify assembly.
2102 */
2103IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2104{
2105#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2106 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2107#else
2108 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2109#endif
2110}
2111
2112
2113/**
2114 * Used by TB code to load unsigned 64-bit data w/ flat address.
2115 */
2116IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2117{
2118#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2119 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2120#else
2121 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2122#endif
2123}
2124
2125
2126/**
2127 * Used by TB code to store unsigned 8-bit data w/ flat address.
2128 */
2129IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2130{
2131#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2132 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2133#else
2134 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2135#endif
2136}
2137
2138
2139/**
2140 * Used by TB code to store unsigned 16-bit data w/ flat address.
2141 */
2142IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2143{
2144#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2145 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2146#else
2147 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2148#endif
2149}
2150
2151
2152/**
2153 * Used by TB code to store unsigned 32-bit data w/ flat address.
2154 */
2155IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2156{
2157#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2158 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2159#else
2160 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2161#endif
2162}
2163
2164
2165/**
2166 * Used by TB code to store unsigned 64-bit data w/ flat address.
2167 */
2168IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2169{
2170#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2171 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2172#else
2173 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2174#endif
2175}
2176
2177
2178
2179/**
2180 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2181 */
2182IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2183{
2184#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2185 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2186#else
2187 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2188#endif
2189}
2190
2191
2192/**
2193 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2194 */
2195IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2196{
2197#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2198 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2199#else
2200 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2201#endif
2202}
2203
2204
2205/**
2206 * Used by TB code to store a segment selector value onto a flat stack.
2207 *
2208 * Intel CPUs doesn't do write a whole dword, thus the special function.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2213 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2214#else
2215 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2222 */
2223IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2224{
2225#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2226 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2227#else
2228 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2229#endif
2230}
2231
2232
2233/**
2234 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2235 */
2236IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2237{
2238#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2239 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2240#else
2241 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2242#endif
2243}
2244
2245
2246/**
2247 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2248 */
2249IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2250{
2251#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2252 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2253#else
2254 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2255#endif
2256}
2257
2258
2259/**
2260 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2261 */
2262IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2263{
2264#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2265 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2266#else
2267 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2268#endif
2269}
2270
2271
2272
2273/*********************************************************************************************************************************
2274* Helpers: Segmented memory mapping. *
2275*********************************************************************************************************************************/
2276
2277/**
2278 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2279 * segmentation.
2280 */
2281IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2282 RTGCPTR GCPtrMem, uint8_t iSegReg))
2283{
2284#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2285 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2286#else
2287 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2288#endif
2289}
2290
2291
2292/**
2293 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2294 */
2295IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2296 RTGCPTR GCPtrMem, uint8_t iSegReg))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2299 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2300#else
2301 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2302#endif
2303}
2304
2305
2306/**
2307 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2308 */
2309IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2310 RTGCPTR GCPtrMem, uint8_t iSegReg))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2313 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2314#else
2315 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2324 RTGCPTR GCPtrMem, uint8_t iSegReg))
2325{
2326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2327 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2328#else
2329 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2330#endif
2331}
2332
2333
2334/**
2335 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2336 * segmentation.
2337 */
2338IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2339 RTGCPTR GCPtrMem, uint8_t iSegReg))
2340{
2341#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2342 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2343#else
2344 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2345#endif
2346}
2347
2348
2349/**
2350 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2351 */
2352IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2353 RTGCPTR GCPtrMem, uint8_t iSegReg))
2354{
2355#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2356 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2357#else
2358 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2359#endif
2360}
2361
2362
2363/**
2364 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2365 */
2366IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2367 RTGCPTR GCPtrMem, uint8_t iSegReg))
2368{
2369#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2370 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2371#else
2372 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2373#endif
2374}
2375
2376
2377/**
2378 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2379 */
2380IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2381 RTGCPTR GCPtrMem, uint8_t iSegReg))
2382{
2383#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2384 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2385#else
2386 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2387#endif
2388}
2389
2390
2391/**
2392 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2393 * segmentation.
2394 */
2395IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2396 RTGCPTR GCPtrMem, uint8_t iSegReg))
2397{
2398#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2399 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2400#else
2401 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2402#endif
2403}
2404
2405
2406/**
2407 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2408 */
2409IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2410 RTGCPTR GCPtrMem, uint8_t iSegReg))
2411{
2412#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2413 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2414#else
2415 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2416#endif
2417}
2418
2419
2420/**
2421 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2422 */
2423IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2424 RTGCPTR GCPtrMem, uint8_t iSegReg))
2425{
2426#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2427 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2428#else
2429 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2430#endif
2431}
2432
2433
2434/**
2435 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2436 */
2437IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2438 RTGCPTR GCPtrMem, uint8_t iSegReg))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2441 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2442#else
2443 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2450 * segmentation.
2451 */
2452IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2453 RTGCPTR GCPtrMem, uint8_t iSegReg))
2454{
2455#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2456 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2457#else
2458 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2459#endif
2460}
2461
2462
2463/**
2464 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2465 */
2466IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2467 RTGCPTR GCPtrMem, uint8_t iSegReg))
2468{
2469#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2470 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2471#else
2472 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2473#endif
2474}
2475
2476
2477/**
2478 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2479 */
2480IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2481 RTGCPTR GCPtrMem, uint8_t iSegReg))
2482{
2483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2484 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2485#else
2486 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2487#endif
2488}
2489
2490
2491/**
2492 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2495 RTGCPTR GCPtrMem, uint8_t iSegReg))
2496{
2497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2498 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2499#else
2500 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#endif
2502}
2503
2504
2505/**
2506 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2507 */
2508IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2509 RTGCPTR GCPtrMem, uint8_t iSegReg))
2510{
2511#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2512 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2513#else
2514 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#endif
2516}
2517
2518
2519/**
2520 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2521 */
2522IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2523 RTGCPTR GCPtrMem, uint8_t iSegReg))
2524{
2525#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2526 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2527#else
2528 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2529#endif
2530}
2531
2532
2533/**
2534 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2535 * segmentation.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2538 RTGCPTR GCPtrMem, uint8_t iSegReg))
2539{
2540#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2541 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2542#else
2543 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2544#endif
2545}
2546
2547
2548/**
2549 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2550 */
2551IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2552 RTGCPTR GCPtrMem, uint8_t iSegReg))
2553{
2554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2555 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2556#else
2557 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2558#endif
2559}
2560
2561
2562/**
2563 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2564 */
2565IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2566 RTGCPTR GCPtrMem, uint8_t iSegReg))
2567{
2568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2569 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2570#else
2571 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#endif
2573}
2574
2575
2576/**
2577 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/*********************************************************************************************************************************
2591* Helpers: Flat memory mapping. *
2592*********************************************************************************************************************************/
2593
2594/**
2595 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2596 * address.
2597 */
2598IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2599{
2600#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2601 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2602#else
2603 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2604#endif
2605}
2606
2607
2608/**
2609 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2610 */
2611IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2615#else
2616 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2625{
2626#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2627 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2628#else
2629 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2630#endif
2631}
2632
2633
2634/**
2635 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2636 */
2637IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2638{
2639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2640 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2641#else
2642 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2643#endif
2644}
2645
2646
2647/**
2648 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2649 * address.
2650 */
2651IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2652{
2653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2654 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2655#else
2656 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2657#endif
2658}
2659
2660
2661/**
2662 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2663 */
2664IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2665{
2666#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2667 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2668#else
2669 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2670#endif
2671}
2672
2673
2674/**
2675 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2676 */
2677IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2678{
2679#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2680 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2681#else
2682 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2683#endif
2684}
2685
2686
2687/**
2688 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2689 */
2690IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2691{
2692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2693 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2694#else
2695 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2696#endif
2697}
2698
2699
2700/**
2701 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2702 * address.
2703 */
2704IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2705{
2706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2707 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2708#else
2709 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2710#endif
2711}
2712
2713
2714/**
2715 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2716 */
2717IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2718{
2719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2720 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2721#else
2722 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2723#endif
2724}
2725
2726
2727/**
2728 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2729 */
2730IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2731{
2732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2733 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2734#else
2735 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2736#endif
2737}
2738
2739
2740/**
2741 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2742 */
2743IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2744{
2745#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2746 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2747#else
2748 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2749#endif
2750}
2751
2752
2753/**
2754 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2755 * address.
2756 */
2757IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2758{
2759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2760 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2761#else
2762 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2763#endif
2764}
2765
2766
2767/**
2768 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2769 */
2770IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2771{
2772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2773 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2774#else
2775 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2776#endif
2777}
2778
2779
2780/**
2781 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2782 */
2783IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2784{
2785#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2786 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2787#else
2788 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2789#endif
2790}
2791
2792
2793/**
2794 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2795 */
2796IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2800#else
2801 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2810{
2811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2812 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2813#else
2814 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2815#endif
2816}
2817
2818
2819/**
2820 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2821 */
2822IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2823{
2824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2825 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2826#else
2827 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2828#endif
2829}
2830
2831
2832/**
2833 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2834 * address.
2835 */
2836IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2837{
2838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2839 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2840#else
2841 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2842#endif
2843}
2844
2845
2846/**
2847 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2848 */
2849IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2850{
2851#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2852 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2853#else
2854 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2855#endif
2856}
2857
2858
2859/**
2860 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2861 */
2862IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2863{
2864#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2865 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2866#else
2867 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2868#endif
2869}
2870
2871
2872/**
2873 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2874 */
2875IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2876{
2877#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2878 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2879#else
2880 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2881#endif
2882}
2883
2884
2885/*********************************************************************************************************************************
2886* Helpers: Commit, rollback & unmap *
2887*********************************************************************************************************************************/
2888
2889/**
2890 * Used by TB code to commit and unmap a read-write memory mapping.
2891 */
2892IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2893{
2894 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2895}
2896
2897
2898/**
2899 * Used by TB code to commit and unmap a read-write memory mapping.
2900 */
2901IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2902{
2903 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2904}
2905
2906
2907/**
2908 * Used by TB code to commit and unmap a write-only memory mapping.
2909 */
2910IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2911{
2912 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2913}
2914
2915
2916/**
2917 * Used by TB code to commit and unmap a read-only memory mapping.
2918 */
2919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2920{
2921 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2922}
2923
2924
2925/**
2926 * Reinitializes the native recompiler state.
2927 *
2928 * Called before starting a new recompile job.
2929 */
2930static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2931{
2932 pReNative->cLabels = 0;
2933 pReNative->bmLabelTypes = 0;
2934 pReNative->cFixups = 0;
2935#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2936 pReNative->pDbgInfo->cEntries = 0;
2937#endif
2938 pReNative->pTbOrg = pTb;
2939 pReNative->cCondDepth = 0;
2940 pReNative->uCondSeqNo = 0;
2941 pReNative->uCheckIrqSeqNo = 0;
2942 pReNative->uTlbSeqNo = 0;
2943
2944#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2945 pReNative->Core.offPc = 0;
2946 pReNative->Core.cInstrPcUpdateSkipped = 0;
2947#endif
2948 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2949#if IEMNATIVE_HST_GREG_COUNT < 32
2950 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2951#endif
2952 ;
2953 pReNative->Core.bmHstRegsWithGstShadow = 0;
2954 pReNative->Core.bmGstRegShadows = 0;
2955 pReNative->Core.bmVars = 0;
2956 pReNative->Core.bmStack = 0;
2957 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2958 pReNative->Core.u64ArgVars = UINT64_MAX;
2959
2960 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2961 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2962 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2963 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2964 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2965 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2974
2975 /* Full host register reinit: */
2976 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2977 {
2978 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2979 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2980 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2981 }
2982
2983 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2984 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2985#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2986 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2989 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2990#endif
2991#ifdef IEMNATIVE_REG_FIXED_TMP1
2992 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2993#endif
2994#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2995 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2996#endif
2997 );
2998 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2999 {
3000 fRegs &= ~RT_BIT_32(idxReg);
3001 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3002 }
3003
3004 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3005#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3006 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3007#endif
3008#ifdef IEMNATIVE_REG_FIXED_TMP0
3009 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3010#endif
3011#ifdef IEMNATIVE_REG_FIXED_TMP1
3012 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3013#endif
3014#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3015 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3016#endif
3017 return pReNative;
3018}
3019
3020
3021/**
3022 * Allocates and initializes the native recompiler state.
3023 *
3024 * This is called the first time an EMT wants to recompile something.
3025 *
3026 * @returns Pointer to the new recompiler state.
3027 * @param pVCpu The cross context virtual CPU structure of the calling
3028 * thread.
3029 * @param pTb The TB that's about to be recompiled.
3030 * @thread EMT(pVCpu)
3031 */
3032static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3033{
3034 VMCPU_ASSERT_EMT(pVCpu);
3035
3036 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3037 AssertReturn(pReNative, NULL);
3038
3039 /*
3040 * Try allocate all the buffers and stuff we need.
3041 */
3042 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3043 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3044 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3045#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3046 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3047#endif
3048 if (RT_LIKELY( pReNative->pInstrBuf
3049 && pReNative->paLabels
3050 && pReNative->paFixups)
3051#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3052 && pReNative->pDbgInfo
3053#endif
3054 )
3055 {
3056 /*
3057 * Set the buffer & array sizes on success.
3058 */
3059 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3060 pReNative->cLabelsAlloc = _8K;
3061 pReNative->cFixupsAlloc = _16K;
3062#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3063 pReNative->cDbgInfoAlloc = _16K;
3064#endif
3065
3066 /* Other constant stuff: */
3067 pReNative->pVCpu = pVCpu;
3068
3069 /*
3070 * Done, just need to save it and reinit it.
3071 */
3072 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3073 return iemNativeReInit(pReNative, pTb);
3074 }
3075
3076 /*
3077 * Failed. Cleanup and return.
3078 */
3079 AssertFailed();
3080 RTMemFree(pReNative->pInstrBuf);
3081 RTMemFree(pReNative->paLabels);
3082 RTMemFree(pReNative->paFixups);
3083#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3084 RTMemFree(pReNative->pDbgInfo);
3085#endif
3086 RTMemFree(pReNative);
3087 return NULL;
3088}
3089
3090
3091/**
3092 * Creates a label
3093 *
3094 * If the label does not yet have a defined position,
3095 * call iemNativeLabelDefine() later to set it.
3096 *
3097 * @returns Label ID. Throws VBox status code on failure, so no need to check
3098 * the return value.
3099 * @param pReNative The native recompile state.
3100 * @param enmType The label type.
3101 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3102 * label is not yet defined (default).
3103 * @param uData Data associated with the lable. Only applicable to
3104 * certain type of labels. Default is zero.
3105 */
3106DECL_HIDDEN_THROW(uint32_t)
3107iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3108 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3109{
3110 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3111
3112 /*
3113 * Locate existing label definition.
3114 *
3115 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3116 * and uData is zero.
3117 */
3118 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3119 uint32_t const cLabels = pReNative->cLabels;
3120 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3121#ifndef VBOX_STRICT
3122 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3123 && offWhere == UINT32_MAX
3124 && uData == 0
3125#endif
3126 )
3127 {
3128#ifndef VBOX_STRICT
3129 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3131 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3132 if (idxLabel < pReNative->cLabels)
3133 return idxLabel;
3134#else
3135 for (uint32_t i = 0; i < cLabels; i++)
3136 if ( paLabels[i].enmType == enmType
3137 && paLabels[i].uData == uData)
3138 {
3139 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3140 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3141 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3142 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3143 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3144 return i;
3145 }
3146 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3147 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3148#endif
3149 }
3150
3151 /*
3152 * Make sure we've got room for another label.
3153 */
3154 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3155 { /* likely */ }
3156 else
3157 {
3158 uint32_t cNew = pReNative->cLabelsAlloc;
3159 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3160 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3161 cNew *= 2;
3162 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3163 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3164 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3165 pReNative->paLabels = paLabels;
3166 pReNative->cLabelsAlloc = cNew;
3167 }
3168
3169 /*
3170 * Define a new label.
3171 */
3172 paLabels[cLabels].off = offWhere;
3173 paLabels[cLabels].enmType = enmType;
3174 paLabels[cLabels].uData = uData;
3175 pReNative->cLabels = cLabels + 1;
3176
3177 Assert((unsigned)enmType < 64);
3178 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3179
3180 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3181 {
3182 Assert(uData == 0);
3183 pReNative->aidxUniqueLabels[enmType] = cLabels;
3184 }
3185
3186 if (offWhere != UINT32_MAX)
3187 {
3188#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3189 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3190 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3191#endif
3192 }
3193 return cLabels;
3194}
3195
3196
3197/**
3198 * Defines the location of an existing label.
3199 *
3200 * @param pReNative The native recompile state.
3201 * @param idxLabel The label to define.
3202 * @param offWhere The position.
3203 */
3204DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3205{
3206 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3207 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3208 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3209 pLabel->off = offWhere;
3210#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3211 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3212 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3213#endif
3214}
3215
3216
3217/**
3218 * Looks up a lable.
3219 *
3220 * @returns Label ID if found, UINT32_MAX if not.
3221 */
3222static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3223 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3224{
3225 Assert((unsigned)enmType < 64);
3226 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3227 {
3228 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3229 return pReNative->aidxUniqueLabels[enmType];
3230
3231 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3232 uint32_t const cLabels = pReNative->cLabels;
3233 for (uint32_t i = 0; i < cLabels; i++)
3234 if ( paLabels[i].enmType == enmType
3235 && paLabels[i].uData == uData
3236 && ( paLabels[i].off == offWhere
3237 || offWhere == UINT32_MAX
3238 || paLabels[i].off == UINT32_MAX))
3239 return i;
3240 }
3241 return UINT32_MAX;
3242}
3243
3244
3245/**
3246 * Adds a fixup.
3247 *
3248 * @throws VBox status code (int) on failure.
3249 * @param pReNative The native recompile state.
3250 * @param offWhere The instruction offset of the fixup location.
3251 * @param idxLabel The target label ID for the fixup.
3252 * @param enmType The fixup type.
3253 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3254 */
3255DECL_HIDDEN_THROW(void)
3256iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3257 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3258{
3259 Assert(idxLabel <= UINT16_MAX);
3260 Assert((unsigned)enmType <= UINT8_MAX);
3261
3262 /*
3263 * Make sure we've room.
3264 */
3265 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3266 uint32_t const cFixups = pReNative->cFixups;
3267 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3268 { /* likely */ }
3269 else
3270 {
3271 uint32_t cNew = pReNative->cFixupsAlloc;
3272 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3273 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3274 cNew *= 2;
3275 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3276 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3277 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3278 pReNative->paFixups = paFixups;
3279 pReNative->cFixupsAlloc = cNew;
3280 }
3281
3282 /*
3283 * Add the fixup.
3284 */
3285 paFixups[cFixups].off = offWhere;
3286 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3287 paFixups[cFixups].enmType = enmType;
3288 paFixups[cFixups].offAddend = offAddend;
3289 pReNative->cFixups = cFixups + 1;
3290}
3291
3292
3293/**
3294 * Slow code path for iemNativeInstrBufEnsure.
3295 */
3296DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3297{
3298 /* Double the buffer size till we meet the request. */
3299 uint32_t cNew = pReNative->cInstrBufAlloc;
3300 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3301 do
3302 cNew *= 2;
3303 while (cNew < off + cInstrReq);
3304
3305 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3306#ifdef RT_ARCH_ARM64
3307 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3308#else
3309 uint32_t const cbMaxInstrBuf = _2M;
3310#endif
3311 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3312
3313 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3314 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3315
3316#ifdef VBOX_STRICT
3317 pReNative->offInstrBufChecked = off + cInstrReq;
3318#endif
3319 pReNative->cInstrBufAlloc = cNew;
3320 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3321}
3322
3323#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3324
3325/**
3326 * Grows the static debug info array used during recompilation.
3327 *
3328 * @returns Pointer to the new debug info block; throws VBox status code on
3329 * failure, so no need to check the return value.
3330 */
3331DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3332{
3333 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3334 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3335 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3336 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3337 pReNative->pDbgInfo = pDbgInfo;
3338 pReNative->cDbgInfoAlloc = cNew;
3339 return pDbgInfo;
3340}
3341
3342
3343/**
3344 * Adds a new debug info uninitialized entry, returning the pointer to it.
3345 */
3346DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3347{
3348 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3349 { /* likely */ }
3350 else
3351 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3352 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3353}
3354
3355
3356/**
3357 * Debug Info: Adds a native offset record, if necessary.
3358 */
3359static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3360{
3361 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3362
3363 /*
3364 * Search backwards to see if we've got a similar record already.
3365 */
3366 uint32_t idx = pDbgInfo->cEntries;
3367 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3368 while (idx-- > idxStop)
3369 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3370 {
3371 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3372 return;
3373 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3374 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3375 break;
3376 }
3377
3378 /*
3379 * Add it.
3380 */
3381 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3382 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3383 pEntry->NativeOffset.offNative = off;
3384}
3385
3386
3387/**
3388 * Debug Info: Record info about a label.
3389 */
3390static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3391{
3392 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3393 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3394 pEntry->Label.uUnused = 0;
3395 pEntry->Label.enmLabel = (uint8_t)enmType;
3396 pEntry->Label.uData = uData;
3397}
3398
3399
3400/**
3401 * Debug Info: Record info about a threaded call.
3402 */
3403static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3404{
3405 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3406 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3407 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3408 pEntry->ThreadedCall.uUnused = 0;
3409 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3410}
3411
3412
3413/**
3414 * Debug Info: Record info about a new guest instruction.
3415 */
3416static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3417{
3418 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3419 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3420 pEntry->GuestInstruction.uUnused = 0;
3421 pEntry->GuestInstruction.fExec = fExec;
3422}
3423
3424
3425/**
3426 * Debug Info: Record info about guest register shadowing.
3427 */
3428static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3429 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3430{
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3432 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3433 pEntry->GuestRegShadowing.uUnused = 0;
3434 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3435 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3436 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3437}
3438
3439
3440# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3441/**
3442 * Debug Info: Record info about delayed RIP updates.
3443 */
3444static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3445{
3446 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3447 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3448 pEntry->DelayedPcUpdate.offPc = offPc;
3449 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3450}
3451# endif
3452
3453#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3454
3455
3456/*********************************************************************************************************************************
3457* Register Allocator *
3458*********************************************************************************************************************************/
3459
3460/**
3461 * Register parameter indexes (indexed by argument number).
3462 */
3463DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3464{
3465 IEMNATIVE_CALL_ARG0_GREG,
3466 IEMNATIVE_CALL_ARG1_GREG,
3467 IEMNATIVE_CALL_ARG2_GREG,
3468 IEMNATIVE_CALL_ARG3_GREG,
3469#if defined(IEMNATIVE_CALL_ARG4_GREG)
3470 IEMNATIVE_CALL_ARG4_GREG,
3471# if defined(IEMNATIVE_CALL_ARG5_GREG)
3472 IEMNATIVE_CALL_ARG5_GREG,
3473# if defined(IEMNATIVE_CALL_ARG6_GREG)
3474 IEMNATIVE_CALL_ARG6_GREG,
3475# if defined(IEMNATIVE_CALL_ARG7_GREG)
3476 IEMNATIVE_CALL_ARG7_GREG,
3477# endif
3478# endif
3479# endif
3480#endif
3481};
3482
3483/**
3484 * Call register masks indexed by argument count.
3485 */
3486DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3487{
3488 0,
3489 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3490 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3491 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3492 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3493 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3494#if defined(IEMNATIVE_CALL_ARG4_GREG)
3495 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3496 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3497# if defined(IEMNATIVE_CALL_ARG5_GREG)
3498 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3499 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3500# if defined(IEMNATIVE_CALL_ARG6_GREG)
3501 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3502 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3503 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3504# if defined(IEMNATIVE_CALL_ARG7_GREG)
3505 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3506 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3507 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3508# endif
3509# endif
3510# endif
3511#endif
3512};
3513
3514#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3515/**
3516 * BP offset of the stack argument slots.
3517 *
3518 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3519 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3520 */
3521DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3522{
3523 IEMNATIVE_FP_OFF_STACK_ARG0,
3524# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3525 IEMNATIVE_FP_OFF_STACK_ARG1,
3526# endif
3527# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3528 IEMNATIVE_FP_OFF_STACK_ARG2,
3529# endif
3530# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3531 IEMNATIVE_FP_OFF_STACK_ARG3,
3532# endif
3533};
3534AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3535#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3536
3537/**
3538 * Info about shadowed guest register values.
3539 * @see IEMNATIVEGSTREG
3540 */
3541static struct
3542{
3543 /** Offset in VMCPU. */
3544 uint32_t off;
3545 /** The field size. */
3546 uint8_t cb;
3547 /** Name (for logging). */
3548 const char *pszName;
3549} const g_aGstShadowInfo[] =
3550{
3551#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3552 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3553 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3554 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3555 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3556 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3557 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3558 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3559 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3560 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3561 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3562 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3563 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3564 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3565 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3566 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3567 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3568 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3569 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3570 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3571 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3572 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3573 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3574 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3575 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3576 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3577 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3578 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3579 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3580 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3581 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3582 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3583 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3584 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3585 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3586 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3587 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3588 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3589 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3590 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3591 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3592 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3593 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3594 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3595 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3596 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3597 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3598 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3599 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3600#undef CPUMCTX_OFF_AND_SIZE
3601};
3602AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3603
3604
3605/** Host CPU general purpose register names. */
3606DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3607{
3608#ifdef RT_ARCH_AMD64
3609 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3610#elif RT_ARCH_ARM64
3611 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3612 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3613#else
3614# error "port me"
3615#endif
3616};
3617
3618
3619DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3620 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3621{
3622 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3623
3624 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3625 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3626 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3627 return (uint8_t)idxReg;
3628}
3629
3630
3631#if 0 /* unused */
3632/**
3633 * Tries to locate a suitable register in the given register mask.
3634 *
3635 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3636 * failed.
3637 *
3638 * @returns Host register number on success, returns UINT8_MAX on failure.
3639 */
3640static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3641{
3642 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3643 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3644 if (fRegs)
3645 {
3646 /** @todo pick better here: */
3647 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3648
3649 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3650 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3651 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3652 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3653
3654 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3655 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3656 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3657 return idxReg;
3658 }
3659 return UINT8_MAX;
3660}
3661#endif /* unused */
3662
3663
3664/**
3665 * Locate a register, possibly freeing one up.
3666 *
3667 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3668 * failed.
3669 *
3670 * @returns Host register number on success. Returns UINT8_MAX if no registers
3671 * found, the caller is supposed to deal with this and raise a
3672 * allocation type specific status code (if desired).
3673 *
3674 * @throws VBox status code if we're run into trouble spilling a variable of
3675 * recording debug info. Does NOT throw anything if we're out of
3676 * registers, though.
3677 */
3678static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3679 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3680{
3681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3682 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3683 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3684
3685 /*
3686 * Try a freed register that's shadowing a guest register.
3687 */
3688 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3689 if (fRegs)
3690 {
3691 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3692
3693#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3694 /*
3695 * When we have livness information, we use it to kick out all shadowed
3696 * guest register that will not be needed any more in this TB. If we're
3697 * lucky, this may prevent us from ending up here again.
3698 *
3699 * Note! We must consider the previous entry here so we don't free
3700 * anything that the current threaded function requires (current
3701 * entry is produced by the next threaded function).
3702 */
3703 uint32_t const idxCurCall = pReNative->idxCurCall;
3704 if (idxCurCall > 0)
3705 {
3706 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3707
3708# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3709 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3710 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3711 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3712#else
3713 /* Construct a mask of the registers not in the read or write state.
3714 Note! We could skips writes, if they aren't from us, as this is just
3715 a hack to prevent trashing registers that have just been written
3716 or will be written when we retire the current instruction. */
3717 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3718 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3719 & IEMLIVENESSBIT_MASK;
3720#endif
3721 /* Merge EFLAGS. */
3722 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3723 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3724 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3725 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3726 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3727
3728 /* If it matches any shadowed registers. */
3729 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3730 {
3731 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3732 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3733 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3734
3735 /* See if we've got any unshadowed registers we can return now. */
3736 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3737 if (fUnshadowedRegs)
3738 {
3739 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3740 return (fPreferVolatile
3741 ? ASMBitFirstSetU32(fUnshadowedRegs)
3742 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3743 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3744 - 1;
3745 }
3746 }
3747 }
3748#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3749
3750 unsigned const idxReg = (fPreferVolatile
3751 ? ASMBitFirstSetU32(fRegs)
3752 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3753 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3754 - 1;
3755
3756 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3757 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3758 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3759 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3760
3761 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3762 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3763 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3764 return idxReg;
3765 }
3766
3767 /*
3768 * Try free up a variable that's in a register.
3769 *
3770 * We do two rounds here, first evacuating variables we don't need to be
3771 * saved on the stack, then in the second round move things to the stack.
3772 */
3773 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3774 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3775 {
3776 uint32_t fVars = pReNative->Core.bmVars;
3777 while (fVars)
3778 {
3779 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3780 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3781 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3782 && (RT_BIT_32(idxReg) & fRegMask)
3783 && ( iLoop == 0
3784 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3785 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3786 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3787 {
3788 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3789 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3790 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3791 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3792 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3793 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3794
3795 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3796 {
3797 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3798 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3799 }
3800
3801 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3802 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3803
3804 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3805 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3806 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3807 return idxReg;
3808 }
3809 fVars &= ~RT_BIT_32(idxVar);
3810 }
3811 }
3812
3813 return UINT8_MAX;
3814}
3815
3816
3817/**
3818 * Reassigns a variable to a different register specified by the caller.
3819 *
3820 * @returns The new code buffer position.
3821 * @param pReNative The native recompile state.
3822 * @param off The current code buffer position.
3823 * @param idxVar The variable index.
3824 * @param idxRegOld The old host register number.
3825 * @param idxRegNew The new host register number.
3826 * @param pszCaller The caller for logging.
3827 */
3828static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3829 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3830{
3831 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3832 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3833 RT_NOREF(pszCaller);
3834
3835 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3836
3837 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3838 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3839 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3840 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3841
3842 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3843 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3844 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3845 if (fGstRegShadows)
3846 {
3847 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3848 | RT_BIT_32(idxRegNew);
3849 while (fGstRegShadows)
3850 {
3851 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3852 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3853
3854 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3855 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3856 }
3857 }
3858
3859 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3860 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3861 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3862 return off;
3863}
3864
3865
3866/**
3867 * Moves a variable to a different register or spills it onto the stack.
3868 *
3869 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3870 * kinds can easily be recreated if needed later.
3871 *
3872 * @returns The new code buffer position.
3873 * @param pReNative The native recompile state.
3874 * @param off The current code buffer position.
3875 * @param idxVar The variable index.
3876 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3877 * call-volatile registers.
3878 */
3879static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3880 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3881{
3882 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3883 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3884 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3885 Assert(!pVar->fRegAcquired);
3886
3887 uint8_t const idxRegOld = pVar->idxReg;
3888 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3889 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3890 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3891 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3892 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3893 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3894 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3895 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3896
3897
3898 /** @todo Add statistics on this.*/
3899 /** @todo Implement basic variable liveness analysis (python) so variables
3900 * can be freed immediately once no longer used. This has the potential to
3901 * be trashing registers and stack for dead variables.
3902 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3903
3904 /*
3905 * First try move it to a different register, as that's cheaper.
3906 */
3907 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3908 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3909 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3910 if (fRegs)
3911 {
3912 /* Avoid using shadow registers, if possible. */
3913 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3914 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3915 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3916 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3917 }
3918
3919 /*
3920 * Otherwise we must spill the register onto the stack.
3921 */
3922 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3923 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3924 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3925 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3926
3927 pVar->idxReg = UINT8_MAX;
3928 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3929 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3930 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3931 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3932 return off;
3933}
3934
3935
3936/**
3937 * Allocates a temporary host general purpose register.
3938 *
3939 * This may emit code to save register content onto the stack in order to free
3940 * up a register.
3941 *
3942 * @returns The host register number; throws VBox status code on failure,
3943 * so no need to check the return value.
3944 * @param pReNative The native recompile state.
3945 * @param poff Pointer to the variable with the code buffer position.
3946 * This will be update if we need to move a variable from
3947 * register to stack in order to satisfy the request.
3948 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3949 * registers (@c true, default) or the other way around
3950 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3951 */
3952DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3953{
3954 /*
3955 * Try find a completely unused register, preferably a call-volatile one.
3956 */
3957 uint8_t idxReg;
3958 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3959 & ~pReNative->Core.bmHstRegsWithGstShadow
3960 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3961 if (fRegs)
3962 {
3963 if (fPreferVolatile)
3964 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3965 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3966 else
3967 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3968 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3969 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3970 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3971 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3972 }
3973 else
3974 {
3975 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3976 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3977 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3978 }
3979 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3980}
3981
3982
3983/**
3984 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3985 * registers.
3986 *
3987 * @returns The host register number; throws VBox status code on failure,
3988 * so no need to check the return value.
3989 * @param pReNative The native recompile state.
3990 * @param poff Pointer to the variable with the code buffer position.
3991 * This will be update if we need to move a variable from
3992 * register to stack in order to satisfy the request.
3993 * @param fRegMask Mask of acceptable registers.
3994 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3995 * registers (@c true, default) or the other way around
3996 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3997 */
3998DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3999 bool fPreferVolatile /*= true*/)
4000{
4001 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4002 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4003
4004 /*
4005 * Try find a completely unused register, preferably a call-volatile one.
4006 */
4007 uint8_t idxReg;
4008 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4009 & ~pReNative->Core.bmHstRegsWithGstShadow
4010 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4011 & fRegMask;
4012 if (fRegs)
4013 {
4014 if (fPreferVolatile)
4015 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4016 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4017 else
4018 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4019 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4020 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4021 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4022 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4023 }
4024 else
4025 {
4026 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4027 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4028 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4029 }
4030 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4031}
4032
4033
4034/**
4035 * Allocates a temporary register for loading an immediate value into.
4036 *
4037 * This will emit code to load the immediate, unless there happens to be an
4038 * unused register with the value already loaded.
4039 *
4040 * The caller will not modify the returned register, it must be considered
4041 * read-only. Free using iemNativeRegFreeTmpImm.
4042 *
4043 * @returns The host register number; throws VBox status code on failure, so no
4044 * need to check the return value.
4045 * @param pReNative The native recompile state.
4046 * @param poff Pointer to the variable with the code buffer position.
4047 * @param uImm The immediate value that the register must hold upon
4048 * return.
4049 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4050 * registers (@c true, default) or the other way around
4051 * (@c false).
4052 *
4053 * @note Reusing immediate values has not been implemented yet.
4054 */
4055DECL_HIDDEN_THROW(uint8_t)
4056iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4057{
4058 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4059 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4060 return idxReg;
4061}
4062
4063#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4064
4065# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4066/**
4067 * Helper for iemNativeLivenessGetStateByGstReg.
4068 *
4069 * @returns IEMLIVENESS_STATE_XXX
4070 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4071 * ORed together.
4072 */
4073DECL_FORCE_INLINE(uint32_t)
4074iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4075{
4076 /* INPUT trumps anything else. */
4077 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4078 return IEMLIVENESS_STATE_INPUT;
4079
4080 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4081 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4082 {
4083 /* If not all sub-fields are clobbered they must be considered INPUT. */
4084 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4085 return IEMLIVENESS_STATE_INPUT;
4086 return IEMLIVENESS_STATE_CLOBBERED;
4087 }
4088
4089 /* XCPT_OR_CALL trumps UNUSED. */
4090 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4091 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4092
4093 return IEMLIVENESS_STATE_UNUSED;
4094}
4095# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4096
4097
4098DECL_FORCE_INLINE(uint32_t)
4099iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4100{
4101# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4102 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4103 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4104# else
4105 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4106 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4107 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4108 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4109# endif
4110}
4111
4112
4113DECL_FORCE_INLINE(uint32_t)
4114iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4115{
4116 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4117 if (enmGstReg == kIemNativeGstReg_EFlags)
4118 {
4119 /* Merge the eflags states to one. */
4120# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4121 uRet = RT_BIT_32(uRet);
4122 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4123 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4124 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4125 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4126 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4127 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4128 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4129# else
4130 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4131 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4132 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4133 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4134 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4135 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4136 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4137# endif
4138 }
4139 return uRet;
4140}
4141
4142
4143# ifdef VBOX_STRICT
4144/** For assertions only, user checks that idxCurCall isn't zerow. */
4145DECL_FORCE_INLINE(uint32_t)
4146iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4147{
4148 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4149}
4150# endif /* VBOX_STRICT */
4151
4152#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4153
4154/**
4155 * Marks host register @a idxHstReg as containing a shadow copy of guest
4156 * register @a enmGstReg.
4157 *
4158 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4159 * host register before calling.
4160 */
4161DECL_FORCE_INLINE(void)
4162iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4163{
4164 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4165 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4166 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4167
4168 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4169 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4170 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4171 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4172#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4173 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4174 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4175#else
4176 RT_NOREF(off);
4177#endif
4178}
4179
4180
4181/**
4182 * Clear any guest register shadow claims from @a idxHstReg.
4183 *
4184 * The register does not need to be shadowing any guest registers.
4185 */
4186DECL_FORCE_INLINE(void)
4187iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4188{
4189 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4190 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4191 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4192 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4193 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4194
4195#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4196 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4197 if (fGstRegs)
4198 {
4199 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4200 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4201 while (fGstRegs)
4202 {
4203 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4204 fGstRegs &= ~RT_BIT_64(iGstReg);
4205 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4206 }
4207 }
4208#else
4209 RT_NOREF(off);
4210#endif
4211
4212 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4213 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4214 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4215}
4216
4217
4218/**
4219 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4220 * and global overview flags.
4221 */
4222DECL_FORCE_INLINE(void)
4223iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4224{
4225 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4226 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4227 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4228 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4229 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4230 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4231 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4232
4233#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4234 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4235 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4236#else
4237 RT_NOREF(off);
4238#endif
4239
4240 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4241 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4242 if (!fGstRegShadowsNew)
4243 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4244 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4245}
4246
4247
4248#if 0 /* unused */
4249/**
4250 * Clear any guest register shadow claim for @a enmGstReg.
4251 */
4252DECL_FORCE_INLINE(void)
4253iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4254{
4255 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4256 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4257 {
4258 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4259 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4260 }
4261}
4262#endif
4263
4264
4265/**
4266 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4267 * as the new shadow of it.
4268 *
4269 * Unlike the other guest reg shadow helpers, this does the logging for you.
4270 * However, it is the liveness state is not asserted here, the caller must do
4271 * that.
4272 */
4273DECL_FORCE_INLINE(void)
4274iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4275 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4276{
4277 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4278 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4279 {
4280 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4281 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4282 if (idxHstRegOld == idxHstRegNew)
4283 return;
4284 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4285 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4286 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4287 }
4288 else
4289 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4290 g_aGstShadowInfo[enmGstReg].pszName));
4291 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4292}
4293
4294
4295/**
4296 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4297 * to @a idxRegTo.
4298 */
4299DECL_FORCE_INLINE(void)
4300iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4301 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4302{
4303 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4304 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4305 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4306 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4307 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4308 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4309 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4310 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4311 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4312
4313 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4314 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4315 if (!fGstRegShadowsFrom)
4316 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4317 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4318 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4319 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4320#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4321 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4322 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4323#else
4324 RT_NOREF(off);
4325#endif
4326}
4327
4328
4329/**
4330 * Allocates a temporary host general purpose register for keeping a guest
4331 * register value.
4332 *
4333 * Since we may already have a register holding the guest register value,
4334 * code will be emitted to do the loading if that's not the case. Code may also
4335 * be emitted if we have to free up a register to satify the request.
4336 *
4337 * @returns The host register number; throws VBox status code on failure, so no
4338 * need to check the return value.
4339 * @param pReNative The native recompile state.
4340 * @param poff Pointer to the variable with the code buffer
4341 * position. This will be update if we need to move a
4342 * variable from register to stack in order to satisfy
4343 * the request.
4344 * @param enmGstReg The guest register that will is to be updated.
4345 * @param enmIntendedUse How the caller will be using the host register.
4346 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4347 * register is okay (default). The ASSUMPTION here is
4348 * that the caller has already flushed all volatile
4349 * registers, so this is only applied if we allocate a
4350 * new register.
4351 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4352 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4353 */
4354DECL_HIDDEN_THROW(uint8_t)
4355iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4356 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4357 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4358{
4359 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4360#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4361 AssertMsg( fSkipLivenessAssert
4362 || pReNative->idxCurCall == 0
4363 || enmGstReg == kIemNativeGstReg_Pc
4364 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4365 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4366 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4367 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4368 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4369 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4370#endif
4371 RT_NOREF(fSkipLivenessAssert);
4372#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4373 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4374#endif
4375 uint32_t const fRegMask = !fNoVolatileRegs
4376 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4377 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4378
4379 /*
4380 * First check if the guest register value is already in a host register.
4381 */
4382 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4383 {
4384 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4385 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4386 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4387 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4388
4389 /* It's not supposed to be allocated... */
4390 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4391 {
4392 /*
4393 * If the register will trash the guest shadow copy, try find a
4394 * completely unused register we can use instead. If that fails,
4395 * we need to disassociate the host reg from the guest reg.
4396 */
4397 /** @todo would be nice to know if preserving the register is in any way helpful. */
4398 /* If the purpose is calculations, try duplicate the register value as
4399 we'll be clobbering the shadow. */
4400 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4401 && ( ~pReNative->Core.bmHstRegs
4402 & ~pReNative->Core.bmHstRegsWithGstShadow
4403 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4404 {
4405 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4406
4407 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4408
4409 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4410 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4411 g_apszIemNativeHstRegNames[idxRegNew]));
4412 idxReg = idxRegNew;
4413 }
4414 /* If the current register matches the restrictions, go ahead and allocate
4415 it for the caller. */
4416 else if (fRegMask & RT_BIT_32(idxReg))
4417 {
4418 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4419 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4420 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4421 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4422 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4423 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4424 else
4425 {
4426 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4427 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4428 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4429 }
4430 }
4431 /* Otherwise, allocate a register that satisfies the caller and transfer
4432 the shadowing if compatible with the intended use. (This basically
4433 means the call wants a non-volatile register (RSP push/pop scenario).) */
4434 else
4435 {
4436 Assert(fNoVolatileRegs);
4437 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4438 !fNoVolatileRegs
4439 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4440 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4441 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4442 {
4443 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4444 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4445 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4446 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4447 }
4448 else
4449 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4450 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4451 g_apszIemNativeHstRegNames[idxRegNew]));
4452 idxReg = idxRegNew;
4453 }
4454 }
4455 else
4456 {
4457 /*
4458 * Oops. Shadowed guest register already allocated!
4459 *
4460 * Allocate a new register, copy the value and, if updating, the
4461 * guest shadow copy assignment to the new register.
4462 */
4463 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4464 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4465 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4466 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4467
4468 /** @todo share register for readonly access. */
4469 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4470 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4471
4472 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4473 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4474
4475 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4476 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4477 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4478 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4479 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4480 else
4481 {
4482 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4483 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4484 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4485 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4486 }
4487 idxReg = idxRegNew;
4488 }
4489 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4490
4491#ifdef VBOX_STRICT
4492 /* Strict builds: Check that the value is correct. */
4493 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4494#endif
4495
4496 return idxReg;
4497 }
4498
4499 /*
4500 * Allocate a new register, load it with the guest value and designate it as a copy of the
4501 */
4502 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4503
4504 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4505 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4506
4507 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4508 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4509 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4510 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4511
4512 return idxRegNew;
4513}
4514
4515
4516/**
4517 * Allocates a temporary host general purpose register that already holds the
4518 * given guest register value.
4519 *
4520 * The use case for this function is places where the shadowing state cannot be
4521 * modified due to branching and such. This will fail if the we don't have a
4522 * current shadow copy handy or if it's incompatible. The only code that will
4523 * be emitted here is value checking code in strict builds.
4524 *
4525 * The intended use can only be readonly!
4526 *
4527 * @returns The host register number, UINT8_MAX if not present.
4528 * @param pReNative The native recompile state.
4529 * @param poff Pointer to the instruction buffer offset.
4530 * Will be updated in strict builds if a register is
4531 * found.
4532 * @param enmGstReg The guest register that will is to be updated.
4533 * @note In strict builds, this may throw instruction buffer growth failures.
4534 * Non-strict builds will not throw anything.
4535 * @sa iemNativeRegAllocTmpForGuestReg
4536 */
4537DECL_HIDDEN_THROW(uint8_t)
4538iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4539{
4540 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4541#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4542 AssertMsg( pReNative->idxCurCall == 0
4543 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4544 || enmGstReg == kIemNativeGstReg_Pc,
4545 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4546#endif
4547
4548 /*
4549 * First check if the guest register value is already in a host register.
4550 */
4551 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4552 {
4553 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4554 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4555 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4556 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4557
4558 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4559 {
4560 /*
4561 * We only do readonly use here, so easy compared to the other
4562 * variant of this code.
4563 */
4564 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4565 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4566 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4567 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4568 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4569
4570#ifdef VBOX_STRICT
4571 /* Strict builds: Check that the value is correct. */
4572 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4573#else
4574 RT_NOREF(poff);
4575#endif
4576 return idxReg;
4577 }
4578 }
4579
4580 return UINT8_MAX;
4581}
4582
4583
4584/**
4585 * Allocates argument registers for a function call.
4586 *
4587 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4588 * need to check the return value.
4589 * @param pReNative The native recompile state.
4590 * @param off The current code buffer offset.
4591 * @param cArgs The number of arguments the function call takes.
4592 */
4593DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4594{
4595 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4596 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4597 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4598 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4599
4600 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4601 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4602 else if (cArgs == 0)
4603 return true;
4604
4605 /*
4606 * Do we get luck and all register are free and not shadowing anything?
4607 */
4608 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4609 for (uint32_t i = 0; i < cArgs; i++)
4610 {
4611 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4612 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4613 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4614 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4615 }
4616 /*
4617 * Okay, not lucky so we have to free up the registers.
4618 */
4619 else
4620 for (uint32_t i = 0; i < cArgs; i++)
4621 {
4622 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4623 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4624 {
4625 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4626 {
4627 case kIemNativeWhat_Var:
4628 {
4629 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4631 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4632 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4633 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4634
4635 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4636 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4637 else
4638 {
4639 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4640 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4641 }
4642 break;
4643 }
4644
4645 case kIemNativeWhat_Tmp:
4646 case kIemNativeWhat_Arg:
4647 case kIemNativeWhat_rc:
4648 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4649 default:
4650 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4651 }
4652
4653 }
4654 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4655 {
4656 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4657 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4658 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4659 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4660 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4661 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4662 }
4663 else
4664 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4665 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4666 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4667 }
4668 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4669 return true;
4670}
4671
4672
4673DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4674
4675
4676#if 0
4677/**
4678 * Frees a register assignment of any type.
4679 *
4680 * @param pReNative The native recompile state.
4681 * @param idxHstReg The register to free.
4682 *
4683 * @note Does not update variables.
4684 */
4685DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4686{
4687 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4688 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4689 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4690 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4691 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4692 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4693 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4694 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4695 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4696 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4697 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4698 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4699 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4700 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4701
4702 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4703 /* no flushing, right:
4704 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4705 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4706 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4707 */
4708}
4709#endif
4710
4711
4712/**
4713 * Frees a temporary register.
4714 *
4715 * Any shadow copies of guest registers assigned to the host register will not
4716 * be flushed by this operation.
4717 */
4718DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4719{
4720 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4721 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4722 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4723 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4724 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4725}
4726
4727
4728/**
4729 * Frees a temporary immediate register.
4730 *
4731 * It is assumed that the call has not modified the register, so it still hold
4732 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4733 */
4734DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4735{
4736 iemNativeRegFreeTmp(pReNative, idxHstReg);
4737}
4738
4739
4740/**
4741 * Frees a register assigned to a variable.
4742 *
4743 * The register will be disassociated from the variable.
4744 */
4745DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4746{
4747 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4748 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4749 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4751 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4752
4753 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4754 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4755 if (!fFlushShadows)
4756 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4757 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4758 else
4759 {
4760 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4761 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4762 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4763 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4764 uint64_t fGstRegShadows = fGstRegShadowsOld;
4765 while (fGstRegShadows)
4766 {
4767 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4768 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4769
4770 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4771 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4772 }
4773 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4774 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4775 }
4776}
4777
4778
4779/**
4780 * Called right before emitting a call instruction to move anything important
4781 * out of call-volatile registers, free and flush the call-volatile registers,
4782 * optionally freeing argument variables.
4783 *
4784 * @returns New code buffer offset, UINT32_MAX on failure.
4785 * @param pReNative The native recompile state.
4786 * @param off The code buffer offset.
4787 * @param cArgs The number of arguments the function call takes.
4788 * It is presumed that the host register part of these have
4789 * been allocated as such already and won't need moving,
4790 * just freeing.
4791 * @param fKeepVars Mask of variables that should keep their register
4792 * assignments. Caller must take care to handle these.
4793 */
4794DECL_HIDDEN_THROW(uint32_t)
4795iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4796{
4797 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4798
4799 /* fKeepVars will reduce this mask. */
4800 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4801
4802 /*
4803 * Move anything important out of volatile registers.
4804 */
4805 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4806 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4807 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4808#ifdef IEMNATIVE_REG_FIXED_TMP0
4809 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4810#endif
4811#ifdef IEMNATIVE_REG_FIXED_TMP1
4812 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4813#endif
4814#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4815 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4816#endif
4817 & ~g_afIemNativeCallRegs[cArgs];
4818
4819 fRegsToMove &= pReNative->Core.bmHstRegs;
4820 if (!fRegsToMove)
4821 { /* likely */ }
4822 else
4823 {
4824 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4825 while (fRegsToMove != 0)
4826 {
4827 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4828 fRegsToMove &= ~RT_BIT_32(idxReg);
4829
4830 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4831 {
4832 case kIemNativeWhat_Var:
4833 {
4834 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4835 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4836 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4837 Assert(pVar->idxReg == idxReg);
4838 if (!(RT_BIT_32(idxVar) & fKeepVars))
4839 {
4840 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4841 idxVar, pVar->enmKind, pVar->idxReg));
4842 if (pVar->enmKind != kIemNativeVarKind_Stack)
4843 pVar->idxReg = UINT8_MAX;
4844 else
4845 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4846 }
4847 else
4848 fRegsToFree &= ~RT_BIT_32(idxReg);
4849 continue;
4850 }
4851
4852 case kIemNativeWhat_Arg:
4853 AssertMsgFailed(("What?!?: %u\n", idxReg));
4854 continue;
4855
4856 case kIemNativeWhat_rc:
4857 case kIemNativeWhat_Tmp:
4858 AssertMsgFailed(("Missing free: %u\n", idxReg));
4859 continue;
4860
4861 case kIemNativeWhat_FixedTmp:
4862 case kIemNativeWhat_pVCpuFixed:
4863 case kIemNativeWhat_pCtxFixed:
4864 case kIemNativeWhat_PcShadow:
4865 case kIemNativeWhat_FixedReserved:
4866 case kIemNativeWhat_Invalid:
4867 case kIemNativeWhat_End:
4868 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4869 }
4870 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4871 }
4872 }
4873
4874 /*
4875 * Do the actual freeing.
4876 */
4877 if (pReNative->Core.bmHstRegs & fRegsToFree)
4878 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4879 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4880 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4881
4882 /* If there are guest register shadows in any call-volatile register, we
4883 have to clear the corrsponding guest register masks for each register. */
4884 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4885 if (fHstRegsWithGstShadow)
4886 {
4887 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4888 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4889 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4890 do
4891 {
4892 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4893 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4894
4895 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4896 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4897 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4898 } while (fHstRegsWithGstShadow != 0);
4899 }
4900
4901 return off;
4902}
4903
4904
4905/**
4906 * Flushes a set of guest register shadow copies.
4907 *
4908 * This is usually done after calling a threaded function or a C-implementation
4909 * of an instruction.
4910 *
4911 * @param pReNative The native recompile state.
4912 * @param fGstRegs Set of guest registers to flush.
4913 */
4914DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4915{
4916 /*
4917 * Reduce the mask by what's currently shadowed
4918 */
4919 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4920 fGstRegs &= bmGstRegShadowsOld;
4921 if (fGstRegs)
4922 {
4923 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4924 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4925 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4926 if (bmGstRegShadowsNew)
4927 {
4928 /*
4929 * Partial.
4930 */
4931 do
4932 {
4933 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4934 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4935 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4936 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4937 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4938
4939 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4940 fGstRegs &= ~fInThisHstReg;
4941 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4942 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4943 if (!fGstRegShadowsNew)
4944 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4945 } while (fGstRegs != 0);
4946 }
4947 else
4948 {
4949 /*
4950 * Clear all.
4951 */
4952 do
4953 {
4954 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4955 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4956 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4957 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4958 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4959
4960 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4961 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4962 } while (fGstRegs != 0);
4963 pReNative->Core.bmHstRegsWithGstShadow = 0;
4964 }
4965 }
4966}
4967
4968
4969/**
4970 * Flushes guest register shadow copies held by a set of host registers.
4971 *
4972 * This is used with the TLB lookup code for ensuring that we don't carry on
4973 * with any guest shadows in volatile registers, as these will get corrupted by
4974 * a TLB miss.
4975 *
4976 * @param pReNative The native recompile state.
4977 * @param fHstRegs Set of host registers to flush guest shadows for.
4978 */
4979DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4980{
4981 /*
4982 * Reduce the mask by what's currently shadowed.
4983 */
4984 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4985 fHstRegs &= bmHstRegsWithGstShadowOld;
4986 if (fHstRegs)
4987 {
4988 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4989 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4990 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4991 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4992 if (bmHstRegsWithGstShadowNew)
4993 {
4994 /*
4995 * Partial (likely).
4996 */
4997 uint64_t fGstShadows = 0;
4998 do
4999 {
5000 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5001 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5002 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5003 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5004
5005 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5006 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5007 fHstRegs &= ~RT_BIT_32(idxHstReg);
5008 } while (fHstRegs != 0);
5009 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5010 }
5011 else
5012 {
5013 /*
5014 * Clear all.
5015 */
5016 do
5017 {
5018 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5019 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5020 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5021 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5022
5023 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5024 fHstRegs &= ~RT_BIT_32(idxHstReg);
5025 } while (fHstRegs != 0);
5026 pReNative->Core.bmGstRegShadows = 0;
5027 }
5028 }
5029}
5030
5031
5032/**
5033 * Restores guest shadow copies in volatile registers.
5034 *
5035 * This is used after calling a helper function (think TLB miss) to restore the
5036 * register state of volatile registers.
5037 *
5038 * @param pReNative The native recompile state.
5039 * @param off The code buffer offset.
5040 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5041 * be active (allocated) w/o asserting. Hack.
5042 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5043 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5044 */
5045DECL_HIDDEN_THROW(uint32_t)
5046iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5047{
5048 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5049 if (fHstRegs)
5050 {
5051 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5052 do
5053 {
5054 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5055
5056 /* It's not fatal if a register is active holding a variable that
5057 shadowing a guest register, ASSUMING all pending guest register
5058 writes were flushed prior to the helper call. However, we'll be
5059 emitting duplicate restores, so it wasts code space. */
5060 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5061 RT_NOREF(fHstRegsActiveShadows);
5062
5063 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5064 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5065 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5066 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5067
5068 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5069 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5070
5071 fHstRegs &= ~RT_BIT_32(idxHstReg);
5072 } while (fHstRegs != 0);
5073 }
5074 return off;
5075}
5076
5077
5078#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5079# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5080static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5081{
5082 /* Compare the shadow with the context value, they should match. */
5083 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5084 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5085 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5086 return off;
5087}
5088# endif
5089
5090/**
5091 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5092 */
5093static uint32_t
5094iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5095{
5096 if (pReNative->Core.offPc)
5097 {
5098# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5099 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5100 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5101# endif
5102
5103# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5104 /* Allocate a temporary PC register. */
5105 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5106
5107 /* Perform the addition and store the result. */
5108 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5109 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5110
5111 /* Free but don't flush the PC register. */
5112 iemNativeRegFreeTmp(pReNative, idxPcReg);
5113# else
5114 /* Compare the shadow with the context value, they should match. */
5115 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5116 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5117# endif
5118
5119 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5120 pReNative->Core.offPc = 0;
5121 pReNative->Core.cInstrPcUpdateSkipped = 0;
5122 }
5123# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5124 else
5125 {
5126 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5127 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5128 }
5129# endif
5130
5131 return off;
5132}
5133#endif
5134
5135
5136/**
5137 * Flushes delayed write of a specific guest register.
5138 *
5139 * This must be called prior to calling CImpl functions and any helpers that use
5140 * the guest state (like raising exceptions) and such.
5141 *
5142 * This optimization has not yet been implemented. The first target would be
5143 * RIP updates, since these are the most common ones.
5144 */
5145DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5146 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
5147{
5148#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5149 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
5150#endif
5151 RT_NOREF(pReNative, enmClass, idxReg);
5152 return off;
5153}
5154
5155
5156/**
5157 * Flushes any delayed guest register writes.
5158 *
5159 * This must be called prior to calling CImpl functions and any helpers that use
5160 * the guest state (like raising exceptions) and such.
5161 *
5162 * This optimization has not yet been implemented. The first target would be
5163 * RIP updates, since these are the most common ones.
5164 */
5165DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/)
5166{
5167#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5168 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5169 off = iemNativeEmitPcWriteback(pReNative, off);
5170#else
5171 RT_NOREF(pReNative, fGstShwExcept);
5172#endif
5173
5174 return off;
5175}
5176
5177
5178#ifdef VBOX_STRICT
5179/**
5180 * Does internal register allocator sanity checks.
5181 */
5182static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5183{
5184 /*
5185 * Iterate host registers building a guest shadowing set.
5186 */
5187 uint64_t bmGstRegShadows = 0;
5188 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5189 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5190 while (bmHstRegsWithGstShadow)
5191 {
5192 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5193 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5194 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5195
5196 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5197 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5198 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5199 bmGstRegShadows |= fThisGstRegShadows;
5200 while (fThisGstRegShadows)
5201 {
5202 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5203 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5204 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5205 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5206 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5207 }
5208 }
5209 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5210 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5211 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5212
5213 /*
5214 * Now the other way around, checking the guest to host index array.
5215 */
5216 bmHstRegsWithGstShadow = 0;
5217 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5218 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5219 while (bmGstRegShadows)
5220 {
5221 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5222 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5223 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5224
5225 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5226 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5227 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5228 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5229 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5230 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5231 }
5232 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5233 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5234 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5235}
5236#endif
5237
5238
5239/*********************************************************************************************************************************
5240* Code Emitters (larger snippets) *
5241*********************************************************************************************************************************/
5242
5243/**
5244 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5245 * extending to 64-bit width.
5246 *
5247 * @returns New code buffer offset on success, UINT32_MAX on failure.
5248 * @param pReNative .
5249 * @param off The current code buffer position.
5250 * @param idxHstReg The host register to load the guest register value into.
5251 * @param enmGstReg The guest register to load.
5252 *
5253 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5254 * that is something the caller needs to do if applicable.
5255 */
5256DECL_HIDDEN_THROW(uint32_t)
5257iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5258{
5259 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
5260 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5261
5262 switch (g_aGstShadowInfo[enmGstReg].cb)
5263 {
5264 case sizeof(uint64_t):
5265 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5266 case sizeof(uint32_t):
5267 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5268 case sizeof(uint16_t):
5269 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5270#if 0 /* not present in the table. */
5271 case sizeof(uint8_t):
5272 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5273#endif
5274 default:
5275 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5276 }
5277}
5278
5279
5280#ifdef VBOX_STRICT
5281/**
5282 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5283 *
5284 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5285 * Trashes EFLAGS on AMD64.
5286 */
5287static uint32_t
5288iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5289{
5290# ifdef RT_ARCH_AMD64
5291 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5292
5293 /* rol reg64, 32 */
5294 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5295 pbCodeBuf[off++] = 0xc1;
5296 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5297 pbCodeBuf[off++] = 32;
5298
5299 /* test reg32, ffffffffh */
5300 if (idxReg >= 8)
5301 pbCodeBuf[off++] = X86_OP_REX_B;
5302 pbCodeBuf[off++] = 0xf7;
5303 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5304 pbCodeBuf[off++] = 0xff;
5305 pbCodeBuf[off++] = 0xff;
5306 pbCodeBuf[off++] = 0xff;
5307 pbCodeBuf[off++] = 0xff;
5308
5309 /* je/jz +1 */
5310 pbCodeBuf[off++] = 0x74;
5311 pbCodeBuf[off++] = 0x01;
5312
5313 /* int3 */
5314 pbCodeBuf[off++] = 0xcc;
5315
5316 /* rol reg64, 32 */
5317 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5318 pbCodeBuf[off++] = 0xc1;
5319 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5320 pbCodeBuf[off++] = 32;
5321
5322# elif defined(RT_ARCH_ARM64)
5323 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5324 /* lsr tmp0, reg64, #32 */
5325 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5326 /* cbz tmp0, +1 */
5327 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5328 /* brk #0x1100 */
5329 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5330
5331# else
5332# error "Port me!"
5333# endif
5334 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5335 return off;
5336}
5337#endif /* VBOX_STRICT */
5338
5339
5340#ifdef VBOX_STRICT
5341/**
5342 * Emitting code that checks that the content of register @a idxReg is the same
5343 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5344 * instruction if that's not the case.
5345 *
5346 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5347 * Trashes EFLAGS on AMD64.
5348 */
5349static uint32_t
5350iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5351{
5352# ifdef RT_ARCH_AMD64
5353 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5354
5355 /* cmp reg, [mem] */
5356 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5357 {
5358 if (idxReg >= 8)
5359 pbCodeBuf[off++] = X86_OP_REX_R;
5360 pbCodeBuf[off++] = 0x38;
5361 }
5362 else
5363 {
5364 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5365 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5366 else
5367 {
5368 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5369 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5370 else
5371 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5372 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5373 if (idxReg >= 8)
5374 pbCodeBuf[off++] = X86_OP_REX_R;
5375 }
5376 pbCodeBuf[off++] = 0x39;
5377 }
5378 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5379
5380 /* je/jz +1 */
5381 pbCodeBuf[off++] = 0x74;
5382 pbCodeBuf[off++] = 0x01;
5383
5384 /* int3 */
5385 pbCodeBuf[off++] = 0xcc;
5386
5387 /* For values smaller than the register size, we must check that the rest
5388 of the register is all zeros. */
5389 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5390 {
5391 /* test reg64, imm32 */
5392 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5393 pbCodeBuf[off++] = 0xf7;
5394 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5395 pbCodeBuf[off++] = 0;
5396 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5397 pbCodeBuf[off++] = 0xff;
5398 pbCodeBuf[off++] = 0xff;
5399
5400 /* je/jz +1 */
5401 pbCodeBuf[off++] = 0x74;
5402 pbCodeBuf[off++] = 0x01;
5403
5404 /* int3 */
5405 pbCodeBuf[off++] = 0xcc;
5406 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5407 }
5408 else
5409 {
5410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5411 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5412 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5413 }
5414
5415# elif defined(RT_ARCH_ARM64)
5416 /* mov TMP0, [gstreg] */
5417 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5418
5419 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5420 /* sub tmp0, tmp0, idxReg */
5421 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5422 /* cbz tmp0, +1 */
5423 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5424 /* brk #0x1000+enmGstReg */
5425 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5427
5428# else
5429# error "Port me!"
5430# endif
5431 return off;
5432}
5433#endif /* VBOX_STRICT */
5434
5435
5436#ifdef VBOX_STRICT
5437/**
5438 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
5439 * important bits.
5440 *
5441 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5442 * Trashes EFLAGS on AMD64.
5443 */
5444static uint32_t
5445iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
5446{
5447 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5448 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
5449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
5450 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
5451
5452#ifdef RT_ARCH_AMD64
5453 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5454
5455 /* je/jz +1 */
5456 pbCodeBuf[off++] = 0x74;
5457 pbCodeBuf[off++] = 0x01;
5458
5459 /* int3 */
5460 pbCodeBuf[off++] = 0xcc;
5461
5462# elif defined(RT_ARCH_ARM64)
5463 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5464
5465 /* b.eq +1 */
5466 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
5467 /* brk #0x2000 */
5468 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
5469
5470# else
5471# error "Port me!"
5472# endif
5473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5474
5475 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5476 return off;
5477}
5478#endif /* VBOX_STRICT */
5479
5480
5481/**
5482 * Emits a code for checking the return code of a call and rcPassUp, returning
5483 * from the code if either are non-zero.
5484 */
5485DECL_HIDDEN_THROW(uint32_t)
5486iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
5487{
5488#ifdef RT_ARCH_AMD64
5489 /*
5490 * AMD64: eax = call status code.
5491 */
5492
5493 /* edx = rcPassUp */
5494 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5495# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5496 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
5497# endif
5498
5499 /* edx = eax | rcPassUp */
5500 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5501 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
5502 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
5503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5504
5505 /* Jump to non-zero status return path. */
5506 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
5507
5508 /* done. */
5509
5510#elif RT_ARCH_ARM64
5511 /*
5512 * ARM64: w0 = call status code.
5513 */
5514# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5515 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
5516# endif
5517 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
5518
5519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5520
5521 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
5522
5523 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
5524 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
5525 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
5526
5527#else
5528# error "port me"
5529#endif
5530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5531 RT_NOREF_PV(idxInstr);
5532 return off;
5533}
5534
5535
5536/**
5537 * Emits code to check if the content of @a idxAddrReg is a canonical address,
5538 * raising a \#GP(0) if it isn't.
5539 *
5540 * @returns New code buffer offset, UINT32_MAX on failure.
5541 * @param pReNative The native recompile state.
5542 * @param off The code buffer offset.
5543 * @param idxAddrReg The host register with the address to check.
5544 * @param idxInstr The current instruction.
5545 */
5546DECL_HIDDEN_THROW(uint32_t)
5547iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
5548{
5549 /*
5550 * Make sure we don't have any outstanding guest register writes as we may
5551 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5552 */
5553 off = iemNativeRegFlushPendingWrites(pReNative, off);
5554
5555#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5556 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5557#else
5558 RT_NOREF(idxInstr);
5559#endif
5560
5561#ifdef RT_ARCH_AMD64
5562 /*
5563 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
5564 * return raisexcpt();
5565 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
5566 */
5567 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5568
5569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
5570 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
5571 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
5572 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
5573 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5574
5575 iemNativeRegFreeTmp(pReNative, iTmpReg);
5576
5577#elif defined(RT_ARCH_ARM64)
5578 /*
5579 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
5580 * return raisexcpt();
5581 * ----
5582 * mov x1, 0x800000000000
5583 * add x1, x0, x1
5584 * cmp xzr, x1, lsr 48
5585 * b.ne .Lraisexcpt
5586 */
5587 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5588
5589 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
5590 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
5591 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
5592 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5593
5594 iemNativeRegFreeTmp(pReNative, iTmpReg);
5595
5596#else
5597# error "Port me"
5598#endif
5599 return off;
5600}
5601
5602
5603/**
5604 * Emits code to check if that the content of @a idxAddrReg is within the limit
5605 * of CS, raising a \#GP(0) if it isn't.
5606 *
5607 * @returns New code buffer offset; throws VBox status code on error.
5608 * @param pReNative The native recompile state.
5609 * @param off The code buffer offset.
5610 * @param idxAddrReg The host register (32-bit) with the address to
5611 * check.
5612 * @param idxInstr The current instruction.
5613 */
5614DECL_HIDDEN_THROW(uint32_t)
5615iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5616 uint8_t idxAddrReg, uint8_t idxInstr)
5617{
5618 /*
5619 * Make sure we don't have any outstanding guest register writes as we may
5620 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
5621 */
5622 off = iemNativeRegFlushPendingWrites(pReNative, off);
5623
5624#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5625 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5626#else
5627 RT_NOREF(idxInstr);
5628#endif
5629
5630 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
5631 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
5632 kIemNativeGstRegUse_ReadOnly);
5633
5634 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
5635 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
5636
5637 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
5638 return off;
5639}
5640
5641
5642/**
5643 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
5644 *
5645 * @returns The flush mask.
5646 * @param fCImpl The IEM_CIMPL_F_XXX flags.
5647 * @param fGstShwFlush The starting flush mask.
5648 */
5649DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
5650{
5651 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
5652 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
5653 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
5654 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
5655 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
5656 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
5657 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
5658 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
5659 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
5660 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
5661 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
5662 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
5663 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
5664 return fGstShwFlush;
5665}
5666
5667
5668/**
5669 * Emits a call to a CImpl function or something similar.
5670 */
5671DECL_HIDDEN_THROW(uint32_t)
5672iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
5673 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
5674{
5675 /* Writeback everything. */
5676 off = iemNativeRegFlushPendingWrites(pReNative, off);
5677
5678 /*
5679 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
5680 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
5681 */
5682 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
5683 fGstShwFlush
5684 | RT_BIT_64(kIemNativeGstReg_Pc)
5685 | RT_BIT_64(kIemNativeGstReg_EFlags));
5686 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
5687
5688 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5689
5690 /*
5691 * Load the parameters.
5692 */
5693#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
5694 /* Special code the hidden VBOXSTRICTRC pointer. */
5695 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5696 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5697 if (cAddParams > 0)
5698 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
5699 if (cAddParams > 1)
5700 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
5701 if (cAddParams > 2)
5702 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
5703 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5704
5705#else
5706 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5707 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5708 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
5709 if (cAddParams > 0)
5710 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
5711 if (cAddParams > 1)
5712 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
5713 if (cAddParams > 2)
5714# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
5715 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
5716# else
5717 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
5718# endif
5719#endif
5720
5721 /*
5722 * Make the call.
5723 */
5724 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
5725
5726#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5727 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5728#endif
5729
5730 /*
5731 * Check the status code.
5732 */
5733 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
5734}
5735
5736
5737/**
5738 * Emits a call to a threaded worker function.
5739 */
5740DECL_HIDDEN_THROW(uint32_t)
5741iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5742{
5743 /* We don't know what the threaded function is doing so we must flush all pending writes. */
5744 off = iemNativeRegFlushPendingWrites(pReNative, off);
5745
5746 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
5747 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
5748
5749#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5750 /* The threaded function may throw / long jmp, so set current instruction
5751 number if we're counting. */
5752 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5753#endif
5754
5755 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
5756
5757#ifdef RT_ARCH_AMD64
5758 /* Load the parameters and emit the call. */
5759# ifdef RT_OS_WINDOWS
5760# ifndef VBOXSTRICTRC_STRICT_ENABLED
5761 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
5762 if (cParams > 0)
5763 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
5764 if (cParams > 1)
5765 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
5766 if (cParams > 2)
5767 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
5768# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
5769 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
5770 if (cParams > 0)
5771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
5772 if (cParams > 1)
5773 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
5774 if (cParams > 2)
5775 {
5776 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
5777 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
5778 }
5779 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
5780# endif /* VBOXSTRICTRC_STRICT_ENABLED */
5781# else
5782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
5783 if (cParams > 0)
5784 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
5785 if (cParams > 1)
5786 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
5787 if (cParams > 2)
5788 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
5789# endif
5790
5791 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5792
5793# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
5794 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
5795# endif
5796
5797#elif RT_ARCH_ARM64
5798 /*
5799 * ARM64:
5800 */
5801 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5802 if (cParams > 0)
5803 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
5804 if (cParams > 1)
5805 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
5806 if (cParams > 2)
5807 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
5808
5809 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
5810
5811#else
5812# error "port me"
5813#endif
5814
5815 /*
5816 * Check the status code.
5817 */
5818 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
5819
5820 return off;
5821}
5822
5823#ifdef VBOX_WITH_STATISTICS
5824/**
5825 * Emits code to update the thread call statistics.
5826 */
5827DECL_INLINE_THROW(uint32_t)
5828iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
5829{
5830 /*
5831 * Update threaded function stats.
5832 */
5833 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
5834 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
5835# if defined(RT_ARCH_ARM64)
5836 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
5837 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
5838 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
5839 iemNativeRegFreeTmp(pReNative, idxTmp1);
5840 iemNativeRegFreeTmp(pReNative, idxTmp2);
5841# else
5842 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
5843# endif
5844 return off;
5845}
5846#endif /* VBOX_WITH_STATISTICS */
5847
5848
5849/**
5850 * Emits the code at the CheckBranchMiss label.
5851 */
5852static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5853{
5854 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
5855 if (idxLabel != UINT32_MAX)
5856 {
5857 iemNativeLabelDefine(pReNative, idxLabel, off);
5858
5859 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
5860 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5861 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
5862
5863 /* jump back to the return sequence. */
5864 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5865 }
5866 return off;
5867}
5868
5869
5870/**
5871 * Emits the code at the NeedCsLimChecking label.
5872 */
5873static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5874{
5875 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
5876 if (idxLabel != UINT32_MAX)
5877 {
5878 iemNativeLabelDefine(pReNative, idxLabel, off);
5879
5880 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
5881 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5882 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
5883
5884 /* jump back to the return sequence. */
5885 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5886 }
5887 return off;
5888}
5889
5890
5891/**
5892 * Emits the code at the ObsoleteTb label.
5893 */
5894static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5895{
5896 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
5897 if (idxLabel != UINT32_MAX)
5898 {
5899 iemNativeLabelDefine(pReNative, idxLabel, off);
5900
5901 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
5902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5903 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
5904
5905 /* jump back to the return sequence. */
5906 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5907 }
5908 return off;
5909}
5910
5911
5912/**
5913 * Emits the code at the RaiseGP0 label.
5914 */
5915static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5916{
5917 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
5918 if (idxLabel != UINT32_MAX)
5919 {
5920 iemNativeLabelDefine(pReNative, idxLabel, off);
5921
5922 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
5923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5924 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
5925
5926 /* jump back to the return sequence. */
5927 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5928 }
5929 return off;
5930}
5931
5932
5933/**
5934 * Emits the code at the RaiseNm label.
5935 */
5936static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5937{
5938 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
5939 if (idxLabel != UINT32_MAX)
5940 {
5941 iemNativeLabelDefine(pReNative, idxLabel, off);
5942
5943 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
5944 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5945 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
5946
5947 /* jump back to the return sequence. */
5948 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5949 }
5950 return off;
5951}
5952
5953
5954/**
5955 * Emits the code at the RaiseUd label.
5956 */
5957static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5958{
5959 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
5960 if (idxLabel != UINT32_MAX)
5961 {
5962 iemNativeLabelDefine(pReNative, idxLabel, off);
5963
5964 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
5965 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5966 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
5967
5968 /* jump back to the return sequence. */
5969 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5970 }
5971 return off;
5972}
5973
5974
5975/**
5976 * Emits the code at the RaiseMf label.
5977 */
5978static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
5979{
5980 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
5981 if (idxLabel != UINT32_MAX)
5982 {
5983 iemNativeLabelDefine(pReNative, idxLabel, off);
5984
5985 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
5986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5987 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
5988
5989 /* jump back to the return sequence. */
5990 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
5991 }
5992 return off;
5993}
5994
5995
5996/**
5997 * Emits the code at the RaiseXf label.
5998 */
5999static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6000{
6001 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6002 if (idxLabel != UINT32_MAX)
6003 {
6004 iemNativeLabelDefine(pReNative, idxLabel, off);
6005
6006 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6007 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6008 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6009
6010 /* jump back to the return sequence. */
6011 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6012 }
6013 return off;
6014}
6015
6016
6017/**
6018 * Emits the code at the ReturnWithFlags label (returns
6019 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6020 */
6021static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6022{
6023 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6024 if (idxLabel != UINT32_MAX)
6025 {
6026 iemNativeLabelDefine(pReNative, idxLabel, off);
6027
6028 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6029
6030 /* jump back to the return sequence. */
6031 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6032 }
6033 return off;
6034}
6035
6036
6037/**
6038 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6039 */
6040static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6041{
6042 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6043 if (idxLabel != UINT32_MAX)
6044 {
6045 iemNativeLabelDefine(pReNative, idxLabel, off);
6046
6047 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6048
6049 /* jump back to the return sequence. */
6050 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6051 }
6052 return off;
6053}
6054
6055
6056/**
6057 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6058 */
6059static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6060{
6061 /*
6062 * Generate the rc + rcPassUp fiddling code if needed.
6063 */
6064 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6065 if (idxLabel != UINT32_MAX)
6066 {
6067 iemNativeLabelDefine(pReNative, idxLabel, off);
6068
6069 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6070#ifdef RT_ARCH_AMD64
6071# ifdef RT_OS_WINDOWS
6072# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6073 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6074# endif
6075 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6076 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6077# else
6078 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6079 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6080# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6081 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6082# endif
6083# endif
6084# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6085 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6086# endif
6087
6088#else
6089 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6090 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6091 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6092#endif
6093
6094 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6095 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6096 }
6097 return off;
6098}
6099
6100
6101/**
6102 * Emits a standard epilog.
6103 */
6104static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6105{
6106 *pidxReturnLabel = UINT32_MAX;
6107
6108 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6109 off = iemNativeRegFlushPendingWrites(pReNative, off);
6110
6111 /*
6112 * Successful return, so clear the return register (eax, w0).
6113 */
6114 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6115
6116 /*
6117 * Define label for common return point.
6118 */
6119 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6120 *pidxReturnLabel = idxReturn;
6121
6122 /*
6123 * Restore registers and return.
6124 */
6125#ifdef RT_ARCH_AMD64
6126 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6127
6128 /* Reposition esp at the r15 restore point. */
6129 pbCodeBuf[off++] = X86_OP_REX_W;
6130 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6132 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6133
6134 /* Pop non-volatile registers and return */
6135 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6136 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6137 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6138 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6139 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6140 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6141 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6142 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6143# ifdef RT_OS_WINDOWS
6144 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6145 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6146# endif
6147 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6148 pbCodeBuf[off++] = 0xc9; /* leave */
6149 pbCodeBuf[off++] = 0xc3; /* ret */
6150 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6151
6152#elif RT_ARCH_ARM64
6153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6154
6155 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6156 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6157 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6158 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6159 IEMNATIVE_FRAME_VAR_SIZE / 8);
6160 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6161 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6162 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6163 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6164 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6165 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6166 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6167 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6168 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6169 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6170 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6171 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6172
6173 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6174 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6175 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6176 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6177
6178 /* retab / ret */
6179# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6180 if (1)
6181 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6182 else
6183# endif
6184 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6185
6186#else
6187# error "port me"
6188#endif
6189 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6190
6191 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6192}
6193
6194
6195/**
6196 * Emits a standard prolog.
6197 */
6198static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6199{
6200#ifdef RT_ARCH_AMD64
6201 /*
6202 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6203 * reserving 64 bytes for stack variables plus 4 non-register argument
6204 * slots. Fixed register assignment: xBX = pReNative;
6205 *
6206 * Since we always do the same register spilling, we can use the same
6207 * unwind description for all the code.
6208 */
6209 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6210 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6211 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6212 pbCodeBuf[off++] = 0x8b;
6213 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6214 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6215 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6216# ifdef RT_OS_WINDOWS
6217 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6218 pbCodeBuf[off++] = 0x8b;
6219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6220 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6221 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6222# else
6223 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6224 pbCodeBuf[off++] = 0x8b;
6225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6226# endif
6227 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6228 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6229 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6230 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6231 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6232 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6233 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6234 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6235
6236# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6237 /* Save the frame pointer. */
6238 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6239# endif
6240
6241 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6242 X86_GREG_xSP,
6243 IEMNATIVE_FRAME_ALIGN_SIZE
6244 + IEMNATIVE_FRAME_VAR_SIZE
6245 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6246 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6247 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6248 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6249 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6250
6251#elif RT_ARCH_ARM64
6252 /*
6253 * We set up a stack frame exactly like on x86, only we have to push the
6254 * return address our selves here. We save all non-volatile registers.
6255 */
6256 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6257
6258# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6259 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6260 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6261 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6262 /* pacibsp */
6263 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6264# endif
6265
6266 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6267 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6268 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6269 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6270 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6271 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6272 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6273 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6274 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6275 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6276 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6277 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6278 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6279 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6280 /* Save the BP and LR (ret address) registers at the top of the frame. */
6281 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6282 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6283 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6284 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6285 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6286 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6287
6288 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6289 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6290
6291 /* mov r28, r0 */
6292 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6293 /* mov r27, r1 */
6294 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6295
6296# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6297 /* Save the frame pointer. */
6298 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6299 ARMV8_A64_REG_X2);
6300# endif
6301
6302#else
6303# error "port me"
6304#endif
6305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6306 return off;
6307}
6308
6309
6310
6311
6312/*********************************************************************************************************************************
6313* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
6314*********************************************************************************************************************************/
6315
6316#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
6317 { \
6318 Assert(pReNative->Core.bmVars == 0); \
6319 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
6320 Assert(pReNative->Core.bmStack == 0); \
6321 pReNative->fMc = (a_fMcFlags); \
6322 pReNative->fCImpl = (a_fCImplFlags); \
6323 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
6324
6325/** We have to get to the end in recompilation mode, as otherwise we won't
6326 * generate code for all the IEM_MC_IF_XXX branches. */
6327#define IEM_MC_END() \
6328 iemNativeVarFreeAll(pReNative); \
6329 } return off
6330
6331
6332
6333/*********************************************************************************************************************************
6334* Native Emitter Support. *
6335*********************************************************************************************************************************/
6336
6337
6338#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
6339
6340#define IEM_MC_NATIVE_ELSE() } else {
6341
6342#define IEM_MC_NATIVE_ENDIF() } ((void)0)
6343
6344
6345#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
6346 off = a_fnEmitter(pReNative, off)
6347
6348#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
6349 off = a_fnEmitter(pReNative, off, (a0))
6350
6351#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
6352 off = a_fnEmitter(pReNative, off, (a0), (a1))
6353
6354#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
6355 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
6356
6357#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
6358 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
6359
6360#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
6361 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
6362
6363#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
6364 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
6365
6366#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
6367 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
6368
6369#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
6370 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
6371
6372
6373
6374/*********************************************************************************************************************************
6375* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
6376*********************************************************************************************************************************/
6377
6378#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
6379 pReNative->fMc = 0; \
6380 pReNative->fCImpl = (a_fFlags); \
6381 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
6382
6383
6384#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
6385 pReNative->fMc = 0; \
6386 pReNative->fCImpl = (a_fFlags); \
6387 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
6388
6389DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6390 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6391 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
6392{
6393 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
6394}
6395
6396
6397#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
6398 pReNative->fMc = 0; \
6399 pReNative->fCImpl = (a_fFlags); \
6400 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6401 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
6402
6403DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6404 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6405 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
6406{
6407 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
6408}
6409
6410
6411#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
6412 pReNative->fMc = 0; \
6413 pReNative->fCImpl = (a_fFlags); \
6414 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
6415 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
6416
6417DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6418 uint8_t idxInstr, uint64_t a_fGstShwFlush,
6419 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
6420 uint64_t uArg2)
6421{
6422 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
6423}
6424
6425
6426
6427/*********************************************************************************************************************************
6428* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
6429*********************************************************************************************************************************/
6430
6431/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
6432 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
6433DECL_INLINE_THROW(uint32_t)
6434iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6435{
6436 /*
6437 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
6438 * return with special status code and make the execution loop deal with
6439 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
6440 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
6441 * could continue w/o interruption, it probably will drop into the
6442 * debugger, so not worth the effort of trying to services it here and we
6443 * just lump it in with the handling of the others.
6444 *
6445 * To simplify the code and the register state management even more (wrt
6446 * immediate in AND operation), we always update the flags and skip the
6447 * extra check associated conditional jump.
6448 */
6449 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
6450 <= UINT32_MAX);
6451#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6452 AssertMsg( pReNative->idxCurCall == 0
6453 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
6454 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
6455#endif
6456
6457 /*
6458 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
6459 * any pending register writes must be flushed.
6460 */
6461 off = iemNativeRegFlushPendingWrites(pReNative, off);
6462
6463 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6464 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6465 true /*fSkipLivenessAssert*/);
6466 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
6467 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
6468 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
6469 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
6470 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6471
6472 /* Free but don't flush the EFLAGS register. */
6473 iemNativeRegFreeTmp(pReNative, idxEflReg);
6474
6475 return off;
6476}
6477
6478
6479/** The VINF_SUCCESS dummy. */
6480template<int const a_rcNormal>
6481DECL_FORCE_INLINE(uint32_t)
6482iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6483{
6484 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
6485 if (a_rcNormal != VINF_SUCCESS)
6486 {
6487#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6488 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6489#else
6490 RT_NOREF_PV(idxInstr);
6491#endif
6492
6493 /* As this code returns from the TB any pending register writes must be flushed. */
6494 off = iemNativeRegFlushPendingWrites(pReNative, off);
6495
6496 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
6497 }
6498 return off;
6499}
6500
6501
6502#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
6503 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6504 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6505
6506#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6507 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6508 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6509 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6510
6511/** Same as iemRegAddToRip64AndFinishingNoFlags. */
6512DECL_INLINE_THROW(uint32_t)
6513iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6514{
6515#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6516# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6517 if (!pReNative->Core.offPc)
6518 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6519# endif
6520
6521 /* Allocate a temporary PC register. */
6522 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6523
6524 /* Perform the addition and store the result. */
6525 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
6526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6527
6528 /* Free but don't flush the PC register. */
6529 iemNativeRegFreeTmp(pReNative, idxPcReg);
6530#endif
6531
6532#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6533 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6534
6535 pReNative->Core.offPc += cbInstr;
6536# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6537 off = iemNativePcAdjustCheck(pReNative, off);
6538# endif
6539 if (pReNative->cCondDepth)
6540 off = iemNativeEmitPcWriteback(pReNative, off);
6541 else
6542 pReNative->Core.cInstrPcUpdateSkipped++;
6543#endif
6544
6545 return off;
6546}
6547
6548
6549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
6550 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6552
6553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6554 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6557
6558/** Same as iemRegAddToEip32AndFinishingNoFlags. */
6559DECL_INLINE_THROW(uint32_t)
6560iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6561{
6562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6564 if (!pReNative->Core.offPc)
6565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6566# endif
6567
6568 /* Allocate a temporary PC register. */
6569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6570
6571 /* Perform the addition and store the result. */
6572 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6574
6575 /* Free but don't flush the PC register. */
6576 iemNativeRegFreeTmp(pReNative, idxPcReg);
6577#endif
6578
6579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6580 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6581
6582 pReNative->Core.offPc += cbInstr;
6583# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6584 off = iemNativePcAdjustCheck(pReNative, off);
6585# endif
6586 if (pReNative->cCondDepth)
6587 off = iemNativeEmitPcWriteback(pReNative, off);
6588 else
6589 pReNative->Core.cInstrPcUpdateSkipped++;
6590#endif
6591
6592 return off;
6593}
6594
6595
6596#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
6597 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6598 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6599
6600#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
6601 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
6602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6604
6605/** Same as iemRegAddToIp16AndFinishingNoFlags. */
6606DECL_INLINE_THROW(uint32_t)
6607iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
6608{
6609#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
6610# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6611 if (!pReNative->Core.offPc)
6612 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6613# endif
6614
6615 /* Allocate a temporary PC register. */
6616 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6617
6618 /* Perform the addition and store the result. */
6619 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
6620 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6621 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6622
6623 /* Free but don't flush the PC register. */
6624 iemNativeRegFreeTmp(pReNative, idxPcReg);
6625#endif
6626
6627#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6628 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6629
6630 pReNative->Core.offPc += cbInstr;
6631# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
6632 off = iemNativePcAdjustCheck(pReNative, off);
6633# endif
6634 if (pReNative->cCondDepth)
6635 off = iemNativeEmitPcWriteback(pReNative, off);
6636 else
6637 pReNative->Core.cInstrPcUpdateSkipped++;
6638#endif
6639
6640 return off;
6641}
6642
6643
6644
6645/*********************************************************************************************************************************
6646* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
6647*********************************************************************************************************************************/
6648
6649#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6650 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6651 (a_enmEffOpSize), pCallEntry->idxInstr); \
6652 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6653
6654#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6655 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6656 (a_enmEffOpSize), pCallEntry->idxInstr); \
6657 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6658 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6659
6660#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
6661 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6662 IEMMODE_16BIT, pCallEntry->idxInstr); \
6663 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6664
6665#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6666 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6667 IEMMODE_16BIT, pCallEntry->idxInstr); \
6668 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6669 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6670
6671#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
6672 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6673 IEMMODE_64BIT, pCallEntry->idxInstr); \
6674 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6675
6676#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6677 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6678 IEMMODE_64BIT, pCallEntry->idxInstr); \
6679 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6680 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6681
6682/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
6683 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
6684 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
6685DECL_INLINE_THROW(uint32_t)
6686iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6687 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6688{
6689 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
6690
6691 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6692 off = iemNativeRegFlushPendingWrites(pReNative, off);
6693
6694#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6695 Assert(pReNative->Core.offPc == 0);
6696
6697 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6698#endif
6699
6700 /* Allocate a temporary PC register. */
6701 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6702
6703 /* Perform the addition. */
6704 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
6705
6706 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
6707 {
6708 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6709 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6710 }
6711 else
6712 {
6713 /* Just truncate the result to 16-bit IP. */
6714 Assert(enmEffOpSize == IEMMODE_16BIT);
6715 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6716 }
6717 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6718
6719 /* Free but don't flush the PC register. */
6720 iemNativeRegFreeTmp(pReNative, idxPcReg);
6721
6722 return off;
6723}
6724
6725
6726#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6727 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6728 (a_enmEffOpSize), pCallEntry->idxInstr); \
6729 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6730
6731#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
6732 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
6733 (a_enmEffOpSize), pCallEntry->idxInstr); \
6734 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6735 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6736
6737#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
6738 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6739 IEMMODE_16BIT, pCallEntry->idxInstr); \
6740 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6741
6742#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6743 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
6744 IEMMODE_16BIT, pCallEntry->idxInstr); \
6745 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6746 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6747
6748#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
6749 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6750 IEMMODE_32BIT, pCallEntry->idxInstr); \
6751 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6752
6753#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6754 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
6755 IEMMODE_32BIT, pCallEntry->idxInstr); \
6756 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6757 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6758
6759/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
6760 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
6761 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
6762DECL_INLINE_THROW(uint32_t)
6763iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
6764 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
6765{
6766 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
6767
6768 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6769 off = iemNativeRegFlushPendingWrites(pReNative, off);
6770
6771#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6772 Assert(pReNative->Core.offPc == 0);
6773
6774 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6775#endif
6776
6777 /* Allocate a temporary PC register. */
6778 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6779
6780 /* Perform the addition. */
6781 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6782
6783 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
6784 if (enmEffOpSize == IEMMODE_16BIT)
6785 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6786
6787 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
6788/** @todo we can skip this in 32-bit FLAT mode. */
6789 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6790
6791 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6792
6793 /* Free but don't flush the PC register. */
6794 iemNativeRegFreeTmp(pReNative, idxPcReg);
6795
6796 return off;
6797}
6798
6799
6800#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
6801 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6802 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6803
6804#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
6805 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
6806 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6807 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6808
6809#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
6810 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6811 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6812
6813#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
6814 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
6815 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6816 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6817
6818#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
6819 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6820 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6821
6822#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
6823 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
6824 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
6825 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
6826
6827/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
6828DECL_INLINE_THROW(uint32_t)
6829iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6830 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
6831{
6832 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6833 off = iemNativeRegFlushPendingWrites(pReNative, off);
6834
6835#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6836 Assert(pReNative->Core.offPc == 0);
6837
6838 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6839#endif
6840
6841 /* Allocate a temporary PC register. */
6842 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6843
6844 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
6845 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
6846 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
6847 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6848 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6849
6850 /* Free but don't flush the PC register. */
6851 iemNativeRegFreeTmp(pReNative, idxPcReg);
6852
6853 return off;
6854}
6855
6856
6857
6858/*********************************************************************************************************************************
6859* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
6860*********************************************************************************************************************************/
6861
6862/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
6863#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
6864 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6865
6866/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
6867#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
6868 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6869
6870/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
6871#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
6872 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
6873
6874/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
6875 * clears flags. */
6876#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
6877 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
6878 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6879
6880/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
6881 * clears flags. */
6882#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
6883 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
6884 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6885
6886/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
6887 * clears flags. */
6888#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
6889 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
6890 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6891
6892#undef IEM_MC_SET_RIP_U16_AND_FINISH
6893
6894
6895/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
6896#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
6897 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6898
6899/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
6900#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
6901 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
6902
6903/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
6904 * clears flags. */
6905#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
6906 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
6907 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6908
6909/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
6910 * and clears flags. */
6911#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
6912 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
6913 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6914
6915#undef IEM_MC_SET_RIP_U32_AND_FINISH
6916
6917
6918/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
6919#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
6920 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
6921
6922/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
6923 * and clears flags. */
6924#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
6925 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
6926 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
6927
6928#undef IEM_MC_SET_RIP_U64_AND_FINISH
6929
6930
6931/** Same as iemRegRipJumpU16AndFinishNoFlags,
6932 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
6933DECL_INLINE_THROW(uint32_t)
6934iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
6935 uint8_t idxInstr, uint8_t cbVar)
6936{
6937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
6938 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
6939
6940 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
6941 off = iemNativeRegFlushPendingWrites(pReNative, off);
6942
6943#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6944 Assert(pReNative->Core.offPc == 0);
6945
6946 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
6947#endif
6948
6949 /* Get a register with the new PC loaded from idxVarPc.
6950 Note! This ASSUMES that the high bits of the GPR is zeroed. */
6951 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
6952
6953 /* Check limit (may #GP(0) + exit TB). */
6954 if (!f64Bit)
6955/** @todo we can skip this test in FLAT 32-bit mode. */
6956 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6957 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
6958 else if (cbVar > sizeof(uint32_t))
6959 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
6960
6961 /* Store the result. */
6962 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6963
6964 iemNativeVarRegisterRelease(pReNative, idxVarPc);
6965 /** @todo implictly free the variable? */
6966
6967 return off;
6968}
6969
6970
6971
6972/*********************************************************************************************************************************
6973* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
6974*********************************************************************************************************************************/
6975
6976#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
6977 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
6978
6979/**
6980 * Emits code to check if a \#NM exception should be raised.
6981 *
6982 * @returns New code buffer offset, UINT32_MAX on failure.
6983 * @param pReNative The native recompile state.
6984 * @param off The code buffer offset.
6985 * @param idxInstr The current instruction.
6986 */
6987DECL_INLINE_THROW(uint32_t)
6988iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6989{
6990 /*
6991 * Make sure we don't have any outstanding guest register writes as we may
6992 * raise an #NM and all guest register must be up to date in CPUMCTX.
6993 *
6994 * @todo r=aeichner Can we postpone this to the RaiseNm path?
6995 */
6996 off = iemNativeRegFlushPendingWrites(pReNative, off);
6997
6998#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6999 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7000#else
7001 RT_NOREF(idxInstr);
7002#endif
7003
7004 /* Allocate a temporary CR0 register. */
7005 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7006 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7007
7008 /*
7009 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
7010 * return raisexcpt();
7011 */
7012 /* Test and jump. */
7013 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
7014
7015 /* Free but don't flush the CR0 register. */
7016 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7017
7018 return off;
7019}
7020
7021
7022#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
7023 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
7024
7025/**
7026 * Emits code to check if a \#MF exception should be raised.
7027 *
7028 * @returns New code buffer offset, UINT32_MAX on failure.
7029 * @param pReNative The native recompile state.
7030 * @param off The code buffer offset.
7031 * @param idxInstr The current instruction.
7032 */
7033DECL_INLINE_THROW(uint32_t)
7034iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7035{
7036 /*
7037 * Make sure we don't have any outstanding guest register writes as we may
7038 * raise an #MF and all guest register must be up to date in CPUMCTX.
7039 *
7040 * @todo r=aeichner Can we postpone this to the RaiseMf path?
7041 */
7042 off = iemNativeRegFlushPendingWrites(pReNative, off);
7043
7044#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7045 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7046#else
7047 RT_NOREF(idxInstr);
7048#endif
7049
7050 /* Allocate a temporary FSW register. */
7051 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
7052 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
7053
7054 /*
7055 * if (FSW & X86_FSW_ES != 0)
7056 * return raisexcpt();
7057 */
7058 /* Test and jump. */
7059 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
7060
7061 /* Free but don't flush the FSW register. */
7062 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
7063
7064 return off;
7065}
7066
7067
7068#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
7069 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7070
7071/**
7072 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
7073 *
7074 * @returns New code buffer offset, UINT32_MAX on failure.
7075 * @param pReNative The native recompile state.
7076 * @param off The code buffer offset.
7077 * @param idxInstr The current instruction.
7078 */
7079DECL_INLINE_THROW(uint32_t)
7080iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7081{
7082 /*
7083 * Make sure we don't have any outstanding guest register writes as we may
7084 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7085 *
7086 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7087 */
7088 off = iemNativeRegFlushPendingWrites(pReNative, off);
7089
7090#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7091 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7092#else
7093 RT_NOREF(idxInstr);
7094#endif
7095
7096 /* Allocate a temporary CR0 and CR4 register. */
7097 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7098 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7099 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7100 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7101
7102 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7103 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
7104 * actual performance benefit first). */
7105 /*
7106 * if (cr0 & X86_CR0_EM)
7107 * return raisexcpt();
7108 */
7109 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
7110 /*
7111 * if (!(cr4 & X86_CR4_OSFXSR))
7112 * return raisexcpt();
7113 */
7114 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
7115 /*
7116 * if (cr0 & X86_CR0_TS)
7117 * return raisexcpt();
7118 */
7119 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
7120
7121 /* Free but don't flush the CR0 and CR4 register. */
7122 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7123 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7124
7125 return off;
7126}
7127
7128
7129#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
7130 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
7131
7132/**
7133 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
7134 *
7135 * @returns New code buffer offset, UINT32_MAX on failure.
7136 * @param pReNative The native recompile state.
7137 * @param off The code buffer offset.
7138 * @param idxInstr The current instruction.
7139 */
7140DECL_INLINE_THROW(uint32_t)
7141iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7142{
7143 /*
7144 * Make sure we don't have any outstanding guest register writes as we may
7145 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7146 *
7147 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7148 */
7149 off = iemNativeRegFlushPendingWrites(pReNative, off);
7150
7151#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7152 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7153#else
7154 RT_NOREF(idxInstr);
7155#endif
7156
7157 /* Allocate a temporary CR0, CR4 and XCR0 register. */
7158 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
7159 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7160 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
7161 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
7162 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7163
7164#if 1
7165 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
7166#endif
7167
7168 /** @todo r=aeichner Optimize this more later to have less compares and branches,
7169 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
7170 * actual performance benefit first). */
7171 /*
7172 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
7173 * return raisexcpt();
7174 */
7175 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
7176 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
7177 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
7178 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7179
7180 /*
7181 * if (!(cr4 & X86_CR4_OSXSAVE))
7182 * return raisexcpt();
7183 */
7184 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
7185 /*
7186 * if (cr0 & X86_CR0_TS)
7187 * return raisexcpt();
7188 */
7189 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
7190
7191 /* Free but don't flush the CR0, CR4 and XCR0 register. */
7192 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
7193 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7194 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
7195
7196 return off;
7197}
7198
7199
7200#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
7201 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
7202
7203/**
7204 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
7205 *
7206 * @returns New code buffer offset, UINT32_MAX on failure.
7207 * @param pReNative The native recompile state.
7208 * @param off The code buffer offset.
7209 * @param idxInstr The current instruction.
7210 */
7211DECL_INLINE_THROW(uint32_t)
7212iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7213{
7214 /*
7215 * Make sure we don't have any outstanding guest register writes as we may
7216 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
7217 *
7218 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
7219 */
7220 off = iemNativeRegFlushPendingWrites(pReNative, off);
7221
7222#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7223 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7224#else
7225 RT_NOREF(idxInstr);
7226#endif
7227
7228 /* Allocate a temporary CR4 register. */
7229 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
7230 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
7231 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
7232
7233 /*
7234 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
7235 * return raisexcpt();
7236 */
7237 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
7238
7239 /* raise \#UD exception unconditionally. */
7240 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
7241
7242 /* Free but don't flush the CR4 register. */
7243 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
7244
7245 return off;
7246}
7247
7248
7249
7250/*********************************************************************************************************************************
7251* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
7252*********************************************************************************************************************************/
7253
7254/**
7255 * Pushes an IEM_MC_IF_XXX onto the condition stack.
7256 *
7257 * @returns Pointer to the condition stack entry on success, NULL on failure
7258 * (too many nestings)
7259 */
7260DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
7261{
7262#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7263 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
7264#endif
7265
7266 uint32_t const idxStack = pReNative->cCondDepth;
7267 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
7268
7269 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
7270 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
7271
7272 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
7273 pEntry->fInElse = false;
7274 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
7275 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
7276
7277 return pEntry;
7278}
7279
7280
7281/**
7282 * Start of the if-block, snapshotting the register and variable state.
7283 */
7284DECL_INLINE_THROW(void)
7285iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
7286{
7287 Assert(offIfBlock != UINT32_MAX);
7288 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7289 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7290 Assert(!pEntry->fInElse);
7291
7292 /* Define the start of the IF block if request or for disassembly purposes. */
7293 if (idxLabelIf != UINT32_MAX)
7294 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
7295#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7296 else
7297 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
7298#else
7299 RT_NOREF(offIfBlock);
7300#endif
7301
7302#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7303 Assert(pReNative->Core.offPc == 0);
7304#endif
7305
7306 /* Copy the initial state so we can restore it in the 'else' block. */
7307 pEntry->InitialState = pReNative->Core;
7308}
7309
7310
7311#define IEM_MC_ELSE() } while (0); \
7312 off = iemNativeEmitElse(pReNative, off); \
7313 do {
7314
7315/** Emits code related to IEM_MC_ELSE. */
7316DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7317{
7318 /* Check sanity and get the conditional stack entry. */
7319 Assert(off != UINT32_MAX);
7320 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7321 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7322 Assert(!pEntry->fInElse);
7323
7324 /* Jump to the endif */
7325 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
7326
7327 /* Define the else label and enter the else part of the condition. */
7328 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7329 pEntry->fInElse = true;
7330
7331#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7332 Assert(pReNative->Core.offPc == 0);
7333#endif
7334
7335 /* Snapshot the core state so we can do a merge at the endif and restore
7336 the snapshot we took at the start of the if-block. */
7337 pEntry->IfFinalState = pReNative->Core;
7338 pReNative->Core = pEntry->InitialState;
7339
7340 return off;
7341}
7342
7343
7344#define IEM_MC_ENDIF() } while (0); \
7345 off = iemNativeEmitEndIf(pReNative, off)
7346
7347/** Emits code related to IEM_MC_ENDIF. */
7348DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7349{
7350 /* Check sanity and get the conditional stack entry. */
7351 Assert(off != UINT32_MAX);
7352 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
7353 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
7354
7355#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7356 Assert(pReNative->Core.offPc == 0);
7357#endif
7358
7359 /*
7360 * Now we have find common group with the core state at the end of the
7361 * if-final. Use the smallest common denominator and just drop anything
7362 * that isn't the same in both states.
7363 */
7364 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
7365 * which is why we're doing this at the end of the else-block.
7366 * But we'd need more info about future for that to be worth the effort. */
7367 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
7368 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
7369 {
7370 /* shadow guest stuff first. */
7371 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
7372 if (fGstRegs)
7373 {
7374 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
7375 do
7376 {
7377 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
7378 fGstRegs &= ~RT_BIT_64(idxGstReg);
7379
7380 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
7381 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
7382 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
7383 {
7384 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
7385 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
7386 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
7387 }
7388 } while (fGstRegs);
7389 }
7390 else
7391 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
7392
7393 /* Check variables next. For now we must require them to be identical
7394 or stuff we can recreate. */
7395 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
7396 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
7397 if (fVars)
7398 {
7399 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
7400 do
7401 {
7402 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
7403 fVars &= ~RT_BIT_32(idxVar);
7404
7405 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
7406 {
7407 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
7408 continue;
7409 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
7410 {
7411 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7412 if (idxHstReg != UINT8_MAX)
7413 {
7414 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7415 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7416 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
7417 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7418 }
7419 continue;
7420 }
7421 }
7422 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
7423 continue;
7424
7425 /* Irreconcilable, so drop it. */
7426 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7427 if (idxHstReg != UINT8_MAX)
7428 {
7429 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7430 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7431 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
7432 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7433 }
7434 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
7435 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7436 } while (fVars);
7437 }
7438
7439 /* Finally, check that the host register allocations matches. */
7440 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
7441 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
7442 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
7443 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
7444 }
7445
7446 /*
7447 * Define the endif label and maybe the else one if we're still in the 'if' part.
7448 */
7449 if (!pEntry->fInElse)
7450 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
7451 else
7452 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
7453 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
7454
7455 /* Pop the conditional stack.*/
7456 pReNative->cCondDepth -= 1;
7457
7458 return off;
7459}
7460
7461
7462#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
7463 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
7464 do {
7465
7466/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
7467DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7468{
7469 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7470
7471 /* Get the eflags. */
7472 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7473 kIemNativeGstRegUse_ReadOnly);
7474
7475 /* Test and jump. */
7476 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7477
7478 /* Free but don't flush the EFlags register. */
7479 iemNativeRegFreeTmp(pReNative, idxEflReg);
7480
7481 /* Make a copy of the core state now as we start the if-block. */
7482 iemNativeCondStartIfBlock(pReNative, off);
7483
7484 return off;
7485}
7486
7487
7488#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
7489 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
7490 do {
7491
7492/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
7493DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
7494{
7495 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7496
7497 /* Get the eflags. */
7498 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7499 kIemNativeGstRegUse_ReadOnly);
7500
7501 /* Test and jump. */
7502 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
7503
7504 /* Free but don't flush the EFlags register. */
7505 iemNativeRegFreeTmp(pReNative, idxEflReg);
7506
7507 /* Make a copy of the core state now as we start the if-block. */
7508 iemNativeCondStartIfBlock(pReNative, off);
7509
7510 return off;
7511}
7512
7513
7514#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
7515 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
7516 do {
7517
7518/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
7519DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7520{
7521 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7522
7523 /* Get the eflags. */
7524 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7525 kIemNativeGstRegUse_ReadOnly);
7526
7527 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7528 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7529
7530 /* Test and jump. */
7531 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7532
7533 /* Free but don't flush the EFlags register. */
7534 iemNativeRegFreeTmp(pReNative, idxEflReg);
7535
7536 /* Make a copy of the core state now as we start the if-block. */
7537 iemNativeCondStartIfBlock(pReNative, off);
7538
7539 return off;
7540}
7541
7542
7543#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
7544 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
7545 do {
7546
7547/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
7548DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
7549{
7550 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7551
7552 /* Get the eflags. */
7553 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7554 kIemNativeGstRegUse_ReadOnly);
7555
7556 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7557 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7558
7559 /* Test and jump. */
7560 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7561
7562 /* Free but don't flush the EFlags register. */
7563 iemNativeRegFreeTmp(pReNative, idxEflReg);
7564
7565 /* Make a copy of the core state now as we start the if-block. */
7566 iemNativeCondStartIfBlock(pReNative, off);
7567
7568 return off;
7569}
7570
7571
7572#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
7573 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
7574 do {
7575
7576#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
7577 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
7578 do {
7579
7580/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
7581DECL_INLINE_THROW(uint32_t)
7582iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7583 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7584{
7585 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7586
7587 /* Get the eflags. */
7588 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7589 kIemNativeGstRegUse_ReadOnly);
7590
7591 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7592 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7593
7594 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7595 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7596 Assert(iBitNo1 != iBitNo2);
7597
7598#ifdef RT_ARCH_AMD64
7599 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
7600
7601 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7602 if (iBitNo1 > iBitNo2)
7603 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7604 else
7605 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7606 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7607
7608#elif defined(RT_ARCH_ARM64)
7609 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7611
7612 /* and tmpreg, eflreg, #1<<iBitNo1 */
7613 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7614
7615 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7616 if (iBitNo1 > iBitNo2)
7617 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7618 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7619 else
7620 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7621 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7622
7623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7624
7625#else
7626# error "Port me"
7627#endif
7628
7629 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7630 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7631 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7632
7633 /* Free but don't flush the EFlags and tmp registers. */
7634 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7635 iemNativeRegFreeTmp(pReNative, idxEflReg);
7636
7637 /* Make a copy of the core state now as we start the if-block. */
7638 iemNativeCondStartIfBlock(pReNative, off);
7639
7640 return off;
7641}
7642
7643
7644#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
7645 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
7646 do {
7647
7648#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
7649 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
7650 do {
7651
7652/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
7653 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
7654DECL_INLINE_THROW(uint32_t)
7655iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
7656 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
7657{
7658 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7659
7660 /* We need an if-block label for the non-inverted variant. */
7661 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
7662 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
7663
7664 /* Get the eflags. */
7665 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7666 kIemNativeGstRegUse_ReadOnly);
7667
7668 /* Translate the flag masks to bit numbers. */
7669 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7670 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7671
7672 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
7673 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
7674 Assert(iBitNo1 != iBitNo);
7675
7676 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
7677 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
7678 Assert(iBitNo2 != iBitNo);
7679 Assert(iBitNo2 != iBitNo1);
7680
7681#ifdef RT_ARCH_AMD64
7682 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
7683#elif defined(RT_ARCH_ARM64)
7684 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7685#endif
7686
7687 /* Check for the lone bit first. */
7688 if (!fInverted)
7689 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
7690 else
7691 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
7692
7693 /* Then extract and compare the other two bits. */
7694#ifdef RT_ARCH_AMD64
7695 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7696 if (iBitNo1 > iBitNo2)
7697 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
7698 else
7699 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
7700 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
7701
7702#elif defined(RT_ARCH_ARM64)
7703 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7704
7705 /* and tmpreg, eflreg, #1<<iBitNo1 */
7706 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
7707
7708 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
7709 if (iBitNo1 > iBitNo2)
7710 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7711 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
7712 else
7713 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
7714 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
7715
7716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7717
7718#else
7719# error "Port me"
7720#endif
7721
7722 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
7723 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
7724 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
7725
7726 /* Free but don't flush the EFlags and tmp registers. */
7727 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7728 iemNativeRegFreeTmp(pReNative, idxEflReg);
7729
7730 /* Make a copy of the core state now as we start the if-block. */
7731 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
7732
7733 return off;
7734}
7735
7736
7737#define IEM_MC_IF_CX_IS_NZ() \
7738 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
7739 do {
7740
7741/** Emits code for IEM_MC_IF_CX_IS_NZ. */
7742DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7743{
7744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7745
7746 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7747 kIemNativeGstRegUse_ReadOnly);
7748 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
7749 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7750
7751 iemNativeCondStartIfBlock(pReNative, off);
7752 return off;
7753}
7754
7755
7756#define IEM_MC_IF_ECX_IS_NZ() \
7757 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
7758 do {
7759
7760#define IEM_MC_IF_RCX_IS_NZ() \
7761 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
7762 do {
7763
7764/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
7765DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7766{
7767 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7768
7769 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7770 kIemNativeGstRegUse_ReadOnly);
7771 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
7772 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7773
7774 iemNativeCondStartIfBlock(pReNative, off);
7775 return off;
7776}
7777
7778
7779#define IEM_MC_IF_CX_IS_NOT_ONE() \
7780 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
7781 do {
7782
7783/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
7784DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7785{
7786 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7787
7788 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7789 kIemNativeGstRegUse_ReadOnly);
7790#ifdef RT_ARCH_AMD64
7791 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7792#else
7793 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7794 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7795 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7796#endif
7797 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7798
7799 iemNativeCondStartIfBlock(pReNative, off);
7800 return off;
7801}
7802
7803
7804#define IEM_MC_IF_ECX_IS_NOT_ONE() \
7805 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
7806 do {
7807
7808#define IEM_MC_IF_RCX_IS_NOT_ONE() \
7809 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
7810 do {
7811
7812/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
7813DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
7814{
7815 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7816
7817 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7818 kIemNativeGstRegUse_ReadOnly);
7819 if (f64Bit)
7820 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7821 else
7822 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7823 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7824
7825 iemNativeCondStartIfBlock(pReNative, off);
7826 return off;
7827}
7828
7829
7830#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7831 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
7832 do {
7833
7834#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7835 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
7836 do {
7837
7838/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
7839 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7840DECL_INLINE_THROW(uint32_t)
7841iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
7842{
7843 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7844
7845 /* We have to load both RCX and EFLAGS before we can start branching,
7846 otherwise we'll end up in the else-block with an inconsistent
7847 register allocator state.
7848 Doing EFLAGS first as it's more likely to be loaded, right? */
7849 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7850 kIemNativeGstRegUse_ReadOnly);
7851 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7852 kIemNativeGstRegUse_ReadOnly);
7853
7854 /** @todo we could reduce this to a single branch instruction by spending a
7855 * temporary register and some setnz stuff. Not sure if loops are
7856 * worth it. */
7857 /* Check CX. */
7858#ifdef RT_ARCH_AMD64
7859 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7860#else
7861 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7862 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
7863 iemNativeRegFreeTmp(pReNative, idxTmpReg);
7864#endif
7865
7866 /* Check the EFlags bit. */
7867 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7868 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7869 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7870 !fCheckIfSet /*fJmpIfSet*/);
7871
7872 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7873 iemNativeRegFreeTmp(pReNative, idxEflReg);
7874
7875 iemNativeCondStartIfBlock(pReNative, off);
7876 return off;
7877}
7878
7879
7880#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7881 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
7882 do {
7883
7884#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7885 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
7886 do {
7887
7888#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
7889 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
7890 do {
7891
7892#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
7893 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
7894 do {
7895
7896/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
7897 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
7898 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
7899 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
7900DECL_INLINE_THROW(uint32_t)
7901iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7902 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
7903{
7904 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
7905
7906 /* We have to load both RCX and EFLAGS before we can start branching,
7907 otherwise we'll end up in the else-block with an inconsistent
7908 register allocator state.
7909 Doing EFLAGS first as it's more likely to be loaded, right? */
7910 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7911 kIemNativeGstRegUse_ReadOnly);
7912 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
7913 kIemNativeGstRegUse_ReadOnly);
7914
7915 /** @todo we could reduce this to a single branch instruction by spending a
7916 * temporary register and some setnz stuff. Not sure if loops are
7917 * worth it. */
7918 /* Check RCX/ECX. */
7919 if (f64Bit)
7920 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7921 else
7922 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
7923
7924 /* Check the EFlags bit. */
7925 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
7926 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
7927 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
7928 !fCheckIfSet /*fJmpIfSet*/);
7929
7930 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
7931 iemNativeRegFreeTmp(pReNative, idxEflReg);
7932
7933 iemNativeCondStartIfBlock(pReNative, off);
7934 return off;
7935}
7936
7937
7938
7939/*********************************************************************************************************************************
7940* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7941*********************************************************************************************************************************/
7942/** Number of hidden arguments for CIMPL calls.
7943 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
7944#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
7945# define IEM_CIMPL_HIDDEN_ARGS 3
7946#else
7947# define IEM_CIMPL_HIDDEN_ARGS 2
7948#endif
7949
7950#define IEM_MC_NOREF(a_Name) \
7951 RT_NOREF_PV(a_Name)
7952
7953#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
7954 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
7955
7956#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
7957 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
7958
7959#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
7960 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
7961
7962#define IEM_MC_LOCAL(a_Type, a_Name) \
7963 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
7964
7965#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
7966 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
7967
7968
7969/**
7970 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
7971 */
7972DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
7973{
7974 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
7975 return IEM_CIMPL_HIDDEN_ARGS;
7976 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
7977 return 1;
7978 return 0;
7979}
7980
7981
7982/**
7983 * Internal work that allocates a variable with kind set to
7984 * kIemNativeVarKind_Invalid and no current stack allocation.
7985 *
7986 * The kind will either be set by the caller or later when the variable is first
7987 * assigned a value.
7988 *
7989 * @returns Unpacked index.
7990 * @internal
7991 */
7992static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7993{
7994 Assert(cbType > 0 && cbType <= 64);
7995 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7996 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7997 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7998 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7999 pReNative->Core.aVars[idxVar].cbVar = cbType;
8000 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8001 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8002 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
8003 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
8004 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
8005 pReNative->Core.aVars[idxVar].fRegAcquired = false;
8006 pReNative->Core.aVars[idxVar].u.uValue = 0;
8007 return idxVar;
8008}
8009
8010
8011/**
8012 * Internal work that allocates an argument variable w/o setting enmKind.
8013 *
8014 * @returns Unpacked index.
8015 * @internal
8016 */
8017static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8018{
8019 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
8020 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8021 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
8022
8023 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
8024 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
8025 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
8026 return idxVar;
8027}
8028
8029
8030/**
8031 * Gets the stack slot for a stack variable, allocating one if necessary.
8032 *
8033 * Calling this function implies that the stack slot will contain a valid
8034 * variable value. The caller deals with any register currently assigned to the
8035 * variable, typically by spilling it into the stack slot.
8036 *
8037 * @returns The stack slot number.
8038 * @param pReNative The recompiler state.
8039 * @param idxVar The variable.
8040 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
8041 */
8042DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8043{
8044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8045 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8046 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8047
8048 /* Already got a slot? */
8049 uint8_t const idxStackSlot = pVar->idxStackSlot;
8050 if (idxStackSlot != UINT8_MAX)
8051 {
8052 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
8053 return idxStackSlot;
8054 }
8055
8056 /*
8057 * A single slot is easy to allocate.
8058 * Allocate them from the top end, closest to BP, to reduce the displacement.
8059 */
8060 if (pVar->cbVar <= sizeof(uint64_t))
8061 {
8062 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8063 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8064 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
8065 pVar->idxStackSlot = (uint8_t)iSlot;
8066 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
8067 return (uint8_t)iSlot;
8068 }
8069
8070 /*
8071 * We need more than one stack slot.
8072 *
8073 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
8074 */
8075 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
8076 Assert(pVar->cbVar <= 64);
8077 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
8078 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
8079 uint32_t bmStack = ~pReNative->Core.bmStack;
8080 while (bmStack != UINT32_MAX)
8081 {
8082/** @todo allocate from the top to reduce BP displacement. */
8083 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
8084 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8085 if (!(iSlot & fBitAlignMask))
8086 {
8087 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
8088 {
8089 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
8090 pVar->idxStackSlot = (uint8_t)iSlot;
8091 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8092 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
8093 return (uint8_t)iSlot;
8094 }
8095 }
8096 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
8097 }
8098 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8099}
8100
8101
8102/**
8103 * Changes the variable to a stack variable.
8104 *
8105 * Currently this is s only possible to do the first time the variable is used,
8106 * switching later is can be implemented but not done.
8107 *
8108 * @param pReNative The recompiler state.
8109 * @param idxVar The variable.
8110 * @throws VERR_IEM_VAR_IPE_2
8111 */
8112static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8113{
8114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8115 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8116 if (pVar->enmKind != kIemNativeVarKind_Stack)
8117 {
8118 /* We could in theory transition from immediate to stack as well, but it
8119 would involve the caller doing work storing the value on the stack. So,
8120 till that's required we only allow transition from invalid. */
8121 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8122 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8123 pVar->enmKind = kIemNativeVarKind_Stack;
8124
8125 /* Note! We don't allocate a stack slot here, that's only done when a
8126 slot is actually needed to hold a variable value. */
8127 }
8128}
8129
8130
8131/**
8132 * Sets it to a variable with a constant value.
8133 *
8134 * This does not require stack storage as we know the value and can always
8135 * reload it, unless of course it's referenced.
8136 *
8137 * @param pReNative The recompiler state.
8138 * @param idxVar The variable.
8139 * @param uValue The immediate value.
8140 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8141 */
8142static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8143{
8144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8145 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8146 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8147 {
8148 /* Only simple transitions for now. */
8149 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8150 pVar->enmKind = kIemNativeVarKind_Immediate;
8151 }
8152 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8153
8154 pVar->u.uValue = uValue;
8155 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8156 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8157 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8158}
8159
8160
8161/**
8162 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8163 *
8164 * This does not require stack storage as we know the value and can always
8165 * reload it. Loading is postponed till needed.
8166 *
8167 * @param pReNative The recompiler state.
8168 * @param idxVar The variable. Unpacked.
8169 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8170 *
8171 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8172 * @internal
8173 */
8174static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8175{
8176 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8177 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8178
8179 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8180 {
8181 /* Only simple transitions for now. */
8182 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8183 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8184 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8185 }
8186 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8187
8188 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8189
8190 /* Update the other variable, ensure it's a stack variable. */
8191 /** @todo handle variables with const values... that'll go boom now. */
8192 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8193 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8194}
8195
8196
8197/**
8198 * Sets the variable to a reference (pointer) to a guest register reference.
8199 *
8200 * This does not require stack storage as we know the value and can always
8201 * reload it. Loading is postponed till needed.
8202 *
8203 * @param pReNative The recompiler state.
8204 * @param idxVar The variable.
8205 * @param enmRegClass The class guest registers to reference.
8206 * @param idxReg The register within @a enmRegClass to reference.
8207 *
8208 * @throws VERR_IEM_VAR_IPE_2
8209 */
8210static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8211 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8212{
8213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8214 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8215
8216 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8217 {
8218 /* Only simple transitions for now. */
8219 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8220 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8221 }
8222 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8223
8224 pVar->u.GstRegRef.enmClass = enmRegClass;
8225 pVar->u.GstRegRef.idx = idxReg;
8226}
8227
8228
8229DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8230{
8231 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8232}
8233
8234
8235DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8236{
8237 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8238
8239 /* Since we're using a generic uint64_t value type, we must truncate it if
8240 the variable is smaller otherwise we may end up with too large value when
8241 scaling up a imm8 w/ sign-extension.
8242
8243 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8244 in the bios, bx=1) when running on arm, because clang expect 16-bit
8245 register parameters to have bits 16 and up set to zero. Instead of
8246 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8247 CF value in the result. */
8248 switch (cbType)
8249 {
8250 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8251 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8252 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8253 }
8254 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8255 return idxVar;
8256}
8257
8258
8259DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8260{
8261 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8262 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8263 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8264 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8265 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8266 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8267
8268 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8269 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8270 return idxArgVar;
8271}
8272
8273
8274DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8275{
8276 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8277 /* Don't set to stack now, leave that to the first use as for instance
8278 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8279 return idxVar;
8280}
8281
8282
8283DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8284{
8285 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8286
8287 /* Since we're using a generic uint64_t value type, we must truncate it if
8288 the variable is smaller otherwise we may end up with too large value when
8289 scaling up a imm8 w/ sign-extension. */
8290 switch (cbType)
8291 {
8292 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8293 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8294 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8295 }
8296 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8297 return idxVar;
8298}
8299
8300
8301/**
8302 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8303 * fixed till we call iemNativeVarRegisterRelease.
8304 *
8305 * @returns The host register number.
8306 * @param pReNative The recompiler state.
8307 * @param idxVar The variable.
8308 * @param poff Pointer to the instruction buffer offset.
8309 * In case a register needs to be freed up or the value
8310 * loaded off the stack.
8311 * @param fInitialized Set if the variable must already have been initialized.
8312 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8313 * the case.
8314 * @param idxRegPref Preferred register number or UINT8_MAX.
8315 */
8316DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8317 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8318{
8319 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8320 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8321 Assert(pVar->cbVar <= 8);
8322 Assert(!pVar->fRegAcquired);
8323
8324 uint8_t idxReg = pVar->idxReg;
8325 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8326 {
8327 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8328 && pVar->enmKind < kIemNativeVarKind_End);
8329 pVar->fRegAcquired = true;
8330 return idxReg;
8331 }
8332
8333 /*
8334 * If the kind of variable has not yet been set, default to 'stack'.
8335 */
8336 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8337 && pVar->enmKind < kIemNativeVarKind_End);
8338 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8339 iemNativeVarSetKindToStack(pReNative, idxVar);
8340
8341 /*
8342 * We have to allocate a register for the variable, even if its a stack one
8343 * as we don't know if there are modification being made to it before its
8344 * finalized (todo: analyze and insert hints about that?).
8345 *
8346 * If we can, we try get the correct register for argument variables. This
8347 * is assuming that most argument variables are fetched as close as possible
8348 * to the actual call, so that there aren't any interfering hidden calls
8349 * (memory accesses, etc) inbetween.
8350 *
8351 * If we cannot or it's a variable, we make sure no argument registers
8352 * that will be used by this MC block will be allocated here, and we always
8353 * prefer non-volatile registers to avoid needing to spill stuff for internal
8354 * call.
8355 */
8356 /** @todo Detect too early argument value fetches and warn about hidden
8357 * calls causing less optimal code to be generated in the python script. */
8358
8359 uint8_t const uArgNo = pVar->uArgNo;
8360 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8361 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8362 {
8363 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8364 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8365 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8366 }
8367 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8368 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8369 {
8370 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8371 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8372 & ~pReNative->Core.bmHstRegsWithGstShadow
8373 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8374 & fNotArgsMask;
8375 if (fRegs)
8376 {
8377 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8378 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8379 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8380 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8381 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8382 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8383 }
8384 else
8385 {
8386 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8387 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8388 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8389 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8390 }
8391 }
8392 else
8393 {
8394 idxReg = idxRegPref;
8395 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8396 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8397 }
8398 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8399 pVar->idxReg = idxReg;
8400
8401 /*
8402 * Load it off the stack if we've got a stack slot.
8403 */
8404 uint8_t const idxStackSlot = pVar->idxStackSlot;
8405 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8406 {
8407 Assert(fInitialized);
8408 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8409 switch (pVar->cbVar)
8410 {
8411 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8412 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8413 case 3: AssertFailed(); RT_FALL_THRU();
8414 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8415 default: AssertFailed(); RT_FALL_THRU();
8416 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8417 }
8418 }
8419 else
8420 {
8421 Assert(idxStackSlot == UINT8_MAX);
8422 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8423 }
8424 pVar->fRegAcquired = true;
8425 return idxReg;
8426}
8427
8428
8429/**
8430 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8431 * guest register.
8432 *
8433 * This function makes sure there is a register for it and sets it to be the
8434 * current shadow copy of @a enmGstReg.
8435 *
8436 * @returns The host register number.
8437 * @param pReNative The recompiler state.
8438 * @param idxVar The variable.
8439 * @param enmGstReg The guest register this variable will be written to
8440 * after this call.
8441 * @param poff Pointer to the instruction buffer offset.
8442 * In case a register needs to be freed up or if the
8443 * variable content needs to be loaded off the stack.
8444 *
8445 * @note We DO NOT expect @a idxVar to be an argument variable,
8446 * because we can only in the commit stage of an instruction when this
8447 * function is used.
8448 */
8449DECL_HIDDEN_THROW(uint8_t)
8450iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8451{
8452 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8453 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8454 Assert(!pVar->fRegAcquired);
8455 AssertMsgStmt( pVar->cbVar <= 8
8456 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8457 || pVar->enmKind == kIemNativeVarKind_Stack),
8458 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8459 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8461
8462 /*
8463 * This shouldn't ever be used for arguments, unless it's in a weird else
8464 * branch that doesn't do any calling and even then it's questionable.
8465 *
8466 * However, in case someone writes crazy wrong MC code and does register
8467 * updates before making calls, just use the regular register allocator to
8468 * ensure we get a register suitable for the intended argument number.
8469 */
8470 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8471
8472 /*
8473 * If there is already a register for the variable, we transfer/set the
8474 * guest shadow copy assignment to it.
8475 */
8476 uint8_t idxReg = pVar->idxReg;
8477 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8478 {
8479 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8480 {
8481 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8482 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8483 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8484 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8485 }
8486 else
8487 {
8488 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8489 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8491 }
8492 /** @todo figure this one out. We need some way of making sure the register isn't
8493 * modified after this point, just in case we start writing crappy MC code. */
8494 pVar->enmGstReg = enmGstReg;
8495 pVar->fRegAcquired = true;
8496 return idxReg;
8497 }
8498 Assert(pVar->uArgNo == UINT8_MAX);
8499
8500 /*
8501 * Because this is supposed to be the commit stage, we're just tag along with the
8502 * temporary register allocator and upgrade it to a variable register.
8503 */
8504 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8505 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8506 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8507 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8508 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8509 pVar->idxReg = idxReg;
8510
8511 /*
8512 * Now we need to load the register value.
8513 */
8514 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8515 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8516 else
8517 {
8518 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8519 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8520 switch (pVar->cbVar)
8521 {
8522 case sizeof(uint64_t):
8523 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8524 break;
8525 case sizeof(uint32_t):
8526 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8527 break;
8528 case sizeof(uint16_t):
8529 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8530 break;
8531 case sizeof(uint8_t):
8532 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8533 break;
8534 default:
8535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8536 }
8537 }
8538
8539 pVar->fRegAcquired = true;
8540 return idxReg;
8541}
8542
8543
8544/**
8545 * Sets the host register for @a idxVarRc to @a idxReg.
8546 *
8547 * The register must not be allocated. Any guest register shadowing will be
8548 * implictly dropped by this call.
8549 *
8550 * The variable must not have any register associated with it (causes
8551 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
8552 * implied.
8553 *
8554 * @returns idxReg
8555 * @param pReNative The recompiler state.
8556 * @param idxVar The variable.
8557 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
8558 * @param off For recording in debug info.
8559 *
8560 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
8561 */
8562DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
8563{
8564 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8565 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8566 Assert(!pVar->fRegAcquired);
8567 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8568 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
8569 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
8570
8571 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
8572 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8573
8574 iemNativeVarSetKindToStack(pReNative, idxVar);
8575 pVar->idxReg = idxReg;
8576
8577 return idxReg;
8578}
8579
8580
8581/**
8582 * A convenient helper function.
8583 */
8584DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8585 uint8_t idxReg, uint32_t *poff)
8586{
8587 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
8588 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
8589 return idxReg;
8590}
8591
8592
8593/**
8594 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8595 *
8596 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8597 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8598 * requirement of flushing anything in volatile host registers when making a
8599 * call.
8600 *
8601 * @returns New @a off value.
8602 * @param pReNative The recompiler state.
8603 * @param off The code buffer position.
8604 * @param fHstRegsNotToSave Set of registers not to save & restore.
8605 */
8606DECL_HIDDEN_THROW(uint32_t)
8607iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8608{
8609 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8610 if (fHstRegs)
8611 {
8612 do
8613 {
8614 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8615 fHstRegs &= ~RT_BIT_32(idxHstReg);
8616
8617 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8618 {
8619 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8620 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8621 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8622 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8623 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8624 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8625 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8626 {
8627 case kIemNativeVarKind_Stack:
8628 {
8629 /* Temporarily spill the variable register. */
8630 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8631 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8632 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8633 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8634 continue;
8635 }
8636
8637 case kIemNativeVarKind_Immediate:
8638 case kIemNativeVarKind_VarRef:
8639 case kIemNativeVarKind_GstRegRef:
8640 /* It is weird to have any of these loaded at this point. */
8641 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8642 continue;
8643
8644 case kIemNativeVarKind_End:
8645 case kIemNativeVarKind_Invalid:
8646 break;
8647 }
8648 AssertFailed();
8649 }
8650 else
8651 {
8652 /*
8653 * Allocate a temporary stack slot and spill the register to it.
8654 */
8655 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8656 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8657 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8658 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8659 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8660 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8661 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8662 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8663 }
8664 } while (fHstRegs);
8665 }
8666 return off;
8667}
8668
8669
8670/**
8671 * Emit code to restore volatile registers after to a call to a helper.
8672 *
8673 * @returns New @a off value.
8674 * @param pReNative The recompiler state.
8675 * @param off The code buffer position.
8676 * @param fHstRegsNotToSave Set of registers not to save & restore.
8677 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8678 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8679 */
8680DECL_HIDDEN_THROW(uint32_t)
8681iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8682{
8683 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8684 if (fHstRegs)
8685 {
8686 do
8687 {
8688 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8689 fHstRegs &= ~RT_BIT_32(idxHstReg);
8690
8691 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8692 {
8693 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8694 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8695 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8696 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8697 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8698 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8699 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8700 {
8701 case kIemNativeVarKind_Stack:
8702 {
8703 /* Unspill the variable register. */
8704 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8705 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8706 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8707 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8708 continue;
8709 }
8710
8711 case kIemNativeVarKind_Immediate:
8712 case kIemNativeVarKind_VarRef:
8713 case kIemNativeVarKind_GstRegRef:
8714 /* It is weird to have any of these loaded at this point. */
8715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8716 continue;
8717
8718 case kIemNativeVarKind_End:
8719 case kIemNativeVarKind_Invalid:
8720 break;
8721 }
8722 AssertFailed();
8723 }
8724 else
8725 {
8726 /*
8727 * Restore from temporary stack slot.
8728 */
8729 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8730 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8731 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8732 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8733
8734 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8735 }
8736 } while (fHstRegs);
8737 }
8738 return off;
8739}
8740
8741
8742/**
8743 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8744 *
8745 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8746 *
8747 * ASSUMES that @a idxVar is valid and unpacked.
8748 */
8749DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8750{
8751 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8752 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8753 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8754 {
8755 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8756 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8757 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8758 Assert(cSlots > 0);
8759 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8760 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8761 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8762 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8763 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8764 }
8765 else
8766 Assert(idxStackSlot == UINT8_MAX);
8767}
8768
8769
8770/**
8771 * Worker that frees a single variable.
8772 *
8773 * ASSUMES that @a idxVar is valid and unpacked.
8774 */
8775DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8776{
8777 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8778 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8779 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8780
8781 /* Free the host register first if any assigned. */
8782 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8783 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8784 {
8785 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8786 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8787 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8788 }
8789
8790 /* Free argument mapping. */
8791 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8792 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8793 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8794
8795 /* Free the stack slots. */
8796 iemNativeVarFreeStackSlots(pReNative, idxVar);
8797
8798 /* Free the actual variable. */
8799 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8800 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8801}
8802
8803
8804/**
8805 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8806 */
8807DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8808{
8809 while (bmVars != 0)
8810 {
8811 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8812 bmVars &= ~RT_BIT_32(idxVar);
8813
8814#if 1 /** @todo optimize by simplifying this later... */
8815 iemNativeVarFreeOneWorker(pReNative, idxVar);
8816#else
8817 /* Only need to free the host register, the rest is done as bulk updates below. */
8818 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8819 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8820 {
8821 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8822 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8823 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8824 }
8825#endif
8826 }
8827#if 0 /** @todo optimize by simplifying this later... */
8828 pReNative->Core.bmVars = 0;
8829 pReNative->Core.bmStack = 0;
8830 pReNative->Core.u64ArgVars = UINT64_MAX;
8831#endif
8832}
8833
8834
8835/**
8836 * This is called by IEM_MC_END() to clean up all variables.
8837 */
8838DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
8839{
8840 uint32_t const bmVars = pReNative->Core.bmVars;
8841 if (bmVars != 0)
8842 iemNativeVarFreeAllSlow(pReNative, bmVars);
8843 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8844 Assert(pReNative->Core.bmStack == 0);
8845}
8846
8847
8848#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
8849
8850/**
8851 * This is called by IEM_MC_FREE_LOCAL.
8852 */
8853DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8854{
8855 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8856 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
8857 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8858}
8859
8860
8861#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
8862
8863/**
8864 * This is called by IEM_MC_FREE_ARG.
8865 */
8866DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8867{
8868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8869 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
8870 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
8871}
8872
8873
8874#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
8875
8876/**
8877 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
8878 */
8879DECL_INLINE_THROW(uint32_t)
8880iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
8881{
8882 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
8883 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
8884 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8885 Assert( pVarDst->cbVar == sizeof(uint16_t)
8886 || pVarDst->cbVar == sizeof(uint32_t));
8887
8888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
8889 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
8890 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
8891 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
8892 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8893
8894 Assert(pVarDst->cbVar < pVarSrc->cbVar);
8895
8896 /*
8897 * Special case for immediates.
8898 */
8899 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
8900 {
8901 switch (pVarDst->cbVar)
8902 {
8903 case sizeof(uint16_t):
8904 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
8905 break;
8906 case sizeof(uint32_t):
8907 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
8908 break;
8909 default: AssertFailed(); break;
8910 }
8911 }
8912 else
8913 {
8914 /*
8915 * The generic solution for now.
8916 */
8917 /** @todo optimize this by having the python script make sure the source
8918 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
8919 * statement. Then we could just transfer the register assignments. */
8920 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
8921 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
8922 switch (pVarDst->cbVar)
8923 {
8924 case sizeof(uint16_t):
8925 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
8926 break;
8927 case sizeof(uint32_t):
8928 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
8929 break;
8930 default: AssertFailed(); break;
8931 }
8932 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
8933 iemNativeVarRegisterRelease(pReNative, idxVarDst);
8934 }
8935 return off;
8936}
8937
8938
8939
8940/*********************************************************************************************************************************
8941* Emitters for IEM_MC_CALL_CIMPL_XXX *
8942*********************************************************************************************************************************/
8943
8944/**
8945 * Emits code to load a reference to the given guest register into @a idxGprDst.
8946 */
8947DECL_INLINE_THROW(uint32_t)
8948iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8949 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8950{
8951#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8952 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8953#endif
8954
8955 /*
8956 * Get the offset relative to the CPUMCTX structure.
8957 */
8958 uint32_t offCpumCtx;
8959 switch (enmClass)
8960 {
8961 case kIemNativeGstRegRef_Gpr:
8962 Assert(idxRegInClass < 16);
8963 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8964 break;
8965
8966 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8967 Assert(idxRegInClass < 4);
8968 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8969 break;
8970
8971 case kIemNativeGstRegRef_EFlags:
8972 Assert(idxRegInClass == 0);
8973 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8974 break;
8975
8976 case kIemNativeGstRegRef_MxCsr:
8977 Assert(idxRegInClass == 0);
8978 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8979 break;
8980
8981 case kIemNativeGstRegRef_FpuReg:
8982 Assert(idxRegInClass < 8);
8983 AssertFailed(); /** @todo what kind of indexing? */
8984 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8985 break;
8986
8987 case kIemNativeGstRegRef_MReg:
8988 Assert(idxRegInClass < 8);
8989 AssertFailed(); /** @todo what kind of indexing? */
8990 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8991 break;
8992
8993 case kIemNativeGstRegRef_XReg:
8994 Assert(idxRegInClass < 16);
8995 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8996 break;
8997
8998 default:
8999 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
9000 }
9001
9002 /*
9003 * Load the value into the destination register.
9004 */
9005#ifdef RT_ARCH_AMD64
9006 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
9007
9008#elif defined(RT_ARCH_ARM64)
9009 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9010 Assert(offCpumCtx < 4096);
9011 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
9012
9013#else
9014# error "Port me!"
9015#endif
9016
9017 return off;
9018}
9019
9020
9021/**
9022 * Common code for CIMPL and AIMPL calls.
9023 *
9024 * These are calls that uses argument variables and such. They should not be
9025 * confused with internal calls required to implement an MC operation,
9026 * like a TLB load and similar.
9027 *
9028 * Upon return all that is left to do is to load any hidden arguments and
9029 * perform the call. All argument variables are freed.
9030 *
9031 * @returns New code buffer offset; throws VBox status code on error.
9032 * @param pReNative The native recompile state.
9033 * @param off The code buffer offset.
9034 * @param cArgs The total nubmer of arguments (includes hidden
9035 * count).
9036 * @param cHiddenArgs The number of hidden arguments. The hidden
9037 * arguments must not have any variable declared for
9038 * them, whereas all the regular arguments must
9039 * (tstIEMCheckMc ensures this).
9040 */
9041DECL_HIDDEN_THROW(uint32_t)
9042iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
9043{
9044#ifdef VBOX_STRICT
9045 /*
9046 * Assert sanity.
9047 */
9048 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
9049 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
9050 for (unsigned i = 0; i < cHiddenArgs; i++)
9051 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
9052 for (unsigned i = cHiddenArgs; i < cArgs; i++)
9053 {
9054 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
9055 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
9056 }
9057 iemNativeRegAssertSanity(pReNative);
9058#endif
9059
9060 /* We don't know what the called function makes use of, so flush any pending register writes. */
9061 off = iemNativeRegFlushPendingWrites(pReNative, off);
9062
9063 /*
9064 * Before we do anything else, go over variables that are referenced and
9065 * make sure they are not in a register.
9066 */
9067 uint32_t bmVars = pReNative->Core.bmVars;
9068 if (bmVars)
9069 {
9070 do
9071 {
9072 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9073 bmVars &= ~RT_BIT_32(idxVar);
9074
9075 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
9076 {
9077 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
9078 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
9079 {
9080 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9081 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9082 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9083 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9084 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9085
9086 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9087 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9088 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9089 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9090 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9091 }
9092 }
9093 } while (bmVars != 0);
9094#if 0 //def VBOX_STRICT
9095 iemNativeRegAssertSanity(pReNative);
9096#endif
9097 }
9098
9099 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9100
9101 /*
9102 * First, go over the host registers that will be used for arguments and make
9103 * sure they either hold the desired argument or are free.
9104 */
9105 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9106 {
9107 for (uint32_t i = 0; i < cRegArgs; i++)
9108 {
9109 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9110 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9111 {
9112 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9113 {
9114 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9116 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9117 Assert(pVar->idxReg == idxArgReg);
9118 uint8_t const uArgNo = pVar->uArgNo;
9119 if (uArgNo == i)
9120 { /* prefect */ }
9121 /* The variable allocator logic should make sure this is impossible,
9122 except for when the return register is used as a parameter (ARM,
9123 but not x86). */
9124#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9125 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9126 {
9127# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9128# error "Implement this"
9129# endif
9130 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9131 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9132 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9134 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9135 }
9136#endif
9137 else
9138 {
9139 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9140
9141 if (pVar->enmKind == kIemNativeVarKind_Stack)
9142 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9143 else
9144 {
9145 /* just free it, can be reloaded if used again */
9146 pVar->idxReg = UINT8_MAX;
9147 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9148 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9149 }
9150 }
9151 }
9152 else
9153 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9154 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9155 }
9156 }
9157#if 0 //def VBOX_STRICT
9158 iemNativeRegAssertSanity(pReNative);
9159#endif
9160 }
9161
9162 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9163
9164#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9165 /*
9166 * If there are any stack arguments, make sure they are in their place as well.
9167 *
9168 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9169 * the caller) be loading it later and it must be free (see first loop).
9170 */
9171 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9172 {
9173 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9174 {
9175 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9176 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9177 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9178 {
9179 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9180 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9181 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9182 pVar->idxReg = UINT8_MAX;
9183 }
9184 else
9185 {
9186 /* Use ARG0 as temp for stuff we need registers for. */
9187 switch (pVar->enmKind)
9188 {
9189 case kIemNativeVarKind_Stack:
9190 {
9191 uint8_t const idxStackSlot = pVar->idxStackSlot;
9192 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9193 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9194 iemNativeStackCalcBpDisp(idxStackSlot));
9195 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9196 continue;
9197 }
9198
9199 case kIemNativeVarKind_Immediate:
9200 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9201 continue;
9202
9203 case kIemNativeVarKind_VarRef:
9204 {
9205 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9206 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9207 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9208 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9209 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9210 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9211 {
9212 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9213 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9214 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9215 }
9216 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9217 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9218 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9219 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9220 continue;
9221 }
9222
9223 case kIemNativeVarKind_GstRegRef:
9224 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9225 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9226 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9227 continue;
9228
9229 case kIemNativeVarKind_Invalid:
9230 case kIemNativeVarKind_End:
9231 break;
9232 }
9233 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9234 }
9235 }
9236# if 0 //def VBOX_STRICT
9237 iemNativeRegAssertSanity(pReNative);
9238# endif
9239 }
9240#else
9241 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9242#endif
9243
9244 /*
9245 * Make sure the argument variables are loaded into their respective registers.
9246 *
9247 * We can optimize this by ASSUMING that any register allocations are for
9248 * registeres that have already been loaded and are ready. The previous step
9249 * saw to that.
9250 */
9251 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9252 {
9253 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9254 {
9255 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9256 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9257 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9258 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9259 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9260 else
9261 {
9262 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9263 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9264 {
9265 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9266 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9267 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9268 | RT_BIT_32(idxArgReg);
9269 pVar->idxReg = idxArgReg;
9270 }
9271 else
9272 {
9273 /* Use ARG0 as temp for stuff we need registers for. */
9274 switch (pVar->enmKind)
9275 {
9276 case kIemNativeVarKind_Stack:
9277 {
9278 uint8_t const idxStackSlot = pVar->idxStackSlot;
9279 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9280 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9281 continue;
9282 }
9283
9284 case kIemNativeVarKind_Immediate:
9285 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9286 continue;
9287
9288 case kIemNativeVarKind_VarRef:
9289 {
9290 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9291 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9292 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9293 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9294 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9295 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9296 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9297 {
9298 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9299 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9300 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9301 }
9302 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9303 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9304 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9305 continue;
9306 }
9307
9308 case kIemNativeVarKind_GstRegRef:
9309 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9310 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9311 continue;
9312
9313 case kIemNativeVarKind_Invalid:
9314 case kIemNativeVarKind_End:
9315 break;
9316 }
9317 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9318 }
9319 }
9320 }
9321#if 0 //def VBOX_STRICT
9322 iemNativeRegAssertSanity(pReNative);
9323#endif
9324 }
9325#ifdef VBOX_STRICT
9326 else
9327 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9328 {
9329 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9330 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9331 }
9332#endif
9333
9334 /*
9335 * Free all argument variables (simplified).
9336 * Their lifetime always expires with the call they are for.
9337 */
9338 /** @todo Make the python script check that arguments aren't used after
9339 * IEM_MC_CALL_XXXX. */
9340 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9341 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9342 * an argument value. There is also some FPU stuff. */
9343 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9344 {
9345 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9346 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9347
9348 /* no need to free registers: */
9349 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9350 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9351 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9352 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9353 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9354 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9355
9356 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9357 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9358 iemNativeVarFreeStackSlots(pReNative, idxVar);
9359 }
9360 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9361
9362 /*
9363 * Flush volatile registers as we make the call.
9364 */
9365 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9366
9367 return off;
9368}
9369
9370
9371/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
9372DECL_HIDDEN_THROW(uint32_t)
9373iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
9374 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
9375
9376{
9377 /*
9378 * Do all the call setup and cleanup.
9379 */
9380 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
9381
9382 /*
9383 * Load the two or three hidden arguments.
9384 */
9385#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9386 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
9387 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9388 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
9389#else
9390 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9391 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
9392#endif
9393
9394 /*
9395 * Make the call and check the return code.
9396 *
9397 * Shadow PC copies are always flushed here, other stuff depends on flags.
9398 * Segment and general purpose registers are explictily flushed via the
9399 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
9400 * macros.
9401 */
9402 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
9403#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9404 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
9405#endif
9406 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
9407 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
9408 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
9409 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
9410
9411 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
9412}
9413
9414
9415#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
9416 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
9417
9418/** Emits code for IEM_MC_CALL_CIMPL_1. */
9419DECL_INLINE_THROW(uint32_t)
9420iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9421 uintptr_t pfnCImpl, uint8_t idxArg0)
9422{
9423 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9424 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
9425}
9426
9427
9428#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
9429 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
9430
9431/** Emits code for IEM_MC_CALL_CIMPL_2. */
9432DECL_INLINE_THROW(uint32_t)
9433iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9434 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
9435{
9436 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9437 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9438 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
9439}
9440
9441
9442#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
9443 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9444 (uintptr_t)a_pfnCImpl, a0, a1, a2)
9445
9446/** Emits code for IEM_MC_CALL_CIMPL_3. */
9447DECL_INLINE_THROW(uint32_t)
9448iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9449 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9450{
9451 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9452 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9453 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9454 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
9455}
9456
9457
9458#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
9459 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9460 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
9461
9462/** Emits code for IEM_MC_CALL_CIMPL_4. */
9463DECL_INLINE_THROW(uint32_t)
9464iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9465 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9466{
9467 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9468 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9469 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9470 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9471 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
9472}
9473
9474
9475#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
9476 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
9477 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
9478
9479/** Emits code for IEM_MC_CALL_CIMPL_4. */
9480DECL_INLINE_THROW(uint32_t)
9481iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
9482 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
9483{
9484 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
9485 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
9486 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
9487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
9488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
9489 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
9490}
9491
9492
9493/** Recompiler debugging: Flush guest register shadow copies. */
9494#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
9495
9496
9497
9498/*********************************************************************************************************************************
9499* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
9500*********************************************************************************************************************************/
9501
9502/**
9503 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
9504 */
9505DECL_INLINE_THROW(uint32_t)
9506iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9507 uintptr_t pfnAImpl, uint8_t cArgs)
9508{
9509 if (idxVarRc != UINT8_MAX)
9510 {
9511 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
9512 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
9513 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
9514 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
9515 }
9516
9517 /*
9518 * Do all the call setup and cleanup.
9519 */
9520 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
9521
9522 /*
9523 * Make the call and update the return code variable if we've got one.
9524 */
9525 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9526 if (idxVarRc != UINT8_MAX)
9527 {
9528off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
9529 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
9530 }
9531
9532 return off;
9533}
9534
9535
9536
9537#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
9538 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
9539
9540#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
9541 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
9542
9543/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
9544DECL_INLINE_THROW(uint32_t)
9545iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
9546{
9547 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
9548}
9549
9550
9551#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
9552 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
9553
9554#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
9555 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
9556
9557/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
9558DECL_INLINE_THROW(uint32_t)
9559iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
9560{
9561 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9562 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
9563}
9564
9565
9566#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
9567 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
9568
9569#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
9570 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
9571
9572/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
9573DECL_INLINE_THROW(uint32_t)
9574iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9575 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9576{
9577 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9578 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9579 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
9580}
9581
9582
9583#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
9584 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
9585
9586#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
9587 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
9588
9589/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
9590DECL_INLINE_THROW(uint32_t)
9591iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9592 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9593{
9594 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9595 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9596 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9597 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
9598}
9599
9600
9601#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
9602 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9603
9604#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
9605 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
9606
9607/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
9608DECL_INLINE_THROW(uint32_t)
9609iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
9610 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
9611{
9612 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
9613 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
9614 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
9615 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
9616 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
9617}
9618
9619
9620
9621/*********************************************************************************************************************************
9622* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
9623*********************************************************************************************************************************/
9624
9625#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
9626 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
9627
9628#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9629 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
9630
9631#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9632 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
9633
9634#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9635 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
9636
9637
9638/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
9639 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
9640DECL_INLINE_THROW(uint32_t)
9641iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
9642{
9643 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9644 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9645 Assert(iGRegEx < 20);
9646
9647 /* Same discussion as in iemNativeEmitFetchGregU16 */
9648 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9649 kIemNativeGstRegUse_ReadOnly);
9650
9651 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9652 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9653
9654 /* The value is zero-extended to the full 64-bit host register width. */
9655 if (iGRegEx < 16)
9656 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9657 else
9658 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9659
9660 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9661 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9662 return off;
9663}
9664
9665
9666#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
9667 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
9668
9669#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
9670 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
9671
9672#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
9673 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
9674
9675/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
9676DECL_INLINE_THROW(uint32_t)
9677iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
9678{
9679 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9680 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9681 Assert(iGRegEx < 20);
9682
9683 /* Same discussion as in iemNativeEmitFetchGregU16 */
9684 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9685 kIemNativeGstRegUse_ReadOnly);
9686
9687 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9688 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9689
9690 if (iGRegEx < 16)
9691 {
9692 switch (cbSignExtended)
9693 {
9694 case sizeof(uint16_t):
9695 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9696 break;
9697 case sizeof(uint32_t):
9698 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9699 break;
9700 case sizeof(uint64_t):
9701 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
9702 break;
9703 default: AssertFailed(); break;
9704 }
9705 }
9706 else
9707 {
9708 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
9709 switch (cbSignExtended)
9710 {
9711 case sizeof(uint16_t):
9712 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9713 break;
9714 case sizeof(uint32_t):
9715 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9716 break;
9717 case sizeof(uint64_t):
9718 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
9719 break;
9720 default: AssertFailed(); break;
9721 }
9722 }
9723
9724 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9725 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9726 return off;
9727}
9728
9729
9730
9731#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
9732 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
9733
9734#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
9735 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9736
9737#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
9738 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9739
9740/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
9741DECL_INLINE_THROW(uint32_t)
9742iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9743{
9744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9746 Assert(iGReg < 16);
9747
9748 /*
9749 * We can either just load the low 16-bit of the GPR into a host register
9750 * for the variable, or we can do so via a shadow copy host register. The
9751 * latter will avoid having to reload it if it's being stored later, but
9752 * will waste a host register if it isn't touched again. Since we don't
9753 * know what going to happen, we choose the latter for now.
9754 */
9755 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9756 kIemNativeGstRegUse_ReadOnly);
9757
9758 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9759 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9760 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9761 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9762
9763 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9764 return off;
9765}
9766
9767
9768#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
9769 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
9770
9771#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
9772 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
9773
9774/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
9775DECL_INLINE_THROW(uint32_t)
9776iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
9777{
9778 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9779 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
9780 Assert(iGReg < 16);
9781
9782 /*
9783 * We can either just load the low 16-bit of the GPR into a host register
9784 * for the variable, or we can do so via a shadow copy host register. The
9785 * latter will avoid having to reload it if it's being stored later, but
9786 * will waste a host register if it isn't touched again. Since we don't
9787 * know what going to happen, we choose the latter for now.
9788 */
9789 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9790 kIemNativeGstRegUse_ReadOnly);
9791
9792 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9793 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9794 if (cbSignExtended == sizeof(uint32_t))
9795 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9796 else
9797 {
9798 Assert(cbSignExtended == sizeof(uint64_t));
9799 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
9800 }
9801 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9802
9803 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9804 return off;
9805}
9806
9807
9808#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
9809 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
9810
9811#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
9812 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
9813
9814/** Emits code for IEM_MC_FETCH_GREG_U32. */
9815DECL_INLINE_THROW(uint32_t)
9816iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
9817{
9818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
9820 Assert(iGReg < 16);
9821
9822 /*
9823 * We can either just load the low 16-bit of the GPR into a host register
9824 * for the variable, or we can do so via a shadow copy host register. The
9825 * latter will avoid having to reload it if it's being stored later, but
9826 * will waste a host register if it isn't touched again. Since we don't
9827 * know what going to happen, we choose the latter for now.
9828 */
9829 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9830 kIemNativeGstRegUse_ReadOnly);
9831
9832 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9834 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9835 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9836
9837 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9838 return off;
9839}
9840
9841
9842#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
9843 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
9844
9845/** Emits code for IEM_MC_FETCH_GREG_U32. */
9846DECL_INLINE_THROW(uint32_t)
9847iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9848{
9849 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9850 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9851 Assert(iGReg < 16);
9852
9853 /*
9854 * We can either just load the low 32-bit of the GPR into a host register
9855 * for the variable, or we can do so via a shadow copy host register. The
9856 * latter will avoid having to reload it if it's being stored later, but
9857 * will waste a host register if it isn't touched again. Since we don't
9858 * know what going to happen, we choose the latter for now.
9859 */
9860 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9861 kIemNativeGstRegUse_ReadOnly);
9862
9863 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9864 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9865 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
9866 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9867
9868 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9869 return off;
9870}
9871
9872
9873#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
9874 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9875
9876#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
9877 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
9878
9879/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
9880 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
9881DECL_INLINE_THROW(uint32_t)
9882iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
9883{
9884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9885 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9886 Assert(iGReg < 16);
9887
9888 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
9889 kIemNativeGstRegUse_ReadOnly);
9890
9891 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9892 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
9894 /** @todo name the register a shadow one already? */
9895 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9896
9897 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
9898 return off;
9899}
9900
9901
9902
9903/*********************************************************************************************************************************
9904* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
9905*********************************************************************************************************************************/
9906
9907#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
9908 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
9909
9910/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
9911DECL_INLINE_THROW(uint32_t)
9912iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
9913{
9914 Assert(iGRegEx < 20);
9915 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
9916 kIemNativeGstRegUse_ForUpdate);
9917#ifdef RT_ARCH_AMD64
9918 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
9919
9920 /* To the lowest byte of the register: mov r8, imm8 */
9921 if (iGRegEx < 16)
9922 {
9923 if (idxGstTmpReg >= 8)
9924 pbCodeBuf[off++] = X86_OP_REX_B;
9925 else if (idxGstTmpReg >= 4)
9926 pbCodeBuf[off++] = X86_OP_REX;
9927 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9928 pbCodeBuf[off++] = u8Value;
9929 }
9930 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
9931 else if (idxGstTmpReg < 4)
9932 {
9933 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
9934 pbCodeBuf[off++] = u8Value;
9935 }
9936 else
9937 {
9938 /* ror reg64, 8 */
9939 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9940 pbCodeBuf[off++] = 0xc1;
9941 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
9942 pbCodeBuf[off++] = 8;
9943
9944 /* mov reg8, imm8 */
9945 if (idxGstTmpReg >= 8)
9946 pbCodeBuf[off++] = X86_OP_REX_B;
9947 else if (idxGstTmpReg >= 4)
9948 pbCodeBuf[off++] = X86_OP_REX;
9949 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
9950 pbCodeBuf[off++] = u8Value;
9951
9952 /* rol reg64, 8 */
9953 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
9954 pbCodeBuf[off++] = 0xc1;
9955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
9956 pbCodeBuf[off++] = 8;
9957 }
9958
9959#elif defined(RT_ARCH_ARM64)
9960 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
9961 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9962 if (iGRegEx < 16)
9963 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
9964 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
9965 else
9966 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
9967 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
9968 iemNativeRegFreeTmp(pReNative, idxImmReg);
9969
9970#else
9971# error "Port me!"
9972#endif
9973
9974 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9975
9976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
9977
9978 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
9979 return off;
9980}
9981
9982
9983#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
9984 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
9985
9986/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
9987DECL_INLINE_THROW(uint32_t)
9988iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
9989{
9990 Assert(iGRegEx < 20);
9991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
9992
9993 /*
9994 * If it's a constant value (unlikely) we treat this as a
9995 * IEM_MC_STORE_GREG_U8_CONST statement.
9996 */
9997 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
9998 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
9999 { /* likely */ }
10000 else
10001 {
10002 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10003 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10004 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
10005 }
10006
10007 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10008 kIemNativeGstRegUse_ForUpdate);
10009 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
10010
10011#ifdef RT_ARCH_AMD64
10012 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
10013 if (iGRegEx < 16)
10014 {
10015 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
10016 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
10017 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
10018 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
10019 pbCodeBuf[off++] = X86_OP_REX;
10020 pbCodeBuf[off++] = 0x8a;
10021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
10022 }
10023 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
10024 else if (idxGstTmpReg < 4 && idxVarReg < 4)
10025 {
10026 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
10027 pbCodeBuf[off++] = 0x8a;
10028 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
10029 }
10030 else
10031 {
10032 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
10033
10034 /* ror reg64, 8 */
10035 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10036 pbCodeBuf[off++] = 0xc1;
10037 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10038 pbCodeBuf[off++] = 8;
10039
10040 /* mov reg8, reg8(r/m) */
10041 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
10042 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
10043 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
10044 pbCodeBuf[off++] = X86_OP_REX;
10045 pbCodeBuf[off++] = 0x8a;
10046 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
10047
10048 /* rol reg64, 8 */
10049 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10050 pbCodeBuf[off++] = 0xc1;
10051 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10052 pbCodeBuf[off++] = 8;
10053 }
10054
10055#elif defined(RT_ARCH_ARM64)
10056 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
10057 or
10058 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
10059 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10060 if (iGRegEx < 16)
10061 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
10062 else
10063 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
10064
10065#else
10066# error "Port me!"
10067#endif
10068 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10069
10070 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10071
10072 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
10073 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10074 return off;
10075}
10076
10077
10078
10079#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
10080 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
10081
10082/** Emits code for IEM_MC_STORE_GREG_U16. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
10085{
10086 Assert(iGReg < 16);
10087 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10088 kIemNativeGstRegUse_ForUpdate);
10089#ifdef RT_ARCH_AMD64
10090 /* mov reg16, imm16 */
10091 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
10092 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10093 if (idxGstTmpReg >= 8)
10094 pbCodeBuf[off++] = X86_OP_REX_B;
10095 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
10096 pbCodeBuf[off++] = RT_BYTE1(uValue);
10097 pbCodeBuf[off++] = RT_BYTE2(uValue);
10098
10099#elif defined(RT_ARCH_ARM64)
10100 /* movk xdst, #uValue, lsl #0 */
10101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10102 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
10103
10104#else
10105# error "Port me!"
10106#endif
10107
10108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10109
10110 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10111 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10112 return off;
10113}
10114
10115
10116#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
10117 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
10118
10119/** Emits code for IEM_MC_STORE_GREG_U16. */
10120DECL_INLINE_THROW(uint32_t)
10121iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10122{
10123 Assert(iGReg < 16);
10124 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10125
10126 /*
10127 * If it's a constant value (unlikely) we treat this as a
10128 * IEM_MC_STORE_GREG_U16_CONST statement.
10129 */
10130 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10131 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10132 { /* likely */ }
10133 else
10134 {
10135 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10136 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10137 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
10138 }
10139
10140 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10141 kIemNativeGstRegUse_ForUpdate);
10142
10143#ifdef RT_ARCH_AMD64
10144 /* mov reg16, reg16 or [mem16] */
10145 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10146 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10147 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10148 {
10149 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
10150 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
10151 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
10152 pbCodeBuf[off++] = 0x8b;
10153 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
10154 }
10155 else
10156 {
10157 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
10158 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10159 if (idxGstTmpReg >= 8)
10160 pbCodeBuf[off++] = X86_OP_REX_R;
10161 pbCodeBuf[off++] = 0x8b;
10162 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
10163 }
10164
10165#elif defined(RT_ARCH_ARM64)
10166 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
10167 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
10168 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10169 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
10170 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10171
10172#else
10173# error "Port me!"
10174#endif
10175
10176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10177
10178 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10179 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10180 return off;
10181}
10182
10183
10184#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
10185 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
10186
10187/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
10188DECL_INLINE_THROW(uint32_t)
10189iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
10190{
10191 Assert(iGReg < 16);
10192 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10193 kIemNativeGstRegUse_ForFullWrite);
10194 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10195 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10196 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10197 return off;
10198}
10199
10200
10201#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
10202 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
10203
10204/** Emits code for IEM_MC_STORE_GREG_U32. */
10205DECL_INLINE_THROW(uint32_t)
10206iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10207{
10208 Assert(iGReg < 16);
10209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10210
10211 /*
10212 * If it's a constant value (unlikely) we treat this as a
10213 * IEM_MC_STORE_GREG_U32_CONST statement.
10214 */
10215 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10216 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10217 { /* likely */ }
10218 else
10219 {
10220 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10221 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10222 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
10223 }
10224
10225 /*
10226 * For the rest we allocate a guest register for the variable and writes
10227 * it to the CPUMCTX structure.
10228 */
10229 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10230 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10231#ifdef VBOX_STRICT
10232 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
10233#endif
10234 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10235 return off;
10236}
10237
10238
10239#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
10240 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
10241
10242/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
10243DECL_INLINE_THROW(uint32_t)
10244iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
10245{
10246 Assert(iGReg < 16);
10247 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10248 kIemNativeGstRegUse_ForFullWrite);
10249 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
10250 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10251 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10252 return off;
10253}
10254
10255
10256#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
10257 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
10258
10259/** Emits code for IEM_MC_STORE_GREG_U64. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
10262{
10263 Assert(iGReg < 16);
10264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
10265
10266 /*
10267 * If it's a constant value (unlikely) we treat this as a
10268 * IEM_MC_STORE_GREG_U64_CONST statement.
10269 */
10270 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
10271 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
10272 { /* likely */ }
10273 else
10274 {
10275 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
10276 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
10277 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
10278 }
10279
10280 /*
10281 * For the rest we allocate a guest register for the variable and writes
10282 * it to the CPUMCTX structure.
10283 */
10284 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
10285 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10286 iemNativeVarRegisterRelease(pReNative, idxValueVar);
10287 return off;
10288}
10289
10290
10291#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
10292 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
10293
10294/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
10295DECL_INLINE_THROW(uint32_t)
10296iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
10297{
10298 Assert(iGReg < 16);
10299 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10300 kIemNativeGstRegUse_ForUpdate);
10301 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
10302 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10303 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10304 return off;
10305}
10306
10307
10308/*********************************************************************************************************************************
10309* General purpose register manipulation (add, sub). *
10310*********************************************************************************************************************************/
10311
10312#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10313 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10314
10315/** Emits code for IEM_MC_ADD_GREG_U16. */
10316DECL_INLINE_THROW(uint32_t)
10317iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
10318{
10319 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10320 kIemNativeGstRegUse_ForUpdate);
10321
10322#ifdef RT_ARCH_AMD64
10323 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10324 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10325 if (idxGstTmpReg >= 8)
10326 pbCodeBuf[off++] = X86_OP_REX_B;
10327 if (uAddend == 1)
10328 {
10329 pbCodeBuf[off++] = 0xff; /* inc */
10330 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10331 }
10332 else
10333 {
10334 pbCodeBuf[off++] = 0x81;
10335 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10336 pbCodeBuf[off++] = uAddend;
10337 pbCodeBuf[off++] = 0;
10338 }
10339
10340#else
10341 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10342 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10343
10344 /* sub tmp, gstgrp, uAddend */
10345 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
10346
10347 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10348 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10349
10350 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10351#endif
10352
10353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10354
10355 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10356
10357 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10358 return off;
10359}
10360
10361
10362#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
10363 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10364
10365#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
10366 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10367
10368/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
10369DECL_INLINE_THROW(uint32_t)
10370iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
10371{
10372 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10373 kIemNativeGstRegUse_ForUpdate);
10374
10375#ifdef RT_ARCH_AMD64
10376 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10377 if (f64Bit)
10378 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10379 else if (idxGstTmpReg >= 8)
10380 pbCodeBuf[off++] = X86_OP_REX_B;
10381 if (uAddend == 1)
10382 {
10383 pbCodeBuf[off++] = 0xff; /* inc */
10384 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10385 }
10386 else if (uAddend < 128)
10387 {
10388 pbCodeBuf[off++] = 0x83; /* add */
10389 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10390 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10391 }
10392 else
10393 {
10394 pbCodeBuf[off++] = 0x81; /* add */
10395 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10396 pbCodeBuf[off++] = RT_BYTE1(uAddend);
10397 pbCodeBuf[off++] = 0;
10398 pbCodeBuf[off++] = 0;
10399 pbCodeBuf[off++] = 0;
10400 }
10401
10402#else
10403 /* sub tmp, gstgrp, uAddend */
10404 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10405 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
10406
10407#endif
10408
10409 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10410
10411 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10412
10413 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10414 return off;
10415}
10416
10417
10418
10419#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
10420 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
10421
10422/** Emits code for IEM_MC_SUB_GREG_U16. */
10423DECL_INLINE_THROW(uint32_t)
10424iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
10425{
10426 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10427 kIemNativeGstRegUse_ForUpdate);
10428
10429#ifdef RT_ARCH_AMD64
10430 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
10431 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10432 if (idxGstTmpReg >= 8)
10433 pbCodeBuf[off++] = X86_OP_REX_B;
10434 if (uSubtrahend == 1)
10435 {
10436 pbCodeBuf[off++] = 0xff; /* dec */
10437 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10438 }
10439 else
10440 {
10441 pbCodeBuf[off++] = 0x81;
10442 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10443 pbCodeBuf[off++] = uSubtrahend;
10444 pbCodeBuf[off++] = 0;
10445 }
10446
10447#else
10448 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
10449 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10450
10451 /* sub tmp, gstgrp, uSubtrahend */
10452 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
10453
10454 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
10455 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
10456
10457 iemNativeRegFreeTmp(pReNative, idxTmpReg);
10458#endif
10459
10460 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10461
10462 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10463
10464 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10465 return off;
10466}
10467
10468
10469#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
10470 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
10471
10472#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
10473 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
10474
10475/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
10476DECL_INLINE_THROW(uint32_t)
10477iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
10478{
10479 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10480 kIemNativeGstRegUse_ForUpdate);
10481
10482#ifdef RT_ARCH_AMD64
10483 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
10484 if (f64Bit)
10485 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
10486 else if (idxGstTmpReg >= 8)
10487 pbCodeBuf[off++] = X86_OP_REX_B;
10488 if (uSubtrahend == 1)
10489 {
10490 pbCodeBuf[off++] = 0xff; /* dec */
10491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10492 }
10493 else if (uSubtrahend < 128)
10494 {
10495 pbCodeBuf[off++] = 0x83; /* sub */
10496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10497 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10498 }
10499 else
10500 {
10501 pbCodeBuf[off++] = 0x81; /* sub */
10502 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
10503 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
10504 pbCodeBuf[off++] = 0;
10505 pbCodeBuf[off++] = 0;
10506 pbCodeBuf[off++] = 0;
10507 }
10508
10509#else
10510 /* sub tmp, gstgrp, uSubtrahend */
10511 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
10512 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
10513
10514#endif
10515
10516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10517
10518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
10519
10520 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10521 return off;
10522}
10523
10524
10525/*********************************************************************************************************************************
10526* Local variable manipulation (add, sub, and, or). *
10527*********************************************************************************************************************************/
10528
10529#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
10530 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10531
10532#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
10533 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10534
10535#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
10536 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10537
10538#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
10539 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10540
10541/** Emits code for AND'ing a local and a constant value. */
10542DECL_INLINE_THROW(uint32_t)
10543iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10544{
10545#ifdef VBOX_STRICT
10546 switch (cbMask)
10547 {
10548 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10549 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10550 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10551 case sizeof(uint64_t): break;
10552 default: AssertFailedBreak();
10553 }
10554#endif
10555
10556 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10557 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10558
10559 if (cbMask <= sizeof(uint32_t))
10560 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
10561 else
10562 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
10563
10564 iemNativeVarRegisterRelease(pReNative, idxVar);
10565 return off;
10566}
10567
10568
10569#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
10570 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
10571
10572#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
10573 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
10574
10575#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
10576 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
10577
10578#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
10579 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
10580
10581/** Emits code for OR'ing a local and a constant value. */
10582DECL_INLINE_THROW(uint32_t)
10583iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
10584{
10585#ifdef VBOX_STRICT
10586 switch (cbMask)
10587 {
10588 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
10589 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
10590 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
10591 case sizeof(uint64_t): break;
10592 default: AssertFailedBreak();
10593 }
10594#endif
10595
10596 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10597 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
10598
10599 if (cbMask <= sizeof(uint32_t))
10600 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
10601 else
10602 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
10603
10604 iemNativeVarRegisterRelease(pReNative, idxVar);
10605 return off;
10606}
10607
10608
10609#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
10610 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
10611
10612#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
10613 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
10614
10615#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
10616 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
10617
10618/** Emits code for reversing the byte order in a local value. */
10619DECL_INLINE_THROW(uint32_t)
10620iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
10621{
10622 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
10623 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
10624
10625 switch (cbLocal)
10626 {
10627 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
10628 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
10629 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
10630 default: AssertFailedBreak();
10631 }
10632
10633 iemNativeVarRegisterRelease(pReNative, idxVar);
10634 return off;
10635}
10636
10637
10638
10639/*********************************************************************************************************************************
10640* EFLAGS *
10641*********************************************************************************************************************************/
10642
10643#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10644# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
10645#else
10646# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
10647 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
10648
10649DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
10650{
10651 if (fEflOutput)
10652 {
10653 PVMCPUCC const pVCpu = pReNative->pVCpu;
10654# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10655 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
10656 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
10657 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
10658# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10659 if (fEflOutput & (a_fEfl)) \
10660 { \
10661 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
10662 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10663 else \
10664 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10665 } else do { } while (0)
10666# else
10667 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
10668 IEMLIVENESSBIT const LivenessClobbered =
10669 {
10670 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10671 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10672 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10673 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10674 };
10675 IEMLIVENESSBIT const LivenessDelayable =
10676 {
10677 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
10678 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
10679 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
10680 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
10681 };
10682# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
10683 if (fEflOutput & (a_fEfl)) \
10684 { \
10685 if (LivenessClobbered.a_fLivenessMember) \
10686 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
10687 else if (LivenessDelayable.a_fLivenessMember) \
10688 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
10689 else \
10690 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
10691 } else do { } while (0)
10692# endif
10693 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
10694 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
10695 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
10696 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
10697 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
10698 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
10699 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
10700# undef CHECK_FLAG_AND_UPDATE_STATS
10701 }
10702 RT_NOREF(fEflInput);
10703}
10704#endif /* VBOX_WITH_STATISTICS */
10705
10706#undef IEM_MC_FETCH_EFLAGS /* should not be used */
10707#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10708 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
10709
10710/** Handles IEM_MC_FETCH_EFLAGS_EX. */
10711DECL_INLINE_THROW(uint32_t)
10712iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
10713 uint32_t fEflInput, uint32_t fEflOutput)
10714{
10715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
10716 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10717 RT_NOREF(fEflInput, fEflOutput);
10718
10719#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10720# ifdef VBOX_STRICT
10721 if ( pReNative->idxCurCall != 0
10722 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
10723 {
10724 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
10725 uint32_t const fBoth = fEflInput | fEflOutput;
10726# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
10727 AssertMsg( !(fBoth & (a_fElfConst)) \
10728 || (!(fEflInput & (a_fElfConst)) \
10729 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10730 : !(fEflOutput & (a_fElfConst)) \
10731 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
10732 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
10733 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
10734 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
10735 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
10736 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
10737 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
10738 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
10739 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
10740 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
10741# undef ASSERT_ONE_EFL
10742 }
10743# endif
10744#endif
10745
10746 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
10747 * the existing shadow copy. */
10748 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
10749 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10750 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
10751 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10752 return off;
10753}
10754
10755
10756
10757/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
10758 * start using it with custom native code emission (inlining assembly
10759 * instruction helpers). */
10760#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
10761#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
10762 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10763 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
10764
10765/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
10766DECL_INLINE_THROW(uint32_t)
10767iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
10768{
10769 RT_NOREF(fEflOutput);
10770 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
10771 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
10772
10773#ifdef VBOX_STRICT
10774 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
10775 uint32_t offFixup = off;
10776 off = iemNativeEmitJnzToFixed(pReNative, off, off);
10777 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
10778 iemNativeFixupFixedJump(pReNative, offFixup, off);
10779
10780 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
10781 offFixup = off;
10782 off = iemNativeEmitJzToFixed(pReNative, off, off);
10783 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
10784 iemNativeFixupFixedJump(pReNative, offFixup, off);
10785
10786 /** @todo validate that only bits in the fElfOutput mask changed. */
10787#endif
10788
10789 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
10790 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
10791 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
10792 return off;
10793}
10794
10795
10796
10797/*********************************************************************************************************************************
10798* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
10799*********************************************************************************************************************************/
10800
10801#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
10802 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
10803
10804#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
10805 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
10806
10807#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
10808 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
10809
10810
10811/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
10812 * IEM_MC_FETCH_SREG_ZX_U64. */
10813DECL_INLINE_THROW(uint32_t)
10814iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
10815{
10816 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
10818 Assert(iSReg < X86_SREG_COUNT);
10819
10820 /*
10821 * For now, we will not create a shadow copy of a selector. The rational
10822 * is that since we do not recompile the popping and loading of segment
10823 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
10824 * pushing and moving to registers, there is only a small chance that the
10825 * shadow copy will be accessed again before the register is reloaded. One
10826 * scenario would be nested called in 16-bit code, but I doubt it's worth
10827 * the extra register pressure atm.
10828 *
10829 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
10830 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
10831 * store scencario covered at present (r160730).
10832 */
10833 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10834 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10835 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
10836 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10837 return off;
10838}
10839
10840
10841
10842/*********************************************************************************************************************************
10843* Register references. *
10844*********************************************************************************************************************************/
10845
10846#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
10847 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
10848
10849#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
10850 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
10851
10852/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
10853DECL_INLINE_THROW(uint32_t)
10854iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
10855{
10856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
10857 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10858 Assert(iGRegEx < 20);
10859
10860 if (iGRegEx < 16)
10861 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10862 else
10863 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
10864
10865 /* If we've delayed writing back the register value, flush it now. */
10866 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
10867
10868 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10869 if (!fConst)
10870 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
10871
10872 return off;
10873}
10874
10875#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
10876 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
10877
10878#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
10879 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
10880
10881#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
10882 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
10883
10884#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
10885 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
10886
10887#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
10888 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
10889
10890#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
10891 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
10892
10893#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
10894 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
10895
10896#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
10897 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
10898
10899#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
10900 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
10901
10902#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
10903 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
10904
10905/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
10906DECL_INLINE_THROW(uint32_t)
10907iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
10908{
10909 Assert(iGReg < 16);
10910 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
10911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10912
10913 /* If we've delayed writing back the register value, flush it now. */
10914 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
10915
10916 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10917 if (!fConst)
10918 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
10919
10920 return off;
10921}
10922
10923
10924#undef IEM_MC_REF_EFLAGS /* should not be used. */
10925#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
10926 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
10927 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
10928
10929/** Handles IEM_MC_REF_EFLAGS. */
10930DECL_INLINE_THROW(uint32_t)
10931iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10932{
10933 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
10934 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10935
10936 /* If we've delayed writing back the register value, flush it now. */
10937 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
10938
10939 /* If there is a shadow copy of guest EFLAGS, flush it now. */
10940 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
10941
10942 return off;
10943}
10944
10945
10946/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
10947 * different code from threaded recompiler, maybe it would be helpful. For now
10948 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
10949#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
10950
10951
10952#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
10953 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
10954
10955#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
10956 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
10957
10958#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
10959 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
10960
10961/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
10962DECL_INLINE_THROW(uint32_t)
10963iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
10964{
10965 Assert(iXReg < 16);
10966 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
10967 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10968
10969 /* If we've delayed writing back the register value, flush it now. */
10970 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
10971
10972 /** @todo r=aeichner This needs to be done as soon as we shadow SSE registers in host registers, needs
10973 * figuring out the semantics on how this is tracked.
10974 * For now this is safe though as the reference will directly operate on the CPUMCTX
10975 * structure so the value can't get out of sync.
10976 */
10977#if 0
10978 /* If it's not a const reference we need to flush the shadow copy of the register now. */
10979 if (!fConst)
10980 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_XREG(iXReg)));
10981#else
10982 RT_NOREF(fConst);
10983#endif
10984
10985 return off;
10986}
10987
10988
10989#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
10990 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
10991
10992/** Handles IEM_MC_REF_MXCSR. */
10993DECL_INLINE_THROW(uint32_t)
10994iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
10995{
10996 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
10997 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
10998
10999 /* If we've delayed writing back the register value, flush it now. */
11000 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
11001
11002 /* If there is a shadow copy of guest MXCSR, flush it now. */
11003 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
11004
11005 return off;
11006}
11007
11008
11009
11010/*********************************************************************************************************************************
11011* Effective Address Calculation *
11012*********************************************************************************************************************************/
11013#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
11014 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
11015
11016/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
11017 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
11018DECL_INLINE_THROW(uint32_t)
11019iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
11020 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
11021{
11022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11023
11024 /*
11025 * Handle the disp16 form with no registers first.
11026 *
11027 * Convert to an immediate value, as that'll delay the register allocation
11028 * and assignment till the memory access / call / whatever and we can use
11029 * a more appropriate register (or none at all).
11030 */
11031 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
11032 {
11033 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
11034 return off;
11035 }
11036
11037 /* Determin the displacment. */
11038 uint16_t u16EffAddr;
11039 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11040 {
11041 case 0: u16EffAddr = 0; break;
11042 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
11043 case 2: u16EffAddr = u16Disp; break;
11044 default: AssertFailedStmt(u16EffAddr = 0);
11045 }
11046
11047 /* Determine the registers involved. */
11048 uint8_t idxGstRegBase;
11049 uint8_t idxGstRegIndex;
11050 switch (bRm & X86_MODRM_RM_MASK)
11051 {
11052 case 0:
11053 idxGstRegBase = X86_GREG_xBX;
11054 idxGstRegIndex = X86_GREG_xSI;
11055 break;
11056 case 1:
11057 idxGstRegBase = X86_GREG_xBX;
11058 idxGstRegIndex = X86_GREG_xDI;
11059 break;
11060 case 2:
11061 idxGstRegBase = X86_GREG_xBP;
11062 idxGstRegIndex = X86_GREG_xSI;
11063 break;
11064 case 3:
11065 idxGstRegBase = X86_GREG_xBP;
11066 idxGstRegIndex = X86_GREG_xDI;
11067 break;
11068 case 4:
11069 idxGstRegBase = X86_GREG_xSI;
11070 idxGstRegIndex = UINT8_MAX;
11071 break;
11072 case 5:
11073 idxGstRegBase = X86_GREG_xDI;
11074 idxGstRegIndex = UINT8_MAX;
11075 break;
11076 case 6:
11077 idxGstRegBase = X86_GREG_xBP;
11078 idxGstRegIndex = UINT8_MAX;
11079 break;
11080#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
11081 default:
11082#endif
11083 case 7:
11084 idxGstRegBase = X86_GREG_xBX;
11085 idxGstRegIndex = UINT8_MAX;
11086 break;
11087 }
11088
11089 /*
11090 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
11091 */
11092 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11093 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11094 kIemNativeGstRegUse_ReadOnly);
11095 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
11096 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11097 kIemNativeGstRegUse_ReadOnly)
11098 : UINT8_MAX;
11099#ifdef RT_ARCH_AMD64
11100 if (idxRegIndex == UINT8_MAX)
11101 {
11102 if (u16EffAddr == 0)
11103 {
11104 /* movxz ret, base */
11105 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
11106 }
11107 else
11108 {
11109 /* lea ret32, [base64 + disp32] */
11110 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11111 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11112 if (idxRegRet >= 8 || idxRegBase >= 8)
11113 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11114 pbCodeBuf[off++] = 0x8d;
11115 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11116 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
11117 else
11118 {
11119 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
11120 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11121 }
11122 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11123 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11124 pbCodeBuf[off++] = 0;
11125 pbCodeBuf[off++] = 0;
11126 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11127
11128 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11129 }
11130 }
11131 else
11132 {
11133 /* lea ret32, [index64 + base64 (+ disp32)] */
11134 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11135 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11136 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11137 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11138 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11139 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11140 pbCodeBuf[off++] = 0x8d;
11141 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
11142 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11143 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
11144 if (bMod == X86_MOD_MEM4)
11145 {
11146 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
11147 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
11148 pbCodeBuf[off++] = 0;
11149 pbCodeBuf[off++] = 0;
11150 }
11151 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11152 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
11153 }
11154
11155#elif defined(RT_ARCH_ARM64)
11156 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11157 if (u16EffAddr == 0)
11158 {
11159 if (idxRegIndex == UINT8_MAX)
11160 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
11161 else
11162 {
11163 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
11164 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11165 }
11166 }
11167 else
11168 {
11169 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
11170 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
11171 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
11172 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11173 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
11174 else
11175 {
11176 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
11177 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11178 }
11179 if (idxRegIndex != UINT8_MAX)
11180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
11181 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
11182 }
11183
11184#else
11185# error "port me"
11186#endif
11187
11188 if (idxRegIndex != UINT8_MAX)
11189 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11190 iemNativeRegFreeTmp(pReNative, idxRegBase);
11191 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11192 return off;
11193}
11194
11195
11196#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
11197 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
11198
11199/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
11200 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
11201DECL_INLINE_THROW(uint32_t)
11202iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
11203 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
11204{
11205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11206
11207 /*
11208 * Handle the disp32 form with no registers first.
11209 *
11210 * Convert to an immediate value, as that'll delay the register allocation
11211 * and assignment till the memory access / call / whatever and we can use
11212 * a more appropriate register (or none at all).
11213 */
11214 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11215 {
11216 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
11217 return off;
11218 }
11219
11220 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11221 uint32_t u32EffAddr = 0;
11222 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11223 {
11224 case 0: break;
11225 case 1: u32EffAddr = (int8_t)u32Disp; break;
11226 case 2: u32EffAddr = u32Disp; break;
11227 default: AssertFailed();
11228 }
11229
11230 /* Get the register (or SIB) value. */
11231 uint8_t idxGstRegBase = UINT8_MAX;
11232 uint8_t idxGstRegIndex = UINT8_MAX;
11233 uint8_t cShiftIndex = 0;
11234 switch (bRm & X86_MODRM_RM_MASK)
11235 {
11236 case 0: idxGstRegBase = X86_GREG_xAX; break;
11237 case 1: idxGstRegBase = X86_GREG_xCX; break;
11238 case 2: idxGstRegBase = X86_GREG_xDX; break;
11239 case 3: idxGstRegBase = X86_GREG_xBX; break;
11240 case 4: /* SIB */
11241 {
11242 /* index /w scaling . */
11243 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11244 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11245 {
11246 case 0: idxGstRegIndex = X86_GREG_xAX; break;
11247 case 1: idxGstRegIndex = X86_GREG_xCX; break;
11248 case 2: idxGstRegIndex = X86_GREG_xDX; break;
11249 case 3: idxGstRegIndex = X86_GREG_xBX; break;
11250 case 4: cShiftIndex = 0; /*no index*/ break;
11251 case 5: idxGstRegIndex = X86_GREG_xBP; break;
11252 case 6: idxGstRegIndex = X86_GREG_xSI; break;
11253 case 7: idxGstRegIndex = X86_GREG_xDI; break;
11254 }
11255
11256 /* base */
11257 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
11258 {
11259 case 0: idxGstRegBase = X86_GREG_xAX; break;
11260 case 1: idxGstRegBase = X86_GREG_xCX; break;
11261 case 2: idxGstRegBase = X86_GREG_xDX; break;
11262 case 3: idxGstRegBase = X86_GREG_xBX; break;
11263 case 4:
11264 idxGstRegBase = X86_GREG_xSP;
11265 u32EffAddr += uSibAndRspOffset >> 8;
11266 break;
11267 case 5:
11268 if ((bRm & X86_MODRM_MOD_MASK) != 0)
11269 idxGstRegBase = X86_GREG_xBP;
11270 else
11271 {
11272 Assert(u32EffAddr == 0);
11273 u32EffAddr = u32Disp;
11274 }
11275 break;
11276 case 6: idxGstRegBase = X86_GREG_xSI; break;
11277 case 7: idxGstRegBase = X86_GREG_xDI; break;
11278 }
11279 break;
11280 }
11281 case 5: idxGstRegBase = X86_GREG_xBP; break;
11282 case 6: idxGstRegBase = X86_GREG_xSI; break;
11283 case 7: idxGstRegBase = X86_GREG_xDI; break;
11284 }
11285
11286 /*
11287 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11288 * the start of the function.
11289 */
11290 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11291 {
11292 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
11293 return off;
11294 }
11295
11296 /*
11297 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11298 */
11299 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11300 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11301 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11302 kIemNativeGstRegUse_ReadOnly);
11303 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11304 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11305 kIemNativeGstRegUse_ReadOnly);
11306
11307 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11308 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11309 {
11310 idxRegBase = idxRegIndex;
11311 idxRegIndex = UINT8_MAX;
11312 }
11313
11314#ifdef RT_ARCH_AMD64
11315 if (idxRegIndex == UINT8_MAX)
11316 {
11317 if (u32EffAddr == 0)
11318 {
11319 /* mov ret, base */
11320 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11321 }
11322 else
11323 {
11324 /* lea ret32, [base64 + disp32] */
11325 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11326 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11327 if (idxRegRet >= 8 || idxRegBase >= 8)
11328 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
11329 pbCodeBuf[off++] = 0x8d;
11330 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11331 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11332 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11333 else
11334 {
11335 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11336 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11337 }
11338 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11339 if (bMod == X86_MOD_MEM4)
11340 {
11341 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11342 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11343 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11344 }
11345 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11346 }
11347 }
11348 else
11349 {
11350 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11351 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11352 if (idxRegBase == UINT8_MAX)
11353 {
11354 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
11355 if (idxRegRet >= 8 || idxRegIndex >= 8)
11356 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11357 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11358 pbCodeBuf[off++] = 0x8d;
11359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11360 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11361 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11362 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11363 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11364 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11365 }
11366 else
11367 {
11368 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11369 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11370 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11371 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11372 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
11373 pbCodeBuf[off++] = 0x8d;
11374 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11375 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11376 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11377 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11378 if (bMod != X86_MOD_MEM0)
11379 {
11380 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11381 if (bMod == X86_MOD_MEM4)
11382 {
11383 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11384 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11385 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11386 }
11387 }
11388 }
11389 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11390 }
11391
11392#elif defined(RT_ARCH_ARM64)
11393 if (u32EffAddr == 0)
11394 {
11395 if (idxRegIndex == UINT8_MAX)
11396 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11397 else if (idxRegBase == UINT8_MAX)
11398 {
11399 if (cShiftIndex == 0)
11400 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
11401 else
11402 {
11403 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11404 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
11405 }
11406 }
11407 else
11408 {
11409 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11410 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11411 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11412 }
11413 }
11414 else
11415 {
11416 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
11417 {
11418 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11419 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
11420 }
11421 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
11422 {
11423 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11424 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
11425 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
11426 }
11427 else
11428 {
11429 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
11430 if (idxRegBase != UINT8_MAX)
11431 {
11432 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11433 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
11434 }
11435 }
11436 if (idxRegIndex != UINT8_MAX)
11437 {
11438 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11439 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11440 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
11441 }
11442 }
11443
11444#else
11445# error "port me"
11446#endif
11447
11448 if (idxRegIndex != UINT8_MAX)
11449 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11450 if (idxRegBase != UINT8_MAX)
11451 iemNativeRegFreeTmp(pReNative, idxRegBase);
11452 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11453 return off;
11454}
11455
11456
11457#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11458 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11459 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11460
11461#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11462 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11463 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
11464
11465#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
11466 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
11467 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
11468
11469/**
11470 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
11471 *
11472 * @returns New off.
11473 * @param pReNative .
11474 * @param off .
11475 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
11476 * bit 4 to REX.X. The two bits are part of the
11477 * REG sub-field, which isn't needed in this
11478 * function.
11479 * @param uSibAndRspOffset Two parts:
11480 * - The first 8 bits make up the SIB byte.
11481 * - The next 8 bits are the fixed RSP/ESP offset
11482 * in case of a pop [xSP].
11483 * @param u32Disp The displacement byte/word/dword, if any.
11484 * @param cbInstr The size of the fully decoded instruction. Used
11485 * for RIP relative addressing.
11486 * @param idxVarRet The result variable number.
11487 * @param f64Bit Whether to use a 64-bit or 32-bit address size
11488 * when calculating the address.
11489 *
11490 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
11491 */
11492DECL_INLINE_THROW(uint32_t)
11493iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
11494 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
11495{
11496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
11497
11498 /*
11499 * Special case the rip + disp32 form first.
11500 */
11501 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
11502 {
11503#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
11504 /* Need to take the current PC offset into account for the displacement, no need to flush here
11505 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
11506 u32Disp += pReNative->Core.offPc;
11507#endif
11508
11509 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11510 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
11511 kIemNativeGstRegUse_ReadOnly);
11512#ifdef RT_ARCH_AMD64
11513 if (f64Bit)
11514 {
11515 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
11516 if ((int32_t)offFinalDisp == offFinalDisp)
11517 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
11518 else
11519 {
11520 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
11521 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
11522 }
11523 }
11524 else
11525 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
11526
11527#elif defined(RT_ARCH_ARM64)
11528 if (f64Bit)
11529 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11530 (int64_t)(int32_t)u32Disp + cbInstr);
11531 else
11532 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
11533 (int32_t)u32Disp + cbInstr);
11534
11535#else
11536# error "Port me!"
11537#endif
11538 iemNativeRegFreeTmp(pReNative, idxRegPc);
11539 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11540 return off;
11541 }
11542
11543 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
11544 int64_t i64EffAddr = 0;
11545 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
11546 {
11547 case 0: break;
11548 case 1: i64EffAddr = (int8_t)u32Disp; break;
11549 case 2: i64EffAddr = (int32_t)u32Disp; break;
11550 default: AssertFailed();
11551 }
11552
11553 /* Get the register (or SIB) value. */
11554 uint8_t idxGstRegBase = UINT8_MAX;
11555 uint8_t idxGstRegIndex = UINT8_MAX;
11556 uint8_t cShiftIndex = 0;
11557 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
11558 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
11559 else /* SIB: */
11560 {
11561 /* index /w scaling . */
11562 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
11563 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
11564 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
11565 if (idxGstRegIndex == 4)
11566 {
11567 /* no index */
11568 cShiftIndex = 0;
11569 idxGstRegIndex = UINT8_MAX;
11570 }
11571
11572 /* base */
11573 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
11574 if (idxGstRegBase == 4)
11575 {
11576 /* pop [rsp] hack */
11577 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
11578 }
11579 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
11580 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
11581 {
11582 /* mod=0 and base=5 -> disp32, no base reg. */
11583 Assert(i64EffAddr == 0);
11584 i64EffAddr = (int32_t)u32Disp;
11585 idxGstRegBase = UINT8_MAX;
11586 }
11587 }
11588
11589 /*
11590 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
11591 * the start of the function.
11592 */
11593 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
11594 {
11595 if (f64Bit)
11596 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
11597 else
11598 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
11599 return off;
11600 }
11601
11602 /*
11603 * Now emit code that calculates:
11604 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11605 * or if !f64Bit:
11606 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
11607 */
11608 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
11609 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
11610 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
11611 kIemNativeGstRegUse_ReadOnly);
11612 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
11613 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
11614 kIemNativeGstRegUse_ReadOnly);
11615
11616 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
11617 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
11618 {
11619 idxRegBase = idxRegIndex;
11620 idxRegIndex = UINT8_MAX;
11621 }
11622
11623#ifdef RT_ARCH_AMD64
11624 uint8_t bFinalAdj;
11625 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
11626 bFinalAdj = 0; /* likely */
11627 else
11628 {
11629 /* pop [rsp] with a problematic disp32 value. Split out the
11630 RSP offset and add it separately afterwards (bFinalAdj). */
11631 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
11632 Assert(idxGstRegBase == X86_GREG_xSP);
11633 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
11634 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
11635 Assert(bFinalAdj != 0);
11636 i64EffAddr -= bFinalAdj;
11637 Assert((int32_t)i64EffAddr == i64EffAddr);
11638 }
11639 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
11640//pReNative->pInstrBuf[off++] = 0xcc;
11641
11642 if (idxRegIndex == UINT8_MAX)
11643 {
11644 if (u32EffAddr == 0)
11645 {
11646 /* mov ret, base */
11647 if (f64Bit)
11648 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
11649 else
11650 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
11651 }
11652 else
11653 {
11654 /* lea ret, [base + disp32] */
11655 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
11656 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11657 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
11658 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11659 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11660 | (f64Bit ? X86_OP_REX_W : 0);
11661 pbCodeBuf[off++] = 0x8d;
11662 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11663 if (idxRegBase != X86_GREG_x12 /*SIB*/)
11664 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
11665 else
11666 {
11667 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11668 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
11669 }
11670 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11671 if (bMod == X86_MOD_MEM4)
11672 {
11673 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11674 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11675 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11676 }
11677 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11678 }
11679 }
11680 else
11681 {
11682 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
11683 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
11684 if (idxRegBase == UINT8_MAX)
11685 {
11686 /* lea ret, [(index64 << cShiftIndex) + disp32] */
11687 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
11688 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11689 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11690 | (f64Bit ? X86_OP_REX_W : 0);
11691 pbCodeBuf[off++] = 0x8d;
11692 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
11693 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
11694 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11695 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11696 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11697 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11698 }
11699 else
11700 {
11701 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
11702 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
11703 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
11704 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
11705 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
11706 | (f64Bit ? X86_OP_REX_W : 0);
11707 pbCodeBuf[off++] = 0x8d;
11708 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
11709 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
11710 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
11711 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
11712 if (bMod != X86_MOD_MEM0)
11713 {
11714 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
11715 if (bMod == X86_MOD_MEM4)
11716 {
11717 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
11718 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
11719 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
11720 }
11721 }
11722 }
11723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11724 }
11725
11726 if (!bFinalAdj)
11727 { /* likely */ }
11728 else
11729 {
11730 Assert(f64Bit);
11731 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
11732 }
11733
11734#elif defined(RT_ARCH_ARM64)
11735 if (i64EffAddr == 0)
11736 {
11737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11738 if (idxRegIndex == UINT8_MAX)
11739 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
11740 else if (idxRegBase != UINT8_MAX)
11741 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
11742 f64Bit, false /*fSetFlags*/, cShiftIndex);
11743 else
11744 {
11745 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
11746 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
11747 }
11748 }
11749 else
11750 {
11751 if (f64Bit)
11752 { /* likely */ }
11753 else
11754 i64EffAddr = (int32_t)i64EffAddr;
11755
11756 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
11757 {
11758 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11759 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
11760 }
11761 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
11762 {
11763 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11764 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
11765 }
11766 else
11767 {
11768 if (f64Bit)
11769 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
11770 else
11771 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
11772 if (idxRegBase != UINT8_MAX)
11773 {
11774 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11775 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
11776 }
11777 }
11778 if (idxRegIndex != UINT8_MAX)
11779 {
11780 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11781 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
11782 f64Bit, false /*fSetFlags*/, cShiftIndex);
11783 }
11784 }
11785
11786#else
11787# error "port me"
11788#endif
11789
11790 if (idxRegIndex != UINT8_MAX)
11791 iemNativeRegFreeTmp(pReNative, idxRegIndex);
11792 if (idxRegBase != UINT8_MAX)
11793 iemNativeRegFreeTmp(pReNative, idxRegBase);
11794 iemNativeVarRegisterRelease(pReNative, idxVarRet);
11795 return off;
11796}
11797
11798
11799/*********************************************************************************************************************************
11800* TLB Lookup. *
11801*********************************************************************************************************************************/
11802
11803/**
11804 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
11805 */
11806DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
11807{
11808 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
11809 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
11810 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
11811 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
11812
11813 /* Do the lookup manually. */
11814 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
11815 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
11816 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
11817 if (RT_LIKELY(pTlbe->uTag == uTag))
11818 {
11819 /*
11820 * Check TLB page table level access flags.
11821 */
11822 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
11823 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
11824 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
11825 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
11826 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
11827 | IEMTLBE_F_PG_UNASSIGNED
11828 | IEMTLBE_F_PT_NO_ACCESSED
11829 | fNoWriteNoDirty | fNoUser);
11830 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
11831 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
11832 {
11833 /*
11834 * Return the address.
11835 */
11836 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
11837 if ((uintptr_t)pbAddr == uResult)
11838 return;
11839 RT_NOREF(cbMem);
11840 AssertFailed();
11841 }
11842 else
11843 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
11844 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
11845 }
11846 else
11847 AssertFailed();
11848 RT_BREAKPOINT();
11849}
11850
11851/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
11852
11853
11854/*********************************************************************************************************************************
11855* Memory fetches and stores common *
11856*********************************************************************************************************************************/
11857
11858typedef enum IEMNATIVEMITMEMOP
11859{
11860 kIemNativeEmitMemOp_Store = 0,
11861 kIemNativeEmitMemOp_Fetch,
11862 kIemNativeEmitMemOp_Fetch_Zx_U16,
11863 kIemNativeEmitMemOp_Fetch_Zx_U32,
11864 kIemNativeEmitMemOp_Fetch_Zx_U64,
11865 kIemNativeEmitMemOp_Fetch_Sx_U16,
11866 kIemNativeEmitMemOp_Fetch_Sx_U32,
11867 kIemNativeEmitMemOp_Fetch_Sx_U64
11868} IEMNATIVEMITMEMOP;
11869
11870/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
11871 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
11872 * (with iSegReg = UINT8_MAX). */
11873DECL_INLINE_THROW(uint32_t)
11874iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
11875 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
11876 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
11877{
11878 /*
11879 * Assert sanity.
11880 */
11881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
11882 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
11883 Assert( enmOp != kIemNativeEmitMemOp_Store
11884 || pVarValue->enmKind == kIemNativeVarKind_Immediate
11885 || pVarValue->enmKind == kIemNativeVarKind_Stack);
11886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
11887 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
11888 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
11889 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
11890 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11891 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
11892 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
11893 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
11894#ifdef VBOX_STRICT
11895 if (iSegReg == UINT8_MAX)
11896 {
11897 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
11898 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
11899 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
11900 switch (cbMem)
11901 {
11902 case 1:
11903 Assert( pfnFunction
11904 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
11905 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11906 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11907 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11908 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
11909 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
11910 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
11911 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
11912 : UINT64_C(0xc000b000a0009000) ));
11913 break;
11914 case 2:
11915 Assert( pfnFunction
11916 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
11917 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11918 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11919 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
11920 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
11921 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
11922 : UINT64_C(0xc000b000a0009000) ));
11923 break;
11924 case 4:
11925 Assert( pfnFunction
11926 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
11927 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11928 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
11929 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
11930 : UINT64_C(0xc000b000a0009000) ));
11931 break;
11932 case 8:
11933 Assert( pfnFunction
11934 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
11935 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
11936 : UINT64_C(0xc000b000a0009000) ));
11937 break;
11938 }
11939 }
11940 else
11941 {
11942 Assert(iSegReg < 6);
11943 switch (cbMem)
11944 {
11945 case 1:
11946 Assert( pfnFunction
11947 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
11948 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
11949 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11950 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11951 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
11952 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
11953 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
11954 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
11955 : UINT64_C(0xc000b000a0009000) ));
11956 break;
11957 case 2:
11958 Assert( pfnFunction
11959 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
11960 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
11961 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11962 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
11963 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
11964 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
11965 : UINT64_C(0xc000b000a0009000) ));
11966 break;
11967 case 4:
11968 Assert( pfnFunction
11969 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
11970 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
11971 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
11972 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
11973 : UINT64_C(0xc000b000a0009000) ));
11974 break;
11975 case 8:
11976 Assert( pfnFunction
11977 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
11978 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
11979 : UINT64_C(0xc000b000a0009000) ));
11980 break;
11981 }
11982 }
11983#endif
11984
11985#ifdef VBOX_STRICT
11986 /*
11987 * Check that the fExec flags we've got make sense.
11988 */
11989 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
11990#endif
11991
11992 /*
11993 * To keep things simple we have to commit any pending writes first as we
11994 * may end up making calls.
11995 */
11996 /** @todo we could postpone this till we make the call and reload the
11997 * registers after returning from the call. Not sure if that's sensible or
11998 * not, though. */
11999#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12000 off = iemNativeRegFlushPendingWrites(pReNative, off);
12001#else
12002 /* The program counter is treated differently for now. */
12003 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
12004#endif
12005
12006#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12007 /*
12008 * Move/spill/flush stuff out of call-volatile registers.
12009 * This is the easy way out. We could contain this to the tlb-miss branch
12010 * by saving and restoring active stuff here.
12011 */
12012 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
12013#endif
12014
12015 /*
12016 * Define labels and allocate the result register (trying for the return
12017 * register if we can).
12018 */
12019 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12020 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
12021 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
12022 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
12023 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
12024 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
12025 uint8_t const idxRegValueStore = !TlbState.fSkip
12026 && enmOp == kIemNativeEmitMemOp_Store
12027 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12028 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
12029 : UINT8_MAX;
12030 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12031 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12032 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12033 : UINT32_MAX;
12034
12035 /*
12036 * Jump to the TLB lookup code.
12037 */
12038 if (!TlbState.fSkip)
12039 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12040
12041 /*
12042 * TlbMiss:
12043 *
12044 * Call helper to do the fetching.
12045 * We flush all guest register shadow copies here.
12046 */
12047 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
12048
12049#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12050 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12051#else
12052 RT_NOREF(idxInstr);
12053#endif
12054
12055#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12056 if (pReNative->Core.offPc)
12057 {
12058 /*
12059 * Update the program counter but restore it at the end of the TlbMiss branch.
12060 * This should allow delaying more program counter updates for the TlbLookup and hit paths
12061 * which are hopefully much more frequent, reducing the amount of memory accesses.
12062 */
12063 /* Allocate a temporary PC register. */
12064 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
12065
12066 /* Perform the addition and store the result. */
12067 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
12068 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
12069
12070 /* Free and flush the PC register. */
12071 iemNativeRegFreeTmp(pReNative, idxPcReg);
12072 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
12073 }
12074#endif
12075
12076#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12077 /* Save variables in volatile registers. */
12078 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12079 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
12080 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
12081 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12082#endif
12083
12084 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
12085 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
12086 if (enmOp == kIemNativeEmitMemOp_Store)
12087 {
12088 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
12089 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
12090#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12091 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12092#else
12093 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
12094 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
12095#endif
12096 }
12097
12098 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
12099 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
12100#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12101 fVolGregMask);
12102#else
12103 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
12104#endif
12105
12106 if (iSegReg != UINT8_MAX)
12107 {
12108 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
12109 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
12110 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
12111 }
12112
12113 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12114 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12115
12116 /* Done setting up parameters, make the call. */
12117 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12118
12119 /*
12120 * Put the result in the right register if this is a fetch.
12121 */
12122 if (enmOp != kIemNativeEmitMemOp_Store)
12123 {
12124 Assert(idxRegValueFetch == pVarValue->idxReg);
12125 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
12126 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
12127 }
12128
12129#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12130 /* Restore variables and guest shadow registers to volatile registers. */
12131 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12132 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12133#endif
12134
12135#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12136 if (pReNative->Core.offPc)
12137 {
12138 /*
12139 * Time to restore the program counter to its original value.
12140 */
12141 /* Allocate a temporary PC register. */
12142 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
12143
12144 /* Restore the original value. */
12145 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
12146 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
12147
12148 /* Free and flush the PC register. */
12149 iemNativeRegFreeTmp(pReNative, idxPcReg);
12150 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
12151 }
12152#endif
12153
12154#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12155 if (!TlbState.fSkip)
12156 {
12157 /* end of TlbMiss - Jump to the done label. */
12158 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12159 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12160
12161 /*
12162 * TlbLookup:
12163 */
12164 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
12165 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
12166 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
12167
12168 /*
12169 * Emit code to do the actual storing / fetching.
12170 */
12171 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12172# ifdef VBOX_WITH_STATISTICS
12173 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12174 enmOp == kIemNativeEmitMemOp_Store
12175 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
12176 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
12177# endif
12178 switch (enmOp)
12179 {
12180 case kIemNativeEmitMemOp_Store:
12181 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
12182 {
12183 switch (cbMem)
12184 {
12185 case 1:
12186 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12187 break;
12188 case 2:
12189 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12190 break;
12191 case 4:
12192 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12193 break;
12194 case 8:
12195 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
12196 break;
12197 default:
12198 AssertFailed();
12199 }
12200 }
12201 else
12202 {
12203 switch (cbMem)
12204 {
12205 case 1:
12206 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
12207 idxRegMemResult, TlbState.idxReg1);
12208 break;
12209 case 2:
12210 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12211 idxRegMemResult, TlbState.idxReg1);
12212 break;
12213 case 4:
12214 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12215 idxRegMemResult, TlbState.idxReg1);
12216 break;
12217 case 8:
12218 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
12219 idxRegMemResult, TlbState.idxReg1);
12220 break;
12221 default:
12222 AssertFailed();
12223 }
12224 }
12225 break;
12226
12227 case kIemNativeEmitMemOp_Fetch:
12228 case kIemNativeEmitMemOp_Fetch_Zx_U16:
12229 case kIemNativeEmitMemOp_Fetch_Zx_U32:
12230 case kIemNativeEmitMemOp_Fetch_Zx_U64:
12231 switch (cbMem)
12232 {
12233 case 1:
12234 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12235 break;
12236 case 2:
12237 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12238 break;
12239 case 4:
12240 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12241 break;
12242 case 8:
12243 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12244 break;
12245 default:
12246 AssertFailed();
12247 }
12248 break;
12249
12250 case kIemNativeEmitMemOp_Fetch_Sx_U16:
12251 Assert(cbMem == 1);
12252 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12253 break;
12254
12255 case kIemNativeEmitMemOp_Fetch_Sx_U32:
12256 Assert(cbMem == 1 || cbMem == 2);
12257 if (cbMem == 1)
12258 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12259 else
12260 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12261 break;
12262
12263 case kIemNativeEmitMemOp_Fetch_Sx_U64:
12264 switch (cbMem)
12265 {
12266 case 1:
12267 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12268 break;
12269 case 2:
12270 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12271 break;
12272 case 4:
12273 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
12274 break;
12275 default:
12276 AssertFailed();
12277 }
12278 break;
12279
12280 default:
12281 AssertFailed();
12282 }
12283
12284 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12285
12286 /*
12287 * TlbDone:
12288 */
12289 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12290
12291 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
12292
12293# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
12294 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
12295 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12296# endif
12297 }
12298#else
12299 RT_NOREF(fAlignMask, idxLabelTlbMiss);
12300#endif
12301
12302 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
12303 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12304 return off;
12305}
12306
12307
12308
12309/*********************************************************************************************************************************
12310* Memory fetches (IEM_MEM_FETCH_XXX). *
12311*********************************************************************************************************************************/
12312
12313/* 8-bit segmented: */
12314#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
12315 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
12316 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12317 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12318
12319#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12320 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12321 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12322 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12323
12324#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12325 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12326 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12327 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12328
12329#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12330 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12331 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12332 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
12333
12334#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12335 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12336 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12337 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12338
12339#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12340 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12341 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12342 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12343
12344#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12345 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12346 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12347 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12348
12349/* 16-bit segmented: */
12350#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
12351 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12352 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12353 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12354
12355#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12356 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
12357 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12358 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12359
12360#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12361 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12362 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12363 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12364
12365#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12366 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12367 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12368 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
12369
12370#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12371 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12372 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12373 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12374
12375#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12376 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12377 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12378 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12379
12380
12381/* 32-bit segmented: */
12382#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
12383 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12384 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12385 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12386
12387#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
12388 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
12389 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12390 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12391
12392#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12393 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12394 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12395 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
12396
12397#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12398 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12399 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12400 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12401
12402
12403/* 64-bit segmented: */
12404#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
12405 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
12406 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12407 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
12408
12409
12410
12411/* 8-bit flat: */
12412#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
12413 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
12414 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
12415 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12416
12417#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
12418 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12419 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
12420 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12421
12422#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
12423 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12424 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12425 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12426
12427#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
12428 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12429 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12430 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
12431
12432#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
12433 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12434 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
12435 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
12436
12437#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
12438 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12439 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12440 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
12441
12442#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
12443 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12444 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12445 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
12446
12447
12448/* 16-bit flat: */
12449#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
12450 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12451 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12452 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12453
12454#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
12455 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
12456 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
12457 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
12458
12459#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
12460 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12461 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
12462 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12463
12464#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
12465 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12466 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12467 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
12468
12469#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
12470 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12471 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
12472 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
12473
12474#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
12475 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12476 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12477 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
12478
12479/* 32-bit flat: */
12480#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
12481 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12482 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12483 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12484
12485#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
12486 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
12487 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
12488 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
12489
12490#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
12491 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12492 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
12493 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
12494
12495#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
12496 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12497 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
12498 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
12499
12500/* 64-bit flat: */
12501#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
12502 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
12503 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
12504 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
12505
12506
12507
12508/*********************************************************************************************************************************
12509* Memory stores (IEM_MEM_STORE_XXX). *
12510*********************************************************************************************************************************/
12511
12512#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
12513 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
12514 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12515 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12516
12517#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
12518 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
12519 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12520 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12521
12522#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
12523 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
12524 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12525 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12526
12527#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
12528 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
12529 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12530 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12531
12532
12533#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
12534 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
12535 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
12536 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12537
12538#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
12539 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
12540 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
12541 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12542
12543#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
12544 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
12545 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
12546 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12547
12548#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
12549 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
12550 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
12551 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12552
12553
12554#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
12555 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
12556 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
12557
12558#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
12559 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
12560 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
12561
12562#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
12563 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
12564 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
12565
12566#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
12567 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
12568 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
12569
12570
12571#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
12572 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
12573 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
12574
12575#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
12576 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
12577 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
12578
12579#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
12580 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
12581 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
12582
12583#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
12584 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
12585 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
12586
12587/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
12588 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
12589DECL_INLINE_THROW(uint32_t)
12590iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
12591 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
12592{
12593 /*
12594 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
12595 * to do the grunt work.
12596 */
12597 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
12598 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
12599 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
12600 pfnFunction, idxInstr);
12601 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
12602 return off;
12603}
12604
12605
12606
12607/*********************************************************************************************************************************
12608* Stack Accesses. *
12609*********************************************************************************************************************************/
12610/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
12611#define IEM_MC_PUSH_U16(a_u16Value) \
12612 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12613 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
12614#define IEM_MC_PUSH_U32(a_u32Value) \
12615 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12616 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
12617#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
12618 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
12619 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
12620#define IEM_MC_PUSH_U64(a_u64Value) \
12621 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
12622 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
12623
12624#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
12625 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
12626 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12627#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
12628 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
12629 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
12630#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
12631 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
12632 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
12633
12634#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
12635 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
12636 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
12637#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
12638 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
12639 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
12640
12641
12642DECL_FORCE_INLINE_THROW(uint32_t)
12643iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12644{
12645 /* Use16BitSp: */
12646#ifdef RT_ARCH_AMD64
12647 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
12648 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12649#else
12650 /* sub regeff, regrsp, #cbMem */
12651 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
12652 /* and regeff, regeff, #0xffff */
12653 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
12654 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
12655 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
12656 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
12657#endif
12658 return off;
12659}
12660
12661
12662DECL_FORCE_INLINE(uint32_t)
12663iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
12664{
12665 /* Use32BitSp: */
12666 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
12667 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
12668 return off;
12669}
12670
12671
12672/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
12673DECL_INLINE_THROW(uint32_t)
12674iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
12675 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
12676{
12677 /*
12678 * Assert sanity.
12679 */
12680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12681 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12682#ifdef VBOX_STRICT
12683 if (RT_BYTE2(cBitsVarAndFlat) != 0)
12684 {
12685 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12686 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12687 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12688 Assert( pfnFunction
12689 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12690 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
12691 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
12692 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
12693 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
12694 : UINT64_C(0xc000b000a0009000) ));
12695 }
12696 else
12697 Assert( pfnFunction
12698 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
12699 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
12700 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
12701 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
12702 : UINT64_C(0xc000b000a0009000) ));
12703#endif
12704
12705#ifdef VBOX_STRICT
12706 /*
12707 * Check that the fExec flags we've got make sense.
12708 */
12709 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12710#endif
12711
12712 /*
12713 * To keep things simple we have to commit any pending writes first as we
12714 * may end up making calls.
12715 */
12716 /** @todo we could postpone this till we make the call and reload the
12717 * registers after returning from the call. Not sure if that's sensible or
12718 * not, though. */
12719 off = iemNativeRegFlushPendingWrites(pReNative, off);
12720
12721 /*
12722 * First we calculate the new RSP and the effective stack pointer value.
12723 * For 64-bit mode and flat 32-bit these two are the same.
12724 * (Code structure is very similar to that of PUSH)
12725 */
12726 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
12727 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
12728 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
12729 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
12730 ? cbMem : sizeof(uint16_t);
12731 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
12732 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
12733 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
12734 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
12735 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
12736 if (cBitsFlat != 0)
12737 {
12738 Assert(idxRegEffSp == idxRegRsp);
12739 Assert(cBitsFlat == 32 || cBitsFlat == 64);
12740 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
12741 if (cBitsFlat == 64)
12742 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
12743 else
12744 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
12745 }
12746 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
12747 {
12748 Assert(idxRegEffSp != idxRegRsp);
12749 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
12750 kIemNativeGstRegUse_ReadOnly);
12751#ifdef RT_ARCH_AMD64
12752 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12753#else
12754 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12755#endif
12756 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
12757 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
12758 offFixupJumpToUseOtherBitSp = off;
12759 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12760 {
12761 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
12762 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12763 }
12764 else
12765 {
12766 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
12767 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12768 }
12769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12770 }
12771 /* SpUpdateEnd: */
12772 uint32_t const offLabelSpUpdateEnd = off;
12773
12774 /*
12775 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
12776 * we're skipping lookup).
12777 */
12778 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
12779 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
12780 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
12781 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
12782 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
12783 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
12784 : UINT32_MAX;
12785 uint8_t const idxRegValue = !TlbState.fSkip
12786 && pVarValue->enmKind != kIemNativeVarKind_Immediate
12787 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
12788 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
12789 : UINT8_MAX;
12790 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
12791
12792
12793 if (!TlbState.fSkip)
12794 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
12795 else
12796 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
12797
12798 /*
12799 * Use16BitSp:
12800 */
12801 if (cBitsFlat == 0)
12802 {
12803#ifdef RT_ARCH_AMD64
12804 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
12805#else
12806 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
12807#endif
12808 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
12809 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
12810 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12811 else
12812 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
12813 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
12814 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12815 }
12816
12817 /*
12818 * TlbMiss:
12819 *
12820 * Call helper to do the pushing.
12821 */
12822 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
12823
12824#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
12825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
12826#else
12827 RT_NOREF(idxInstr);
12828#endif
12829
12830 /* Save variables in volatile registers. */
12831 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
12832 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
12833 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
12834 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
12835 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
12836
12837 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
12838 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
12839 {
12840 /* Swap them using ARG0 as temp register: */
12841 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
12842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
12843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
12844 }
12845 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
12846 {
12847 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
12848 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
12849 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
12850
12851 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
12852 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
12853 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12854 }
12855 else
12856 {
12857 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
12858 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
12859
12860 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
12861 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
12862 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
12863 }
12864
12865 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
12866 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
12867
12868 /* Done setting up parameters, make the call. */
12869 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
12870
12871 /* Restore variables and guest shadow registers to volatile registers. */
12872 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
12873 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
12874
12875#ifdef IEMNATIVE_WITH_TLB_LOOKUP
12876 if (!TlbState.fSkip)
12877 {
12878 /* end of TlbMiss - Jump to the done label. */
12879 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
12880 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
12881
12882 /*
12883 * TlbLookup:
12884 */
12885 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
12886 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
12887
12888 /*
12889 * Emit code to do the actual storing / fetching.
12890 */
12891 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
12892# ifdef VBOX_WITH_STATISTICS
12893 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
12894 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
12895# endif
12896 if (idxRegValue != UINT8_MAX)
12897 {
12898 switch (cbMemAccess)
12899 {
12900 case 2:
12901 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12902 break;
12903 case 4:
12904 if (!fIsIntelSeg)
12905 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12906 else
12907 {
12908 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
12909 PUSH FS in real mode, so we have to try emulate that here.
12910 We borrow the now unused idxReg1 from the TLB lookup code here. */
12911 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
12912 kIemNativeGstReg_EFlags);
12913 if (idxRegEfl != UINT8_MAX)
12914 {
12915#ifdef ARCH_AMD64
12916 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
12917 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12918 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12919#else
12920 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
12921 off, TlbState.idxReg1, idxRegEfl,
12922 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12923#endif
12924 iemNativeRegFreeTmp(pReNative, idxRegEfl);
12925 }
12926 else
12927 {
12928 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
12929 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
12930 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
12931 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
12932 }
12933 /* ASSUMES the upper half of idxRegValue is ZERO. */
12934 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
12935 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
12936 }
12937 break;
12938 case 8:
12939 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
12940 break;
12941 default:
12942 AssertFailed();
12943 }
12944 }
12945 else
12946 {
12947 switch (cbMemAccess)
12948 {
12949 case 2:
12950 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
12951 idxRegMemResult, TlbState.idxReg1);
12952 break;
12953 case 4:
12954 Assert(!fIsSegReg);
12955 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
12956 idxRegMemResult, TlbState.idxReg1);
12957 break;
12958 case 8:
12959 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
12960 break;
12961 default:
12962 AssertFailed();
12963 }
12964 }
12965
12966 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
12967 TlbState.freeRegsAndReleaseVars(pReNative);
12968
12969 /*
12970 * TlbDone:
12971 *
12972 * Commit the new RSP value.
12973 */
12974 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
12975 }
12976#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
12977
12978 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
12979 iemNativeRegFreeTmp(pReNative, idxRegRsp);
12980 if (idxRegEffSp != idxRegRsp)
12981 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
12982
12983 /* The value variable is implictly flushed. */
12984 if (idxRegValue != UINT8_MAX)
12985 iemNativeVarRegisterRelease(pReNative, idxVarValue);
12986 iemNativeVarFreeLocal(pReNative, idxVarValue);
12987
12988 return off;
12989}
12990
12991
12992
12993/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
12994#define IEM_MC_POP_GREG_U16(a_iGReg) \
12995 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
12996 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
12997#define IEM_MC_POP_GREG_U32(a_iGReg) \
12998 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
12999 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
13000#define IEM_MC_POP_GREG_U64(a_iGReg) \
13001 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13002 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
13003
13004#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
13005 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13006 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
13007#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
13008 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13009 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
13010
13011#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
13012 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13013 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
13014#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
13015 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13016 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
13017
13018
13019DECL_FORCE_INLINE_THROW(uint32_t)
13020iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
13021 uint8_t idxRegTmp)
13022{
13023 /* Use16BitSp: */
13024#ifdef RT_ARCH_AMD64
13025 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13026 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13027 RT_NOREF(idxRegTmp);
13028#else
13029 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
13030 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
13031 /* add tmp, regrsp, #cbMem */
13032 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
13033 /* and tmp, tmp, #0xffff */
13034 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13035 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
13036 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
13037 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
13038#endif
13039 return off;
13040}
13041
13042
13043DECL_FORCE_INLINE(uint32_t)
13044iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13045{
13046 /* Use32BitSp: */
13047 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13048 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13049 return off;
13050}
13051
13052
13053/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
13054DECL_INLINE_THROW(uint32_t)
13055iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
13056 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13057{
13058 /*
13059 * Assert sanity.
13060 */
13061 Assert(idxGReg < 16);
13062#ifdef VBOX_STRICT
13063 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13064 {
13065 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13066 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13067 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13068 Assert( pfnFunction
13069 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
13070 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
13071 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
13072 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
13073 : UINT64_C(0xc000b000a0009000) ));
13074 }
13075 else
13076 Assert( pfnFunction
13077 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
13078 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
13079 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
13080 : UINT64_C(0xc000b000a0009000) ));
13081#endif
13082
13083#ifdef VBOX_STRICT
13084 /*
13085 * Check that the fExec flags we've got make sense.
13086 */
13087 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13088#endif
13089
13090 /*
13091 * To keep things simple we have to commit any pending writes first as we
13092 * may end up making calls.
13093 */
13094 off = iemNativeRegFlushPendingWrites(pReNative, off);
13095
13096 /*
13097 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
13098 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
13099 * directly as the effective stack pointer.
13100 * (Code structure is very similar to that of PUSH)
13101 */
13102 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13103 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13104 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13105 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13106 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13107 /** @todo can do a better job picking the register here. For cbMem >= 4 this
13108 * will be the resulting register value. */
13109 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
13110
13111 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13112 if (cBitsFlat != 0)
13113 {
13114 Assert(idxRegEffSp == idxRegRsp);
13115 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13116 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13117 }
13118 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13119 {
13120 Assert(idxRegEffSp != idxRegRsp);
13121 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13122 kIemNativeGstRegUse_ReadOnly);
13123#ifdef RT_ARCH_AMD64
13124 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13125#else
13126 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13127#endif
13128 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13129 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13130 offFixupJumpToUseOtherBitSp = off;
13131 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13132 {
13133/** @todo can skip idxRegRsp updating when popping ESP. */
13134 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13135 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13136 }
13137 else
13138 {
13139 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13140 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13141 }
13142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13143 }
13144 /* SpUpdateEnd: */
13145 uint32_t const offLabelSpUpdateEnd = off;
13146
13147 /*
13148 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13149 * we're skipping lookup).
13150 */
13151 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13152 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
13153 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13154 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13155 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13156 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13157 : UINT32_MAX;
13158
13159 if (!TlbState.fSkip)
13160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13161 else
13162 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13163
13164 /*
13165 * Use16BitSp:
13166 */
13167 if (cBitsFlat == 0)
13168 {
13169#ifdef RT_ARCH_AMD64
13170 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13171#else
13172 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13173#endif
13174 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13175 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13176 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
13177 else
13178 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13179 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13181 }
13182
13183 /*
13184 * TlbMiss:
13185 *
13186 * Call helper to do the pushing.
13187 */
13188 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13189
13190#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13191 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13192#else
13193 RT_NOREF(idxInstr);
13194#endif
13195
13196 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13197 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13198 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
13199 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13200
13201
13202 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
13203 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13204 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13205
13206 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13208
13209 /* Done setting up parameters, make the call. */
13210 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13211
13212 /* Move the return register content to idxRegMemResult. */
13213 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13214 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13215
13216 /* Restore variables and guest shadow registers to volatile registers. */
13217 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13218 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13219
13220#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13221 if (!TlbState.fSkip)
13222 {
13223 /* end of TlbMiss - Jump to the done label. */
13224 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13225 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13226
13227 /*
13228 * TlbLookup:
13229 */
13230 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
13231 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13232
13233 /*
13234 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
13235 */
13236 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13237# ifdef VBOX_WITH_STATISTICS
13238 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13239 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13240# endif
13241 switch (cbMem)
13242 {
13243 case 2:
13244 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13245 break;
13246 case 4:
13247 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13248 break;
13249 case 8:
13250 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
13251 break;
13252 default:
13253 AssertFailed();
13254 }
13255
13256 TlbState.freeRegsAndReleaseVars(pReNative);
13257
13258 /*
13259 * TlbDone:
13260 *
13261 * Set the new RSP value (FLAT accesses needs to calculate it first) and
13262 * commit the popped register value.
13263 */
13264 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13265 }
13266#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13267
13268 if (idxGReg != X86_GREG_xSP)
13269 {
13270 /* Set the register. */
13271 if (cbMem >= sizeof(uint32_t))
13272 {
13273#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
13274 AssertMsg( pReNative->idxCurCall == 0
13275 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
13276 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
13277#endif
13278 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
13279 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
13280 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13281 }
13282 else
13283 {
13284 Assert(cbMem == sizeof(uint16_t));
13285 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
13286 kIemNativeGstRegUse_ForUpdate);
13287 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
13288 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
13289 iemNativeRegFreeTmp(pReNative, idxRegDst);
13290 }
13291
13292 /* Complete RSP calculation for FLAT mode. */
13293 if (idxRegEffSp == idxRegRsp)
13294 {
13295 if (cBitsFlat == 64)
13296 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13297 else
13298 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13299 }
13300 }
13301 else
13302 {
13303 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
13304 if (cbMem == sizeof(uint64_t))
13305 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
13306 else if (cbMem == sizeof(uint32_t))
13307 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
13308 else
13309 {
13310 if (idxRegEffSp == idxRegRsp)
13311 {
13312 if (cBitsFlat == 64)
13313 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
13314 else
13315 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
13316 }
13317 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
13318 }
13319 }
13320 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
13321
13322 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13323 if (idxRegEffSp != idxRegRsp)
13324 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13325 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13326
13327 return off;
13328}
13329
13330
13331
13332/*********************************************************************************************************************************
13333* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
13334*********************************************************************************************************************************/
13335
13336#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13338 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13339 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
13340
13341#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13342 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13343 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13344 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
13345
13346#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13347 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13348 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13349 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
13350
13351#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13352 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13353 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13354 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
13355
13356
13357#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13359 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13360 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
13361
13362#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13363 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13364 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13365 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
13366
13367#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13368 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13369 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13370 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13371
13372#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13373 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13374 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13375 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
13376
13377#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13378 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
13379 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13380 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
13381
13382
13383#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13384 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13385 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13386 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
13387
13388#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13389 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13390 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13391 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
13392
13393#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13394 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13395 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13396 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13397
13398#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13399 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13400 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13401 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
13402
13403#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13404 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
13405 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13406 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
13407
13408
13409#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13410 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13411 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13412 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
13413
13414#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13415 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13416 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13417 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
13418#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13419 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13420 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13421 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13422
13423#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13424 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13425 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13426 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
13427
13428#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13429 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
13430 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13431 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
13432
13433
13434#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13435 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13436 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13437 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
13438
13439#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13440 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
13441 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13442 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
13443
13444
13445#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13446 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13447 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13448 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
13449
13450#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13451 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13452 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13453 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
13454
13455#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13456 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13457 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13458 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
13459
13460#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
13461 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
13462 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13463 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
13464
13465
13466
13467#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13468 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13469 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
13470 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
13471
13472#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13473 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13474 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
13475 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
13476
13477#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13478 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13479 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
13480 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
13481
13482#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
13483 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13484 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
13485 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
13486
13487
13488#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13489 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13490 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13491 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
13492
13493#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13495 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13496 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
13497
13498#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13499 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13500 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13501 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13502
13503#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
13504 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13505 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13506 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
13507
13508#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
13509 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
13510 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
13511 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
13512
13513
13514#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13515 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13516 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13517 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
13518
13519#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13520 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13521 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13522 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
13523
13524#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13525 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13526 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13527 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13528
13529#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
13530 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13531 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13532 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
13533
13534#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
13535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
13536 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
13537 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
13538
13539
13540#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13541 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13542 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13543 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
13544
13545#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13546 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13547 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13548 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
13549
13550#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13551 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13552 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13553 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13554
13555#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
13556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13557 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13558 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
13559
13560#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
13561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
13562 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13563 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
13564
13565
13566#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
13567 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13568 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
13569 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
13570
13571#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
13572 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
13573 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
13574 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
13575
13576
13577#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13578 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13579 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13580 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
13581
13582#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13583 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13584 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13585 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
13586
13587#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13588 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13589 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13590 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
13591
13592#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
13593 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
13594 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
13595 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
13596
13597
13598DECL_INLINE_THROW(uint32_t)
13599iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
13600 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
13601 uintptr_t pfnFunction, uint8_t idxInstr)
13602{
13603 /*
13604 * Assert sanity.
13605 */
13606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
13607 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
13608 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
13609 && pVarMem->cbVar == sizeof(void *),
13610 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13611
13612 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13613 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13614 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
13615 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
13616 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13617
13618 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
13619 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
13620 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
13621 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
13622 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
13623
13624 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
13625
13626 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
13627
13628#ifdef VBOX_STRICT
13629# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
13630 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
13631 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
13632 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
13633 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
13634# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
13635 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
13636 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
13637 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
13638
13639 if (iSegReg == UINT8_MAX)
13640 {
13641 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13642 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13643 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13644 switch (cbMem)
13645 {
13646 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
13647 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
13648 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
13649 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
13650 case 10:
13651 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
13652 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
13653 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13654 break;
13655 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
13656# if 0
13657 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
13658 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
13659# endif
13660 default: AssertFailed(); break;
13661 }
13662 }
13663 else
13664 {
13665 Assert(iSegReg < 6);
13666 switch (cbMem)
13667 {
13668 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
13669 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
13670 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
13671 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
13672 case 10:
13673 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
13674 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
13675 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
13676 break;
13677 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
13678# if 0
13679 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
13680 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
13681# endif
13682 default: AssertFailed(); break;
13683 }
13684 }
13685# undef IEM_MAP_HLP_FN
13686# undef IEM_MAP_HLP_FN_NO_AT
13687#endif
13688
13689#ifdef VBOX_STRICT
13690 /*
13691 * Check that the fExec flags we've got make sense.
13692 */
13693 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13694#endif
13695
13696 /*
13697 * To keep things simple we have to commit any pending writes first as we
13698 * may end up making calls.
13699 */
13700 off = iemNativeRegFlushPendingWrites(pReNative, off);
13701
13702#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13703 /*
13704 * Move/spill/flush stuff out of call-volatile registers.
13705 * This is the easy way out. We could contain this to the tlb-miss branch
13706 * by saving and restoring active stuff here.
13707 */
13708 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
13709 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13710#endif
13711
13712 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
13713 while the tlb-miss codepath will temporarily put it on the stack.
13714 Set the the type to stack here so we don't need to do it twice below. */
13715 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
13716 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
13717 /** @todo use a tmp register from TlbState, since they'll be free after tlb
13718 * lookup is done. */
13719
13720 /*
13721 * Define labels and allocate the result register (trying for the return
13722 * register if we can).
13723 */
13724 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13725 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13726 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
13727 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
13728 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
13729 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13730 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13731 : UINT32_MAX;
13732//off=iemNativeEmitBrk(pReNative, off, 0);
13733 /*
13734 * Jump to the TLB lookup code.
13735 */
13736 if (!TlbState.fSkip)
13737 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13738
13739 /*
13740 * TlbMiss:
13741 *
13742 * Call helper to do the fetching.
13743 * We flush all guest register shadow copies here.
13744 */
13745 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13746
13747#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13748 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13749#else
13750 RT_NOREF(idxInstr);
13751#endif
13752
13753#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13754 /* Save variables in volatile registers. */
13755 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
13756 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13757#endif
13758
13759 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
13760 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
13761#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13762 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13763#else
13764 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13765#endif
13766
13767 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
13768 if (iSegReg != UINT8_MAX)
13769 {
13770 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13771 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
13772 }
13773
13774 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
13775 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
13776 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
13777
13778 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13779 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13780
13781 /* Done setting up parameters, make the call. */
13782 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13783
13784 /*
13785 * Put the output in the right registers.
13786 */
13787 Assert(idxRegMemResult == pVarMem->idxReg);
13788 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
13789 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
13790
13791#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13792 /* Restore variables and guest shadow registers to volatile registers. */
13793 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13794 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13795#endif
13796
13797 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
13798 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
13799
13800#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13801 if (!TlbState.fSkip)
13802 {
13803 /* end of tlbsmiss - Jump to the done label. */
13804 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13805 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13806
13807 /*
13808 * TlbLookup:
13809 */
13810 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
13811 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13812# ifdef VBOX_WITH_STATISTICS
13813 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
13814 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
13815# endif
13816
13817 /* [idxVarUnmapInfo] = 0; */
13818 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
13819
13820 /*
13821 * TlbDone:
13822 */
13823 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13824
13825 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13826
13827# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13828 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13829 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13830# endif
13831 }
13832#else
13833 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
13834#endif
13835
13836 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13837 iemNativeVarRegisterRelease(pReNative, idxVarMem);
13838
13839 return off;
13840}
13841
13842
13843#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
13844 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
13845 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
13846
13847#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
13848 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
13849 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
13850
13851#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
13852 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
13853 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
13854
13855#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
13856 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
13857 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
13858
13859DECL_INLINE_THROW(uint32_t)
13860iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
13861 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
13862{
13863 /*
13864 * Assert sanity.
13865 */
13866 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
13867#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
13868 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
13869#endif
13870 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
13871 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
13872 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
13873#ifdef VBOX_STRICT
13874 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
13875 {
13876 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
13877 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
13878 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
13879 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
13880 case IEM_ACCESS_TYPE_WRITE:
13881 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
13882 case IEM_ACCESS_TYPE_READ:
13883 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
13884 default: AssertFailed();
13885 }
13886#else
13887 RT_NOREF(fAccess);
13888#endif
13889
13890 /*
13891 * To keep things simple we have to commit any pending writes first as we
13892 * may end up making calls (there shouldn't be any at this point, so this
13893 * is just for consistency).
13894 */
13895 /** @todo we could postpone this till we make the call and reload the
13896 * registers after returning from the call. Not sure if that's sensible or
13897 * not, though. */
13898 off = iemNativeRegFlushPendingWrites(pReNative, off);
13899
13900 /*
13901 * Move/spill/flush stuff out of call-volatile registers.
13902 *
13903 * We exclude any register holding the bUnmapInfo variable, as we'll be
13904 * checking it after returning from the call and will free it afterwards.
13905 */
13906 /** @todo save+restore active registers and maybe guest shadows in miss
13907 * scenario. */
13908 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
13909
13910 /*
13911 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
13912 * to call the unmap helper function.
13913 *
13914 * The likelyhood of it being zero is higher than for the TLB hit when doing
13915 * the mapping, as a TLB miss for an well aligned and unproblematic memory
13916 * access should also end up with a mapping that won't need special unmapping.
13917 */
13918 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
13919 * should speed up things for the pure interpreter as well when TLBs
13920 * are enabled. */
13921#ifdef RT_ARCH_AMD64
13922 if (pVarUnmapInfo->idxReg == UINT8_MAX)
13923 {
13924 /* test byte [rbp - xxx], 0ffh */
13925 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
13926 pbCodeBuf[off++] = 0xf6;
13927 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
13928 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
13929 pbCodeBuf[off++] = 0xff;
13930 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13931 }
13932 else
13933#endif
13934 {
13935 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
13936 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
13937 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
13938 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
13939 }
13940 uint32_t const offJmpFixup = off;
13941 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
13942
13943 /*
13944 * Call the unmap helper function.
13945 */
13946#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
13947 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13948#else
13949 RT_NOREF(idxInstr);
13950#endif
13951
13952 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
13953 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
13954 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13955
13956 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13957 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13958
13959 /* Done setting up parameters, make the call. */
13960 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13961
13962 /* The bUnmapInfo variable is implictly free by these MCs. */
13963 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
13964
13965 /*
13966 * Done, just fixup the jump for the non-call case.
13967 */
13968 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
13969
13970 return off;
13971}
13972
13973
13974
13975/*********************************************************************************************************************************
13976* State and Exceptions *
13977*********************************************************************************************************************************/
13978
13979#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13980#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13981
13982#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13983#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13984#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13985
13986#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13987#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
13988#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
13989
13990
13991DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
13992{
13993 /** @todo this needs a lot more work later. */
13994 RT_NOREF(pReNative, fForChange);
13995 return off;
13996}
13997
13998
13999
14000/*********************************************************************************************************************************
14001* Emitters for FPU related operations. *
14002*********************************************************************************************************************************/
14003
14004#define IEM_MC_FETCH_FCW(a_u16Fcw) \
14005 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
14006
14007/** Emits code for IEM_MC_FETCH_FCW. */
14008DECL_INLINE_THROW(uint32_t)
14009iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
14010{
14011 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
14012 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
14013
14014 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
14015
14016 /* Allocate a temporary FCW register. */
14017 /** @todo eliminate extra register */
14018 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
14019 kIemNativeGstRegUse_ReadOnly);
14020
14021 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
14022
14023 /* Free but don't flush the FCW register. */
14024 iemNativeRegFreeTmp(pReNative, idxFcwReg);
14025 iemNativeVarRegisterRelease(pReNative, idxDstVar);
14026
14027 return off;
14028}
14029
14030
14031#define IEM_MC_FETCH_FSW(a_u16Fsw) \
14032 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
14033
14034/** Emits code for IEM_MC_FETCH_FSW. */
14035DECL_INLINE_THROW(uint32_t)
14036iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
14037{
14038 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
14039 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
14040
14041 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
14042 /* Allocate a temporary FSW register. */
14043 /** @todo eliminate extra register */
14044 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
14045 kIemNativeGstRegUse_ReadOnly);
14046
14047 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
14048
14049 /* Free but don't flush the FSW register. */
14050 iemNativeRegFreeTmp(pReNative, idxFswReg);
14051 iemNativeVarRegisterRelease(pReNative, idxDstVar);
14052
14053 return off;
14054}
14055
14056
14057
14058/*********************************************************************************************************************************
14059* The native code generator functions for each MC block. *
14060*********************************************************************************************************************************/
14061
14062/*
14063 * Include instruction emitters.
14064 */
14065#include "target-x86/IEMAllN8veEmit-x86.h"
14066
14067/*
14068 * Include g_apfnIemNativeRecompileFunctions and associated functions.
14069 *
14070 * This should probably live in it's own file later, but lets see what the
14071 * compile times turn out to be first.
14072 */
14073#include "IEMNativeFunctions.cpp.h"
14074
14075
14076
14077/*********************************************************************************************************************************
14078* Recompiler Core. *
14079*********************************************************************************************************************************/
14080
14081
14082/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
14083static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
14084{
14085 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
14086 pDis->cbCachedInstr += cbMaxRead;
14087 RT_NOREF(cbMinRead);
14088 return VERR_NO_DATA;
14089}
14090
14091
14092DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
14093{
14094 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
14095 {
14096#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
14097 ENTRY(fLocalForcedActions),
14098 ENTRY(iem.s.rcPassUp),
14099 ENTRY(iem.s.fExec),
14100 ENTRY(iem.s.pbInstrBuf),
14101 ENTRY(iem.s.uInstrBufPc),
14102 ENTRY(iem.s.GCPhysInstrBuf),
14103 ENTRY(iem.s.cbInstrBufTotal),
14104 ENTRY(iem.s.idxTbCurInstr),
14105#ifdef VBOX_WITH_STATISTICS
14106 ENTRY(iem.s.StatNativeTlbHitsForFetch),
14107 ENTRY(iem.s.StatNativeTlbHitsForStore),
14108 ENTRY(iem.s.StatNativeTlbHitsForStack),
14109 ENTRY(iem.s.StatNativeTlbHitsForMapped),
14110 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
14111 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
14112 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
14113 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
14114#endif
14115 ENTRY(iem.s.DataTlb.aEntries),
14116 ENTRY(iem.s.DataTlb.uTlbRevision),
14117 ENTRY(iem.s.DataTlb.uTlbPhysRev),
14118 ENTRY(iem.s.DataTlb.cTlbHits),
14119 ENTRY(iem.s.CodeTlb.aEntries),
14120 ENTRY(iem.s.CodeTlb.uTlbRevision),
14121 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
14122 ENTRY(iem.s.CodeTlb.cTlbHits),
14123 ENTRY(pVMR3),
14124 ENTRY(cpum.GstCtx.rax),
14125 ENTRY(cpum.GstCtx.ah),
14126 ENTRY(cpum.GstCtx.rcx),
14127 ENTRY(cpum.GstCtx.ch),
14128 ENTRY(cpum.GstCtx.rdx),
14129 ENTRY(cpum.GstCtx.dh),
14130 ENTRY(cpum.GstCtx.rbx),
14131 ENTRY(cpum.GstCtx.bh),
14132 ENTRY(cpum.GstCtx.rsp),
14133 ENTRY(cpum.GstCtx.rbp),
14134 ENTRY(cpum.GstCtx.rsi),
14135 ENTRY(cpum.GstCtx.rdi),
14136 ENTRY(cpum.GstCtx.r8),
14137 ENTRY(cpum.GstCtx.r9),
14138 ENTRY(cpum.GstCtx.r10),
14139 ENTRY(cpum.GstCtx.r11),
14140 ENTRY(cpum.GstCtx.r12),
14141 ENTRY(cpum.GstCtx.r13),
14142 ENTRY(cpum.GstCtx.r14),
14143 ENTRY(cpum.GstCtx.r15),
14144 ENTRY(cpum.GstCtx.es.Sel),
14145 ENTRY(cpum.GstCtx.es.u64Base),
14146 ENTRY(cpum.GstCtx.es.u32Limit),
14147 ENTRY(cpum.GstCtx.es.Attr),
14148 ENTRY(cpum.GstCtx.cs.Sel),
14149 ENTRY(cpum.GstCtx.cs.u64Base),
14150 ENTRY(cpum.GstCtx.cs.u32Limit),
14151 ENTRY(cpum.GstCtx.cs.Attr),
14152 ENTRY(cpum.GstCtx.ss.Sel),
14153 ENTRY(cpum.GstCtx.ss.u64Base),
14154 ENTRY(cpum.GstCtx.ss.u32Limit),
14155 ENTRY(cpum.GstCtx.ss.Attr),
14156 ENTRY(cpum.GstCtx.ds.Sel),
14157 ENTRY(cpum.GstCtx.ds.u64Base),
14158 ENTRY(cpum.GstCtx.ds.u32Limit),
14159 ENTRY(cpum.GstCtx.ds.Attr),
14160 ENTRY(cpum.GstCtx.fs.Sel),
14161 ENTRY(cpum.GstCtx.fs.u64Base),
14162 ENTRY(cpum.GstCtx.fs.u32Limit),
14163 ENTRY(cpum.GstCtx.fs.Attr),
14164 ENTRY(cpum.GstCtx.gs.Sel),
14165 ENTRY(cpum.GstCtx.gs.u64Base),
14166 ENTRY(cpum.GstCtx.gs.u32Limit),
14167 ENTRY(cpum.GstCtx.gs.Attr),
14168 ENTRY(cpum.GstCtx.rip),
14169 ENTRY(cpum.GstCtx.eflags),
14170 ENTRY(cpum.GstCtx.uRipInhibitInt),
14171#undef ENTRY
14172 };
14173#ifdef VBOX_STRICT
14174 static bool s_fOrderChecked = false;
14175 if (!s_fOrderChecked)
14176 {
14177 s_fOrderChecked = true;
14178 uint32_t offPrev = s_aMembers[0].off;
14179 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
14180 {
14181 Assert(s_aMembers[i].off > offPrev);
14182 offPrev = s_aMembers[i].off;
14183 }
14184 }
14185#endif
14186
14187 /*
14188 * Binary lookup.
14189 */
14190 unsigned iStart = 0;
14191 unsigned iEnd = RT_ELEMENTS(s_aMembers);
14192 for (;;)
14193 {
14194 unsigned const iCur = iStart + (iEnd - iStart) / 2;
14195 uint32_t const offCur = s_aMembers[iCur].off;
14196 if (off < offCur)
14197 {
14198 if (iCur != iStart)
14199 iEnd = iCur;
14200 else
14201 break;
14202 }
14203 else if (off > offCur)
14204 {
14205 if (iCur + 1 < iEnd)
14206 iStart = iCur + 1;
14207 else
14208 break;
14209 }
14210 else
14211 return s_aMembers[iCur].pszName;
14212 }
14213#ifdef VBOX_WITH_STATISTICS
14214 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
14215 return "iem.s.acThreadedFuncStats[iFn]";
14216#endif
14217 return NULL;
14218}
14219
14220
14221/**
14222 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
14223 * @returns pszBuf.
14224 * @param fFlags The flags.
14225 * @param pszBuf The output buffer.
14226 * @param cbBuf The output buffer size. At least 32 bytes.
14227 */
14228DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
14229{
14230 Assert(cbBuf >= 32);
14231 static RTSTRTUPLE const s_aModes[] =
14232 {
14233 /* [00] = */ { RT_STR_TUPLE("16BIT") },
14234 /* [01] = */ { RT_STR_TUPLE("32BIT") },
14235 /* [02] = */ { RT_STR_TUPLE("!2!") },
14236 /* [03] = */ { RT_STR_TUPLE("!3!") },
14237 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
14238 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
14239 /* [06] = */ { RT_STR_TUPLE("!6!") },
14240 /* [07] = */ { RT_STR_TUPLE("!7!") },
14241 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
14242 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
14243 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
14244 /* [0b] = */ { RT_STR_TUPLE("!b!") },
14245 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
14246 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
14247 /* [0e] = */ { RT_STR_TUPLE("!e!") },
14248 /* [0f] = */ { RT_STR_TUPLE("!f!") },
14249 /* [10] = */ { RT_STR_TUPLE("!10!") },
14250 /* [11] = */ { RT_STR_TUPLE("!11!") },
14251 /* [12] = */ { RT_STR_TUPLE("!12!") },
14252 /* [13] = */ { RT_STR_TUPLE("!13!") },
14253 /* [14] = */ { RT_STR_TUPLE("!14!") },
14254 /* [15] = */ { RT_STR_TUPLE("!15!") },
14255 /* [16] = */ { RT_STR_TUPLE("!16!") },
14256 /* [17] = */ { RT_STR_TUPLE("!17!") },
14257 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
14258 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
14259 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
14260 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
14261 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
14262 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
14263 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
14264 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
14265 };
14266 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
14267 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
14268 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
14269
14270 pszBuf[off++] = ' ';
14271 pszBuf[off++] = 'C';
14272 pszBuf[off++] = 'P';
14273 pszBuf[off++] = 'L';
14274 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
14275 Assert(off < 32);
14276
14277 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
14278
14279 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
14280 {
14281 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
14282 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
14283 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
14284 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
14285 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
14286 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
14287 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
14288 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
14289 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
14290 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
14291 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
14292 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
14293 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
14294 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
14295 };
14296 if (fFlags)
14297 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
14298 if (s_aFlags[i].fFlag & fFlags)
14299 {
14300 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
14301 pszBuf[off++] = ' ';
14302 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
14303 off += s_aFlags[i].cchName;
14304 fFlags &= ~s_aFlags[i].fFlag;
14305 if (!fFlags)
14306 break;
14307 }
14308 pszBuf[off] = '\0';
14309
14310 return pszBuf;
14311}
14312
14313
14314DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
14315{
14316 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
14317#if defined(RT_ARCH_AMD64)
14318 static const char * const a_apszMarkers[] =
14319 {
14320 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
14321 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
14322 };
14323#endif
14324
14325 char szDisBuf[512];
14326 DISSTATE Dis;
14327 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
14328 uint32_t const cNative = pTb->Native.cInstructions;
14329 uint32_t offNative = 0;
14330#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14331 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
14332#endif
14333 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14334 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14335 : DISCPUMODE_64BIT;
14336#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14337 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
14338#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14339 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
14340#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
14341# error "Port me"
14342#else
14343 csh hDisasm = ~(size_t)0;
14344# if defined(RT_ARCH_AMD64)
14345 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
14346# elif defined(RT_ARCH_ARM64)
14347 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
14348# else
14349# error "Port me"
14350# endif
14351 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
14352
14353 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
14354 //Assert(rcCs == CS_ERR_OK);
14355#endif
14356
14357 /*
14358 * Print TB info.
14359 */
14360 pHlp->pfnPrintf(pHlp,
14361 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
14362 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
14363 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
14364 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
14365#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
14366 if (pDbgInfo && pDbgInfo->cEntries > 1)
14367 {
14368 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
14369
14370 /*
14371 * This disassembly is driven by the debug info which follows the native
14372 * code and indicates when it starts with the next guest instructions,
14373 * where labels are and such things.
14374 */
14375 uint32_t idxThreadedCall = 0;
14376 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
14377 uint8_t idxRange = UINT8_MAX;
14378 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
14379 uint32_t offRange = 0;
14380 uint32_t offOpcodes = 0;
14381 uint32_t const cbOpcodes = pTb->cbOpcodes;
14382 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
14383 uint32_t const cDbgEntries = pDbgInfo->cEntries;
14384 uint32_t iDbgEntry = 1;
14385 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
14386
14387 while (offNative < cNative)
14388 {
14389 /* If we're at or have passed the point where the next chunk of debug
14390 info starts, process it. */
14391 if (offDbgNativeNext <= offNative)
14392 {
14393 offDbgNativeNext = UINT32_MAX;
14394 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
14395 {
14396 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
14397 {
14398 case kIemTbDbgEntryType_GuestInstruction:
14399 {
14400 /* Did the exec flag change? */
14401 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
14402 {
14403 pHlp->pfnPrintf(pHlp,
14404 " fExec change %#08x -> %#08x %s\n",
14405 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14406 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
14407 szDisBuf, sizeof(szDisBuf)));
14408 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
14409 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
14410 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
14411 : DISCPUMODE_64BIT;
14412 }
14413
14414 /* New opcode range? We need to fend up a spurious debug info entry here for cases
14415 where the compilation was aborted before the opcode was recorded and the actual
14416 instruction was translated to a threaded call. This may happen when we run out
14417 of ranges, or when some complicated interrupts/FFs are found to be pending or
14418 similar. So, we just deal with it here rather than in the compiler code as it
14419 is a lot simpler to do here. */
14420 if ( idxRange == UINT8_MAX
14421 || idxRange >= cRanges
14422 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
14423 {
14424 idxRange += 1;
14425 if (idxRange < cRanges)
14426 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
14427 else
14428 continue;
14429 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
14430 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
14431 + (pTb->aRanges[idxRange].idxPhysPage == 0
14432 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14433 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
14434 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14435 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
14436 pTb->aRanges[idxRange].idxPhysPage);
14437 GCPhysPc += offRange;
14438 }
14439
14440 /* Disassemble the instruction. */
14441 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
14442 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
14443 uint32_t cbInstr = 1;
14444 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14445 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
14446 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14447 if (RT_SUCCESS(rc))
14448 {
14449 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14450 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14451 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14452 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14453
14454 static unsigned const s_offMarker = 55;
14455 static char const s_szMarker[] = " ; <--- guest";
14456 if (cch < s_offMarker)
14457 {
14458 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
14459 cch = s_offMarker;
14460 }
14461 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
14462 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
14463
14464 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
14465 }
14466 else
14467 {
14468 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
14469 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
14470 cbInstr = 1;
14471 }
14472 GCPhysPc += cbInstr;
14473 offOpcodes += cbInstr;
14474 offRange += cbInstr;
14475 continue;
14476 }
14477
14478 case kIemTbDbgEntryType_ThreadedCall:
14479 pHlp->pfnPrintf(pHlp,
14480 " Call #%u to %s (%u args) - %s\n",
14481 idxThreadedCall,
14482 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14483 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
14484 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
14485 idxThreadedCall++;
14486 continue;
14487
14488 case kIemTbDbgEntryType_GuestRegShadowing:
14489 {
14490 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
14491 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
14492 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
14493 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
14494 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14495 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
14496 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
14497 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
14498 else
14499 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
14500 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
14501 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
14502 continue;
14503 }
14504
14505 case kIemTbDbgEntryType_Label:
14506 {
14507 const char *pszName = "what_the_fudge";
14508 const char *pszComment = "";
14509 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
14510 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
14511 {
14512 case kIemNativeLabelType_Return:
14513 pszName = "Return";
14514 break;
14515 case kIemNativeLabelType_ReturnBreak:
14516 pszName = "ReturnBreak";
14517 break;
14518 case kIemNativeLabelType_ReturnWithFlags:
14519 pszName = "ReturnWithFlags";
14520 break;
14521 case kIemNativeLabelType_NonZeroRetOrPassUp:
14522 pszName = "NonZeroRetOrPassUp";
14523 break;
14524 case kIemNativeLabelType_RaiseGp0:
14525 pszName = "RaiseGp0";
14526 break;
14527 case kIemNativeLabelType_RaiseNm:
14528 pszName = "RaiseNm";
14529 break;
14530 case kIemNativeLabelType_RaiseUd:
14531 pszName = "RaiseUd";
14532 break;
14533 case kIemNativeLabelType_RaiseMf:
14534 pszName = "RaiseMf";
14535 break;
14536 case kIemNativeLabelType_RaiseXf:
14537 pszName = "RaiseXf";
14538 break;
14539 case kIemNativeLabelType_ObsoleteTb:
14540 pszName = "ObsoleteTb";
14541 break;
14542 case kIemNativeLabelType_NeedCsLimChecking:
14543 pszName = "NeedCsLimChecking";
14544 break;
14545 case kIemNativeLabelType_CheckBranchMiss:
14546 pszName = "CheckBranchMiss";
14547 break;
14548 case kIemNativeLabelType_If:
14549 pszName = "If";
14550 fNumbered = true;
14551 break;
14552 case kIemNativeLabelType_Else:
14553 pszName = "Else";
14554 fNumbered = true;
14555 pszComment = " ; regs state restored pre-if-block";
14556 break;
14557 case kIemNativeLabelType_Endif:
14558 pszName = "Endif";
14559 fNumbered = true;
14560 break;
14561 case kIemNativeLabelType_CheckIrq:
14562 pszName = "CheckIrq_CheckVM";
14563 fNumbered = true;
14564 break;
14565 case kIemNativeLabelType_TlbLookup:
14566 pszName = "TlbLookup";
14567 fNumbered = true;
14568 break;
14569 case kIemNativeLabelType_TlbMiss:
14570 pszName = "TlbMiss";
14571 fNumbered = true;
14572 break;
14573 case kIemNativeLabelType_TlbDone:
14574 pszName = "TlbDone";
14575 fNumbered = true;
14576 break;
14577 case kIemNativeLabelType_Invalid:
14578 case kIemNativeLabelType_End:
14579 break;
14580 }
14581 if (fNumbered)
14582 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
14583 else
14584 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
14585 continue;
14586 }
14587
14588 case kIemTbDbgEntryType_NativeOffset:
14589 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
14590 Assert(offDbgNativeNext > offNative);
14591 break;
14592
14593#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
14594 case kIemTbDbgEntryType_DelayedPcUpdate:
14595 pHlp->pfnPrintf(pHlp,
14596 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
14597 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
14598 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
14599 continue;
14600#endif
14601
14602 default:
14603 AssertFailed();
14604 }
14605 iDbgEntry++;
14606 break;
14607 }
14608 }
14609
14610 /*
14611 * Disassemble the next native instruction.
14612 */
14613 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14614# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14615 uint32_t cbInstr = sizeof(paNative[0]);
14616 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14617 if (RT_SUCCESS(rc))
14618 {
14619# if defined(RT_ARCH_AMD64)
14620 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14621 {
14622 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14623 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14624 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14625 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14626 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14627 uInfo & 0x8000 ? "recompiled" : "todo");
14628 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14629 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14630 else
14631 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14632 }
14633 else
14634# endif
14635 {
14636 const char *pszAnnotation = NULL;
14637# ifdef RT_ARCH_AMD64
14638 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14639 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14640 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14641 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14642 PCDISOPPARAM pMemOp;
14643 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
14644 pMemOp = &Dis.Param1;
14645 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
14646 pMemOp = &Dis.Param2;
14647 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
14648 pMemOp = &Dis.Param3;
14649 else
14650 pMemOp = NULL;
14651 if ( pMemOp
14652 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
14653 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
14654 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
14655 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
14656
14657#elif defined(RT_ARCH_ARM64)
14658 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14659 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14660 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14661# else
14662# error "Port me"
14663# endif
14664 if (pszAnnotation)
14665 {
14666 static unsigned const s_offAnnotation = 55;
14667 size_t const cchAnnotation = strlen(pszAnnotation);
14668 size_t cchDis = strlen(szDisBuf);
14669 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
14670 {
14671 if (cchDis < s_offAnnotation)
14672 {
14673 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
14674 cchDis = s_offAnnotation;
14675 }
14676 szDisBuf[cchDis++] = ' ';
14677 szDisBuf[cchDis++] = ';';
14678 szDisBuf[cchDis++] = ' ';
14679 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
14680 }
14681 }
14682 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14683 }
14684 }
14685 else
14686 {
14687# if defined(RT_ARCH_AMD64)
14688 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14689 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14690# elif defined(RT_ARCH_ARM64)
14691 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14692# else
14693# error "Port me"
14694# endif
14695 cbInstr = sizeof(paNative[0]);
14696 }
14697 offNative += cbInstr / sizeof(paNative[0]);
14698
14699# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14700 cs_insn *pInstr;
14701 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14702 (uintptr_t)pNativeCur, 1, &pInstr);
14703 if (cInstrs > 0)
14704 {
14705 Assert(cInstrs == 1);
14706 const char *pszAnnotation = NULL;
14707# if defined(RT_ARCH_ARM64)
14708 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
14709 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
14710 {
14711 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
14712 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
14713 char *psz = strchr(pInstr->op_str, '[');
14714 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
14715 {
14716 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
14717 int32_t off = -1;
14718 psz += 4;
14719 if (*psz == ']')
14720 off = 0;
14721 else if (*psz == ',')
14722 {
14723 psz = RTStrStripL(psz + 1);
14724 if (*psz == '#')
14725 off = RTStrToInt32(&psz[1]);
14726 /** @todo deal with index registers and LSL as well... */
14727 }
14728 if (off >= 0)
14729 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
14730 }
14731 }
14732# endif
14733
14734 size_t const cchOp = strlen(pInstr->op_str);
14735# if defined(RT_ARCH_AMD64)
14736 if (pszAnnotation)
14737 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
14738 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
14739 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14740 else
14741 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14742 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14743
14744# else
14745 if (pszAnnotation)
14746 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
14747 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
14748 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
14749 else
14750 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14751 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14752# endif
14753 offNative += pInstr->size / sizeof(*pNativeCur);
14754 cs_free(pInstr, cInstrs);
14755 }
14756 else
14757 {
14758# if defined(RT_ARCH_AMD64)
14759 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14760 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14761# else
14762 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14763# endif
14764 offNative++;
14765 }
14766# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14767 }
14768 }
14769 else
14770#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
14771 {
14772 /*
14773 * No debug info, just disassemble the x86 code and then the native code.
14774 *
14775 * First the guest code:
14776 */
14777 for (unsigned i = 0; i < pTb->cRanges; i++)
14778 {
14779 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
14780 + (pTb->aRanges[i].idxPhysPage == 0
14781 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
14782 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
14783 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
14784 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
14785 unsigned off = pTb->aRanges[i].offOpcodes;
14786 /** @todo this ain't working when crossing pages! */
14787 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
14788 while (off < cbOpcodes)
14789 {
14790 uint32_t cbInstr = 1;
14791 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
14792 &pTb->pabOpcodes[off], cbOpcodes - off,
14793 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
14794 if (RT_SUCCESS(rc))
14795 {
14796 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14797 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14798 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14799 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14800 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
14801 GCPhysPc += cbInstr;
14802 off += cbInstr;
14803 }
14804 else
14805 {
14806 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
14807 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
14808 break;
14809 }
14810 }
14811 }
14812
14813 /*
14814 * Then the native code:
14815 */
14816 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
14817 while (offNative < cNative)
14818 {
14819 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
14820# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14821 uint32_t cbInstr = sizeof(paNative[0]);
14822 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
14823 if (RT_SUCCESS(rc))
14824 {
14825# if defined(RT_ARCH_AMD64)
14826 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
14827 {
14828 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
14829 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
14830 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
14831 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
14832 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
14833 uInfo & 0x8000 ? "recompiled" : "todo");
14834 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
14835 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
14836 else
14837 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
14838 }
14839 else
14840# endif
14841 {
14842# ifdef RT_ARCH_AMD64
14843 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
14844 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
14845 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14846 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14847# elif defined(RT_ARCH_ARM64)
14848 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
14849 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
14850 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
14851# else
14852# error "Port me"
14853# endif
14854 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
14855 }
14856 }
14857 else
14858 {
14859# if defined(RT_ARCH_AMD64)
14860 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
14861 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
14862# else
14863 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
14864# endif
14865 cbInstr = sizeof(paNative[0]);
14866 }
14867 offNative += cbInstr / sizeof(paNative[0]);
14868
14869# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14870 cs_insn *pInstr;
14871 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
14872 (uintptr_t)pNativeCur, 1, &pInstr);
14873 if (cInstrs > 0)
14874 {
14875 Assert(cInstrs == 1);
14876# if defined(RT_ARCH_AMD64)
14877 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
14878 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
14879# else
14880 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
14881 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
14882# endif
14883 offNative += pInstr->size / sizeof(*pNativeCur);
14884 cs_free(pInstr, cInstrs);
14885 }
14886 else
14887 {
14888# if defined(RT_ARCH_AMD64)
14889 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
14890 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
14891# else
14892 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
14893# endif
14894 offNative++;
14895 }
14896# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
14897 }
14898 }
14899
14900#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
14901 /* Cleanup. */
14902 cs_close(&hDisasm);
14903#endif
14904}
14905
14906
14907/**
14908 * Recompiles the given threaded TB into a native one.
14909 *
14910 * In case of failure the translation block will be returned as-is.
14911 *
14912 * @returns pTb.
14913 * @param pVCpu The cross context virtual CPU structure of the calling
14914 * thread.
14915 * @param pTb The threaded translation to recompile to native.
14916 */
14917DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
14918{
14919 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
14920
14921 /*
14922 * The first time thru, we allocate the recompiler state, the other times
14923 * we just need to reset it before using it again.
14924 */
14925 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
14926 if (RT_LIKELY(pReNative))
14927 iemNativeReInit(pReNative, pTb);
14928 else
14929 {
14930 pReNative = iemNativeInit(pVCpu, pTb);
14931 AssertReturn(pReNative, pTb);
14932 }
14933
14934#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14935 /*
14936 * First do liveness analysis. This is done backwards.
14937 */
14938 {
14939 uint32_t idxCall = pTb->Thrd.cCalls;
14940 if (idxCall <= pReNative->cLivenessEntriesAlloc)
14941 { /* likely */ }
14942 else
14943 {
14944 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
14945 while (idxCall > cAlloc)
14946 cAlloc *= 2;
14947 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
14948 AssertReturn(pvNew, pTb);
14949 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
14950 pReNative->cLivenessEntriesAlloc = cAlloc;
14951 }
14952 AssertReturn(idxCall > 0, pTb);
14953 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
14954
14955 /* The initial (final) entry. */
14956 idxCall--;
14957 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
14958
14959 /* Loop backwards thru the calls and fill in the other entries. */
14960 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
14961 while (idxCall > 0)
14962 {
14963 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
14964 if (pfnLiveness)
14965 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
14966 else
14967 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
14968 pCallEntry--;
14969 idxCall--;
14970 }
14971
14972# ifdef VBOX_WITH_STATISTICS
14973 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
14974 to 'clobbered' rather that 'input'. */
14975 /** @todo */
14976# endif
14977 }
14978#endif
14979
14980 /*
14981 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
14982 * for aborting if an error happens.
14983 */
14984 uint32_t cCallsLeft = pTb->Thrd.cCalls;
14985#ifdef LOG_ENABLED
14986 uint32_t const cCallsOrg = cCallsLeft;
14987#endif
14988 uint32_t off = 0;
14989 int rc = VINF_SUCCESS;
14990 IEMNATIVE_TRY_SETJMP(pReNative, rc)
14991 {
14992 /*
14993 * Emit prolog code (fixed).
14994 */
14995 off = iemNativeEmitProlog(pReNative, off);
14996
14997 /*
14998 * Convert the calls to native code.
14999 */
15000#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15001 int32_t iGstInstr = -1;
15002#endif
15003#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
15004 uint32_t cThreadedCalls = 0;
15005 uint32_t cRecompiledCalls = 0;
15006#endif
15007#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
15008 uint32_t idxCurCall = 0;
15009#endif
15010 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
15011 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
15012 while (cCallsLeft-- > 0)
15013 {
15014 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
15015#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
15016 pReNative->idxCurCall = idxCurCall;
15017#endif
15018
15019 /*
15020 * Debug info, assembly markup and statistics.
15021 */
15022#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
15023 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
15024 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
15025#endif
15026#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15027 iemNativeDbgInfoAddNativeOffset(pReNative, off);
15028 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
15029 {
15030 if (iGstInstr < (int32_t)pTb->cInstructions)
15031 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
15032 else
15033 Assert(iGstInstr == pTb->cInstructions);
15034 iGstInstr = pCallEntry->idxInstr;
15035 }
15036 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
15037#endif
15038#if defined(VBOX_STRICT)
15039 off = iemNativeEmitMarker(pReNative, off,
15040 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
15041#endif
15042#if defined(VBOX_STRICT)
15043 iemNativeRegAssertSanity(pReNative);
15044#endif
15045#ifdef VBOX_WITH_STATISTICS
15046 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
15047#endif
15048
15049 /*
15050 * Actual work.
15051 */
15052 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
15053 pfnRecom ? "(recompiled)" : "(todo)"));
15054 if (pfnRecom) /** @todo stats on this. */
15055 {
15056 off = pfnRecom(pReNative, off, pCallEntry);
15057 STAM_REL_STATS({cRecompiledCalls++;});
15058 }
15059 else
15060 {
15061 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
15062 STAM_REL_STATS({cThreadedCalls++;});
15063 }
15064 Assert(off <= pReNative->cInstrBufAlloc);
15065 Assert(pReNative->cCondDepth == 0);
15066
15067#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
15068 if (LogIs2Enabled())
15069 {
15070 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
15071# ifndef IEMLIVENESS_EXTENDED_LAYOUT
15072 static const char s_achState[] = "CUXI";
15073# else
15074 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
15075# endif
15076
15077 char szGpr[17];
15078 for (unsigned i = 0; i < 16; i++)
15079 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
15080 szGpr[16] = '\0';
15081
15082 char szSegBase[X86_SREG_COUNT + 1];
15083 char szSegLimit[X86_SREG_COUNT + 1];
15084 char szSegAttrib[X86_SREG_COUNT + 1];
15085 char szSegSel[X86_SREG_COUNT + 1];
15086 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
15087 {
15088 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
15089 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
15090 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
15091 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
15092 }
15093 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
15094 = szSegSel[X86_SREG_COUNT] = '\0';
15095
15096 char szEFlags[8];
15097 for (unsigned i = 0; i < 7; i++)
15098 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
15099 szEFlags[7] = '\0';
15100
15101 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
15102 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
15103 }
15104#endif
15105
15106 /*
15107 * Advance.
15108 */
15109 pCallEntry++;
15110#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
15111 idxCurCall++;
15112#endif
15113 }
15114
15115 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
15116 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
15117 if (!cThreadedCalls)
15118 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
15119
15120 /*
15121 * Emit the epilog code.
15122 */
15123 uint32_t idxReturnLabel;
15124 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
15125
15126 /*
15127 * Generate special jump labels.
15128 */
15129 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
15130 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
15131 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
15132 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
15133 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
15134 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
15135 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
15136 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
15137 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
15138 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
15139 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
15140 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
15141 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
15142 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
15143 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
15144 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
15145 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
15146 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
15147 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
15148 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
15149 }
15150 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
15151 {
15152 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
15153 return pTb;
15154 }
15155 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
15156 Assert(off <= pReNative->cInstrBufAlloc);
15157
15158 /*
15159 * Make sure all labels has been defined.
15160 */
15161 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
15162#ifdef VBOX_STRICT
15163 uint32_t const cLabels = pReNative->cLabels;
15164 for (uint32_t i = 0; i < cLabels; i++)
15165 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
15166#endif
15167
15168 /*
15169 * Allocate executable memory, copy over the code we've generated.
15170 */
15171 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
15172 if (pTbAllocator->pDelayedFreeHead)
15173 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
15174
15175 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
15176 AssertReturn(paFinalInstrBuf, pTb);
15177 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
15178
15179 /*
15180 * Apply fixups.
15181 */
15182 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
15183 uint32_t const cFixups = pReNative->cFixups;
15184 for (uint32_t i = 0; i < cFixups; i++)
15185 {
15186 Assert(paFixups[i].off < off);
15187 Assert(paFixups[i].idxLabel < cLabels);
15188 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
15189 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
15190 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
15191 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
15192 switch (paFixups[i].enmType)
15193 {
15194#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
15195 case kIemNativeFixupType_Rel32:
15196 Assert(paFixups[i].off + 4 <= off);
15197 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15198 continue;
15199
15200#elif defined(RT_ARCH_ARM64)
15201 case kIemNativeFixupType_RelImm26At0:
15202 {
15203 Assert(paFixups[i].off < off);
15204 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15205 Assert(offDisp >= -262144 && offDisp < 262144);
15206 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
15207 continue;
15208 }
15209
15210 case kIemNativeFixupType_RelImm19At5:
15211 {
15212 Assert(paFixups[i].off < off);
15213 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15214 Assert(offDisp >= -262144 && offDisp < 262144);
15215 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
15216 continue;
15217 }
15218
15219 case kIemNativeFixupType_RelImm14At5:
15220 {
15221 Assert(paFixups[i].off < off);
15222 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
15223 Assert(offDisp >= -8192 && offDisp < 8192);
15224 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
15225 continue;
15226 }
15227
15228#endif
15229 case kIemNativeFixupType_Invalid:
15230 case kIemNativeFixupType_End:
15231 break;
15232 }
15233 AssertFailed();
15234 }
15235
15236 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
15237 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
15238
15239 /*
15240 * Convert the translation block.
15241 */
15242 RTMemFree(pTb->Thrd.paCalls);
15243 pTb->Native.paInstructions = paFinalInstrBuf;
15244 pTb->Native.cInstructions = off;
15245 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
15246#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15247 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
15248 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
15249#endif
15250
15251 Assert(pTbAllocator->cThreadedTbs > 0);
15252 pTbAllocator->cThreadedTbs -= 1;
15253 pTbAllocator->cNativeTbs += 1;
15254 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
15255
15256#ifdef LOG_ENABLED
15257 /*
15258 * Disassemble to the log if enabled.
15259 */
15260 if (LogIs3Enabled())
15261 {
15262 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
15263 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
15264# ifdef DEBUG_bird
15265 RTLogFlush(NULL);
15266# endif
15267 }
15268#endif
15269 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
15270
15271 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
15272 return pTb;
15273}
15274
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette