VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103990

Last change on this file since 103990 was 103990, checked in by vboxsync, 14 months ago

VMM/IEM: Fix the SIMD guest register value checking, cnt operates on byte elements so the generated code would only actually check the low 64-bit instead of the whole 128-bit for matching values, use uaddlv instead which sums up all byte values and stores the result in the low 16-bit of the vector register. The result should be zero for matching values, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 428.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103990 2024-03-21 14:23:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 128-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to load 256-bit data w/ segmentation.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1919 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1920#else
1921 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to load 256-bit data w/ segmentation.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1932 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1933#else
1934 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1935#endif
1936}
1937#endif
1938
1939
1940/**
1941 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1947#else
1948 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1960#else
1961 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1972 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1973#else
1974 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1975#endif
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1985 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1986#else
1987 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1988#endif
1989}
1990
1991
1992#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1993/**
1994 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1999 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2000#else
2001 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2012 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2013#else
2014 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2015#endif
2016}
2017
2018
2019/**
2020 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2021 */
2022IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2023{
2024#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2025 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2026#else
2027 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2028#endif
2029}
2030
2031
2032/**
2033 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2038 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2039#else
2040 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2041#endif
2042}
2043#endif
2044
2045
2046
2047/**
2048 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2053 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2054#else
2055 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2066 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2067#else
2068 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to store an 32-bit selector value onto a generic stack.
2075 *
2076 * Intel CPUs doesn't do write a whole dword, thus the special function.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2081 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2082#else
2083 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2094 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2095#else
2096 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2107 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2108#else
2109 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114/**
2115 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2116 */
2117IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2118{
2119#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2120 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2121#else
2122 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2123#endif
2124}
2125
2126
2127/**
2128 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2133 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2134#else
2135 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140
2141/*********************************************************************************************************************************
2142* Helpers: Flat memory fetches and stores. *
2143*********************************************************************************************************************************/
2144
2145/**
2146 * Used by TB code to load unsigned 8-bit data w/ flat address.
2147 * @note Zero extending the value to 64-bit to simplify assembly.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2152 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2153#else
2154 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2161 * to 16 bits.
2162 * @note Zero extending the value to 64-bit to simplify assembly.
2163 */
2164IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2165{
2166#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2167 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2168#else
2169 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2170#endif
2171}
2172
2173
2174/**
2175 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2176 * to 32 bits.
2177 * @note Zero extending the value to 64-bit to simplify assembly.
2178 */
2179IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2180{
2181#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2182 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2183#else
2184 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2185#endif
2186}
2187
2188
2189/**
2190 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2191 * to 64 bits.
2192 */
2193IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2194{
2195#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2196 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2197#else
2198 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2199#endif
2200}
2201
2202
2203/**
2204 * Used by TB code to load unsigned 16-bit data w/ flat address.
2205 * @note Zero extending the value to 64-bit to simplify assembly.
2206 */
2207IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2208{
2209#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2210 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2211#else
2212 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2213#endif
2214}
2215
2216
2217/**
2218 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2219 * to 32 bits.
2220 * @note Zero extending the value to 64-bit to simplify assembly.
2221 */
2222IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2223{
2224#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2225 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2226#else
2227 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2228#endif
2229}
2230
2231
2232/**
2233 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2234 * to 64 bits.
2235 * @note Zero extending the value to 64-bit to simplify assembly.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2240 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2241#else
2242 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to load unsigned 32-bit data w/ flat address.
2249 * @note Zero extending the value to 64-bit to simplify assembly.
2250 */
2251IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2252{
2253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2254 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2255#else
2256 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2257#endif
2258}
2259
2260
2261/**
2262 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2263 * to 64 bits.
2264 * @note Zero extending the value to 64-bit to simplify assembly.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2269 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2270#else
2271 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276/**
2277 * Used by TB code to load unsigned 64-bit data w/ flat address.
2278 */
2279IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2280{
2281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2282 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2283#else
2284 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2285#endif
2286}
2287
2288
2289#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2290/**
2291 * Used by TB code to load unsigned 128-bit data w/ flat address.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2296 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2297#else
2298 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2299#endif
2300}
2301
2302
2303/**
2304 * Used by TB code to load unsigned 128-bit data w/ flat address.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2307{
2308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2309 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2310#else
2311 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2312#endif
2313}
2314
2315
2316/**
2317 * Used by TB code to load unsigned 128-bit data w/ flat address.
2318 */
2319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2322 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2323#else
2324 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to load unsigned 256-bit data w/ flat address.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2333{
2334#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2335 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2336#else
2337 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2338#endif
2339}
2340
2341
2342/**
2343 * Used by TB code to load unsigned 256-bit data w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2346{
2347#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2348 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2349#else
2350 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2351#endif
2352}
2353#endif
2354
2355
2356/**
2357 * Used by TB code to store unsigned 8-bit data w/ flat address.
2358 */
2359IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2362 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2363#else
2364 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2365#endif
2366}
2367
2368
2369/**
2370 * Used by TB code to store unsigned 16-bit data w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2375 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2376#else
2377 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to store unsigned 32-bit data w/ flat address.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2388 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2389#else
2390 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to store unsigned 64-bit data w/ flat address.
2397 */
2398IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2399{
2400#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2401 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2402#else
2403 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2404#endif
2405}
2406
2407
2408#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2409/**
2410 * Used by TB code to store unsigned 128-bit data w/ flat address.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2413{
2414#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2415 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2416#else
2417 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2418#endif
2419}
2420
2421
2422/**
2423 * Used by TB code to store unsigned 128-bit data w/ flat address.
2424 */
2425IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2426{
2427#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2428 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2429#else
2430 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2431#endif
2432}
2433
2434
2435/**
2436 * Used by TB code to store unsigned 256-bit data w/ flat address.
2437 */
2438IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2441 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2442#else
2443 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to store unsigned 256-bit data w/ flat address.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2452{
2453#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2454 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2455#else
2456 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2457#endif
2458}
2459#endif
2460
2461
2462
2463/**
2464 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2465 */
2466IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2469 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2470#else
2471 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2480{
2481#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2482 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2483#else
2484 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2485#endif
2486}
2487
2488
2489/**
2490 * Used by TB code to store a segment selector value onto a flat stack.
2491 *
2492 * Intel CPUs doesn't do write a whole dword, thus the special function.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2495{
2496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2497 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2498#else
2499 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2500#endif
2501}
2502
2503
2504/**
2505 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2506 */
2507IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2508{
2509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2510 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2511#else
2512 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2513#endif
2514}
2515
2516
2517/**
2518 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2519 */
2520IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2521{
2522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2523 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2524#else
2525 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2526#endif
2527}
2528
2529
2530/**
2531 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2532 */
2533IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2534{
2535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2536 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2537#else
2538 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2539#endif
2540}
2541
2542
2543/**
2544 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2545 */
2546IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2549 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2550#else
2551 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2552#endif
2553}
2554
2555
2556
2557/*********************************************************************************************************************************
2558* Helpers: Segmented memory mapping. *
2559*********************************************************************************************************************************/
2560
2561/**
2562 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2563 * segmentation.
2564 */
2565IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2566 RTGCPTR GCPtrMem, uint8_t iSegReg))
2567{
2568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2569 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2570#else
2571 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#endif
2573}
2574
2575
2576/**
2577 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2620 * segmentation.
2621 */
2622IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2623 RTGCPTR GCPtrMem, uint8_t iSegReg))
2624{
2625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2626 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2627#else
2628 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#endif
2630}
2631
2632
2633/**
2634 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2635 */
2636IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2637 RTGCPTR GCPtrMem, uint8_t iSegReg))
2638{
2639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2640 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2641#else
2642 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2643#endif
2644}
2645
2646
2647/**
2648 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2649 */
2650IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2651 RTGCPTR GCPtrMem, uint8_t iSegReg))
2652{
2653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2654 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2655#else
2656 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2657#endif
2658}
2659
2660
2661/**
2662 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2663 */
2664IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2665 RTGCPTR GCPtrMem, uint8_t iSegReg))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2669#else
2670 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2677 * segmentation.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2680 RTGCPTR GCPtrMem, uint8_t iSegReg))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2684#else
2685 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2694 RTGCPTR GCPtrMem, uint8_t iSegReg))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2698#else
2699 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2706 */
2707IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2708 RTGCPTR GCPtrMem, uint8_t iSegReg))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2712#else
2713 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2722 RTGCPTR GCPtrMem, uint8_t iSegReg))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2726#else
2727 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2734 * segmentation.
2735 */
2736IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2737 RTGCPTR GCPtrMem, uint8_t iSegReg))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2741#else
2742 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2749 */
2750IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2751 RTGCPTR GCPtrMem, uint8_t iSegReg))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2755#else
2756 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2765 RTGCPTR GCPtrMem, uint8_t iSegReg))
2766{
2767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2768 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2769#else
2770 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2771#endif
2772}
2773
2774
2775/**
2776 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2777 */
2778IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2779 RTGCPTR GCPtrMem, uint8_t iSegReg))
2780{
2781#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2782 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2783#else
2784 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2785#endif
2786}
2787
2788
2789/**
2790 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2791 */
2792IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2793 RTGCPTR GCPtrMem, uint8_t iSegReg))
2794{
2795#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2796 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2797#else
2798 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2799#endif
2800}
2801
2802
2803/**
2804 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2805 */
2806IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2807 RTGCPTR GCPtrMem, uint8_t iSegReg))
2808{
2809#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2810 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2811#else
2812 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2813#endif
2814}
2815
2816
2817/**
2818 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2819 * segmentation.
2820 */
2821IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2822 RTGCPTR GCPtrMem, uint8_t iSegReg))
2823{
2824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2825 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2826#else
2827 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#endif
2829}
2830
2831
2832/**
2833 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2834 */
2835IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2836 RTGCPTR GCPtrMem, uint8_t iSegReg))
2837{
2838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2839 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2840#else
2841 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2842#endif
2843}
2844
2845
2846/**
2847 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2848 */
2849IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2850 RTGCPTR GCPtrMem, uint8_t iSegReg))
2851{
2852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2853 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2854#else
2855 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2856#endif
2857}
2858
2859
2860/**
2861 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2862 */
2863IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2864 RTGCPTR GCPtrMem, uint8_t iSegReg))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2868#else
2869 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2870#endif
2871}
2872
2873
2874/*********************************************************************************************************************************
2875* Helpers: Flat memory mapping. *
2876*********************************************************************************************************************************/
2877
2878/**
2879 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2880 * address.
2881 */
2882IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2883{
2884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2885 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2886#else
2887 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2888#endif
2889}
2890
2891
2892/**
2893 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2896{
2897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2898 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2899#else
2900 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2901#endif
2902}
2903
2904
2905/**
2906 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2907 */
2908IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2909{
2910#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2911 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2912#else
2913 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2914#endif
2915}
2916
2917
2918/**
2919 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2920 */
2921IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2922{
2923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2924 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2925#else
2926 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2927#endif
2928}
2929
2930
2931/**
2932 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2933 * address.
2934 */
2935IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2936{
2937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2938 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2939#else
2940 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2941#endif
2942}
2943
2944
2945/**
2946 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2947 */
2948IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2949{
2950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2951 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2952#else
2953 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2954#endif
2955}
2956
2957
2958/**
2959 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2962{
2963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2964 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2965#else
2966 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2967#endif
2968}
2969
2970
2971/**
2972 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2973 */
2974IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2975{
2976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2977 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2978#else
2979 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2980#endif
2981}
2982
2983
2984/**
2985 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2986 * address.
2987 */
2988IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2989{
2990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2991 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2992#else
2993 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2994#endif
2995}
2996
2997
2998/**
2999 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3000 */
3001IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3002{
3003#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3004 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3005#else
3006 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3007#endif
3008}
3009
3010
3011/**
3012 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3013 */
3014IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3015{
3016#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3017 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3018#else
3019 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3020#endif
3021}
3022
3023
3024/**
3025 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3026 */
3027IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3028{
3029#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3030 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3031#else
3032 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3033#endif
3034}
3035
3036
3037/**
3038 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3039 * address.
3040 */
3041IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3042{
3043#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3044 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3045#else
3046 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3047#endif
3048}
3049
3050
3051/**
3052 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3053 */
3054IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3055{
3056#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3057 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3058#else
3059 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3060#endif
3061}
3062
3063
3064/**
3065 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3066 */
3067IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3068{
3069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3070 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3071#else
3072 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3073#endif
3074}
3075
3076
3077/**
3078 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3079 */
3080IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3081{
3082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3083 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3084#else
3085 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3086#endif
3087}
3088
3089
3090/**
3091 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3092 */
3093IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3094{
3095#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3096 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3097#else
3098 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3099#endif
3100}
3101
3102
3103/**
3104 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3105 */
3106IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3107{
3108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3109 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3110#else
3111 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3112#endif
3113}
3114
3115
3116/**
3117 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3118 * address.
3119 */
3120IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3121{
3122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3123 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3124#else
3125 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3126#endif
3127}
3128
3129
3130/**
3131 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3132 */
3133IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3134{
3135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3136 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3137#else
3138 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3139#endif
3140}
3141
3142
3143/**
3144 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3145 */
3146IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3147{
3148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3149 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3150#else
3151 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3152#endif
3153}
3154
3155
3156/**
3157 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3158 */
3159IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3160{
3161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3162 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3163#else
3164 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3165#endif
3166}
3167
3168
3169/*********************************************************************************************************************************
3170* Helpers: Commit, rollback & unmap *
3171*********************************************************************************************************************************/
3172
3173/**
3174 * Used by TB code to commit and unmap a read-write memory mapping.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3177{
3178 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3179}
3180
3181
3182/**
3183 * Used by TB code to commit and unmap a read-write memory mapping.
3184 */
3185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3186{
3187 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3188}
3189
3190
3191/**
3192 * Used by TB code to commit and unmap a write-only memory mapping.
3193 */
3194IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3195{
3196 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3197}
3198
3199
3200/**
3201 * Used by TB code to commit and unmap a read-only memory mapping.
3202 */
3203IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3204{
3205 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3206}
3207
3208
3209/**
3210 * Reinitializes the native recompiler state.
3211 *
3212 * Called before starting a new recompile job.
3213 */
3214static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3215{
3216 pReNative->cLabels = 0;
3217 pReNative->bmLabelTypes = 0;
3218 pReNative->cFixups = 0;
3219#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3220 pReNative->pDbgInfo->cEntries = 0;
3221#endif
3222 pReNative->pTbOrg = pTb;
3223 pReNative->cCondDepth = 0;
3224 pReNative->uCondSeqNo = 0;
3225 pReNative->uCheckIrqSeqNo = 0;
3226 pReNative->uTlbSeqNo = 0;
3227
3228#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3229 pReNative->Core.offPc = 0;
3230 pReNative->Core.cInstrPcUpdateSkipped = 0;
3231#endif
3232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3233 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3234#endif
3235 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3236#if IEMNATIVE_HST_GREG_COUNT < 32
3237 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3238#endif
3239 ;
3240 pReNative->Core.bmHstRegsWithGstShadow = 0;
3241 pReNative->Core.bmGstRegShadows = 0;
3242 pReNative->Core.bmVars = 0;
3243 pReNative->Core.bmStack = 0;
3244 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3245 pReNative->Core.u64ArgVars = UINT64_MAX;
3246
3247 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3248 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3249 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3250 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3251 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3252 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3253 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3254 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3255 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3256 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3257 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3258 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3259 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3260 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3261 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3262 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3263 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3264
3265 /* Full host register reinit: */
3266 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3267 {
3268 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3269 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3270 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3271 }
3272
3273 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3274 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3275#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3276 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3277#endif
3278#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3279 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3280#endif
3281#ifdef IEMNATIVE_REG_FIXED_TMP1
3282 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3283#endif
3284#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3285 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3286#endif
3287 );
3288 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3289 {
3290 fRegs &= ~RT_BIT_32(idxReg);
3291 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3292 }
3293
3294 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3295#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3296 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3297#endif
3298#ifdef IEMNATIVE_REG_FIXED_TMP0
3299 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3300#endif
3301#ifdef IEMNATIVE_REG_FIXED_TMP1
3302 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3303#endif
3304#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3305 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3306#endif
3307
3308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3309# ifdef RT_ARCH_ARM64
3310 /*
3311 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3312 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3313 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3314 * and the register allocator assumes that it will be always free when the lower is picked.
3315 */
3316 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3317# else
3318 uint32_t const fFixedAdditional = 0;
3319# endif
3320
3321 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3322 | fFixedAdditional
3323# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3324 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3325# endif
3326 ;
3327 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3328 pReNative->Core.bmGstSimdRegShadows = 0;
3329 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3330 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3331
3332 /* Full host register reinit: */
3333 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3334 {
3335 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3336 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3337 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3338 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3339 }
3340
3341 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3342 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3343 {
3344 fRegs &= ~RT_BIT_32(idxReg);
3345 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3346 }
3347
3348#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3349 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3350#endif
3351
3352#endif
3353
3354 return pReNative;
3355}
3356
3357
3358/**
3359 * Allocates and initializes the native recompiler state.
3360 *
3361 * This is called the first time an EMT wants to recompile something.
3362 *
3363 * @returns Pointer to the new recompiler state.
3364 * @param pVCpu The cross context virtual CPU structure of the calling
3365 * thread.
3366 * @param pTb The TB that's about to be recompiled.
3367 * @thread EMT(pVCpu)
3368 */
3369static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3370{
3371 VMCPU_ASSERT_EMT(pVCpu);
3372
3373 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3374 AssertReturn(pReNative, NULL);
3375
3376 /*
3377 * Try allocate all the buffers and stuff we need.
3378 */
3379 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3380 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3381 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3383 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3384#endif
3385 if (RT_LIKELY( pReNative->pInstrBuf
3386 && pReNative->paLabels
3387 && pReNative->paFixups)
3388#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3389 && pReNative->pDbgInfo
3390#endif
3391 )
3392 {
3393 /*
3394 * Set the buffer & array sizes on success.
3395 */
3396 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3397 pReNative->cLabelsAlloc = _8K;
3398 pReNative->cFixupsAlloc = _16K;
3399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3400 pReNative->cDbgInfoAlloc = _16K;
3401#endif
3402
3403 /* Other constant stuff: */
3404 pReNative->pVCpu = pVCpu;
3405
3406 /*
3407 * Done, just need to save it and reinit it.
3408 */
3409 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3410 return iemNativeReInit(pReNative, pTb);
3411 }
3412
3413 /*
3414 * Failed. Cleanup and return.
3415 */
3416 AssertFailed();
3417 RTMemFree(pReNative->pInstrBuf);
3418 RTMemFree(pReNative->paLabels);
3419 RTMemFree(pReNative->paFixups);
3420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3421 RTMemFree(pReNative->pDbgInfo);
3422#endif
3423 RTMemFree(pReNative);
3424 return NULL;
3425}
3426
3427
3428/**
3429 * Creates a label
3430 *
3431 * If the label does not yet have a defined position,
3432 * call iemNativeLabelDefine() later to set it.
3433 *
3434 * @returns Label ID. Throws VBox status code on failure, so no need to check
3435 * the return value.
3436 * @param pReNative The native recompile state.
3437 * @param enmType The label type.
3438 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3439 * label is not yet defined (default).
3440 * @param uData Data associated with the lable. Only applicable to
3441 * certain type of labels. Default is zero.
3442 */
3443DECL_HIDDEN_THROW(uint32_t)
3444iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3445 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3446{
3447 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3448
3449 /*
3450 * Locate existing label definition.
3451 *
3452 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3453 * and uData is zero.
3454 */
3455 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3456 uint32_t const cLabels = pReNative->cLabels;
3457 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3458#ifndef VBOX_STRICT
3459 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3460 && offWhere == UINT32_MAX
3461 && uData == 0
3462#endif
3463 )
3464 {
3465#ifndef VBOX_STRICT
3466 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3468 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3469 if (idxLabel < pReNative->cLabels)
3470 return idxLabel;
3471#else
3472 for (uint32_t i = 0; i < cLabels; i++)
3473 if ( paLabels[i].enmType == enmType
3474 && paLabels[i].uData == uData)
3475 {
3476 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3477 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3478 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3479 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3481 return i;
3482 }
3483 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3484 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3485#endif
3486 }
3487
3488 /*
3489 * Make sure we've got room for another label.
3490 */
3491 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3492 { /* likely */ }
3493 else
3494 {
3495 uint32_t cNew = pReNative->cLabelsAlloc;
3496 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3497 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3498 cNew *= 2;
3499 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3500 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3501 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3502 pReNative->paLabels = paLabels;
3503 pReNative->cLabelsAlloc = cNew;
3504 }
3505
3506 /*
3507 * Define a new label.
3508 */
3509 paLabels[cLabels].off = offWhere;
3510 paLabels[cLabels].enmType = enmType;
3511 paLabels[cLabels].uData = uData;
3512 pReNative->cLabels = cLabels + 1;
3513
3514 Assert((unsigned)enmType < 64);
3515 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3516
3517 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3518 {
3519 Assert(uData == 0);
3520 pReNative->aidxUniqueLabels[enmType] = cLabels;
3521 }
3522
3523 if (offWhere != UINT32_MAX)
3524 {
3525#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3526 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3527 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3528#endif
3529 }
3530 return cLabels;
3531}
3532
3533
3534/**
3535 * Defines the location of an existing label.
3536 *
3537 * @param pReNative The native recompile state.
3538 * @param idxLabel The label to define.
3539 * @param offWhere The position.
3540 */
3541DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3542{
3543 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3544 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3545 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3546 pLabel->off = offWhere;
3547#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3548 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3549 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3550#endif
3551}
3552
3553
3554/**
3555 * Looks up a lable.
3556 *
3557 * @returns Label ID if found, UINT32_MAX if not.
3558 */
3559static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3560 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3561{
3562 Assert((unsigned)enmType < 64);
3563 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3564 {
3565 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3566 return pReNative->aidxUniqueLabels[enmType];
3567
3568 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3569 uint32_t const cLabels = pReNative->cLabels;
3570 for (uint32_t i = 0; i < cLabels; i++)
3571 if ( paLabels[i].enmType == enmType
3572 && paLabels[i].uData == uData
3573 && ( paLabels[i].off == offWhere
3574 || offWhere == UINT32_MAX
3575 || paLabels[i].off == UINT32_MAX))
3576 return i;
3577 }
3578 return UINT32_MAX;
3579}
3580
3581
3582/**
3583 * Adds a fixup.
3584 *
3585 * @throws VBox status code (int) on failure.
3586 * @param pReNative The native recompile state.
3587 * @param offWhere The instruction offset of the fixup location.
3588 * @param idxLabel The target label ID for the fixup.
3589 * @param enmType The fixup type.
3590 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3591 */
3592DECL_HIDDEN_THROW(void)
3593iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3594 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3595{
3596 Assert(idxLabel <= UINT16_MAX);
3597 Assert((unsigned)enmType <= UINT8_MAX);
3598#ifdef RT_ARCH_ARM64
3599 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3600 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3601 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3602#endif
3603
3604 /*
3605 * Make sure we've room.
3606 */
3607 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3608 uint32_t const cFixups = pReNative->cFixups;
3609 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3610 { /* likely */ }
3611 else
3612 {
3613 uint32_t cNew = pReNative->cFixupsAlloc;
3614 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3615 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3616 cNew *= 2;
3617 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3618 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3619 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3620 pReNative->paFixups = paFixups;
3621 pReNative->cFixupsAlloc = cNew;
3622 }
3623
3624 /*
3625 * Add the fixup.
3626 */
3627 paFixups[cFixups].off = offWhere;
3628 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3629 paFixups[cFixups].enmType = enmType;
3630 paFixups[cFixups].offAddend = offAddend;
3631 pReNative->cFixups = cFixups + 1;
3632}
3633
3634
3635/**
3636 * Slow code path for iemNativeInstrBufEnsure.
3637 */
3638DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3639{
3640 /* Double the buffer size till we meet the request. */
3641 uint32_t cNew = pReNative->cInstrBufAlloc;
3642 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3643 do
3644 cNew *= 2;
3645 while (cNew < off + cInstrReq);
3646
3647 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3648#ifdef RT_ARCH_ARM64
3649 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3650#else
3651 uint32_t const cbMaxInstrBuf = _2M;
3652#endif
3653 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3654
3655 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3656 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3657
3658#ifdef VBOX_STRICT
3659 pReNative->offInstrBufChecked = off + cInstrReq;
3660#endif
3661 pReNative->cInstrBufAlloc = cNew;
3662 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3663}
3664
3665#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3666
3667/**
3668 * Grows the static debug info array used during recompilation.
3669 *
3670 * @returns Pointer to the new debug info block; throws VBox status code on
3671 * failure, so no need to check the return value.
3672 */
3673DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3674{
3675 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3676 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3677 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3678 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3679 pReNative->pDbgInfo = pDbgInfo;
3680 pReNative->cDbgInfoAlloc = cNew;
3681 return pDbgInfo;
3682}
3683
3684
3685/**
3686 * Adds a new debug info uninitialized entry, returning the pointer to it.
3687 */
3688DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3689{
3690 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3691 { /* likely */ }
3692 else
3693 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3694 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3695}
3696
3697
3698/**
3699 * Debug Info: Adds a native offset record, if necessary.
3700 */
3701DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3702{
3703 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3704
3705 /*
3706 * Search backwards to see if we've got a similar record already.
3707 */
3708 uint32_t idx = pDbgInfo->cEntries;
3709 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3710 while (idx-- > idxStop)
3711 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3712 {
3713 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3714 return;
3715 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3716 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3717 break;
3718 }
3719
3720 /*
3721 * Add it.
3722 */
3723 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3724 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3725 pEntry->NativeOffset.offNative = off;
3726}
3727
3728
3729/**
3730 * Debug Info: Record info about a label.
3731 */
3732static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3733{
3734 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3735 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3736 pEntry->Label.uUnused = 0;
3737 pEntry->Label.enmLabel = (uint8_t)enmType;
3738 pEntry->Label.uData = uData;
3739}
3740
3741
3742/**
3743 * Debug Info: Record info about a threaded call.
3744 */
3745static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3746{
3747 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3748 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3749 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3750 pEntry->ThreadedCall.uUnused = 0;
3751 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3752}
3753
3754
3755/**
3756 * Debug Info: Record info about a new guest instruction.
3757 */
3758static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3759{
3760 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3761 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3762 pEntry->GuestInstruction.uUnused = 0;
3763 pEntry->GuestInstruction.fExec = fExec;
3764}
3765
3766
3767/**
3768 * Debug Info: Record info about guest register shadowing.
3769 */
3770DECL_HIDDEN_THROW(void)
3771iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3772 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3773{
3774 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3775 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3776 pEntry->GuestRegShadowing.uUnused = 0;
3777 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3778 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3779 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3780}
3781
3782
3783# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3784/**
3785 * Debug Info: Record info about guest register shadowing.
3786 */
3787DECL_HIDDEN_THROW(void)
3788iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3789 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3790{
3791 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3792 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3793 pEntry->GuestSimdRegShadowing.uUnused = 0;
3794 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3795 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3796 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3797}
3798# endif
3799
3800
3801# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3802/**
3803 * Debug Info: Record info about delayed RIP updates.
3804 */
3805DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3806{
3807 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3808 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3809 pEntry->DelayedPcUpdate.offPc = offPc;
3810 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3811}
3812# endif
3813
3814#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3815
3816
3817/*********************************************************************************************************************************
3818* Register Allocator *
3819*********************************************************************************************************************************/
3820
3821/**
3822 * Register parameter indexes (indexed by argument number).
3823 */
3824DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3825{
3826 IEMNATIVE_CALL_ARG0_GREG,
3827 IEMNATIVE_CALL_ARG1_GREG,
3828 IEMNATIVE_CALL_ARG2_GREG,
3829 IEMNATIVE_CALL_ARG3_GREG,
3830#if defined(IEMNATIVE_CALL_ARG4_GREG)
3831 IEMNATIVE_CALL_ARG4_GREG,
3832# if defined(IEMNATIVE_CALL_ARG5_GREG)
3833 IEMNATIVE_CALL_ARG5_GREG,
3834# if defined(IEMNATIVE_CALL_ARG6_GREG)
3835 IEMNATIVE_CALL_ARG6_GREG,
3836# if defined(IEMNATIVE_CALL_ARG7_GREG)
3837 IEMNATIVE_CALL_ARG7_GREG,
3838# endif
3839# endif
3840# endif
3841#endif
3842};
3843AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3844
3845/**
3846 * Call register masks indexed by argument count.
3847 */
3848DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3849{
3850 0,
3851 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3852 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3854 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3855 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3856#if defined(IEMNATIVE_CALL_ARG4_GREG)
3857 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3858 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3859# if defined(IEMNATIVE_CALL_ARG5_GREG)
3860 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3861 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3862# if defined(IEMNATIVE_CALL_ARG6_GREG)
3863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3866# if defined(IEMNATIVE_CALL_ARG7_GREG)
3867 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3868 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3869 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3870# endif
3871# endif
3872# endif
3873#endif
3874};
3875
3876#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3877/**
3878 * BP offset of the stack argument slots.
3879 *
3880 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3881 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3882 */
3883DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3884{
3885 IEMNATIVE_FP_OFF_STACK_ARG0,
3886# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3887 IEMNATIVE_FP_OFF_STACK_ARG1,
3888# endif
3889# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3890 IEMNATIVE_FP_OFF_STACK_ARG2,
3891# endif
3892# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3893 IEMNATIVE_FP_OFF_STACK_ARG3,
3894# endif
3895};
3896AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3897#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3898
3899/**
3900 * Info about shadowed guest register values.
3901 * @see IEMNATIVEGSTREG
3902 */
3903DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3904{
3905#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3910 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3911 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3912 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3913 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3918 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3919 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3920 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3921 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3922 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3923 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3924 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3925 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3926 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3927 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3928 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3929 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3930 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3931 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3932 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3933 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3934 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3935 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3936 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3937 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3938 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3939 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3940 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3941 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3942 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3943 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3944 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3945 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3946 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3947 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3948 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3949 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3950 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3951 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3952 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3953 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3954#undef CPUMCTX_OFF_AND_SIZE
3955};
3956AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3957
3958
3959/** Host CPU general purpose register names. */
3960DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3961{
3962#ifdef RT_ARCH_AMD64
3963 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3964#elif RT_ARCH_ARM64
3965 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3966 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3967#else
3968# error "port me"
3969#endif
3970};
3971
3972
3973#if 0 /* unused */
3974/**
3975 * Tries to locate a suitable register in the given register mask.
3976 *
3977 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3978 * failed.
3979 *
3980 * @returns Host register number on success, returns UINT8_MAX on failure.
3981 */
3982static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3983{
3984 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3985 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3986 if (fRegs)
3987 {
3988 /** @todo pick better here: */
3989 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3990
3991 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3992 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3993 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3994 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3995
3996 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3997 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3998 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3999 return idxReg;
4000 }
4001 return UINT8_MAX;
4002}
4003#endif /* unused */
4004
4005
4006/**
4007 * Locate a register, possibly freeing one up.
4008 *
4009 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4010 * failed.
4011 *
4012 * @returns Host register number on success. Returns UINT8_MAX if no registers
4013 * found, the caller is supposed to deal with this and raise a
4014 * allocation type specific status code (if desired).
4015 *
4016 * @throws VBox status code if we're run into trouble spilling a variable of
4017 * recording debug info. Does NOT throw anything if we're out of
4018 * registers, though.
4019 */
4020static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4021 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4022{
4023 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4024 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4025 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4026
4027 /*
4028 * Try a freed register that's shadowing a guest register.
4029 */
4030 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4031 if (fRegs)
4032 {
4033 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4034
4035#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4036 /*
4037 * When we have livness information, we use it to kick out all shadowed
4038 * guest register that will not be needed any more in this TB. If we're
4039 * lucky, this may prevent us from ending up here again.
4040 *
4041 * Note! We must consider the previous entry here so we don't free
4042 * anything that the current threaded function requires (current
4043 * entry is produced by the next threaded function).
4044 */
4045 uint32_t const idxCurCall = pReNative->idxCurCall;
4046 if (idxCurCall > 0)
4047 {
4048 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4049
4050# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4051 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4052 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4053 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4054#else
4055 /* Construct a mask of the registers not in the read or write state.
4056 Note! We could skips writes, if they aren't from us, as this is just
4057 a hack to prevent trashing registers that have just been written
4058 or will be written when we retire the current instruction. */
4059 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4060 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4061 & IEMLIVENESSBIT_MASK;
4062#endif
4063 /* Merge EFLAGS. */
4064 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4065 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4066 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4067 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4068 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4069
4070 /* If it matches any shadowed registers. */
4071 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4072 {
4073 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4074 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4075 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4076
4077 /* See if we've got any unshadowed registers we can return now. */
4078 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4079 if (fUnshadowedRegs)
4080 {
4081 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4082 return (fPreferVolatile
4083 ? ASMBitFirstSetU32(fUnshadowedRegs)
4084 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4085 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4086 - 1;
4087 }
4088 }
4089 }
4090#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4091
4092 unsigned const idxReg = (fPreferVolatile
4093 ? ASMBitFirstSetU32(fRegs)
4094 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4095 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4096 - 1;
4097
4098 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4099 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4100 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4101 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4102
4103 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4104 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4105 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4106 return idxReg;
4107 }
4108
4109 /*
4110 * Try free up a variable that's in a register.
4111 *
4112 * We do two rounds here, first evacuating variables we don't need to be
4113 * saved on the stack, then in the second round move things to the stack.
4114 */
4115 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4116 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4117 {
4118 uint32_t fVars = pReNative->Core.bmVars;
4119 while (fVars)
4120 {
4121 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4122 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4124 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4125 continue;
4126#endif
4127
4128 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4129 && (RT_BIT_32(idxReg) & fRegMask)
4130 && ( iLoop == 0
4131 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4132 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4133 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4134 {
4135 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4136 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4137 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4138 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4139 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4140 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4141
4142 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4143 {
4144 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4145 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4146 }
4147
4148 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4149 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4150
4151 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4152 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4153 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4154 return idxReg;
4155 }
4156 fVars &= ~RT_BIT_32(idxVar);
4157 }
4158 }
4159
4160 return UINT8_MAX;
4161}
4162
4163
4164/**
4165 * Reassigns a variable to a different register specified by the caller.
4166 *
4167 * @returns The new code buffer position.
4168 * @param pReNative The native recompile state.
4169 * @param off The current code buffer position.
4170 * @param idxVar The variable index.
4171 * @param idxRegOld The old host register number.
4172 * @param idxRegNew The new host register number.
4173 * @param pszCaller The caller for logging.
4174 */
4175static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4176 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4177{
4178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4179 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4180#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4181 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4182#endif
4183 RT_NOREF(pszCaller);
4184
4185 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4186
4187 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4188 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4189 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4190 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4191
4192 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4193 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4194 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4195 if (fGstRegShadows)
4196 {
4197 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4198 | RT_BIT_32(idxRegNew);
4199 while (fGstRegShadows)
4200 {
4201 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4202 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4203
4204 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4205 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4206 }
4207 }
4208
4209 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4210 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4211 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4212 return off;
4213}
4214
4215
4216/**
4217 * Moves a variable to a different register or spills it onto the stack.
4218 *
4219 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4220 * kinds can easily be recreated if needed later.
4221 *
4222 * @returns The new code buffer position.
4223 * @param pReNative The native recompile state.
4224 * @param off The current code buffer position.
4225 * @param idxVar The variable index.
4226 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4227 * call-volatile registers.
4228 */
4229DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4230 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4231{
4232 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4233 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4234 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4235 Assert(!pVar->fRegAcquired);
4236
4237 uint8_t const idxRegOld = pVar->idxReg;
4238 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4239 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4240 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4241 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4242 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4243 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4244 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4245 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4246
4247
4248 /** @todo Add statistics on this.*/
4249 /** @todo Implement basic variable liveness analysis (python) so variables
4250 * can be freed immediately once no longer used. This has the potential to
4251 * be trashing registers and stack for dead variables.
4252 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4253
4254 /*
4255 * First try move it to a different register, as that's cheaper.
4256 */
4257 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4258 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4259 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4260 if (fRegs)
4261 {
4262 /* Avoid using shadow registers, if possible. */
4263 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4264 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4265 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4266 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4267 }
4268
4269 /*
4270 * Otherwise we must spill the register onto the stack.
4271 */
4272 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4273 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4274 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4275 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4276
4277 pVar->idxReg = UINT8_MAX;
4278 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4279 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4282 return off;
4283}
4284
4285
4286/**
4287 * Allocates a temporary host general purpose register.
4288 *
4289 * This may emit code to save register content onto the stack in order to free
4290 * up a register.
4291 *
4292 * @returns The host register number; throws VBox status code on failure,
4293 * so no need to check the return value.
4294 * @param pReNative The native recompile state.
4295 * @param poff Pointer to the variable with the code buffer position.
4296 * This will be update if we need to move a variable from
4297 * register to stack in order to satisfy the request.
4298 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4299 * registers (@c true, default) or the other way around
4300 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4301 */
4302DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4303{
4304 /*
4305 * Try find a completely unused register, preferably a call-volatile one.
4306 */
4307 uint8_t idxReg;
4308 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4309 & ~pReNative->Core.bmHstRegsWithGstShadow
4310 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4311 if (fRegs)
4312 {
4313 if (fPreferVolatile)
4314 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4315 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4316 else
4317 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4318 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4319 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4320 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4321 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4322 }
4323 else
4324 {
4325 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4326 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4327 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4328 }
4329 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4330}
4331
4332
4333/**
4334 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4335 * registers.
4336 *
4337 * @returns The host register number; throws VBox status code on failure,
4338 * so no need to check the return value.
4339 * @param pReNative The native recompile state.
4340 * @param poff Pointer to the variable with the code buffer position.
4341 * This will be update if we need to move a variable from
4342 * register to stack in order to satisfy the request.
4343 * @param fRegMask Mask of acceptable registers.
4344 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4345 * registers (@c true, default) or the other way around
4346 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4347 */
4348DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4349 bool fPreferVolatile /*= true*/)
4350{
4351 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4352 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4353
4354 /*
4355 * Try find a completely unused register, preferably a call-volatile one.
4356 */
4357 uint8_t idxReg;
4358 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4359 & ~pReNative->Core.bmHstRegsWithGstShadow
4360 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4361 & fRegMask;
4362 if (fRegs)
4363 {
4364 if (fPreferVolatile)
4365 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4366 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4367 else
4368 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4369 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4370 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4371 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4372 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4373 }
4374 else
4375 {
4376 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4377 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4378 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4379 }
4380 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4381}
4382
4383
4384/**
4385 * Allocates a temporary register for loading an immediate value into.
4386 *
4387 * This will emit code to load the immediate, unless there happens to be an
4388 * unused register with the value already loaded.
4389 *
4390 * The caller will not modify the returned register, it must be considered
4391 * read-only. Free using iemNativeRegFreeTmpImm.
4392 *
4393 * @returns The host register number; throws VBox status code on failure, so no
4394 * need to check the return value.
4395 * @param pReNative The native recompile state.
4396 * @param poff Pointer to the variable with the code buffer position.
4397 * @param uImm The immediate value that the register must hold upon
4398 * return.
4399 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4400 * registers (@c true, default) or the other way around
4401 * (@c false).
4402 *
4403 * @note Reusing immediate values has not been implemented yet.
4404 */
4405DECL_HIDDEN_THROW(uint8_t)
4406iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4407{
4408 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4409 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4410 return idxReg;
4411}
4412
4413
4414/**
4415 * Allocates a temporary host general purpose register for keeping a guest
4416 * register value.
4417 *
4418 * Since we may already have a register holding the guest register value,
4419 * code will be emitted to do the loading if that's not the case. Code may also
4420 * be emitted if we have to free up a register to satify the request.
4421 *
4422 * @returns The host register number; throws VBox status code on failure, so no
4423 * need to check the return value.
4424 * @param pReNative The native recompile state.
4425 * @param poff Pointer to the variable with the code buffer
4426 * position. This will be update if we need to move a
4427 * variable from register to stack in order to satisfy
4428 * the request.
4429 * @param enmGstReg The guest register that will is to be updated.
4430 * @param enmIntendedUse How the caller will be using the host register.
4431 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4432 * register is okay (default). The ASSUMPTION here is
4433 * that the caller has already flushed all volatile
4434 * registers, so this is only applied if we allocate a
4435 * new register.
4436 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4437 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4438 */
4439DECL_HIDDEN_THROW(uint8_t)
4440iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4441 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4442 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4443{
4444 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4445#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4446 AssertMsg( fSkipLivenessAssert
4447 || pReNative->idxCurCall == 0
4448 || enmGstReg == kIemNativeGstReg_Pc
4449 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4450 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4451 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4452 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4453 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4454 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4455#endif
4456 RT_NOREF(fSkipLivenessAssert);
4457#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4458 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4459#endif
4460 uint32_t const fRegMask = !fNoVolatileRegs
4461 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4462 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4463
4464 /*
4465 * First check if the guest register value is already in a host register.
4466 */
4467 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4468 {
4469 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4470 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4471 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4472 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4473
4474 /* It's not supposed to be allocated... */
4475 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4476 {
4477 /*
4478 * If the register will trash the guest shadow copy, try find a
4479 * completely unused register we can use instead. If that fails,
4480 * we need to disassociate the host reg from the guest reg.
4481 */
4482 /** @todo would be nice to know if preserving the register is in any way helpful. */
4483 /* If the purpose is calculations, try duplicate the register value as
4484 we'll be clobbering the shadow. */
4485 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4486 && ( ~pReNative->Core.bmHstRegs
4487 & ~pReNative->Core.bmHstRegsWithGstShadow
4488 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4489 {
4490 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4491
4492 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4493
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4496 g_apszIemNativeHstRegNames[idxRegNew]));
4497 idxReg = idxRegNew;
4498 }
4499 /* If the current register matches the restrictions, go ahead and allocate
4500 it for the caller. */
4501 else if (fRegMask & RT_BIT_32(idxReg))
4502 {
4503 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4504 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4505 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4506 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4507 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4508 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4509 else
4510 {
4511 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4512 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4513 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4514 }
4515 }
4516 /* Otherwise, allocate a register that satisfies the caller and transfer
4517 the shadowing if compatible with the intended use. (This basically
4518 means the call wants a non-volatile register (RSP push/pop scenario).) */
4519 else
4520 {
4521 Assert(fNoVolatileRegs);
4522 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4523 !fNoVolatileRegs
4524 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4525 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4526 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4527 {
4528 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4529 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4530 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4531 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4532 }
4533 else
4534 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4535 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4536 g_apszIemNativeHstRegNames[idxRegNew]));
4537 idxReg = idxRegNew;
4538 }
4539 }
4540 else
4541 {
4542 /*
4543 * Oops. Shadowed guest register already allocated!
4544 *
4545 * Allocate a new register, copy the value and, if updating, the
4546 * guest shadow copy assignment to the new register.
4547 */
4548 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4549 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4550 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4551 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4552
4553 /** @todo share register for readonly access. */
4554 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4555 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4556
4557 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4558 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4559
4560 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4561 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4562 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4563 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4564 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4565 else
4566 {
4567 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4568 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4569 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4570 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4571 }
4572 idxReg = idxRegNew;
4573 }
4574 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4575
4576#ifdef VBOX_STRICT
4577 /* Strict builds: Check that the value is correct. */
4578 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4579#endif
4580
4581 return idxReg;
4582 }
4583
4584 /*
4585 * Allocate a new register, load it with the guest value and designate it as a copy of the
4586 */
4587 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4588
4589 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4590 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4591
4592 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4593 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4594 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4595 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4596
4597 return idxRegNew;
4598}
4599
4600
4601/**
4602 * Allocates a temporary host general purpose register that already holds the
4603 * given guest register value.
4604 *
4605 * The use case for this function is places where the shadowing state cannot be
4606 * modified due to branching and such. This will fail if the we don't have a
4607 * current shadow copy handy or if it's incompatible. The only code that will
4608 * be emitted here is value checking code in strict builds.
4609 *
4610 * The intended use can only be readonly!
4611 *
4612 * @returns The host register number, UINT8_MAX if not present.
4613 * @param pReNative The native recompile state.
4614 * @param poff Pointer to the instruction buffer offset.
4615 * Will be updated in strict builds if a register is
4616 * found.
4617 * @param enmGstReg The guest register that will is to be updated.
4618 * @note In strict builds, this may throw instruction buffer growth failures.
4619 * Non-strict builds will not throw anything.
4620 * @sa iemNativeRegAllocTmpForGuestReg
4621 */
4622DECL_HIDDEN_THROW(uint8_t)
4623iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4624{
4625 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4626#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4627 AssertMsg( pReNative->idxCurCall == 0
4628 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4629 || enmGstReg == kIemNativeGstReg_Pc,
4630 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4631#endif
4632
4633 /*
4634 * First check if the guest register value is already in a host register.
4635 */
4636 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4637 {
4638 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4639 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4640 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4641 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4642
4643 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4644 {
4645 /*
4646 * We only do readonly use here, so easy compared to the other
4647 * variant of this code.
4648 */
4649 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4650 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4651 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4652 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4653 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4654
4655#ifdef VBOX_STRICT
4656 /* Strict builds: Check that the value is correct. */
4657 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4658#else
4659 RT_NOREF(poff);
4660#endif
4661 return idxReg;
4662 }
4663 }
4664
4665 return UINT8_MAX;
4666}
4667
4668
4669/**
4670 * Allocates argument registers for a function call.
4671 *
4672 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4673 * need to check the return value.
4674 * @param pReNative The native recompile state.
4675 * @param off The current code buffer offset.
4676 * @param cArgs The number of arguments the function call takes.
4677 */
4678DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4679{
4680 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4681 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4682 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4683 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4684
4685 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4686 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4687 else if (cArgs == 0)
4688 return true;
4689
4690 /*
4691 * Do we get luck and all register are free and not shadowing anything?
4692 */
4693 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4694 for (uint32_t i = 0; i < cArgs; i++)
4695 {
4696 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4697 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4698 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4699 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4700 }
4701 /*
4702 * Okay, not lucky so we have to free up the registers.
4703 */
4704 else
4705 for (uint32_t i = 0; i < cArgs; i++)
4706 {
4707 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4708 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4709 {
4710 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4711 {
4712 case kIemNativeWhat_Var:
4713 {
4714 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4716 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4717 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4718 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4719#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4720 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4721#endif
4722
4723 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4724 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4725 else
4726 {
4727 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4728 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4729 }
4730 break;
4731 }
4732
4733 case kIemNativeWhat_Tmp:
4734 case kIemNativeWhat_Arg:
4735 case kIemNativeWhat_rc:
4736 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4737 default:
4738 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4739 }
4740
4741 }
4742 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4743 {
4744 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4745 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4746 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4747 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4748 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4749 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4750 }
4751 else
4752 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4753 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4754 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4755 }
4756 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4757 return true;
4758}
4759
4760
4761DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4762
4763
4764#if 0
4765/**
4766 * Frees a register assignment of any type.
4767 *
4768 * @param pReNative The native recompile state.
4769 * @param idxHstReg The register to free.
4770 *
4771 * @note Does not update variables.
4772 */
4773DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4774{
4775 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4776 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4777 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4778 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4779 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4780 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4781 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4782 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4783 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4784 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4785 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4786 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4787 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4788 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4789
4790 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4791 /* no flushing, right:
4792 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4793 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4794 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4795 */
4796}
4797#endif
4798
4799
4800/**
4801 * Frees a temporary register.
4802 *
4803 * Any shadow copies of guest registers assigned to the host register will not
4804 * be flushed by this operation.
4805 */
4806DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4807{
4808 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4809 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4810 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4811 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4812 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4813}
4814
4815
4816/**
4817 * Frees a temporary immediate register.
4818 *
4819 * It is assumed that the call has not modified the register, so it still hold
4820 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4821 */
4822DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4823{
4824 iemNativeRegFreeTmp(pReNative, idxHstReg);
4825}
4826
4827
4828/**
4829 * Frees a register assigned to a variable.
4830 *
4831 * The register will be disassociated from the variable.
4832 */
4833DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4834{
4835 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4836 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4837 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4839 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4840#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4841 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4842#endif
4843
4844 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4845 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4846 if (!fFlushShadows)
4847 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4848 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4849 else
4850 {
4851 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4852 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4853 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4854 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4855 uint64_t fGstRegShadows = fGstRegShadowsOld;
4856 while (fGstRegShadows)
4857 {
4858 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4859 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4860
4861 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4862 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4863 }
4864 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4865 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4866 }
4867}
4868
4869
4870#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4871# ifdef LOG_ENABLED
4872/** Host CPU SIMD register names. */
4873DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4874{
4875# ifdef RT_ARCH_AMD64
4876 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4877# elif RT_ARCH_ARM64
4878 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4879 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4880# else
4881# error "port me"
4882# endif
4883};
4884# endif
4885
4886
4887/**
4888 * Frees a SIMD register assigned to a variable.
4889 *
4890 * The register will be disassociated from the variable.
4891 */
4892DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4893{
4894 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4895 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4896 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4897 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4898 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4900
4901 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4902 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4903 if (!fFlushShadows)
4904 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4905 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4906 else
4907 {
4908 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4909 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4910 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4911 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4912 uint64_t fGstRegShadows = fGstRegShadowsOld;
4913 while (fGstRegShadows)
4914 {
4915 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4916 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4917
4918 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4919 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4920 }
4921 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4922 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4923 }
4924}
4925#endif
4926
4927
4928/**
4929 * Called right before emitting a call instruction to move anything important
4930 * out of call-volatile registers, free and flush the call-volatile registers,
4931 * optionally freeing argument variables.
4932 *
4933 * @returns New code buffer offset, UINT32_MAX on failure.
4934 * @param pReNative The native recompile state.
4935 * @param off The code buffer offset.
4936 * @param cArgs The number of arguments the function call takes.
4937 * It is presumed that the host register part of these have
4938 * been allocated as such already and won't need moving,
4939 * just freeing.
4940 * @param fKeepVars Mask of variables that should keep their register
4941 * assignments. Caller must take care to handle these.
4942 */
4943DECL_HIDDEN_THROW(uint32_t)
4944iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4945{
4946 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4947
4948 /* fKeepVars will reduce this mask. */
4949 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4950
4951 /*
4952 * Move anything important out of volatile registers.
4953 */
4954 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4955 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4956 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4957#ifdef IEMNATIVE_REG_FIXED_TMP0
4958 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4959#endif
4960#ifdef IEMNATIVE_REG_FIXED_TMP1
4961 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4962#endif
4963#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4964 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4965#endif
4966 & ~g_afIemNativeCallRegs[cArgs];
4967
4968 fRegsToMove &= pReNative->Core.bmHstRegs;
4969 if (!fRegsToMove)
4970 { /* likely */ }
4971 else
4972 {
4973 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4974 while (fRegsToMove != 0)
4975 {
4976 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4977 fRegsToMove &= ~RT_BIT_32(idxReg);
4978
4979 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4980 {
4981 case kIemNativeWhat_Var:
4982 {
4983 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4985 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4986 Assert(pVar->idxReg == idxReg);
4987 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4988 {
4989 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4990 idxVar, pVar->enmKind, pVar->idxReg));
4991 if (pVar->enmKind != kIemNativeVarKind_Stack)
4992 pVar->idxReg = UINT8_MAX;
4993 else
4994 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4995 }
4996 else
4997 fRegsToFree &= ~RT_BIT_32(idxReg);
4998 continue;
4999 }
5000
5001 case kIemNativeWhat_Arg:
5002 AssertMsgFailed(("What?!?: %u\n", idxReg));
5003 continue;
5004
5005 case kIemNativeWhat_rc:
5006 case kIemNativeWhat_Tmp:
5007 AssertMsgFailed(("Missing free: %u\n", idxReg));
5008 continue;
5009
5010 case kIemNativeWhat_FixedTmp:
5011 case kIemNativeWhat_pVCpuFixed:
5012 case kIemNativeWhat_pCtxFixed:
5013 case kIemNativeWhat_PcShadow:
5014 case kIemNativeWhat_FixedReserved:
5015 case kIemNativeWhat_Invalid:
5016 case kIemNativeWhat_End:
5017 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5018 }
5019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5020 }
5021 }
5022
5023 /*
5024 * Do the actual freeing.
5025 */
5026 if (pReNative->Core.bmHstRegs & fRegsToFree)
5027 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5028 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5029 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5030
5031 /* If there are guest register shadows in any call-volatile register, we
5032 have to clear the corrsponding guest register masks for each register. */
5033 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5034 if (fHstRegsWithGstShadow)
5035 {
5036 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5037 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5038 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5039 do
5040 {
5041 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5042 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5043
5044 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5045 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5046 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5047 } while (fHstRegsWithGstShadow != 0);
5048 }
5049
5050 return off;
5051}
5052
5053
5054/**
5055 * Flushes a set of guest register shadow copies.
5056 *
5057 * This is usually done after calling a threaded function or a C-implementation
5058 * of an instruction.
5059 *
5060 * @param pReNative The native recompile state.
5061 * @param fGstRegs Set of guest registers to flush.
5062 */
5063DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5064{
5065 /*
5066 * Reduce the mask by what's currently shadowed
5067 */
5068 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5069 fGstRegs &= bmGstRegShadowsOld;
5070 if (fGstRegs)
5071 {
5072 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5073 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5074 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5075 if (bmGstRegShadowsNew)
5076 {
5077 /*
5078 * Partial.
5079 */
5080 do
5081 {
5082 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5083 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5084 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5085 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5086 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5087
5088 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5089 fGstRegs &= ~fInThisHstReg;
5090 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5091 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5092 if (!fGstRegShadowsNew)
5093 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5094 } while (fGstRegs != 0);
5095 }
5096 else
5097 {
5098 /*
5099 * Clear all.
5100 */
5101 do
5102 {
5103 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5104 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5105 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5106 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5107 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5108
5109 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5110 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5111 } while (fGstRegs != 0);
5112 pReNative->Core.bmHstRegsWithGstShadow = 0;
5113 }
5114 }
5115}
5116
5117
5118/**
5119 * Flushes guest register shadow copies held by a set of host registers.
5120 *
5121 * This is used with the TLB lookup code for ensuring that we don't carry on
5122 * with any guest shadows in volatile registers, as these will get corrupted by
5123 * a TLB miss.
5124 *
5125 * @param pReNative The native recompile state.
5126 * @param fHstRegs Set of host registers to flush guest shadows for.
5127 */
5128DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5129{
5130 /*
5131 * Reduce the mask by what's currently shadowed.
5132 */
5133 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5134 fHstRegs &= bmHstRegsWithGstShadowOld;
5135 if (fHstRegs)
5136 {
5137 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5138 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5139 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5140 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5141 if (bmHstRegsWithGstShadowNew)
5142 {
5143 /*
5144 * Partial (likely).
5145 */
5146 uint64_t fGstShadows = 0;
5147 do
5148 {
5149 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5150 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5151 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5152 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5153
5154 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5155 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5156 fHstRegs &= ~RT_BIT_32(idxHstReg);
5157 } while (fHstRegs != 0);
5158 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5159 }
5160 else
5161 {
5162 /*
5163 * Clear all.
5164 */
5165 do
5166 {
5167 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5168 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5169 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5170 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5171
5172 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5173 fHstRegs &= ~RT_BIT_32(idxHstReg);
5174 } while (fHstRegs != 0);
5175 pReNative->Core.bmGstRegShadows = 0;
5176 }
5177 }
5178}
5179
5180
5181/**
5182 * Restores guest shadow copies in volatile registers.
5183 *
5184 * This is used after calling a helper function (think TLB miss) to restore the
5185 * register state of volatile registers.
5186 *
5187 * @param pReNative The native recompile state.
5188 * @param off The code buffer offset.
5189 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5190 * be active (allocated) w/o asserting. Hack.
5191 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5192 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5193 */
5194DECL_HIDDEN_THROW(uint32_t)
5195iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5196{
5197 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5198 if (fHstRegs)
5199 {
5200 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5201 do
5202 {
5203 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5204
5205 /* It's not fatal if a register is active holding a variable that
5206 shadowing a guest register, ASSUMING all pending guest register
5207 writes were flushed prior to the helper call. However, we'll be
5208 emitting duplicate restores, so it wasts code space. */
5209 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5210 RT_NOREF(fHstRegsActiveShadows);
5211
5212 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5213 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5214 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5215 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5216
5217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5218 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5219
5220 fHstRegs &= ~RT_BIT_32(idxHstReg);
5221 } while (fHstRegs != 0);
5222 }
5223 return off;
5224}
5225
5226
5227
5228
5229/*********************************************************************************************************************************
5230* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5231*********************************************************************************************************************************/
5232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5233
5234/**
5235 * Info about shadowed guest SIMD register values.
5236 * @see IEMNATIVEGSTSIMDREG
5237 */
5238static struct
5239{
5240 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5241 uint32_t offXmm;
5242 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5243 uint32_t offYmm;
5244 /** Name (for logging). */
5245 const char *pszName;
5246} const g_aGstSimdShadowInfo[] =
5247{
5248#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5249 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5250 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5251 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5252 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5253 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5254 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5255 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5256 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5257 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5258 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5259 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5260 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5261 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5262 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5263 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5264 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5265 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5266#undef CPUMCTX_OFF_AND_SIZE
5267};
5268AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5269
5270
5271/**
5272 * Frees a temporary SIMD register.
5273 *
5274 * Any shadow copies of guest registers assigned to the host register will not
5275 * be flushed by this operation.
5276 */
5277DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5278{
5279 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5280 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5281 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5282 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5283 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5284}
5285
5286
5287/**
5288 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5289 *
5290 * @returns New code bufferoffset.
5291 * @param pReNative The native recompile state.
5292 * @param off Current code buffer position.
5293 * @param enmGstSimdReg The guest SIMD register to flush.
5294 */
5295DECL_HIDDEN_THROW(uint32_t)
5296iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5297{
5298 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5299
5300 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5301 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5302 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5303 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5304
5305 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5306 {
5307 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5308 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5309 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5310 }
5311
5312 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5313 {
5314 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5315 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5316 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5317 }
5318
5319 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5320 return off;
5321}
5322
5323
5324/**
5325 * Locate a register, possibly freeing one up.
5326 *
5327 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5328 * failed.
5329 *
5330 * @returns Host register number on success. Returns UINT8_MAX if no registers
5331 * found, the caller is supposed to deal with this and raise a
5332 * allocation type specific status code (if desired).
5333 *
5334 * @throws VBox status code if we're run into trouble spilling a variable of
5335 * recording debug info. Does NOT throw anything if we're out of
5336 * registers, though.
5337 */
5338static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5339 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5340{
5341 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5342 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5343 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5344
5345 /*
5346 * Try a freed register that's shadowing a guest register.
5347 */
5348 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5349 if (fRegs)
5350 {
5351 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5352
5353#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5354 /*
5355 * When we have livness information, we use it to kick out all shadowed
5356 * guest register that will not be needed any more in this TB. If we're
5357 * lucky, this may prevent us from ending up here again.
5358 *
5359 * Note! We must consider the previous entry here so we don't free
5360 * anything that the current threaded function requires (current
5361 * entry is produced by the next threaded function).
5362 */
5363 uint32_t const idxCurCall = pReNative->idxCurCall;
5364 if (idxCurCall > 0)
5365 {
5366 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5367
5368# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5369 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5370 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5371 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5372#else
5373 /* Construct a mask of the registers not in the read or write state.
5374 Note! We could skips writes, if they aren't from us, as this is just
5375 a hack to prevent trashing registers that have just been written
5376 or will be written when we retire the current instruction. */
5377 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5378 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5379 & IEMLIVENESSBIT_MASK;
5380#endif
5381 /* If it matches any shadowed registers. */
5382 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5383 {
5384 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5385 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5386 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5387
5388 /* See if we've got any unshadowed registers we can return now. */
5389 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5390 if (fUnshadowedRegs)
5391 {
5392 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5393 return (fPreferVolatile
5394 ? ASMBitFirstSetU32(fUnshadowedRegs)
5395 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5396 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5397 - 1;
5398 }
5399 }
5400 }
5401#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5402
5403 unsigned const idxReg = (fPreferVolatile
5404 ? ASMBitFirstSetU32(fRegs)
5405 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5406 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5407 - 1;
5408
5409 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5410 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5411 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5412 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5413
5414 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5415 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5416 uint32_t idxGstSimdReg = 0;
5417 do
5418 {
5419 if (fGstRegShadows & 0x1)
5420 {
5421 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5422 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5423 }
5424 idxGstSimdReg++;
5425 fGstRegShadows >>= 1;
5426 } while (fGstRegShadows);
5427
5428 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5429 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5430 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5431 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5432 return idxReg;
5433 }
5434
5435 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5436
5437 /*
5438 * Try free up a variable that's in a register.
5439 *
5440 * We do two rounds here, first evacuating variables we don't need to be
5441 * saved on the stack, then in the second round move things to the stack.
5442 */
5443 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5444 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5445 {
5446 uint32_t fVars = pReNative->Core.bmVars;
5447 while (fVars)
5448 {
5449 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5450 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5451 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5452 continue;
5453
5454 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5455 && (RT_BIT_32(idxReg) & fRegMask)
5456 && ( iLoop == 0
5457 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5458 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5459 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5460 {
5461 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5462 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5463 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5464 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5465 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5466 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5467
5468 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5469 {
5470 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5471 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5472 }
5473
5474 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5475 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5476
5477 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5478 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5479 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5480 return idxReg;
5481 }
5482 fVars &= ~RT_BIT_32(idxVar);
5483 }
5484 }
5485
5486 AssertFailed();
5487 return UINT8_MAX;
5488}
5489
5490
5491/**
5492 * Flushes a set of guest register shadow copies.
5493 *
5494 * This is usually done after calling a threaded function or a C-implementation
5495 * of an instruction.
5496 *
5497 * @param pReNative The native recompile state.
5498 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5499 */
5500DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5501{
5502 /*
5503 * Reduce the mask by what's currently shadowed
5504 */
5505 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5506 fGstSimdRegs &= bmGstSimdRegShadows;
5507 if (fGstSimdRegs)
5508 {
5509 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5510 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5511 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5512 if (bmGstSimdRegShadowsNew)
5513 {
5514 /*
5515 * Partial.
5516 */
5517 do
5518 {
5519 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5520 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5521 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5522 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5523 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5524 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5525
5526 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5527 fGstSimdRegs &= ~fInThisHstReg;
5528 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5529 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5530 if (!fGstRegShadowsNew)
5531 {
5532 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5533 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5534 }
5535 } while (fGstSimdRegs != 0);
5536 }
5537 else
5538 {
5539 /*
5540 * Clear all.
5541 */
5542 do
5543 {
5544 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5545 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5546 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5547 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5548 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5549 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5550
5551 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5552 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5553 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5554 } while (fGstSimdRegs != 0);
5555 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5556 }
5557 }
5558}
5559
5560
5561/**
5562 * Allocates a temporary host SIMD register.
5563 *
5564 * This may emit code to save register content onto the stack in order to free
5565 * up a register.
5566 *
5567 * @returns The host register number; throws VBox status code on failure,
5568 * so no need to check the return value.
5569 * @param pReNative The native recompile state.
5570 * @param poff Pointer to the variable with the code buffer position.
5571 * This will be update if we need to move a variable from
5572 * register to stack in order to satisfy the request.
5573 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5574 * registers (@c true, default) or the other way around
5575 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5576 */
5577DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5578{
5579 /*
5580 * Try find a completely unused register, preferably a call-volatile one.
5581 */
5582 uint8_t idxSimdReg;
5583 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5584 & ~pReNative->Core.bmHstRegsWithGstShadow
5585 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5586 if (fRegs)
5587 {
5588 if (fPreferVolatile)
5589 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5590 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5591 else
5592 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5593 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5594 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5595 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5596
5597 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5598 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5599 }
5600 else
5601 {
5602 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5603 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5604 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5605 }
5606
5607 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5608 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5609}
5610
5611
5612/**
5613 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5614 * registers.
5615 *
5616 * @returns The host register number; throws VBox status code on failure,
5617 * so no need to check the return value.
5618 * @param pReNative The native recompile state.
5619 * @param poff Pointer to the variable with the code buffer position.
5620 * This will be update if we need to move a variable from
5621 * register to stack in order to satisfy the request.
5622 * @param fRegMask Mask of acceptable registers.
5623 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5624 * registers (@c true, default) or the other way around
5625 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5626 */
5627DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5628 bool fPreferVolatile /*= true*/)
5629{
5630 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5631 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5632
5633 /*
5634 * Try find a completely unused register, preferably a call-volatile one.
5635 */
5636 uint8_t idxSimdReg;
5637 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5638 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5639 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5640 & fRegMask;
5641 if (fRegs)
5642 {
5643 if (fPreferVolatile)
5644 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5645 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5646 else
5647 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5648 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5649 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5650 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5651
5652 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5653 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5654 }
5655 else
5656 {
5657 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5658 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5659 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5660 }
5661
5662 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5663 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5664}
5665
5666
5667/**
5668 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5669 *
5670 * @param pReNative The native recompile state.
5671 * @param idxHstSimdReg The host SIMD register to update the state for.
5672 * @param enmLoadSz The load size to set.
5673 */
5674DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5675 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5676{
5677 /* Everything valid already? -> nothing to do. */
5678 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5679 return;
5680
5681 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5682 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5683 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5684 {
5685 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5686 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5687 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5688 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5689 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5690 }
5691}
5692
5693
5694static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5695 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5696{
5697 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5698 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5699 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5700 {
5701# ifdef RT_ARCH_ARM64
5702 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5703 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5704# endif
5705
5706 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5707 {
5708 switch (enmLoadSzDst)
5709 {
5710 case kIemNativeGstSimdRegLdStSz_256:
5711 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5712 break;
5713 case kIemNativeGstSimdRegLdStSz_Low128:
5714 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5715 break;
5716 case kIemNativeGstSimdRegLdStSz_High128:
5717 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5718 break;
5719 default:
5720 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5721 }
5722
5723 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5724 }
5725 }
5726 else
5727 {
5728 /* Complicated stuff where the source is currently missing something, later. */
5729 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5730 }
5731
5732 return off;
5733}
5734
5735
5736/**
5737 * Allocates a temporary host SIMD register for keeping a guest
5738 * SIMD register value.
5739 *
5740 * Since we may already have a register holding the guest register value,
5741 * code will be emitted to do the loading if that's not the case. Code may also
5742 * be emitted if we have to free up a register to satify the request.
5743 *
5744 * @returns The host register number; throws VBox status code on failure, so no
5745 * need to check the return value.
5746 * @param pReNative The native recompile state.
5747 * @param poff Pointer to the variable with the code buffer
5748 * position. This will be update if we need to move a
5749 * variable from register to stack in order to satisfy
5750 * the request.
5751 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5752 * @param enmIntendedUse How the caller will be using the host register.
5753 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5754 * register is okay (default). The ASSUMPTION here is
5755 * that the caller has already flushed all volatile
5756 * registers, so this is only applied if we allocate a
5757 * new register.
5758 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5759 */
5760DECL_HIDDEN_THROW(uint8_t)
5761iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5762 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5763 bool fNoVolatileRegs /*= false*/)
5764{
5765 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5766#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5767 AssertMsg( pReNative->idxCurCall == 0
5768 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5769 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5770 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5771 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5772 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5773 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5774#endif
5775#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5776 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5777#endif
5778 uint32_t const fRegMask = !fNoVolatileRegs
5779 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5780 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5781
5782 /*
5783 * First check if the guest register value is already in a host register.
5784 */
5785 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5786 {
5787 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5788 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5789 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5790 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5791
5792 /* It's not supposed to be allocated... */
5793 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5794 {
5795 /*
5796 * If the register will trash the guest shadow copy, try find a
5797 * completely unused register we can use instead. If that fails,
5798 * we need to disassociate the host reg from the guest reg.
5799 */
5800 /** @todo would be nice to know if preserving the register is in any way helpful. */
5801 /* If the purpose is calculations, try duplicate the register value as
5802 we'll be clobbering the shadow. */
5803 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5804 && ( ~pReNative->Core.bmHstSimdRegs
5805 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5806 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5807 {
5808 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5809
5810 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5811
5812 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5813 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5814 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5815 idxSimdReg = idxRegNew;
5816 }
5817 /* If the current register matches the restrictions, go ahead and allocate
5818 it for the caller. */
5819 else if (fRegMask & RT_BIT_32(idxSimdReg))
5820 {
5821 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5822 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5823 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5824 {
5825 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5826 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5827 else
5828 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5829 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5830 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5831 }
5832 else
5833 {
5834 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5835 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5836 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5837 }
5838 }
5839 /* Otherwise, allocate a register that satisfies the caller and transfer
5840 the shadowing if compatible with the intended use. (This basically
5841 means the call wants a non-volatile register (RSP push/pop scenario).) */
5842 else
5843 {
5844 Assert(fNoVolatileRegs);
5845 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5846 !fNoVolatileRegs
5847 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5848 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5849 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5850 {
5851 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5852 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5853 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5854 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5855 }
5856 else
5857 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5858 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5859 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5860 idxSimdReg = idxRegNew;
5861 }
5862 }
5863 else
5864 {
5865 /*
5866 * Oops. Shadowed guest register already allocated!
5867 *
5868 * Allocate a new register, copy the value and, if updating, the
5869 * guest shadow copy assignment to the new register.
5870 */
5871 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5872 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5873 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5874 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5875
5876 /** @todo share register for readonly access. */
5877 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5878 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5879
5880 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5881 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5882 else
5883 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5884
5885 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5886 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5887 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5888 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5889 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5890 else
5891 {
5892 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5893 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5894 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5895 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5896 }
5897 idxSimdReg = idxRegNew;
5898 }
5899 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5900
5901#ifdef VBOX_STRICT
5902 /* Strict builds: Check that the value is correct. */
5903 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5904 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5905#endif
5906
5907 return idxSimdReg;
5908 }
5909
5910 /*
5911 * Allocate a new register, load it with the guest value and designate it as a copy of the
5912 */
5913 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5914
5915 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5916 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5917 else
5918 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5919
5920 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5921 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5922
5923 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5924 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5925
5926 return idxRegNew;
5927}
5928
5929#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5930
5931
5932
5933/*********************************************************************************************************************************
5934* Code emitters for flushing pending guest register writes and sanity checks *
5935*********************************************************************************************************************************/
5936
5937#ifdef VBOX_STRICT
5938/**
5939 * Does internal register allocator sanity checks.
5940 */
5941DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5942{
5943 /*
5944 * Iterate host registers building a guest shadowing set.
5945 */
5946 uint64_t bmGstRegShadows = 0;
5947 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5948 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5949 while (bmHstRegsWithGstShadow)
5950 {
5951 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5952 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5953 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5954
5955 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5956 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5957 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5958 bmGstRegShadows |= fThisGstRegShadows;
5959 while (fThisGstRegShadows)
5960 {
5961 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5962 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5963 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5964 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5965 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5966 }
5967 }
5968 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5969 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5970 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5971
5972 /*
5973 * Now the other way around, checking the guest to host index array.
5974 */
5975 bmHstRegsWithGstShadow = 0;
5976 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5977 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5978 while (bmGstRegShadows)
5979 {
5980 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5981 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5982 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5983
5984 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5985 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5986 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5987 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5988 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5989 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5990 }
5991 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5992 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5993 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5994}
5995#endif /* VBOX_STRICT */
5996
5997
5998/**
5999 * Flushes any delayed guest register writes.
6000 *
6001 * This must be called prior to calling CImpl functions and any helpers that use
6002 * the guest state (like raising exceptions) and such.
6003 *
6004 * This optimization has not yet been implemented. The first target would be
6005 * RIP updates, since these are the most common ones.
6006 */
6007DECL_HIDDEN_THROW(uint32_t)
6008iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
6009{
6010#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6011 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6012 off = iemNativeEmitPcWriteback(pReNative, off);
6013#else
6014 RT_NOREF(pReNative, fGstShwExcept);
6015#endif
6016
6017#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6018 /** @todo r=bird: There must be a quicker way to check if anything needs
6019 * doing and then call simd function to do the flushing */
6020 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6021 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6022 {
6023 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6024 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6025
6026 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6027 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6028
6029 if ( fFlushShadows
6030 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6031 {
6032 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6033
6034 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6035 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6036 }
6037 }
6038#else
6039 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6040#endif
6041
6042 return off;
6043}
6044
6045
6046#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6047/**
6048 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6049 */
6050DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6051{
6052 Assert(pReNative->Core.offPc);
6053# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6054 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6055 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6056# endif
6057
6058# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6059 /* Allocate a temporary PC register. */
6060 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6061
6062 /* Perform the addition and store the result. */
6063 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6064 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6065
6066 /* Free but don't flush the PC register. */
6067 iemNativeRegFreeTmp(pReNative, idxPcReg);
6068# else
6069 /* Compare the shadow with the context value, they should match. */
6070 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6071 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6072# endif
6073
6074 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6075 pReNative->Core.offPc = 0;
6076 pReNative->Core.cInstrPcUpdateSkipped = 0;
6077
6078 return off;
6079}
6080#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6081
6082
6083/*********************************************************************************************************************************
6084* Code Emitters (larger snippets) *
6085*********************************************************************************************************************************/
6086
6087/**
6088 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6089 * extending to 64-bit width.
6090 *
6091 * @returns New code buffer offset on success, UINT32_MAX on failure.
6092 * @param pReNative .
6093 * @param off The current code buffer position.
6094 * @param idxHstReg The host register to load the guest register value into.
6095 * @param enmGstReg The guest register to load.
6096 *
6097 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6098 * that is something the caller needs to do if applicable.
6099 */
6100DECL_HIDDEN_THROW(uint32_t)
6101iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6102{
6103 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6104 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6105
6106 switch (g_aGstShadowInfo[enmGstReg].cb)
6107 {
6108 case sizeof(uint64_t):
6109 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6110 case sizeof(uint32_t):
6111 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6112 case sizeof(uint16_t):
6113 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6114#if 0 /* not present in the table. */
6115 case sizeof(uint8_t):
6116 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6117#endif
6118 default:
6119 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6120 }
6121}
6122
6123
6124#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6125/**
6126 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6127 *
6128 * @returns New code buffer offset on success, UINT32_MAX on failure.
6129 * @param pReNative The recompiler state.
6130 * @param off The current code buffer position.
6131 * @param idxHstSimdReg The host register to load the guest register value into.
6132 * @param enmGstSimdReg The guest register to load.
6133 * @param enmLoadSz The load size of the register.
6134 *
6135 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6136 * that is something the caller needs to do if applicable.
6137 */
6138DECL_HIDDEN_THROW(uint32_t)
6139iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6140 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6141{
6142 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6143
6144 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6145 switch (enmLoadSz)
6146 {
6147 case kIemNativeGstSimdRegLdStSz_256:
6148 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6149 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6150 case kIemNativeGstSimdRegLdStSz_Low128:
6151 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6152 case kIemNativeGstSimdRegLdStSz_High128:
6153 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6154 default:
6155 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6156 }
6157}
6158#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6159
6160#ifdef VBOX_STRICT
6161
6162/**
6163 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6164 *
6165 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6166 * Trashes EFLAGS on AMD64.
6167 */
6168DECL_HIDDEN_THROW(uint32_t)
6169iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6170{
6171# ifdef RT_ARCH_AMD64
6172 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6173
6174 /* rol reg64, 32 */
6175 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6176 pbCodeBuf[off++] = 0xc1;
6177 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6178 pbCodeBuf[off++] = 32;
6179
6180 /* test reg32, ffffffffh */
6181 if (idxReg >= 8)
6182 pbCodeBuf[off++] = X86_OP_REX_B;
6183 pbCodeBuf[off++] = 0xf7;
6184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6185 pbCodeBuf[off++] = 0xff;
6186 pbCodeBuf[off++] = 0xff;
6187 pbCodeBuf[off++] = 0xff;
6188 pbCodeBuf[off++] = 0xff;
6189
6190 /* je/jz +1 */
6191 pbCodeBuf[off++] = 0x74;
6192 pbCodeBuf[off++] = 0x01;
6193
6194 /* int3 */
6195 pbCodeBuf[off++] = 0xcc;
6196
6197 /* rol reg64, 32 */
6198 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6199 pbCodeBuf[off++] = 0xc1;
6200 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6201 pbCodeBuf[off++] = 32;
6202
6203# elif defined(RT_ARCH_ARM64)
6204 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6205 /* lsr tmp0, reg64, #32 */
6206 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6207 /* cbz tmp0, +1 */
6208 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6209 /* brk #0x1100 */
6210 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6211
6212# else
6213# error "Port me!"
6214# endif
6215 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6216 return off;
6217}
6218
6219
6220/**
6221 * Emitting code that checks that the content of register @a idxReg is the same
6222 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6223 * instruction if that's not the case.
6224 *
6225 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6226 * Trashes EFLAGS on AMD64.
6227 */
6228DECL_HIDDEN_THROW(uint32_t)
6229iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6230{
6231# ifdef RT_ARCH_AMD64
6232 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6233
6234 /* cmp reg, [mem] */
6235 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6236 {
6237 if (idxReg >= 8)
6238 pbCodeBuf[off++] = X86_OP_REX_R;
6239 pbCodeBuf[off++] = 0x38;
6240 }
6241 else
6242 {
6243 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6244 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6245 else
6246 {
6247 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6248 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6249 else
6250 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6251 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6252 if (idxReg >= 8)
6253 pbCodeBuf[off++] = X86_OP_REX_R;
6254 }
6255 pbCodeBuf[off++] = 0x39;
6256 }
6257 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6258
6259 /* je/jz +1 */
6260 pbCodeBuf[off++] = 0x74;
6261 pbCodeBuf[off++] = 0x01;
6262
6263 /* int3 */
6264 pbCodeBuf[off++] = 0xcc;
6265
6266 /* For values smaller than the register size, we must check that the rest
6267 of the register is all zeros. */
6268 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6269 {
6270 /* test reg64, imm32 */
6271 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6272 pbCodeBuf[off++] = 0xf7;
6273 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6274 pbCodeBuf[off++] = 0;
6275 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6276 pbCodeBuf[off++] = 0xff;
6277 pbCodeBuf[off++] = 0xff;
6278
6279 /* je/jz +1 */
6280 pbCodeBuf[off++] = 0x74;
6281 pbCodeBuf[off++] = 0x01;
6282
6283 /* int3 */
6284 pbCodeBuf[off++] = 0xcc;
6285 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6286 }
6287 else
6288 {
6289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6290 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6291 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6292 }
6293
6294# elif defined(RT_ARCH_ARM64)
6295 /* mov TMP0, [gstreg] */
6296 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6297
6298 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6299 /* sub tmp0, tmp0, idxReg */
6300 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6301 /* cbz tmp0, +1 */
6302 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6303 /* brk #0x1000+enmGstReg */
6304 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6306
6307# else
6308# error "Port me!"
6309# endif
6310 return off;
6311}
6312
6313
6314# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6315# ifdef RT_ARCH_AMD64
6316/**
6317 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6318 */
6319DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6320{
6321 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6322 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6323 if (idxSimdReg >= 8)
6324 pbCodeBuf[off++] = X86_OP_REX_R;
6325 pbCodeBuf[off++] = 0x0f;
6326 pbCodeBuf[off++] = 0x38;
6327 pbCodeBuf[off++] = 0x29;
6328 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6329
6330 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6331 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6332 pbCodeBuf[off++] = X86_OP_REX_W
6333 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6334 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6335 pbCodeBuf[off++] = 0x0f;
6336 pbCodeBuf[off++] = 0x3a;
6337 pbCodeBuf[off++] = 0x16;
6338 pbCodeBuf[off++] = 0xeb;
6339 pbCodeBuf[off++] = 0x00;
6340
6341 /* cmp tmp0, 0xffffffffffffffff. */
6342 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6343 pbCodeBuf[off++] = 0x83;
6344 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6345 pbCodeBuf[off++] = 0xff;
6346
6347 /* je/jz +1 */
6348 pbCodeBuf[off++] = 0x74;
6349 pbCodeBuf[off++] = 0x01;
6350
6351 /* int3 */
6352 pbCodeBuf[off++] = 0xcc;
6353
6354 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6355 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6356 pbCodeBuf[off++] = X86_OP_REX_W
6357 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6358 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6359 pbCodeBuf[off++] = 0x0f;
6360 pbCodeBuf[off++] = 0x3a;
6361 pbCodeBuf[off++] = 0x16;
6362 pbCodeBuf[off++] = 0xeb;
6363 pbCodeBuf[off++] = 0x01;
6364
6365 /* cmp tmp0, 0xffffffffffffffff. */
6366 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6367 pbCodeBuf[off++] = 0x83;
6368 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6369 pbCodeBuf[off++] = 0xff;
6370
6371 /* je/jz +1 */
6372 pbCodeBuf[off++] = 0x74;
6373 pbCodeBuf[off++] = 0x01;
6374
6375 /* int3 */
6376 pbCodeBuf[off++] = 0xcc;
6377
6378 return off;
6379}
6380# endif
6381
6382
6383/**
6384 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6385 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6386 * instruction if that's not the case.
6387 *
6388 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6389 * Trashes EFLAGS on AMD64.
6390 */
6391DECL_HIDDEN_THROW(uint32_t)
6392iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6393 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6394{
6395 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6396 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6397 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6398 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6399 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6400 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6401 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6402 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6403 return off;
6404
6405# ifdef RT_ARCH_AMD64
6406 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6407 {
6408 /* movdqa vectmp0, idxSimdReg */
6409 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6410
6411 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6412
6413 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6414 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6415 }
6416
6417 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6418 {
6419 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6420 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6421
6422 /* vextracti128 vectmp0, idxSimdReg, 1 */
6423 pbCodeBuf[off++] = X86_OP_VEX3;
6424 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6425 | X86_OP_VEX3_BYTE1_X
6426 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6427 | 0x03; /* Opcode map */
6428 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6429 pbCodeBuf[off++] = 0x39;
6430 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6431 pbCodeBuf[off++] = 0x01;
6432
6433 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6434 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6435 }
6436# elif defined(RT_ARCH_ARM64)
6437 /* mov vectmp0, [gstreg] */
6438 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6439
6440 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6441 {
6442 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6443 /* eor vectmp0, vectmp0, idxSimdReg */
6444 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6445 /* uaddlv vectmp0, vectmp0.16B */
6446 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6447 /* umov tmp0, vectmp0.H[0] */
6448 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6449 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6450 /* cbz tmp0, +1 */
6451 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6452 /* brk #0x1000+enmGstReg */
6453 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6454 }
6455
6456 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6457 {
6458 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6459 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6460 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6461 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6462 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6463 /* umov tmp0, (vectmp0 + 1).H[0] */
6464 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6465 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6466 /* cbz tmp0, +1 */
6467 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6468 /* brk #0x1000+enmGstReg */
6469 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6470 }
6471
6472# else
6473# error "Port me!"
6474# endif
6475
6476 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6477 return off;
6478}
6479# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6480
6481
6482/**
6483 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6484 * important bits.
6485 *
6486 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6487 * Trashes EFLAGS on AMD64.
6488 */
6489DECL_HIDDEN_THROW(uint32_t)
6490iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6491{
6492 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6493 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6494 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6495 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6496
6497#ifdef RT_ARCH_AMD64
6498 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6499
6500 /* je/jz +1 */
6501 pbCodeBuf[off++] = 0x74;
6502 pbCodeBuf[off++] = 0x01;
6503
6504 /* int3 */
6505 pbCodeBuf[off++] = 0xcc;
6506
6507# elif defined(RT_ARCH_ARM64)
6508 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6509
6510 /* b.eq +1 */
6511 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6512 /* brk #0x2000 */
6513 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6514
6515# else
6516# error "Port me!"
6517# endif
6518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6519
6520 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6521 return off;
6522}
6523
6524#endif /* VBOX_STRICT */
6525
6526
6527#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6528/**
6529 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6530 */
6531DECL_HIDDEN_THROW(uint32_t)
6532iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6533{
6534 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6535
6536 fEflNeeded &= X86_EFL_STATUS_BITS;
6537 if (fEflNeeded)
6538 {
6539# ifdef RT_ARCH_AMD64
6540 /* test dword [pVCpu + offVCpu], imm32 */
6541 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6542 if (fEflNeeded <= 0xff)
6543 {
6544 pCodeBuf[off++] = 0xf6;
6545 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6546 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6547 }
6548 else
6549 {
6550 pCodeBuf[off++] = 0xf7;
6551 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6552 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6553 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6554 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6555 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6556 }
6557 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6558
6559# else
6560 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6561 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6562 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6563# ifdef RT_ARCH_ARM64
6564 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6565 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6566# else
6567# error "Port me!"
6568# endif
6569 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6570# endif
6571 }
6572 return off;
6573}
6574#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6575
6576
6577/**
6578 * Emits a code for checking the return code of a call and rcPassUp, returning
6579 * from the code if either are non-zero.
6580 */
6581DECL_HIDDEN_THROW(uint32_t)
6582iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6583{
6584#ifdef RT_ARCH_AMD64
6585 /*
6586 * AMD64: eax = call status code.
6587 */
6588
6589 /* edx = rcPassUp */
6590 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6591# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6592 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6593# endif
6594
6595 /* edx = eax | rcPassUp */
6596 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6597 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6598 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6599 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6600
6601 /* Jump to non-zero status return path. */
6602 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6603
6604 /* done. */
6605
6606#elif RT_ARCH_ARM64
6607 /*
6608 * ARM64: w0 = call status code.
6609 */
6610# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6611 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6612# endif
6613 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6614
6615 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6616
6617 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6618
6619 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6620 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6621 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6622
6623#else
6624# error "port me"
6625#endif
6626 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6627 RT_NOREF_PV(idxInstr);
6628 return off;
6629}
6630
6631
6632/**
6633 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6634 * raising a \#GP(0) if it isn't.
6635 *
6636 * @returns New code buffer offset, UINT32_MAX on failure.
6637 * @param pReNative The native recompile state.
6638 * @param off The code buffer offset.
6639 * @param idxAddrReg The host register with the address to check.
6640 * @param idxInstr The current instruction.
6641 */
6642DECL_HIDDEN_THROW(uint32_t)
6643iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6644{
6645 /*
6646 * Make sure we don't have any outstanding guest register writes as we may
6647 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6648 */
6649 off = iemNativeRegFlushPendingWrites(pReNative, off);
6650
6651#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6652 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6653#else
6654 RT_NOREF(idxInstr);
6655#endif
6656
6657#ifdef RT_ARCH_AMD64
6658 /*
6659 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6660 * return raisexcpt();
6661 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6662 */
6663 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6664
6665 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6666 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6667 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6668 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6669 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6670
6671 iemNativeRegFreeTmp(pReNative, iTmpReg);
6672
6673#elif defined(RT_ARCH_ARM64)
6674 /*
6675 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6676 * return raisexcpt();
6677 * ----
6678 * mov x1, 0x800000000000
6679 * add x1, x0, x1
6680 * cmp xzr, x1, lsr 48
6681 * b.ne .Lraisexcpt
6682 */
6683 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6684
6685 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6686 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6687 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6688 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6689
6690 iemNativeRegFreeTmp(pReNative, iTmpReg);
6691
6692#else
6693# error "Port me"
6694#endif
6695 return off;
6696}
6697
6698
6699/**
6700 * Emits code to check if that the content of @a idxAddrReg is within the limit
6701 * of CS, raising a \#GP(0) if it isn't.
6702 *
6703 * @returns New code buffer offset; throws VBox status code on error.
6704 * @param pReNative The native recompile state.
6705 * @param off The code buffer offset.
6706 * @param idxAddrReg The host register (32-bit) with the address to
6707 * check.
6708 * @param idxInstr The current instruction.
6709 */
6710DECL_HIDDEN_THROW(uint32_t)
6711iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6712 uint8_t idxAddrReg, uint8_t idxInstr)
6713{
6714 /*
6715 * Make sure we don't have any outstanding guest register writes as we may
6716 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6717 */
6718 off = iemNativeRegFlushPendingWrites(pReNative, off);
6719
6720#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6721 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6722#else
6723 RT_NOREF(idxInstr);
6724#endif
6725
6726 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6727 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6728 kIemNativeGstRegUse_ReadOnly);
6729
6730 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6731 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6732
6733 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6734 return off;
6735}
6736
6737
6738/**
6739 * Emits a call to a CImpl function or something similar.
6740 */
6741DECL_HIDDEN_THROW(uint32_t)
6742iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6743 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6744{
6745 /* Writeback everything. */
6746 off = iemNativeRegFlushPendingWrites(pReNative, off);
6747
6748 /*
6749 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6750 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6751 */
6752 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6753 fGstShwFlush
6754 | RT_BIT_64(kIemNativeGstReg_Pc)
6755 | RT_BIT_64(kIemNativeGstReg_EFlags));
6756 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6757
6758 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6759
6760 /*
6761 * Load the parameters.
6762 */
6763#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6764 /* Special code the hidden VBOXSTRICTRC pointer. */
6765 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6766 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6767 if (cAddParams > 0)
6768 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6769 if (cAddParams > 1)
6770 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6771 if (cAddParams > 2)
6772 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6773 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6774
6775#else
6776 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6777 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6778 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6779 if (cAddParams > 0)
6780 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6781 if (cAddParams > 1)
6782 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6783 if (cAddParams > 2)
6784# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6785 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6786# else
6787 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6788# endif
6789#endif
6790
6791 /*
6792 * Make the call.
6793 */
6794 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6795
6796#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6797 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6798#endif
6799
6800 /*
6801 * Check the status code.
6802 */
6803 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6804}
6805
6806
6807/**
6808 * Emits a call to a threaded worker function.
6809 */
6810DECL_HIDDEN_THROW(uint32_t)
6811iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6812{
6813 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6814
6815 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6816 off = iemNativeRegFlushPendingWrites(pReNative, off);
6817
6818 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6819 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6820
6821#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6822 /* The threaded function may throw / long jmp, so set current instruction
6823 number if we're counting. */
6824 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6825#endif
6826
6827 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6828
6829#ifdef RT_ARCH_AMD64
6830 /* Load the parameters and emit the call. */
6831# ifdef RT_OS_WINDOWS
6832# ifndef VBOXSTRICTRC_STRICT_ENABLED
6833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6834 if (cParams > 0)
6835 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6836 if (cParams > 1)
6837 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6838 if (cParams > 2)
6839 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6840# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6841 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6842 if (cParams > 0)
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6844 if (cParams > 1)
6845 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6846 if (cParams > 2)
6847 {
6848 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6849 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6850 }
6851 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6852# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6853# else
6854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6855 if (cParams > 0)
6856 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6857 if (cParams > 1)
6858 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6859 if (cParams > 2)
6860 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6861# endif
6862
6863 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6864
6865# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6866 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6867# endif
6868
6869#elif RT_ARCH_ARM64
6870 /*
6871 * ARM64:
6872 */
6873 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6874 if (cParams > 0)
6875 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6876 if (cParams > 1)
6877 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6878 if (cParams > 2)
6879 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6880
6881 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6882
6883#else
6884# error "port me"
6885#endif
6886
6887 /*
6888 * Check the status code.
6889 */
6890 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6891
6892 return off;
6893}
6894
6895#ifdef VBOX_WITH_STATISTICS
6896/**
6897 * Emits code to update the thread call statistics.
6898 */
6899DECL_INLINE_THROW(uint32_t)
6900iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6901{
6902 /*
6903 * Update threaded function stats.
6904 */
6905 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6906 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6907# if defined(RT_ARCH_ARM64)
6908 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6909 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6910 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6911 iemNativeRegFreeTmp(pReNative, idxTmp1);
6912 iemNativeRegFreeTmp(pReNative, idxTmp2);
6913# else
6914 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6915# endif
6916 return off;
6917}
6918#endif /* VBOX_WITH_STATISTICS */
6919
6920
6921/**
6922 * Emits the code at the ReturnWithFlags label (returns
6923 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6924 */
6925static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6926{
6927 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6928 if (idxLabel != UINT32_MAX)
6929 {
6930 iemNativeLabelDefine(pReNative, idxLabel, off);
6931
6932 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6933
6934 /* jump back to the return sequence. */
6935 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6936 }
6937 return off;
6938}
6939
6940
6941/**
6942 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6943 */
6944static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6945{
6946 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6947 if (idxLabel != UINT32_MAX)
6948 {
6949 iemNativeLabelDefine(pReNative, idxLabel, off);
6950
6951 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6952
6953 /* jump back to the return sequence. */
6954 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6955 }
6956 return off;
6957}
6958
6959
6960/**
6961 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6962 */
6963static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6964{
6965 /*
6966 * Generate the rc + rcPassUp fiddling code if needed.
6967 */
6968 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6969 if (idxLabel != UINT32_MAX)
6970 {
6971 iemNativeLabelDefine(pReNative, idxLabel, off);
6972
6973 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6974#ifdef RT_ARCH_AMD64
6975# ifdef RT_OS_WINDOWS
6976# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6977 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6978# endif
6979 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6980 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6981# else
6982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6984# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6985 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6986# endif
6987# endif
6988# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6989 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6990# endif
6991
6992#else
6993 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6995 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6996#endif
6997
6998 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6999 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7000 }
7001 return off;
7002}
7003
7004
7005/**
7006 * Emits a standard epilog.
7007 */
7008static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7009{
7010 *pidxReturnLabel = UINT32_MAX;
7011
7012 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7013 off = iemNativeRegFlushPendingWrites(pReNative, off);
7014
7015 /*
7016 * Successful return, so clear the return register (eax, w0).
7017 */
7018 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7019
7020 /*
7021 * Define label for common return point.
7022 */
7023 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7024 *pidxReturnLabel = idxReturn;
7025
7026 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7027
7028 /*
7029 * Restore registers and return.
7030 */
7031#ifdef RT_ARCH_AMD64
7032 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7033
7034 /* Reposition esp at the r15 restore point. */
7035 pbCodeBuf[off++] = X86_OP_REX_W;
7036 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7037 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7038 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7039
7040 /* Pop non-volatile registers and return */
7041 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7042 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7043 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7044 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7045 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7046 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7047 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7048 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7049# ifdef RT_OS_WINDOWS
7050 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7051 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7052# endif
7053 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7054 pbCodeBuf[off++] = 0xc9; /* leave */
7055 pbCodeBuf[off++] = 0xc3; /* ret */
7056 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7057
7058#elif RT_ARCH_ARM64
7059 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7060
7061 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7062 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7063 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7064 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7065 IEMNATIVE_FRAME_VAR_SIZE / 8);
7066 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7067 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7068 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7069 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7070 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7071 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7072 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7073 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7074 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7075 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7076 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7077 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7078
7079 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7080 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7081 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7082 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7083
7084 /* retab / ret */
7085# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7086 if (1)
7087 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7088 else
7089# endif
7090 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7091
7092#else
7093# error "port me"
7094#endif
7095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7096
7097 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7098}
7099
7100
7101/**
7102 * Emits a standard prolog.
7103 */
7104static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7105{
7106#ifdef RT_ARCH_AMD64
7107 /*
7108 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7109 * reserving 64 bytes for stack variables plus 4 non-register argument
7110 * slots. Fixed register assignment: xBX = pReNative;
7111 *
7112 * Since we always do the same register spilling, we can use the same
7113 * unwind description for all the code.
7114 */
7115 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7116 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7117 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7118 pbCodeBuf[off++] = 0x8b;
7119 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7120 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7121 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7122# ifdef RT_OS_WINDOWS
7123 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7124 pbCodeBuf[off++] = 0x8b;
7125 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7126 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7127 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7128# else
7129 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7130 pbCodeBuf[off++] = 0x8b;
7131 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7132# endif
7133 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7134 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7135 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7136 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7137 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7138 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7139 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7140 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7141
7142# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7143 /* Save the frame pointer. */
7144 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7145# endif
7146
7147 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7148 X86_GREG_xSP,
7149 IEMNATIVE_FRAME_ALIGN_SIZE
7150 + IEMNATIVE_FRAME_VAR_SIZE
7151 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7152 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7153 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7154 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7155 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7156
7157#elif RT_ARCH_ARM64
7158 /*
7159 * We set up a stack frame exactly like on x86, only we have to push the
7160 * return address our selves here. We save all non-volatile registers.
7161 */
7162 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7163
7164# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7165 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7166 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7167 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7168 /* pacibsp */
7169 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7170# endif
7171
7172 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7173 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7174 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7175 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7176 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7177 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7178 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7179 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7180 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7181 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7182 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7183 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7184 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7185 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7186 /* Save the BP and LR (ret address) registers at the top of the frame. */
7187 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7188 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7189 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7190 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7191 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7192 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7193
7194 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7196
7197 /* mov r28, r0 */
7198 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7199 /* mov r27, r1 */
7200 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7201
7202# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7203 /* Save the frame pointer. */
7204 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7205 ARMV8_A64_REG_X2);
7206# endif
7207
7208#else
7209# error "port me"
7210#endif
7211 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7212 return off;
7213}
7214
7215
7216/*********************************************************************************************************************************
7217* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7218*********************************************************************************************************************************/
7219
7220/**
7221 * Internal work that allocates a variable with kind set to
7222 * kIemNativeVarKind_Invalid and no current stack allocation.
7223 *
7224 * The kind will either be set by the caller or later when the variable is first
7225 * assigned a value.
7226 *
7227 * @returns Unpacked index.
7228 * @internal
7229 */
7230static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7231{
7232 Assert(cbType > 0 && cbType <= 64);
7233 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7234 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7235 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7236 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7237 pReNative->Core.aVars[idxVar].cbVar = cbType;
7238 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7239 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7240 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7241 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7242 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7243 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7244 pReNative->Core.aVars[idxVar].u.uValue = 0;
7245#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7246 pReNative->Core.aVars[idxVar].fSimdReg = false;
7247#endif
7248 return idxVar;
7249}
7250
7251
7252/**
7253 * Internal work that allocates an argument variable w/o setting enmKind.
7254 *
7255 * @returns Unpacked index.
7256 * @internal
7257 */
7258static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7259{
7260 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7261 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7262 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7263
7264 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7265 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7266 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7267 return idxVar;
7268}
7269
7270
7271/**
7272 * Gets the stack slot for a stack variable, allocating one if necessary.
7273 *
7274 * Calling this function implies that the stack slot will contain a valid
7275 * variable value. The caller deals with any register currently assigned to the
7276 * variable, typically by spilling it into the stack slot.
7277 *
7278 * @returns The stack slot number.
7279 * @param pReNative The recompiler state.
7280 * @param idxVar The variable.
7281 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7282 */
7283DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7284{
7285 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7286 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7287 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7288
7289 /* Already got a slot? */
7290 uint8_t const idxStackSlot = pVar->idxStackSlot;
7291 if (idxStackSlot != UINT8_MAX)
7292 {
7293 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7294 return idxStackSlot;
7295 }
7296
7297 /*
7298 * A single slot is easy to allocate.
7299 * Allocate them from the top end, closest to BP, to reduce the displacement.
7300 */
7301 if (pVar->cbVar <= sizeof(uint64_t))
7302 {
7303 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7304 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7305 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7306 pVar->idxStackSlot = (uint8_t)iSlot;
7307 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7308 return (uint8_t)iSlot;
7309 }
7310
7311 /*
7312 * We need more than one stack slot.
7313 *
7314 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7315 */
7316 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7317 Assert(pVar->cbVar <= 64);
7318 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7319 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7320 uint32_t bmStack = pReNative->Core.bmStack;
7321 while (bmStack != UINT32_MAX)
7322 {
7323 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7324 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7325 iSlot = (iSlot - 1) & ~fBitAlignMask;
7326 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7327 {
7328 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7329 pVar->idxStackSlot = (uint8_t)iSlot;
7330 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7331 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7332 return (uint8_t)iSlot;
7333 }
7334
7335 bmStack |= (fBitAllocMask << iSlot);
7336 }
7337 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7338}
7339
7340
7341/**
7342 * Changes the variable to a stack variable.
7343 *
7344 * Currently this is s only possible to do the first time the variable is used,
7345 * switching later is can be implemented but not done.
7346 *
7347 * @param pReNative The recompiler state.
7348 * @param idxVar The variable.
7349 * @throws VERR_IEM_VAR_IPE_2
7350 */
7351DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7352{
7353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7354 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7355 if (pVar->enmKind != kIemNativeVarKind_Stack)
7356 {
7357 /* We could in theory transition from immediate to stack as well, but it
7358 would involve the caller doing work storing the value on the stack. So,
7359 till that's required we only allow transition from invalid. */
7360 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7361 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7362 pVar->enmKind = kIemNativeVarKind_Stack;
7363
7364 /* Note! We don't allocate a stack slot here, that's only done when a
7365 slot is actually needed to hold a variable value. */
7366 }
7367}
7368
7369
7370/**
7371 * Sets it to a variable with a constant value.
7372 *
7373 * This does not require stack storage as we know the value and can always
7374 * reload it, unless of course it's referenced.
7375 *
7376 * @param pReNative The recompiler state.
7377 * @param idxVar The variable.
7378 * @param uValue The immediate value.
7379 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7380 */
7381DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7382{
7383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7384 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7385 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7386 {
7387 /* Only simple transitions for now. */
7388 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7389 pVar->enmKind = kIemNativeVarKind_Immediate;
7390 }
7391 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7392
7393 pVar->u.uValue = uValue;
7394 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7395 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7396 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7397}
7398
7399
7400/**
7401 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7402 *
7403 * This does not require stack storage as we know the value and can always
7404 * reload it. Loading is postponed till needed.
7405 *
7406 * @param pReNative The recompiler state.
7407 * @param idxVar The variable. Unpacked.
7408 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7409 *
7410 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7411 * @internal
7412 */
7413static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7414{
7415 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7416 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7417
7418 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7419 {
7420 /* Only simple transitions for now. */
7421 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7422 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7423 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7424 }
7425 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7426
7427 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7428
7429 /* Update the other variable, ensure it's a stack variable. */
7430 /** @todo handle variables with const values... that'll go boom now. */
7431 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7432 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7433}
7434
7435
7436/**
7437 * Sets the variable to a reference (pointer) to a guest register reference.
7438 *
7439 * This does not require stack storage as we know the value and can always
7440 * reload it. Loading is postponed till needed.
7441 *
7442 * @param pReNative The recompiler state.
7443 * @param idxVar The variable.
7444 * @param enmRegClass The class guest registers to reference.
7445 * @param idxReg The register within @a enmRegClass to reference.
7446 *
7447 * @throws VERR_IEM_VAR_IPE_2
7448 */
7449DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7450 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7451{
7452 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7453 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7454
7455 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7456 {
7457 /* Only simple transitions for now. */
7458 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7459 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7460 }
7461 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7462
7463 pVar->u.GstRegRef.enmClass = enmRegClass;
7464 pVar->u.GstRegRef.idx = idxReg;
7465}
7466
7467
7468DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7469{
7470 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7471}
7472
7473
7474DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7475{
7476 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7477
7478 /* Since we're using a generic uint64_t value type, we must truncate it if
7479 the variable is smaller otherwise we may end up with too large value when
7480 scaling up a imm8 w/ sign-extension.
7481
7482 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7483 in the bios, bx=1) when running on arm, because clang expect 16-bit
7484 register parameters to have bits 16 and up set to zero. Instead of
7485 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7486 CF value in the result. */
7487 switch (cbType)
7488 {
7489 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7490 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7491 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7492 }
7493 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7494 return idxVar;
7495}
7496
7497
7498DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7499{
7500 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7501 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7502 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7503 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7504 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7505 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7506
7507 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7508 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7509 return idxArgVar;
7510}
7511
7512
7513DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7514{
7515 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7516 /* Don't set to stack now, leave that to the first use as for instance
7517 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7518 return idxVar;
7519}
7520
7521
7522DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7523{
7524 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7525
7526 /* Since we're using a generic uint64_t value type, we must truncate it if
7527 the variable is smaller otherwise we may end up with too large value when
7528 scaling up a imm8 w/ sign-extension. */
7529 switch (cbType)
7530 {
7531 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7532 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7533 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7534 }
7535 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7536 return idxVar;
7537}
7538
7539
7540/**
7541 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7542 * fixed till we call iemNativeVarRegisterRelease.
7543 *
7544 * @returns The host register number.
7545 * @param pReNative The recompiler state.
7546 * @param idxVar The variable.
7547 * @param poff Pointer to the instruction buffer offset.
7548 * In case a register needs to be freed up or the value
7549 * loaded off the stack.
7550 * @param fInitialized Set if the variable must already have been initialized.
7551 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7552 * the case.
7553 * @param idxRegPref Preferred register number or UINT8_MAX.
7554 */
7555DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7556 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7557{
7558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7559 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7560 Assert(pVar->cbVar <= 8);
7561 Assert(!pVar->fRegAcquired);
7562
7563 uint8_t idxReg = pVar->idxReg;
7564 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7565 {
7566 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7567 && pVar->enmKind < kIemNativeVarKind_End);
7568 pVar->fRegAcquired = true;
7569 return idxReg;
7570 }
7571
7572 /*
7573 * If the kind of variable has not yet been set, default to 'stack'.
7574 */
7575 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7576 && pVar->enmKind < kIemNativeVarKind_End);
7577 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7578 iemNativeVarSetKindToStack(pReNative, idxVar);
7579
7580 /*
7581 * We have to allocate a register for the variable, even if its a stack one
7582 * as we don't know if there are modification being made to it before its
7583 * finalized (todo: analyze and insert hints about that?).
7584 *
7585 * If we can, we try get the correct register for argument variables. This
7586 * is assuming that most argument variables are fetched as close as possible
7587 * to the actual call, so that there aren't any interfering hidden calls
7588 * (memory accesses, etc) inbetween.
7589 *
7590 * If we cannot or it's a variable, we make sure no argument registers
7591 * that will be used by this MC block will be allocated here, and we always
7592 * prefer non-volatile registers to avoid needing to spill stuff for internal
7593 * call.
7594 */
7595 /** @todo Detect too early argument value fetches and warn about hidden
7596 * calls causing less optimal code to be generated in the python script. */
7597
7598 uint8_t const uArgNo = pVar->uArgNo;
7599 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7600 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7601 {
7602 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7603 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7604 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7605 }
7606 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7607 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7608 {
7609 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7610 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7611 & ~pReNative->Core.bmHstRegsWithGstShadow
7612 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7613 & fNotArgsMask;
7614 if (fRegs)
7615 {
7616 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7617 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7618 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7619 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7620 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7621 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7622 }
7623 else
7624 {
7625 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7626 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7627 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7628 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7629 }
7630 }
7631 else
7632 {
7633 idxReg = idxRegPref;
7634 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7635 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7636 }
7637 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7638 pVar->idxReg = idxReg;
7639
7640#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7641 pVar->fSimdReg = false;
7642#endif
7643
7644 /*
7645 * Load it off the stack if we've got a stack slot.
7646 */
7647 uint8_t const idxStackSlot = pVar->idxStackSlot;
7648 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7649 {
7650 Assert(fInitialized);
7651 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7652 switch (pVar->cbVar)
7653 {
7654 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7655 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7656 case 3: AssertFailed(); RT_FALL_THRU();
7657 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7658 default: AssertFailed(); RT_FALL_THRU();
7659 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7660 }
7661 }
7662 else
7663 {
7664 Assert(idxStackSlot == UINT8_MAX);
7665 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7666 }
7667 pVar->fRegAcquired = true;
7668 return idxReg;
7669}
7670
7671
7672#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7673/**
7674 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7675 * fixed till we call iemNativeVarRegisterRelease.
7676 *
7677 * @returns The host register number.
7678 * @param pReNative The recompiler state.
7679 * @param idxVar The variable.
7680 * @param poff Pointer to the instruction buffer offset.
7681 * In case a register needs to be freed up or the value
7682 * loaded off the stack.
7683 * @param fInitialized Set if the variable must already have been initialized.
7684 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7685 * the case.
7686 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7687 */
7688DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7689 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7690{
7691 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7692 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7693 Assert( pVar->cbVar == sizeof(RTUINT128U)
7694 || pVar->cbVar == sizeof(RTUINT256U));
7695 Assert(!pVar->fRegAcquired);
7696
7697 uint8_t idxReg = pVar->idxReg;
7698 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7699 {
7700 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7701 && pVar->enmKind < kIemNativeVarKind_End);
7702 pVar->fRegAcquired = true;
7703 return idxReg;
7704 }
7705
7706 /*
7707 * If the kind of variable has not yet been set, default to 'stack'.
7708 */
7709 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7710 && pVar->enmKind < kIemNativeVarKind_End);
7711 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7712 iemNativeVarSetKindToStack(pReNative, idxVar);
7713
7714 /*
7715 * We have to allocate a register for the variable, even if its a stack one
7716 * as we don't know if there are modification being made to it before its
7717 * finalized (todo: analyze and insert hints about that?).
7718 *
7719 * If we can, we try get the correct register for argument variables. This
7720 * is assuming that most argument variables are fetched as close as possible
7721 * to the actual call, so that there aren't any interfering hidden calls
7722 * (memory accesses, etc) inbetween.
7723 *
7724 * If we cannot or it's a variable, we make sure no argument registers
7725 * that will be used by this MC block will be allocated here, and we always
7726 * prefer non-volatile registers to avoid needing to spill stuff for internal
7727 * call.
7728 */
7729 /** @todo Detect too early argument value fetches and warn about hidden
7730 * calls causing less optimal code to be generated in the python script. */
7731
7732 uint8_t const uArgNo = pVar->uArgNo;
7733 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7734
7735 /* SIMD is bit simpler for now because there is no support for arguments. */
7736 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7737 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7738 {
7739 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7740 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7741 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7742 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7743 & fNotArgsMask;
7744 if (fRegs)
7745 {
7746 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7747 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7748 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7749 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7750 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7751 }
7752 else
7753 {
7754 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7755 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7756 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7757 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7758 }
7759 }
7760 else
7761 {
7762 idxReg = idxRegPref;
7763 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7764 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7765 }
7766 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7767
7768 pVar->fSimdReg = true;
7769 pVar->idxReg = idxReg;
7770
7771 /*
7772 * Load it off the stack if we've got a stack slot.
7773 */
7774 uint8_t const idxStackSlot = pVar->idxStackSlot;
7775 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7776 {
7777 Assert(fInitialized);
7778 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7779 switch (pVar->cbVar)
7780 {
7781 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7782 default: AssertFailed(); RT_FALL_THRU();
7783 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7784 }
7785 }
7786 else
7787 {
7788 Assert(idxStackSlot == UINT8_MAX);
7789 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7790 }
7791 pVar->fRegAcquired = true;
7792 return idxReg;
7793}
7794#endif
7795
7796
7797/**
7798 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7799 * guest register.
7800 *
7801 * This function makes sure there is a register for it and sets it to be the
7802 * current shadow copy of @a enmGstReg.
7803 *
7804 * @returns The host register number.
7805 * @param pReNative The recompiler state.
7806 * @param idxVar The variable.
7807 * @param enmGstReg The guest register this variable will be written to
7808 * after this call.
7809 * @param poff Pointer to the instruction buffer offset.
7810 * In case a register needs to be freed up or if the
7811 * variable content needs to be loaded off the stack.
7812 *
7813 * @note We DO NOT expect @a idxVar to be an argument variable,
7814 * because we can only in the commit stage of an instruction when this
7815 * function is used.
7816 */
7817DECL_HIDDEN_THROW(uint8_t)
7818iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7819{
7820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7821 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7822 Assert(!pVar->fRegAcquired);
7823 AssertMsgStmt( pVar->cbVar <= 8
7824 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7825 || pVar->enmKind == kIemNativeVarKind_Stack),
7826 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7827 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7828 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7829
7830 /*
7831 * This shouldn't ever be used for arguments, unless it's in a weird else
7832 * branch that doesn't do any calling and even then it's questionable.
7833 *
7834 * However, in case someone writes crazy wrong MC code and does register
7835 * updates before making calls, just use the regular register allocator to
7836 * ensure we get a register suitable for the intended argument number.
7837 */
7838 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7839
7840 /*
7841 * If there is already a register for the variable, we transfer/set the
7842 * guest shadow copy assignment to it.
7843 */
7844 uint8_t idxReg = pVar->idxReg;
7845 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7846 {
7847 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7848 {
7849 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7850 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7851 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7852 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7853 }
7854 else
7855 {
7856 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7857 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7858 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7859 }
7860 /** @todo figure this one out. We need some way of making sure the register isn't
7861 * modified after this point, just in case we start writing crappy MC code. */
7862 pVar->enmGstReg = enmGstReg;
7863 pVar->fRegAcquired = true;
7864 return idxReg;
7865 }
7866 Assert(pVar->uArgNo == UINT8_MAX);
7867
7868 /*
7869 * Because this is supposed to be the commit stage, we're just tag along with the
7870 * temporary register allocator and upgrade it to a variable register.
7871 */
7872 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7873 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7874 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7875 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7876 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7877 pVar->idxReg = idxReg;
7878
7879 /*
7880 * Now we need to load the register value.
7881 */
7882 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7883 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7884 else
7885 {
7886 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7887 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7888 switch (pVar->cbVar)
7889 {
7890 case sizeof(uint64_t):
7891 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7892 break;
7893 case sizeof(uint32_t):
7894 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7895 break;
7896 case sizeof(uint16_t):
7897 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7898 break;
7899 case sizeof(uint8_t):
7900 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7901 break;
7902 default:
7903 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7904 }
7905 }
7906
7907 pVar->fRegAcquired = true;
7908 return idxReg;
7909}
7910
7911
7912/**
7913 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7914 *
7915 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7916 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7917 * requirement of flushing anything in volatile host registers when making a
7918 * call.
7919 *
7920 * @returns New @a off value.
7921 * @param pReNative The recompiler state.
7922 * @param off The code buffer position.
7923 * @param fHstRegsNotToSave Set of registers not to save & restore.
7924 */
7925DECL_HIDDEN_THROW(uint32_t)
7926iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7927{
7928 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7929 if (fHstRegs)
7930 {
7931 do
7932 {
7933 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7934 fHstRegs &= ~RT_BIT_32(idxHstReg);
7935
7936 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7937 {
7938 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7939 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7940 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7941 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7942 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7943 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7944 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7945 {
7946 case kIemNativeVarKind_Stack:
7947 {
7948 /* Temporarily spill the variable register. */
7949 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7950 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7951 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7952 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7953 continue;
7954 }
7955
7956 case kIemNativeVarKind_Immediate:
7957 case kIemNativeVarKind_VarRef:
7958 case kIemNativeVarKind_GstRegRef:
7959 /* It is weird to have any of these loaded at this point. */
7960 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7961 continue;
7962
7963 case kIemNativeVarKind_End:
7964 case kIemNativeVarKind_Invalid:
7965 break;
7966 }
7967 AssertFailed();
7968 }
7969 else
7970 {
7971 /*
7972 * Allocate a temporary stack slot and spill the register to it.
7973 */
7974 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7975 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7976 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7977 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7978 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7979 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7980 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7981 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7982 }
7983 } while (fHstRegs);
7984 }
7985#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7986 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7987 if (fHstRegs)
7988 {
7989 do
7990 {
7991 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7992 fHstRegs &= ~RT_BIT_32(idxHstReg);
7993
7994 /*
7995 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7996 * which would be more difficult due to spanning multiple stack slots and different sizes
7997 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7998 * don't need saving.
7999 */
8000 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8001 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8002 continue;
8003
8004 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8005
8006 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8007 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8008 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8009 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8010 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8011 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8012 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8013 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8014 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8015 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8016 {
8017 case kIemNativeVarKind_Stack:
8018 {
8019 /* Temporarily spill the variable register. */
8020 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8021 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8022 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8023 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8024 if (cbVar == sizeof(RTUINT128U))
8025 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8026 else
8027 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8028 continue;
8029 }
8030
8031 case kIemNativeVarKind_Immediate:
8032 case kIemNativeVarKind_VarRef:
8033 case kIemNativeVarKind_GstRegRef:
8034 /* It is weird to have any of these loaded at this point. */
8035 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8036 continue;
8037
8038 case kIemNativeVarKind_End:
8039 case kIemNativeVarKind_Invalid:
8040 break;
8041 }
8042 AssertFailed();
8043 } while (fHstRegs);
8044 }
8045#endif
8046 return off;
8047}
8048
8049
8050/**
8051 * Emit code to restore volatile registers after to a call to a helper.
8052 *
8053 * @returns New @a off value.
8054 * @param pReNative The recompiler state.
8055 * @param off The code buffer position.
8056 * @param fHstRegsNotToSave Set of registers not to save & restore.
8057 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8058 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8059 */
8060DECL_HIDDEN_THROW(uint32_t)
8061iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8062{
8063 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8064 if (fHstRegs)
8065 {
8066 do
8067 {
8068 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8069 fHstRegs &= ~RT_BIT_32(idxHstReg);
8070
8071 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8072 {
8073 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8074 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8075 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8076 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8077 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8078 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8079 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8080 {
8081 case kIemNativeVarKind_Stack:
8082 {
8083 /* Unspill the variable register. */
8084 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8085 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8086 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8087 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8088 continue;
8089 }
8090
8091 case kIemNativeVarKind_Immediate:
8092 case kIemNativeVarKind_VarRef:
8093 case kIemNativeVarKind_GstRegRef:
8094 /* It is weird to have any of these loaded at this point. */
8095 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8096 continue;
8097
8098 case kIemNativeVarKind_End:
8099 case kIemNativeVarKind_Invalid:
8100 break;
8101 }
8102 AssertFailed();
8103 }
8104 else
8105 {
8106 /*
8107 * Restore from temporary stack slot.
8108 */
8109 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8110 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8111 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8112 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8113
8114 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8115 }
8116 } while (fHstRegs);
8117 }
8118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8119 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8120 if (fHstRegs)
8121 {
8122 do
8123 {
8124 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8125 fHstRegs &= ~RT_BIT_32(idxHstReg);
8126
8127 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8128 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8129 continue;
8130 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8131
8132 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8133 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8134 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8135 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8136 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8137 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8138 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8139 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8140 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8141 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8142 {
8143 case kIemNativeVarKind_Stack:
8144 {
8145 /* Unspill the variable register. */
8146 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8147 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8148 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8149 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8150
8151 if (cbVar == sizeof(RTUINT128U))
8152 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8153 else
8154 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8155 continue;
8156 }
8157
8158 case kIemNativeVarKind_Immediate:
8159 case kIemNativeVarKind_VarRef:
8160 case kIemNativeVarKind_GstRegRef:
8161 /* It is weird to have any of these loaded at this point. */
8162 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8163 continue;
8164
8165 case kIemNativeVarKind_End:
8166 case kIemNativeVarKind_Invalid:
8167 break;
8168 }
8169 AssertFailed();
8170 } while (fHstRegs);
8171 }
8172#endif
8173 return off;
8174}
8175
8176
8177/**
8178 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8179 *
8180 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8181 *
8182 * ASSUMES that @a idxVar is valid and unpacked.
8183 */
8184DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8185{
8186 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8187 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8188 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8189 {
8190 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8191 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8192 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8193 Assert(cSlots > 0);
8194 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8195 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8196 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8197 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8198 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8199 }
8200 else
8201 Assert(idxStackSlot == UINT8_MAX);
8202}
8203
8204
8205/**
8206 * Worker that frees a single variable.
8207 *
8208 * ASSUMES that @a idxVar is valid and unpacked.
8209 */
8210DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8211{
8212 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8213 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8214 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8215
8216 /* Free the host register first if any assigned. */
8217 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8218#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8219 if ( idxHstReg != UINT8_MAX
8220 && pReNative->Core.aVars[idxVar].fSimdReg)
8221 {
8222 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8223 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8224 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8225 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8226 }
8227 else
8228#endif
8229 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8230 {
8231 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8232 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8233 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8234 }
8235
8236 /* Free argument mapping. */
8237 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8238 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8239 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8240
8241 /* Free the stack slots. */
8242 iemNativeVarFreeStackSlots(pReNative, idxVar);
8243
8244 /* Free the actual variable. */
8245 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8246 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8247}
8248
8249
8250/**
8251 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8252 */
8253DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8254{
8255 while (bmVars != 0)
8256 {
8257 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8258 bmVars &= ~RT_BIT_32(idxVar);
8259
8260#if 1 /** @todo optimize by simplifying this later... */
8261 iemNativeVarFreeOneWorker(pReNative, idxVar);
8262#else
8263 /* Only need to free the host register, the rest is done as bulk updates below. */
8264 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8265 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8266 {
8267 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8268 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8269 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8270 }
8271#endif
8272 }
8273#if 0 /** @todo optimize by simplifying this later... */
8274 pReNative->Core.bmVars = 0;
8275 pReNative->Core.bmStack = 0;
8276 pReNative->Core.u64ArgVars = UINT64_MAX;
8277#endif
8278}
8279
8280
8281
8282/*********************************************************************************************************************************
8283* Emitters for IEM_MC_CALL_CIMPL_XXX *
8284*********************************************************************************************************************************/
8285
8286/**
8287 * Emits code to load a reference to the given guest register into @a idxGprDst.
8288 */
8289DECL_HIDDEN_THROW(uint32_t)
8290iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8291 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8292{
8293#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8294 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8295#endif
8296
8297 /*
8298 * Get the offset relative to the CPUMCTX structure.
8299 */
8300 uint32_t offCpumCtx;
8301 switch (enmClass)
8302 {
8303 case kIemNativeGstRegRef_Gpr:
8304 Assert(idxRegInClass < 16);
8305 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8306 break;
8307
8308 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8309 Assert(idxRegInClass < 4);
8310 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8311 break;
8312
8313 case kIemNativeGstRegRef_EFlags:
8314 Assert(idxRegInClass == 0);
8315 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8316 break;
8317
8318 case kIemNativeGstRegRef_MxCsr:
8319 Assert(idxRegInClass == 0);
8320 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8321 break;
8322
8323 case kIemNativeGstRegRef_FpuReg:
8324 Assert(idxRegInClass < 8);
8325 AssertFailed(); /** @todo what kind of indexing? */
8326 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8327 break;
8328
8329 case kIemNativeGstRegRef_MReg:
8330 Assert(idxRegInClass < 8);
8331 AssertFailed(); /** @todo what kind of indexing? */
8332 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8333 break;
8334
8335 case kIemNativeGstRegRef_XReg:
8336 Assert(idxRegInClass < 16);
8337 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8338 break;
8339
8340 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8341 Assert(idxRegInClass == 0);
8342 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8343 break;
8344
8345 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8346 Assert(idxRegInClass == 0);
8347 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8348 break;
8349
8350 default:
8351 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8352 }
8353
8354 /*
8355 * Load the value into the destination register.
8356 */
8357#ifdef RT_ARCH_AMD64
8358 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8359
8360#elif defined(RT_ARCH_ARM64)
8361 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8362 Assert(offCpumCtx < 4096);
8363 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8364
8365#else
8366# error "Port me!"
8367#endif
8368
8369 return off;
8370}
8371
8372
8373/**
8374 * Common code for CIMPL and AIMPL calls.
8375 *
8376 * These are calls that uses argument variables and such. They should not be
8377 * confused with internal calls required to implement an MC operation,
8378 * like a TLB load and similar.
8379 *
8380 * Upon return all that is left to do is to load any hidden arguments and
8381 * perform the call. All argument variables are freed.
8382 *
8383 * @returns New code buffer offset; throws VBox status code on error.
8384 * @param pReNative The native recompile state.
8385 * @param off The code buffer offset.
8386 * @param cArgs The total nubmer of arguments (includes hidden
8387 * count).
8388 * @param cHiddenArgs The number of hidden arguments. The hidden
8389 * arguments must not have any variable declared for
8390 * them, whereas all the regular arguments must
8391 * (tstIEMCheckMc ensures this).
8392 */
8393DECL_HIDDEN_THROW(uint32_t)
8394iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8395{
8396#ifdef VBOX_STRICT
8397 /*
8398 * Assert sanity.
8399 */
8400 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8401 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8402 for (unsigned i = 0; i < cHiddenArgs; i++)
8403 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8404 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8405 {
8406 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8407 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8408 }
8409 iemNativeRegAssertSanity(pReNative);
8410#endif
8411
8412 /* We don't know what the called function makes use of, so flush any pending register writes. */
8413 off = iemNativeRegFlushPendingWrites(pReNative, off);
8414
8415 /*
8416 * Before we do anything else, go over variables that are referenced and
8417 * make sure they are not in a register.
8418 */
8419 uint32_t bmVars = pReNative->Core.bmVars;
8420 if (bmVars)
8421 {
8422 do
8423 {
8424 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8425 bmVars &= ~RT_BIT_32(idxVar);
8426
8427 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8428 {
8429 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8430#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8431 if ( idxRegOld != UINT8_MAX
8432 && pReNative->Core.aVars[idxVar].fSimdReg)
8433 {
8434 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8435 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8436
8437 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8438 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8439 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8440 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8441 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8442 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8443 else
8444 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8445
8446 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8447 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8448
8449 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8450 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8451 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8452 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8453 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8454 }
8455 else
8456#endif
8457 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8458 {
8459 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8460 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8461 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8462 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8463 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8464
8465 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8466 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8467 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8468 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8469 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8470 }
8471 }
8472 } while (bmVars != 0);
8473#if 0 //def VBOX_STRICT
8474 iemNativeRegAssertSanity(pReNative);
8475#endif
8476 }
8477
8478 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8479
8480 /*
8481 * First, go over the host registers that will be used for arguments and make
8482 * sure they either hold the desired argument or are free.
8483 */
8484 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8485 {
8486 for (uint32_t i = 0; i < cRegArgs; i++)
8487 {
8488 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8489 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8490 {
8491 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8492 {
8493 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8495 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8496 Assert(pVar->idxReg == idxArgReg);
8497 uint8_t const uArgNo = pVar->uArgNo;
8498 if (uArgNo == i)
8499 { /* prefect */ }
8500 /* The variable allocator logic should make sure this is impossible,
8501 except for when the return register is used as a parameter (ARM,
8502 but not x86). */
8503#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8504 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8505 {
8506# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8507# error "Implement this"
8508# endif
8509 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8510 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8511 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8512 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8513 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8514 }
8515#endif
8516 else
8517 {
8518 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8519
8520 if (pVar->enmKind == kIemNativeVarKind_Stack)
8521 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8522 else
8523 {
8524 /* just free it, can be reloaded if used again */
8525 pVar->idxReg = UINT8_MAX;
8526 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8527 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8528 }
8529 }
8530 }
8531 else
8532 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8533 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8534 }
8535 }
8536#if 0 //def VBOX_STRICT
8537 iemNativeRegAssertSanity(pReNative);
8538#endif
8539 }
8540
8541 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8542
8543#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8544 /*
8545 * If there are any stack arguments, make sure they are in their place as well.
8546 *
8547 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8548 * the caller) be loading it later and it must be free (see first loop).
8549 */
8550 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8551 {
8552 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8553 {
8554 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8555 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8556 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8557 {
8558 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8559 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8560 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8561 pVar->idxReg = UINT8_MAX;
8562 }
8563 else
8564 {
8565 /* Use ARG0 as temp for stuff we need registers for. */
8566 switch (pVar->enmKind)
8567 {
8568 case kIemNativeVarKind_Stack:
8569 {
8570 uint8_t const idxStackSlot = pVar->idxStackSlot;
8571 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8572 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8573 iemNativeStackCalcBpDisp(idxStackSlot));
8574 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8575 continue;
8576 }
8577
8578 case kIemNativeVarKind_Immediate:
8579 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8580 continue;
8581
8582 case kIemNativeVarKind_VarRef:
8583 {
8584 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8585 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8586 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8587 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8588 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8589# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8590 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8591 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8592 if ( fSimdReg
8593 && idxRegOther != UINT8_MAX)
8594 {
8595 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8596 if (cbVar == sizeof(RTUINT128U))
8597 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8598 else
8599 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8600 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8601 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8602 }
8603 else
8604# endif
8605 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8606 {
8607 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8608 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8609 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8610 }
8611 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8612 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8613 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8614 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8615 continue;
8616 }
8617
8618 case kIemNativeVarKind_GstRegRef:
8619 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8620 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8621 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8622 continue;
8623
8624 case kIemNativeVarKind_Invalid:
8625 case kIemNativeVarKind_End:
8626 break;
8627 }
8628 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8629 }
8630 }
8631# if 0 //def VBOX_STRICT
8632 iemNativeRegAssertSanity(pReNative);
8633# endif
8634 }
8635#else
8636 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8637#endif
8638
8639 /*
8640 * Make sure the argument variables are loaded into their respective registers.
8641 *
8642 * We can optimize this by ASSUMING that any register allocations are for
8643 * registeres that have already been loaded and are ready. The previous step
8644 * saw to that.
8645 */
8646 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8647 {
8648 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8649 {
8650 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8651 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8652 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8653 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8654 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8655 else
8656 {
8657 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8658 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8659 {
8660 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8661 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8662 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8663 | RT_BIT_32(idxArgReg);
8664 pVar->idxReg = idxArgReg;
8665 }
8666 else
8667 {
8668 /* Use ARG0 as temp for stuff we need registers for. */
8669 switch (pVar->enmKind)
8670 {
8671 case kIemNativeVarKind_Stack:
8672 {
8673 uint8_t const idxStackSlot = pVar->idxStackSlot;
8674 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8675 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8676 continue;
8677 }
8678
8679 case kIemNativeVarKind_Immediate:
8680 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8681 continue;
8682
8683 case kIemNativeVarKind_VarRef:
8684 {
8685 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8686 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8687 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8688 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8689 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8690 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8691#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8692 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8693 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8694 if ( fSimdReg
8695 && idxRegOther != UINT8_MAX)
8696 {
8697 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8698 if (cbVar == sizeof(RTUINT128U))
8699 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8700 else
8701 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8702 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8703 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8704 }
8705 else
8706#endif
8707 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8708 {
8709 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8710 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8711 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8712 }
8713 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8714 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8715 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8716 continue;
8717 }
8718
8719 case kIemNativeVarKind_GstRegRef:
8720 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8721 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8722 continue;
8723
8724 case kIemNativeVarKind_Invalid:
8725 case kIemNativeVarKind_End:
8726 break;
8727 }
8728 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8729 }
8730 }
8731 }
8732#if 0 //def VBOX_STRICT
8733 iemNativeRegAssertSanity(pReNative);
8734#endif
8735 }
8736#ifdef VBOX_STRICT
8737 else
8738 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8739 {
8740 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8741 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8742 }
8743#endif
8744
8745 /*
8746 * Free all argument variables (simplified).
8747 * Their lifetime always expires with the call they are for.
8748 */
8749 /** @todo Make the python script check that arguments aren't used after
8750 * IEM_MC_CALL_XXXX. */
8751 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8752 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8753 * an argument value. There is also some FPU stuff. */
8754 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8755 {
8756 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8757 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8758
8759 /* no need to free registers: */
8760 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8761 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8762 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8763 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8764 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8765 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8766
8767 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8768 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8769 iemNativeVarFreeStackSlots(pReNative, idxVar);
8770 }
8771 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8772
8773 /*
8774 * Flush volatile registers as we make the call.
8775 */
8776 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8777
8778 return off;
8779}
8780
8781
8782
8783/*********************************************************************************************************************************
8784* TLB Lookup. *
8785*********************************************************************************************************************************/
8786
8787/**
8788 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8789 */
8790DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8791{
8792 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8793 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8794 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8795 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8796
8797 /* Do the lookup manually. */
8798 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8799 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8800 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8801 if (RT_LIKELY(pTlbe->uTag == uTag))
8802 {
8803 /*
8804 * Check TLB page table level access flags.
8805 */
8806 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8807 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8808 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8809 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8810 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8811 | IEMTLBE_F_PG_UNASSIGNED
8812 | IEMTLBE_F_PT_NO_ACCESSED
8813 | fNoWriteNoDirty | fNoUser);
8814 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8815 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8816 {
8817 /*
8818 * Return the address.
8819 */
8820 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8821 if ((uintptr_t)pbAddr == uResult)
8822 return;
8823 RT_NOREF(cbMem);
8824 AssertFailed();
8825 }
8826 else
8827 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8828 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8829 }
8830 else
8831 AssertFailed();
8832 RT_BREAKPOINT();
8833}
8834
8835/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8836
8837
8838
8839/*********************************************************************************************************************************
8840* Recompiler Core. *
8841*********************************************************************************************************************************/
8842
8843/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8844static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8845{
8846 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8847 pDis->cbCachedInstr += cbMaxRead;
8848 RT_NOREF(cbMinRead);
8849 return VERR_NO_DATA;
8850}
8851
8852
8853DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8854{
8855 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8856 {
8857#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8858 ENTRY(fLocalForcedActions),
8859 ENTRY(iem.s.rcPassUp),
8860 ENTRY(iem.s.fExec),
8861 ENTRY(iem.s.pbInstrBuf),
8862 ENTRY(iem.s.uInstrBufPc),
8863 ENTRY(iem.s.GCPhysInstrBuf),
8864 ENTRY(iem.s.cbInstrBufTotal),
8865 ENTRY(iem.s.idxTbCurInstr),
8866#ifdef VBOX_WITH_STATISTICS
8867 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8868 ENTRY(iem.s.StatNativeTlbHitsForStore),
8869 ENTRY(iem.s.StatNativeTlbHitsForStack),
8870 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8871 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8872 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8873 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8874 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8875#endif
8876 ENTRY(iem.s.DataTlb.aEntries),
8877 ENTRY(iem.s.DataTlb.uTlbRevision),
8878 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8879 ENTRY(iem.s.DataTlb.cTlbHits),
8880 ENTRY(iem.s.CodeTlb.aEntries),
8881 ENTRY(iem.s.CodeTlb.uTlbRevision),
8882 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8883 ENTRY(iem.s.CodeTlb.cTlbHits),
8884 ENTRY(pVMR3),
8885 ENTRY(cpum.GstCtx.rax),
8886 ENTRY(cpum.GstCtx.ah),
8887 ENTRY(cpum.GstCtx.rcx),
8888 ENTRY(cpum.GstCtx.ch),
8889 ENTRY(cpum.GstCtx.rdx),
8890 ENTRY(cpum.GstCtx.dh),
8891 ENTRY(cpum.GstCtx.rbx),
8892 ENTRY(cpum.GstCtx.bh),
8893 ENTRY(cpum.GstCtx.rsp),
8894 ENTRY(cpum.GstCtx.rbp),
8895 ENTRY(cpum.GstCtx.rsi),
8896 ENTRY(cpum.GstCtx.rdi),
8897 ENTRY(cpum.GstCtx.r8),
8898 ENTRY(cpum.GstCtx.r9),
8899 ENTRY(cpum.GstCtx.r10),
8900 ENTRY(cpum.GstCtx.r11),
8901 ENTRY(cpum.GstCtx.r12),
8902 ENTRY(cpum.GstCtx.r13),
8903 ENTRY(cpum.GstCtx.r14),
8904 ENTRY(cpum.GstCtx.r15),
8905 ENTRY(cpum.GstCtx.es.Sel),
8906 ENTRY(cpum.GstCtx.es.u64Base),
8907 ENTRY(cpum.GstCtx.es.u32Limit),
8908 ENTRY(cpum.GstCtx.es.Attr),
8909 ENTRY(cpum.GstCtx.cs.Sel),
8910 ENTRY(cpum.GstCtx.cs.u64Base),
8911 ENTRY(cpum.GstCtx.cs.u32Limit),
8912 ENTRY(cpum.GstCtx.cs.Attr),
8913 ENTRY(cpum.GstCtx.ss.Sel),
8914 ENTRY(cpum.GstCtx.ss.u64Base),
8915 ENTRY(cpum.GstCtx.ss.u32Limit),
8916 ENTRY(cpum.GstCtx.ss.Attr),
8917 ENTRY(cpum.GstCtx.ds.Sel),
8918 ENTRY(cpum.GstCtx.ds.u64Base),
8919 ENTRY(cpum.GstCtx.ds.u32Limit),
8920 ENTRY(cpum.GstCtx.ds.Attr),
8921 ENTRY(cpum.GstCtx.fs.Sel),
8922 ENTRY(cpum.GstCtx.fs.u64Base),
8923 ENTRY(cpum.GstCtx.fs.u32Limit),
8924 ENTRY(cpum.GstCtx.fs.Attr),
8925 ENTRY(cpum.GstCtx.gs.Sel),
8926 ENTRY(cpum.GstCtx.gs.u64Base),
8927 ENTRY(cpum.GstCtx.gs.u32Limit),
8928 ENTRY(cpum.GstCtx.gs.Attr),
8929 ENTRY(cpum.GstCtx.rip),
8930 ENTRY(cpum.GstCtx.eflags),
8931 ENTRY(cpum.GstCtx.uRipInhibitInt),
8932 ENTRY(cpum.GstCtx.cr0),
8933 ENTRY(cpum.GstCtx.cr4),
8934 ENTRY(cpum.GstCtx.aXcr[0]),
8935 ENTRY(cpum.GstCtx.aXcr[1]),
8936#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8937 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8938 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8939 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8940 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8941 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8942 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8943 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8944 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8945 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8946 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8947 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8948 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8949 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8950 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8951 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8952 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8953 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8954 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8955 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8956 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8957 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8958 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8959 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8960 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8961 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8962 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8963 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8964 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8965 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8966 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8967 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8968 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8969#endif
8970#undef ENTRY
8971 };
8972#ifdef VBOX_STRICT
8973 static bool s_fOrderChecked = false;
8974 if (!s_fOrderChecked)
8975 {
8976 s_fOrderChecked = true;
8977 uint32_t offPrev = s_aMembers[0].off;
8978 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8979 {
8980 Assert(s_aMembers[i].off > offPrev);
8981 offPrev = s_aMembers[i].off;
8982 }
8983 }
8984#endif
8985
8986 /*
8987 * Binary lookup.
8988 */
8989 unsigned iStart = 0;
8990 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8991 for (;;)
8992 {
8993 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8994 uint32_t const offCur = s_aMembers[iCur].off;
8995 if (off < offCur)
8996 {
8997 if (iCur != iStart)
8998 iEnd = iCur;
8999 else
9000 break;
9001 }
9002 else if (off > offCur)
9003 {
9004 if (iCur + 1 < iEnd)
9005 iStart = iCur + 1;
9006 else
9007 break;
9008 }
9009 else
9010 return s_aMembers[iCur].pszName;
9011 }
9012#ifdef VBOX_WITH_STATISTICS
9013 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9014 return "iem.s.acThreadedFuncStats[iFn]";
9015#endif
9016 return NULL;
9017}
9018
9019
9020DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9021{
9022 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9023#if defined(RT_ARCH_AMD64)
9024 static const char * const a_apszMarkers[] =
9025 {
9026 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9027 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9028 };
9029#endif
9030
9031 char szDisBuf[512];
9032 DISSTATE Dis;
9033 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9034 uint32_t const cNative = pTb->Native.cInstructions;
9035 uint32_t offNative = 0;
9036#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9037 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9038#endif
9039 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9040 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9041 : DISCPUMODE_64BIT;
9042#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9043 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9044#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9045 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9046#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9047# error "Port me"
9048#else
9049 csh hDisasm = ~(size_t)0;
9050# if defined(RT_ARCH_AMD64)
9051 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9052# elif defined(RT_ARCH_ARM64)
9053 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9054# else
9055# error "Port me"
9056# endif
9057 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9058
9059 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9060 //Assert(rcCs == CS_ERR_OK);
9061#endif
9062
9063 /*
9064 * Print TB info.
9065 */
9066 pHlp->pfnPrintf(pHlp,
9067 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9068 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9069 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9070 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9071#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9072 if (pDbgInfo && pDbgInfo->cEntries > 1)
9073 {
9074 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9075
9076 /*
9077 * This disassembly is driven by the debug info which follows the native
9078 * code and indicates when it starts with the next guest instructions,
9079 * where labels are and such things.
9080 */
9081 uint32_t idxThreadedCall = 0;
9082 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9083 uint8_t idxRange = UINT8_MAX;
9084 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9085 uint32_t offRange = 0;
9086 uint32_t offOpcodes = 0;
9087 uint32_t const cbOpcodes = pTb->cbOpcodes;
9088 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9089 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9090 uint32_t iDbgEntry = 1;
9091 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9092
9093 while (offNative < cNative)
9094 {
9095 /* If we're at or have passed the point where the next chunk of debug
9096 info starts, process it. */
9097 if (offDbgNativeNext <= offNative)
9098 {
9099 offDbgNativeNext = UINT32_MAX;
9100 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9101 {
9102 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9103 {
9104 case kIemTbDbgEntryType_GuestInstruction:
9105 {
9106 /* Did the exec flag change? */
9107 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9108 {
9109 pHlp->pfnPrintf(pHlp,
9110 " fExec change %#08x -> %#08x %s\n",
9111 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9112 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9113 szDisBuf, sizeof(szDisBuf)));
9114 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9115 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9116 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9117 : DISCPUMODE_64BIT;
9118 }
9119
9120 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9121 where the compilation was aborted before the opcode was recorded and the actual
9122 instruction was translated to a threaded call. This may happen when we run out
9123 of ranges, or when some complicated interrupts/FFs are found to be pending or
9124 similar. So, we just deal with it here rather than in the compiler code as it
9125 is a lot simpler to do here. */
9126 if ( idxRange == UINT8_MAX
9127 || idxRange >= cRanges
9128 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9129 {
9130 idxRange += 1;
9131 if (idxRange < cRanges)
9132 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9133 else
9134 continue;
9135 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9136 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9137 + (pTb->aRanges[idxRange].idxPhysPage == 0
9138 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9139 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9140 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9141 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9142 pTb->aRanges[idxRange].idxPhysPage);
9143 GCPhysPc += offRange;
9144 }
9145
9146 /* Disassemble the instruction. */
9147 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9148 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9149 uint32_t cbInstr = 1;
9150 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9151 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9152 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9153 if (RT_SUCCESS(rc))
9154 {
9155 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9156 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9157 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9158 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9159
9160 static unsigned const s_offMarker = 55;
9161 static char const s_szMarker[] = " ; <--- guest";
9162 if (cch < s_offMarker)
9163 {
9164 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9165 cch = s_offMarker;
9166 }
9167 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9168 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9169
9170 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9171 }
9172 else
9173 {
9174 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9175 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9176 cbInstr = 1;
9177 }
9178 GCPhysPc += cbInstr;
9179 offOpcodes += cbInstr;
9180 offRange += cbInstr;
9181 continue;
9182 }
9183
9184 case kIemTbDbgEntryType_ThreadedCall:
9185 pHlp->pfnPrintf(pHlp,
9186 " Call #%u to %s (%u args) - %s\n",
9187 idxThreadedCall,
9188 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9189 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9190 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9191 idxThreadedCall++;
9192 continue;
9193
9194 case kIemTbDbgEntryType_GuestRegShadowing:
9195 {
9196 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9197 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9198 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9199 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9200 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9201 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9202 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9203 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9204 else
9205 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9206 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9207 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9208 continue;
9209 }
9210
9211#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9212 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9213 {
9214 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9215 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9216 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9217 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9218 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9219 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9220 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9221 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9222 else
9223 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9224 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9225 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9226 continue;
9227 }
9228#endif
9229
9230 case kIemTbDbgEntryType_Label:
9231 {
9232 const char *pszName = "what_the_fudge";
9233 const char *pszComment = "";
9234 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9235 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9236 {
9237 case kIemNativeLabelType_Return: pszName = "Return"; break;
9238 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9239 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9240 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9241 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9242 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9243 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9244 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9245 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9246 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9247 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9248 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9249 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9250 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9251 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9252 case kIemNativeLabelType_If:
9253 pszName = "If";
9254 fNumbered = true;
9255 break;
9256 case kIemNativeLabelType_Else:
9257 pszName = "Else";
9258 fNumbered = true;
9259 pszComment = " ; regs state restored pre-if-block";
9260 break;
9261 case kIemNativeLabelType_Endif:
9262 pszName = "Endif";
9263 fNumbered = true;
9264 break;
9265 case kIemNativeLabelType_CheckIrq:
9266 pszName = "CheckIrq_CheckVM";
9267 fNumbered = true;
9268 break;
9269 case kIemNativeLabelType_TlbLookup:
9270 pszName = "TlbLookup";
9271 fNumbered = true;
9272 break;
9273 case kIemNativeLabelType_TlbMiss:
9274 pszName = "TlbMiss";
9275 fNumbered = true;
9276 break;
9277 case kIemNativeLabelType_TlbDone:
9278 pszName = "TlbDone";
9279 fNumbered = true;
9280 break;
9281 case kIemNativeLabelType_Invalid:
9282 case kIemNativeLabelType_End:
9283 break;
9284 }
9285 if (fNumbered)
9286 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9287 else
9288 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9289 continue;
9290 }
9291
9292 case kIemTbDbgEntryType_NativeOffset:
9293 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9294 Assert(offDbgNativeNext > offNative);
9295 break;
9296
9297#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9298 case kIemTbDbgEntryType_DelayedPcUpdate:
9299 pHlp->pfnPrintf(pHlp,
9300 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9301 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9302 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9303 continue;
9304#endif
9305
9306 default:
9307 AssertFailed();
9308 }
9309 iDbgEntry++;
9310 break;
9311 }
9312 }
9313
9314 /*
9315 * Disassemble the next native instruction.
9316 */
9317 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9318# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9319 uint32_t cbInstr = sizeof(paNative[0]);
9320 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9321 if (RT_SUCCESS(rc))
9322 {
9323# if defined(RT_ARCH_AMD64)
9324 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9325 {
9326 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9327 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9328 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9329 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9330 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9331 uInfo & 0x8000 ? "recompiled" : "todo");
9332 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9333 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9334 else
9335 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9336 }
9337 else
9338# endif
9339 {
9340 const char *pszAnnotation = NULL;
9341# ifdef RT_ARCH_AMD64
9342 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9343 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9344 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9345 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9346 PCDISOPPARAM pMemOp;
9347 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9348 pMemOp = &Dis.Param1;
9349 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9350 pMemOp = &Dis.Param2;
9351 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9352 pMemOp = &Dis.Param3;
9353 else
9354 pMemOp = NULL;
9355 if ( pMemOp
9356 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9357 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9358 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9359 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9360
9361#elif defined(RT_ARCH_ARM64)
9362 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9363 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9364 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9365# else
9366# error "Port me"
9367# endif
9368 if (pszAnnotation)
9369 {
9370 static unsigned const s_offAnnotation = 55;
9371 size_t const cchAnnotation = strlen(pszAnnotation);
9372 size_t cchDis = strlen(szDisBuf);
9373 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9374 {
9375 if (cchDis < s_offAnnotation)
9376 {
9377 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9378 cchDis = s_offAnnotation;
9379 }
9380 szDisBuf[cchDis++] = ' ';
9381 szDisBuf[cchDis++] = ';';
9382 szDisBuf[cchDis++] = ' ';
9383 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9384 }
9385 }
9386 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9387 }
9388 }
9389 else
9390 {
9391# if defined(RT_ARCH_AMD64)
9392 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9393 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9394# elif defined(RT_ARCH_ARM64)
9395 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9396# else
9397# error "Port me"
9398# endif
9399 cbInstr = sizeof(paNative[0]);
9400 }
9401 offNative += cbInstr / sizeof(paNative[0]);
9402
9403# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9404 cs_insn *pInstr;
9405 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9406 (uintptr_t)pNativeCur, 1, &pInstr);
9407 if (cInstrs > 0)
9408 {
9409 Assert(cInstrs == 1);
9410 const char *pszAnnotation = NULL;
9411# if defined(RT_ARCH_ARM64)
9412 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9413 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9414 {
9415 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9416 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9417 char *psz = strchr(pInstr->op_str, '[');
9418 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9419 {
9420 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9421 int32_t off = -1;
9422 psz += 4;
9423 if (*psz == ']')
9424 off = 0;
9425 else if (*psz == ',')
9426 {
9427 psz = RTStrStripL(psz + 1);
9428 if (*psz == '#')
9429 off = RTStrToInt32(&psz[1]);
9430 /** @todo deal with index registers and LSL as well... */
9431 }
9432 if (off >= 0)
9433 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9434 }
9435 }
9436# endif
9437
9438 size_t const cchOp = strlen(pInstr->op_str);
9439# if defined(RT_ARCH_AMD64)
9440 if (pszAnnotation)
9441 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9442 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9443 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9444 else
9445 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9446 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9447
9448# else
9449 if (pszAnnotation)
9450 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9451 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9452 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9453 else
9454 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9455 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9456# endif
9457 offNative += pInstr->size / sizeof(*pNativeCur);
9458 cs_free(pInstr, cInstrs);
9459 }
9460 else
9461 {
9462# if defined(RT_ARCH_AMD64)
9463 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9464 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9465# else
9466 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9467# endif
9468 offNative++;
9469 }
9470# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9471 }
9472 }
9473 else
9474#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9475 {
9476 /*
9477 * No debug info, just disassemble the x86 code and then the native code.
9478 *
9479 * First the guest code:
9480 */
9481 for (unsigned i = 0; i < pTb->cRanges; i++)
9482 {
9483 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9484 + (pTb->aRanges[i].idxPhysPage == 0
9485 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9486 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9487 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9488 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9489 unsigned off = pTb->aRanges[i].offOpcodes;
9490 /** @todo this ain't working when crossing pages! */
9491 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9492 while (off < cbOpcodes)
9493 {
9494 uint32_t cbInstr = 1;
9495 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9496 &pTb->pabOpcodes[off], cbOpcodes - off,
9497 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9498 if (RT_SUCCESS(rc))
9499 {
9500 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9501 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9502 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9503 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9504 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9505 GCPhysPc += cbInstr;
9506 off += cbInstr;
9507 }
9508 else
9509 {
9510 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9511 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9512 break;
9513 }
9514 }
9515 }
9516
9517 /*
9518 * Then the native code:
9519 */
9520 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9521 while (offNative < cNative)
9522 {
9523 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9524# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9525 uint32_t cbInstr = sizeof(paNative[0]);
9526 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9527 if (RT_SUCCESS(rc))
9528 {
9529# if defined(RT_ARCH_AMD64)
9530 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9531 {
9532 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9533 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9534 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9535 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9536 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9537 uInfo & 0x8000 ? "recompiled" : "todo");
9538 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9539 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9540 else
9541 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9542 }
9543 else
9544# endif
9545 {
9546# ifdef RT_ARCH_AMD64
9547 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9548 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9549 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9550 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9551# elif defined(RT_ARCH_ARM64)
9552 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9553 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9554 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9555# else
9556# error "Port me"
9557# endif
9558 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9559 }
9560 }
9561 else
9562 {
9563# if defined(RT_ARCH_AMD64)
9564 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9565 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9566# else
9567 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9568# endif
9569 cbInstr = sizeof(paNative[0]);
9570 }
9571 offNative += cbInstr / sizeof(paNative[0]);
9572
9573# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9574 cs_insn *pInstr;
9575 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9576 (uintptr_t)pNativeCur, 1, &pInstr);
9577 if (cInstrs > 0)
9578 {
9579 Assert(cInstrs == 1);
9580# if defined(RT_ARCH_AMD64)
9581 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9582 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9583# else
9584 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9585 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9586# endif
9587 offNative += pInstr->size / sizeof(*pNativeCur);
9588 cs_free(pInstr, cInstrs);
9589 }
9590 else
9591 {
9592# if defined(RT_ARCH_AMD64)
9593 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9594 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9595# else
9596 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9597# endif
9598 offNative++;
9599 }
9600# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9601 }
9602 }
9603
9604#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9605 /* Cleanup. */
9606 cs_close(&hDisasm);
9607#endif
9608}
9609
9610
9611/**
9612 * Recompiles the given threaded TB into a native one.
9613 *
9614 * In case of failure the translation block will be returned as-is.
9615 *
9616 * @returns pTb.
9617 * @param pVCpu The cross context virtual CPU structure of the calling
9618 * thread.
9619 * @param pTb The threaded translation to recompile to native.
9620 */
9621DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9622{
9623 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9624
9625 /*
9626 * The first time thru, we allocate the recompiler state, the other times
9627 * we just need to reset it before using it again.
9628 */
9629 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9630 if (RT_LIKELY(pReNative))
9631 iemNativeReInit(pReNative, pTb);
9632 else
9633 {
9634 pReNative = iemNativeInit(pVCpu, pTb);
9635 AssertReturn(pReNative, pTb);
9636 }
9637
9638#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9639 /*
9640 * First do liveness analysis. This is done backwards.
9641 */
9642 {
9643 uint32_t idxCall = pTb->Thrd.cCalls;
9644 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9645 { /* likely */ }
9646 else
9647 {
9648 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9649 while (idxCall > cAlloc)
9650 cAlloc *= 2;
9651 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9652 AssertReturn(pvNew, pTb);
9653 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9654 pReNative->cLivenessEntriesAlloc = cAlloc;
9655 }
9656 AssertReturn(idxCall > 0, pTb);
9657 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9658
9659 /* The initial (final) entry. */
9660 idxCall--;
9661 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9662
9663 /* Loop backwards thru the calls and fill in the other entries. */
9664 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9665 while (idxCall > 0)
9666 {
9667 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9668 if (pfnLiveness)
9669 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9670 else
9671 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9672 pCallEntry--;
9673 idxCall--;
9674 }
9675
9676# ifdef VBOX_WITH_STATISTICS
9677 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9678 to 'clobbered' rather that 'input'. */
9679 /** @todo */
9680# endif
9681 }
9682#endif
9683
9684 /*
9685 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9686 * for aborting if an error happens.
9687 */
9688 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9689#ifdef LOG_ENABLED
9690 uint32_t const cCallsOrg = cCallsLeft;
9691#endif
9692 uint32_t off = 0;
9693 int rc = VINF_SUCCESS;
9694 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9695 {
9696 /*
9697 * Emit prolog code (fixed).
9698 */
9699 off = iemNativeEmitProlog(pReNative, off);
9700
9701 /*
9702 * Convert the calls to native code.
9703 */
9704#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9705 int32_t iGstInstr = -1;
9706#endif
9707#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9708 uint32_t cThreadedCalls = 0;
9709 uint32_t cRecompiledCalls = 0;
9710#endif
9711#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9712 uint32_t idxCurCall = 0;
9713#endif
9714 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9715 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9716 while (cCallsLeft-- > 0)
9717 {
9718 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9719#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9720 pReNative->idxCurCall = idxCurCall;
9721#endif
9722
9723 /*
9724 * Debug info, assembly markup and statistics.
9725 */
9726#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9727 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9728 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9729#endif
9730#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9731 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9732 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9733 {
9734 if (iGstInstr < (int32_t)pTb->cInstructions)
9735 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9736 else
9737 Assert(iGstInstr == pTb->cInstructions);
9738 iGstInstr = pCallEntry->idxInstr;
9739 }
9740 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9741#endif
9742#if defined(VBOX_STRICT)
9743 off = iemNativeEmitMarker(pReNative, off,
9744 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9745#endif
9746#if defined(VBOX_STRICT)
9747 iemNativeRegAssertSanity(pReNative);
9748#endif
9749#ifdef VBOX_WITH_STATISTICS
9750 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9751#endif
9752
9753 /*
9754 * Actual work.
9755 */
9756 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9757 pfnRecom ? "(recompiled)" : "(todo)"));
9758 if (pfnRecom) /** @todo stats on this. */
9759 {
9760 off = pfnRecom(pReNative, off, pCallEntry);
9761 STAM_REL_STATS({cRecompiledCalls++;});
9762 }
9763 else
9764 {
9765 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9766 STAM_REL_STATS({cThreadedCalls++;});
9767 }
9768 Assert(off <= pReNative->cInstrBufAlloc);
9769 Assert(pReNative->cCondDepth == 0);
9770
9771#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9772 if (LogIs2Enabled())
9773 {
9774 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9776 static const char s_achState[] = "CUXI";
9777# else
9778 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9779# endif
9780
9781 char szGpr[17];
9782 for (unsigned i = 0; i < 16; i++)
9783 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9784 szGpr[16] = '\0';
9785
9786 char szSegBase[X86_SREG_COUNT + 1];
9787 char szSegLimit[X86_SREG_COUNT + 1];
9788 char szSegAttrib[X86_SREG_COUNT + 1];
9789 char szSegSel[X86_SREG_COUNT + 1];
9790 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9791 {
9792 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9793 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9794 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9795 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9796 }
9797 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9798 = szSegSel[X86_SREG_COUNT] = '\0';
9799
9800 char szEFlags[8];
9801 for (unsigned i = 0; i < 7; i++)
9802 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9803 szEFlags[7] = '\0';
9804
9805 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9806 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9807 }
9808#endif
9809
9810 /*
9811 * Advance.
9812 */
9813 pCallEntry++;
9814#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9815 idxCurCall++;
9816#endif
9817 }
9818
9819 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9820 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9821 if (!cThreadedCalls)
9822 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9823
9824 /*
9825 * Emit the epilog code.
9826 */
9827 uint32_t idxReturnLabel;
9828 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9829
9830 /*
9831 * Generate special jump labels.
9832 */
9833 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9834 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9835 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9836 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9837
9838 /*
9839 * Generate simple TB tail labels that just calls a help with a pVCpu
9840 * arg and either return or longjmps/throws a non-zero status.
9841 *
9842 * The array entries must be ordered by enmLabel value so we can index
9843 * using fTailLabels bit numbers.
9844 */
9845 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9846 static struct
9847 {
9848 IEMNATIVELABELTYPE enmLabel;
9849 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9850 } const g_aSimpleTailLabels[] =
9851 {
9852 { kIemNativeLabelType_Invalid, NULL },
9853 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9854 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9855 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9856 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9857 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9858 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9859 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9860 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9861 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9862 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9863 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9864 };
9865 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9866 AssertCompile(kIemNativeLabelType_Invalid == 0);
9867 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9868 if (fTailLabels)
9869 {
9870 do
9871 {
9872 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9873 fTailLabels &= ~RT_BIT_64(enmLabel);
9874 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9875
9876 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9877 Assert(idxLabel != UINT32_MAX);
9878 if (idxLabel != UINT32_MAX)
9879 {
9880 iemNativeLabelDefine(pReNative, idxLabel, off);
9881
9882 /* int pfnCallback(PVMCPUCC pVCpu) */
9883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9884 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9885
9886 /* jump back to the return sequence. */
9887 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9888 }
9889
9890 } while (fTailLabels);
9891 }
9892 }
9893 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9894 {
9895 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9896 return pTb;
9897 }
9898 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9899 Assert(off <= pReNative->cInstrBufAlloc);
9900
9901 /*
9902 * Make sure all labels has been defined.
9903 */
9904 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9905#ifdef VBOX_STRICT
9906 uint32_t const cLabels = pReNative->cLabels;
9907 for (uint32_t i = 0; i < cLabels; i++)
9908 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9909#endif
9910
9911 /*
9912 * Allocate executable memory, copy over the code we've generated.
9913 */
9914 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9915 if (pTbAllocator->pDelayedFreeHead)
9916 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9917
9918 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9919 AssertReturn(paFinalInstrBuf, pTb);
9920 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9921
9922 /*
9923 * Apply fixups.
9924 */
9925 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9926 uint32_t const cFixups = pReNative->cFixups;
9927 for (uint32_t i = 0; i < cFixups; i++)
9928 {
9929 Assert(paFixups[i].off < off);
9930 Assert(paFixups[i].idxLabel < cLabels);
9931 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9932 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9933 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9934 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9935 switch (paFixups[i].enmType)
9936 {
9937#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9938 case kIemNativeFixupType_Rel32:
9939 Assert(paFixups[i].off + 4 <= off);
9940 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9941 continue;
9942
9943#elif defined(RT_ARCH_ARM64)
9944 case kIemNativeFixupType_RelImm26At0:
9945 {
9946 Assert(paFixups[i].off < off);
9947 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9948 Assert(offDisp >= -262144 && offDisp < 262144);
9949 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9950 continue;
9951 }
9952
9953 case kIemNativeFixupType_RelImm19At5:
9954 {
9955 Assert(paFixups[i].off < off);
9956 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9957 Assert(offDisp >= -262144 && offDisp < 262144);
9958 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9959 continue;
9960 }
9961
9962 case kIemNativeFixupType_RelImm14At5:
9963 {
9964 Assert(paFixups[i].off < off);
9965 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9966 Assert(offDisp >= -8192 && offDisp < 8192);
9967 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9968 continue;
9969 }
9970
9971#endif
9972 case kIemNativeFixupType_Invalid:
9973 case kIemNativeFixupType_End:
9974 break;
9975 }
9976 AssertFailed();
9977 }
9978
9979 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9980 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9981
9982 /*
9983 * Convert the translation block.
9984 */
9985 RTMemFree(pTb->Thrd.paCalls);
9986 pTb->Native.paInstructions = paFinalInstrBuf;
9987 pTb->Native.cInstructions = off;
9988 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9989#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9990 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9991 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9992#endif
9993
9994 Assert(pTbAllocator->cThreadedTbs > 0);
9995 pTbAllocator->cThreadedTbs -= 1;
9996 pTbAllocator->cNativeTbs += 1;
9997 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9998
9999#ifdef LOG_ENABLED
10000 /*
10001 * Disassemble to the log if enabled.
10002 */
10003 if (LogIs3Enabled())
10004 {
10005 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10006 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10007# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10008 RTLogFlush(NULL);
10009# endif
10010 }
10011#endif
10012 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10013
10014 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10015 return pTb;
10016}
10017
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette