VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101548

Last change on this file since 101548 was 101548, checked in by vboxsync, 19 months ago

VMM/IEM: doxygen fix. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 201.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101548 2023-10-23 01:30:06Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): Register allocator
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94
95#include "IEMInline.h"
96#include "IEMThreadedFunctions.h"
97#include "IEMN8veRecompiler.h"
98#include "IEMNativeFunctions.h"
99
100
101/*
102 * Narrow down configs here to avoid wasting time on unused configs here.
103 * Note! Same checks in IEMAllThrdRecompiler.cpp.
104 */
105
106#ifndef IEM_WITH_CODE_TLB
107# error The code TLB must be enabled for the recompiler.
108#endif
109
110#ifndef IEM_WITH_DATA_TLB
111# error The data TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_SETJMP
115# error The setjmp approach must be enabled for the recompiler.
116#endif
117
118
119/*********************************************************************************************************************************
120* Defined Constants And Macros *
121*********************************************************************************************************************************/
122/** Always count instructions for now. */
123#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
124
125
126/*********************************************************************************************************************************
127* Internal Functions *
128*********************************************************************************************************************************/
129static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
130 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT;
131static bool iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT;
132static bool iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData) RT_NOEXCEPT;
133
134
135/*********************************************************************************************************************************
136* Executable Memory Allocator *
137*********************************************************************************************************************************/
138/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
139 * Use an alternative chunk sub-allocator that does store internal data
140 * in the chunk.
141 *
142 * Using the RTHeapSimple is not practial on newer darwin systems where
143 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
144 * memory. We would have to change the protection of the whole chunk for
145 * every call to RTHeapSimple, which would be rather expensive.
146 *
147 * This alternative implemenation let restrict page protection modifications
148 * to the pages backing the executable memory we just allocated.
149 */
150#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
151/** The chunk sub-allocation unit size in bytes. */
152#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
153/** The chunk sub-allocation unit size as a shift factor. */
154#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
155
156#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
157# ifdef IEMNATIVE_USE_GDB_JIT
158# define IEMNATIVE_USE_GDB_JIT_ET_DYN
159
160/** GDB JIT: Code entry. */
161typedef struct GDBJITCODEENTRY
162{
163 struct GDBJITCODEENTRY *pNext;
164 struct GDBJITCODEENTRY *pPrev;
165 uint8_t *pbSymFile;
166 uint64_t cbSymFile;
167} GDBJITCODEENTRY;
168
169/** GDB JIT: Actions. */
170typedef enum GDBJITACTIONS : uint32_t
171{
172 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
173} GDBJITACTIONS;
174
175/** GDB JIT: Descriptor. */
176typedef struct GDBJITDESCRIPTOR
177{
178 uint32_t uVersion;
179 GDBJITACTIONS enmAction;
180 GDBJITCODEENTRY *pRelevant;
181 GDBJITCODEENTRY *pHead;
182 /** Our addition: */
183 GDBJITCODEENTRY *pTail;
184} GDBJITDESCRIPTOR;
185
186/** GDB JIT: Our simple symbol file data. */
187typedef struct GDBJITSYMFILE
188{
189 Elf64_Ehdr EHdr;
190# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
191 Elf64_Shdr aShdrs[5];
192# else
193 Elf64_Shdr aShdrs[7];
194 Elf64_Phdr aPhdrs[2];
195# endif
196 /** The dwarf ehframe data for the chunk. */
197 uint8_t abEhFrame[512];
198 char szzStrTab[128];
199 Elf64_Sym aSymbols[3];
200# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
201 Elf64_Sym aDynSyms[2];
202 Elf64_Dyn aDyn[6];
203# endif
204} GDBJITSYMFILE;
205
206extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
207extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
208
209/** Init once for g_IemNativeGdbJitLock. */
210static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
211/** Init once for the critical section. */
212static RTCRITSECT g_IemNativeGdbJitLock;
213
214/** GDB reads the info here. */
215GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
216
217/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
218DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
219{
220 ASMNopPause();
221}
222
223/** @callback_method_impl{FNRTONCE} */
224static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
225{
226 RT_NOREF(pvUser);
227 return RTCritSectInit(&g_IemNativeGdbJitLock);
228}
229
230
231# endif /* IEMNATIVE_USE_GDB_JIT */
232
233/**
234 * Per-chunk unwind info for non-windows hosts.
235 */
236typedef struct IEMEXECMEMCHUNKEHFRAME
237{
238# ifdef IEMNATIVE_USE_LIBUNWIND
239 /** The offset of the FDA into abEhFrame. */
240 uintptr_t offFda;
241# else
242 /** 'struct object' storage area. */
243 uint8_t abObject[1024];
244# endif
245# ifdef IEMNATIVE_USE_GDB_JIT
246# if 0
247 /** The GDB JIT 'symbol file' data. */
248 GDBJITSYMFILE GdbJitSymFile;
249# endif
250 /** The GDB JIT list entry. */
251 GDBJITCODEENTRY GdbJitEntry;
252# endif
253 /** The dwarf ehframe data for the chunk. */
254 uint8_t abEhFrame[512];
255} IEMEXECMEMCHUNKEHFRAME;
256/** Pointer to per-chunk info info for non-windows hosts. */
257typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
258#endif
259
260
261/**
262 * An chunk of executable memory.
263 */
264typedef struct IEMEXECMEMCHUNK
265{
266#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
267 /** Number of free items in this chunk. */
268 uint32_t cFreeUnits;
269 /** Hint were to start searching for free space in the allocation bitmap. */
270 uint32_t idxFreeHint;
271#else
272 /** The heap handle. */
273 RTHEAPSIMPLE hHeap;
274#endif
275 /** Pointer to the chunk. */
276 void *pvChunk;
277#ifdef IN_RING3
278 /**
279 * Pointer to the unwind information.
280 *
281 * This is used during C++ throw and longjmp (windows and probably most other
282 * platforms). Some debuggers (windbg) makes use of it as well.
283 *
284 * Windows: This is allocated from hHeap on windows because (at least for
285 * AMD64) the UNWIND_INFO structure address in the
286 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
287 *
288 * Others: Allocated from the regular heap to avoid unnecessary executable data
289 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
290 void *pvUnwindInfo;
291#elif defined(IN_RING0)
292 /** Allocation handle. */
293 RTR0MEMOBJ hMemObj;
294#endif
295} IEMEXECMEMCHUNK;
296/** Pointer to a memory chunk. */
297typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
298
299
300/**
301 * Executable memory allocator for the native recompiler.
302 */
303typedef struct IEMEXECMEMALLOCATOR
304{
305 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
306 uint32_t uMagic;
307
308 /** The chunk size. */
309 uint32_t cbChunk;
310 /** The maximum number of chunks. */
311 uint32_t cMaxChunks;
312 /** The current number of chunks. */
313 uint32_t cChunks;
314 /** Hint where to start looking for available memory. */
315 uint32_t idxChunkHint;
316 /** Statistics: Current number of allocations. */
317 uint32_t cAllocations;
318
319 /** The total amount of memory available. */
320 uint64_t cbTotal;
321 /** Total amount of free memory. */
322 uint64_t cbFree;
323 /** Total amount of memory allocated. */
324 uint64_t cbAllocated;
325
326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
327 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
328 *
329 * Since the chunk size is a power of two and the minimum chunk size is a lot
330 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
331 * require a whole number of uint64_t elements in the allocation bitmap. So,
332 * for sake of simplicity, they are allocated as one continous chunk for
333 * simplicity/laziness. */
334 uint64_t *pbmAlloc;
335 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
336 uint32_t cUnitsPerChunk;
337 /** Number of bitmap elements per chunk (for quickly locating the bitmap
338 * portion corresponding to an chunk). */
339 uint32_t cBitmapElementsPerChunk;
340#else
341 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
342 * @{ */
343 /** The size of the heap internal block header. This is used to adjust the
344 * request memory size to make sure there is exacly enough room for a header at
345 * the end of the blocks we allocate before the next 64 byte alignment line. */
346 uint32_t cbHeapBlockHdr;
347 /** The size of initial heap allocation required make sure the first
348 * allocation is correctly aligned. */
349 uint32_t cbHeapAlignTweak;
350 /** The alignment tweak allocation address. */
351 void *pvAlignTweak;
352 /** @} */
353#endif
354
355#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
356 /** Pointer to the array of unwind info running parallel to aChunks (same
357 * allocation as this structure, located after the bitmaps).
358 * (For Windows, the structures must reside in 32-bit RVA distance to the
359 * actual chunk, so they are allocated off the chunk.) */
360 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
361#endif
362
363 /** The allocation chunks. */
364 RT_FLEXIBLE_ARRAY_EXTENSION
365 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
366} IEMEXECMEMALLOCATOR;
367/** Pointer to an executable memory allocator. */
368typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
369
370/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
371#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
372
373
374static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
375
376
377/**
378 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
379 * the heap statistics.
380 */
381static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
382 uint32_t cbReq, uint32_t idxChunk)
383{
384 pExecMemAllocator->cAllocations += 1;
385 pExecMemAllocator->cbAllocated += cbReq;
386#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
387 pExecMemAllocator->cbFree -= cbReq;
388#else
389 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
390#endif
391 pExecMemAllocator->idxChunkHint = idxChunk;
392
393#ifdef RT_OS_DARWIN
394 /*
395 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
396 * on darwin. So, we mark the pages returned as read+write after alloc and
397 * expect the caller to call iemExecMemAllocatorReadyForUse when done
398 * writing to the allocation.
399 *
400 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
401 * for details.
402 */
403 /** @todo detect if this is necessary... it wasn't required on 10.15 or
404 * whatever older version it was. */
405 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
406 AssertRC(rc);
407#endif
408
409 return pvRet;
410}
411
412
413#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
414static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
415 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
416{
417 /*
418 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
419 */
420 Assert(!(cToScan & 63));
421 Assert(!(idxFirst & 63));
422 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
423 pbmAlloc += idxFirst / 64;
424
425 /*
426 * Scan the bitmap for cReqUnits of consequtive clear bits
427 */
428 /** @todo This can probably be done more efficiently for non-x86 systems. */
429 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
430 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
431 {
432 uint32_t idxAddBit = 1;
433 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
434 idxAddBit++;
435 if (idxAddBit >= cReqUnits)
436 {
437 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
438
439 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
440 pChunk->cFreeUnits -= cReqUnits;
441 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
442
443 void * const pvRet = (uint8_t *)pChunk->pvChunk
444 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
445
446 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
447 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
448 }
449
450 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
451 }
452 return NULL;
453}
454#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
455
456
457static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
458{
459#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
460 /*
461 * Figure out how much to allocate.
462 */
463 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
464 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
465 {
466 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
467 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
468 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
469 {
470 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
471 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
472 if (pvRet)
473 return pvRet;
474 }
475 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
476 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
477 cReqUnits, idxChunk);
478 }
479#else
480 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
481 if (pvRet)
482 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
483#endif
484 return NULL;
485
486}
487
488
489/**
490 * Allocates @a cbReq bytes of executable memory.
491 *
492 * @returns Pointer to the memory, NULL if out of memory or other problem
493 * encountered.
494 * @param pVCpu The cross context virtual CPU structure of the calling
495 * thread.
496 * @param cbReq How many bytes are required.
497 */
498static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
499{
500 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
501 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
502 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
503
504 /*
505 * Adjust the request size so it'll fit the allocator alignment/whatnot.
506 *
507 * For the RTHeapSimple allocator this means to follow the logic described
508 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
509 * existing chunks if we think we've got sufficient free memory around.
510 *
511 * While for the alternative one we just align it up to a whole unit size.
512 */
513#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
514 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
515#else
516 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
517#endif
518 if (cbReq <= pExecMemAllocator->cbFree)
519 {
520 uint32_t const cChunks = pExecMemAllocator->cChunks;
521 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
522 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
523 {
524 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
525 if (pvRet)
526 return pvRet;
527 }
528 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
529 {
530 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
531 if (pvRet)
532 return pvRet;
533 }
534 }
535
536 /*
537 * Can we grow it with another chunk?
538 */
539 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
540 {
541 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
542 AssertLogRelRCReturn(rc, NULL);
543
544 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
545 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
546 if (pvRet)
547 return pvRet;
548 AssertFailed();
549 }
550
551 /* What now? Prune native translation blocks from the cache? */
552 AssertFailed();
553 return NULL;
554}
555
556
557/** This is a hook that we may need later for changing memory protection back
558 * to readonly+exec */
559static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
560{
561#ifdef RT_OS_DARWIN
562 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
563 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
564 AssertRC(rc); RT_NOREF(pVCpu);
565
566 /*
567 * Flush the instruction cache:
568 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
569 */
570 /* sys_dcache_flush(pv, cb); - not necessary */
571 sys_icache_invalidate(pv, cb);
572#else
573 RT_NOREF(pVCpu, pv, cb);
574#endif
575}
576
577
578/**
579 * Frees executable memory.
580 */
581void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
582{
583 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
584 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
585 Assert(pv);
586#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
587 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
588#else
589 Assert(!((uintptr_t)pv & 63));
590#endif
591
592 /* Align the size as we did when allocating the block. */
593#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
594 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
595#else
596 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
597#endif
598
599 /* Free it / assert sanity. */
600#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
601 uint32_t const cChunks = pExecMemAllocator->cChunks;
602 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
603 bool fFound = false;
604 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
605 {
606 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
607 fFound = offChunk < cbChunk;
608 if (fFound)
609 {
610#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
611 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
612 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
613
614 /* Check that it's valid and free it. */
615 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
616 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
617 for (uint32_t i = 1; i < cReqUnits; i++)
618 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
619 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
620
621 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
622 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
623
624 /* Update the stats. */
625 pExecMemAllocator->cbAllocated -= cb;
626 pExecMemAllocator->cbFree += cb;
627 pExecMemAllocator->cAllocations -= 1;
628 return;
629#else
630 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
631 break;
632#endif
633 }
634 }
635# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
636 AssertFailed();
637# else
638 Assert(fFound);
639# endif
640#endif
641
642#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
643 /* Update stats while cb is freshly calculated.*/
644 pExecMemAllocator->cbAllocated -= cb;
645 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
646 pExecMemAllocator->cAllocations -= 1;
647
648 /* Free it. */
649 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
650#endif
651}
652
653
654
655#ifdef IN_RING3
656# ifdef RT_OS_WINDOWS
657
658/**
659 * Initializes the unwind info structures for windows hosts.
660 */
661static int
662iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
663 void *pvChunk, uint32_t idxChunk)
664{
665 RT_NOREF(pVCpu);
666
667 /*
668 * The AMD64 unwind opcodes.
669 *
670 * This is a program that starts with RSP after a RET instruction that
671 * ends up in recompiled code, and the operations we describe here will
672 * restore all non-volatile registers and bring RSP back to where our
673 * RET address is. This means it's reverse order from what happens in
674 * the prologue.
675 *
676 * Note! Using a frame register approach here both because we have one
677 * and but mainly because the UWOP_ALLOC_LARGE argument values
678 * would be a pain to write initializers for. On the positive
679 * side, we're impervious to changes in the the stack variable
680 * area can can deal with dynamic stack allocations if necessary.
681 */
682 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
683 {
684 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
685 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
686 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
687 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
688 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
689 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
690 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
691 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
692 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
693 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
694 };
695 union
696 {
697 IMAGE_UNWIND_INFO Info;
698 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
699 } s_UnwindInfo =
700 {
701 {
702 /* .Version = */ 1,
703 /* .Flags = */ 0,
704 /* .SizeOfProlog = */ 16, /* whatever */
705 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
706 /* .FrameRegister = */ X86_GREG_xBP,
707 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
708 }
709 };
710 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
711 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
712
713 /*
714 * Calc how much space we need and allocate it off the exec heap.
715 */
716 unsigned const cFunctionEntries = 1;
717 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
718 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
719# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
720 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
721 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
722 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
723# else
724 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
725 - pExecMemAllocator->cbHeapBlockHdr;
726 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
727 32 /*cbAlignment*/);
728# endif
729 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
730 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
731
732 /*
733 * Initialize the structures.
734 */
735 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
736
737 paFunctions[0].BeginAddress = 0;
738 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
739 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
740
741 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
742 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
743
744 /*
745 * Register it.
746 */
747 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
748 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
749
750 return paFunctions;
751}
752
753
754# else /* !RT_OS_WINDOWS */
755
756/**
757 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
758 */
759DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
760{
761 if (iValue >= 64)
762 {
763 Assert(iValue < 0x2000);
764 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
765 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
766 }
767 else if (iValue >= 0)
768 *Ptr.pb++ = (uint8_t)iValue;
769 else if (iValue > -64)
770 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
771 else
772 {
773 Assert(iValue > -0x2000);
774 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
775 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
776 }
777 return Ptr;
778}
779
780
781/**
782 * Emits an ULEB128 encoded value (up to 64-bit wide).
783 */
784DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
785{
786 while (uValue >= 0x80)
787 {
788 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
789 uValue >>= 7;
790 }
791 *Ptr.pb++ = (uint8_t)uValue;
792 return Ptr;
793}
794
795
796/**
797 * Emits a CFA rule as register @a uReg + offset @a off.
798 */
799DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
800{
801 *Ptr.pb++ = DW_CFA_def_cfa;
802 Ptr = iemDwarfPutUleb128(Ptr, uReg);
803 Ptr = iemDwarfPutUleb128(Ptr, off);
804 return Ptr;
805}
806
807
808/**
809 * Emits a register (@a uReg) save location:
810 * CFA + @a off * data_alignment_factor
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
813{
814 if (uReg < 0x40)
815 *Ptr.pb++ = DW_CFA_offset | uReg;
816 else
817 {
818 *Ptr.pb++ = DW_CFA_offset_extended;
819 Ptr = iemDwarfPutUleb128(Ptr, uReg);
820 }
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826# if 0 /* unused */
827/**
828 * Emits a register (@a uReg) save location, using signed offset:
829 * CFA + @a offSigned * data_alignment_factor
830 */
831DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
832{
833 *Ptr.pb++ = DW_CFA_offset_extended_sf;
834 Ptr = iemDwarfPutUleb128(Ptr, uReg);
835 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
836 return Ptr;
837}
838# endif
839
840
841/**
842 * Initializes the unwind info section for non-windows hosts.
843 */
844static int
845iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
846 void *pvChunk, uint32_t idxChunk)
847{
848 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
849 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
850
851 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
852
853 /*
854 * Generate the CIE first.
855 */
856# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
857 uint8_t const iDwarfVer = 3;
858# else
859 uint8_t const iDwarfVer = 4;
860# endif
861 RTPTRUNION const PtrCie = Ptr;
862 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
863 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
864 *Ptr.pb++ = iDwarfVer; /* DwARF version */
865 *Ptr.pb++ = 0; /* Augmentation. */
866 if (iDwarfVer >= 4)
867 {
868 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
869 *Ptr.pb++ = 0; /* Segment selector size. */
870 }
871# ifdef RT_ARCH_AMD64
872 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
873# else
874 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
875# endif
876 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
877# ifdef RT_ARCH_AMD64
878 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
879# elif defined(RT_ARCH_ARM64)
880 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
881# else
882# error "port me"
883# endif
884 /* Initial instructions: */
885# ifdef RT_ARCH_AMD64
886 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
889 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
890 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
892 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
893 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
894# elif defined(RT_ARCH_ARM64)
895# if 1
896 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
897# else
898 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
899# endif
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
912 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
913 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
914# else
915# error "port me"
916# endif
917 while ((Ptr.u - PtrCie.u) & 3)
918 *Ptr.pb++ = DW_CFA_nop;
919 /* Finalize the CIE size. */
920 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
921
922 /*
923 * Generate an FDE for the whole chunk area.
924 */
925# ifdef IEMNATIVE_USE_LIBUNWIND
926 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
927# endif
928 RTPTRUNION const PtrFde = Ptr;
929 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
930 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
931 Ptr.pu32++;
932 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
933 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
934# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
935 *Ptr.pb++ = DW_CFA_nop;
936# endif
937 while ((Ptr.u - PtrFde.u) & 3)
938 *Ptr.pb++ = DW_CFA_nop;
939 /* Finalize the FDE size. */
940 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
941
942 /* Terminator entry. */
943 *Ptr.pu32++ = 0;
944 *Ptr.pu32++ = 0; /* just to be sure... */
945 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
946
947 /*
948 * Register it.
949 */
950# ifdef IEMNATIVE_USE_LIBUNWIND
951 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
952# else
953 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
954 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
955# endif
956
957# ifdef IEMNATIVE_USE_GDB_JIT
958 /*
959 * Now for telling GDB about this (experimental).
960 *
961 * This seems to work best with ET_DYN.
962 */
963 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
964# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
965 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
966 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
967# else
968 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
969 - pExecMemAllocator->cbHeapBlockHdr;
970 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
971# endif
972 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
973 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
974
975 RT_ZERO(*pSymFile);
976
977 /*
978 * The ELF header:
979 */
980 pSymFile->EHdr.e_ident[0] = ELFMAG0;
981 pSymFile->EHdr.e_ident[1] = ELFMAG1;
982 pSymFile->EHdr.e_ident[2] = ELFMAG2;
983 pSymFile->EHdr.e_ident[3] = ELFMAG3;
984 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
985 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
986 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
987 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
988# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
989 pSymFile->EHdr.e_type = ET_DYN;
990# else
991 pSymFile->EHdr.e_type = ET_REL;
992# endif
993# ifdef RT_ARCH_AMD64
994 pSymFile->EHdr.e_machine = EM_AMD64;
995# elif defined(RT_ARCH_ARM64)
996 pSymFile->EHdr.e_machine = EM_AARCH64;
997# else
998# error "port me"
999# endif
1000 pSymFile->EHdr.e_version = 1; /*?*/
1001 pSymFile->EHdr.e_entry = 0;
1002# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1003 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1004# else
1005 pSymFile->EHdr.e_phoff = 0;
1006# endif
1007 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1008 pSymFile->EHdr.e_flags = 0;
1009 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1010# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1011 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1012 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1013# else
1014 pSymFile->EHdr.e_phentsize = 0;
1015 pSymFile->EHdr.e_phnum = 0;
1016# endif
1017 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1018 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1019 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1020
1021 uint32_t offStrTab = 0;
1022#define APPEND_STR(a_szStr) do { \
1023 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1024 offStrTab += sizeof(a_szStr); \
1025 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1026 } while (0)
1027#define APPEND_STR_FMT(a_szStr, ...) do { \
1028 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1029 offStrTab++; \
1030 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1031 } while (0)
1032
1033 /*
1034 * Section headers.
1035 */
1036 /* Section header #0: NULL */
1037 unsigned i = 0;
1038 APPEND_STR("");
1039 RT_ZERO(pSymFile->aShdrs[i]);
1040 i++;
1041
1042 /* Section header: .eh_frame */
1043 pSymFile->aShdrs[i].sh_name = offStrTab;
1044 APPEND_STR(".eh_frame");
1045 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1046 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1047# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1048 pSymFile->aShdrs[i].sh_offset
1049 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1050# else
1051 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1052 pSymFile->aShdrs[i].sh_offset = 0;
1053# endif
1054
1055 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1056 pSymFile->aShdrs[i].sh_link = 0;
1057 pSymFile->aShdrs[i].sh_info = 0;
1058 pSymFile->aShdrs[i].sh_addralign = 1;
1059 pSymFile->aShdrs[i].sh_entsize = 0;
1060 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1061 i++;
1062
1063 /* Section header: .shstrtab */
1064 unsigned const iShStrTab = i;
1065 pSymFile->EHdr.e_shstrndx = iShStrTab;
1066 pSymFile->aShdrs[i].sh_name = offStrTab;
1067 APPEND_STR(".shstrtab");
1068 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1069 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1070# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1071 pSymFile->aShdrs[i].sh_offset
1072 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1073# else
1074 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1075 pSymFile->aShdrs[i].sh_offset = 0;
1076# endif
1077 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1078 pSymFile->aShdrs[i].sh_link = 0;
1079 pSymFile->aShdrs[i].sh_info = 0;
1080 pSymFile->aShdrs[i].sh_addralign = 1;
1081 pSymFile->aShdrs[i].sh_entsize = 0;
1082 i++;
1083
1084 /* Section header: .symbols */
1085 pSymFile->aShdrs[i].sh_name = offStrTab;
1086 APPEND_STR(".symtab");
1087 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1088 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1091 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1092 pSymFile->aShdrs[i].sh_link = iShStrTab;
1093 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1094 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1095 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1096 i++;
1097
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1099 /* Section header: .symbols */
1100 pSymFile->aShdrs[i].sh_name = offStrTab;
1101 APPEND_STR(".dynsym");
1102 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1103 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1104 pSymFile->aShdrs[i].sh_offset
1105 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1106 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1107 pSymFile->aShdrs[i].sh_link = iShStrTab;
1108 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1109 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1110 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1111 i++;
1112# endif
1113
1114# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1115 /* Section header: .dynamic */
1116 pSymFile->aShdrs[i].sh_name = offStrTab;
1117 APPEND_STR(".dynamic");
1118 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1119 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1120 pSymFile->aShdrs[i].sh_offset
1121 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1122 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1123 pSymFile->aShdrs[i].sh_link = iShStrTab;
1124 pSymFile->aShdrs[i].sh_info = 0;
1125 pSymFile->aShdrs[i].sh_addralign = 1;
1126 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1127 i++;
1128# endif
1129
1130 /* Section header: .text */
1131 unsigned const iShText = i;
1132 pSymFile->aShdrs[i].sh_name = offStrTab;
1133 APPEND_STR(".text");
1134 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1135 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1136# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1137 pSymFile->aShdrs[i].sh_offset
1138 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1139# else
1140 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1141 pSymFile->aShdrs[i].sh_offset = 0;
1142# endif
1143 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1144 pSymFile->aShdrs[i].sh_link = 0;
1145 pSymFile->aShdrs[i].sh_info = 0;
1146 pSymFile->aShdrs[i].sh_addralign = 1;
1147 pSymFile->aShdrs[i].sh_entsize = 0;
1148 i++;
1149
1150 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1151
1152# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1153 /*
1154 * The program headers:
1155 */
1156 /* Everything in a single LOAD segment: */
1157 i = 0;
1158 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1159 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1160 pSymFile->aPhdrs[i].p_offset
1161 = pSymFile->aPhdrs[i].p_vaddr
1162 = pSymFile->aPhdrs[i].p_paddr = 0;
1163 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1164 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1165 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1166 i++;
1167 /* The .dynamic segment. */
1168 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1169 pSymFile->aPhdrs[i].p_flags = PF_R;
1170 pSymFile->aPhdrs[i].p_offset
1171 = pSymFile->aPhdrs[i].p_vaddr
1172 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1173 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1174 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1175 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1179
1180 /*
1181 * The dynamic section:
1182 */
1183 i = 0;
1184 pSymFile->aDyn[i].d_tag = DT_SONAME;
1185 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1186 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1187 i++;
1188 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1189 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1190 i++;
1191 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1192 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1193 i++;
1194 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1195 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1196 i++;
1197 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1198 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1199 i++;
1200 pSymFile->aDyn[i].d_tag = DT_NULL;
1201 i++;
1202 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1203# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1204
1205 /*
1206 * Symbol tables:
1207 */
1208 /** @todo gdb doesn't seem to really like this ... */
1209 i = 0;
1210 pSymFile->aSymbols[i].st_name = 0;
1211 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1212 pSymFile->aSymbols[i].st_value = 0;
1213 pSymFile->aSymbols[i].st_size = 0;
1214 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1215 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1217 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1218# endif
1219 i++;
1220
1221 pSymFile->aSymbols[i].st_name = 0;
1222 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1223 pSymFile->aSymbols[i].st_value = 0;
1224 pSymFile->aSymbols[i].st_size = 0;
1225 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1226 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1227 i++;
1228
1229 pSymFile->aSymbols[i].st_name = offStrTab;
1230 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1231# if 0
1232 pSymFile->aSymbols[i].st_shndx = iShText;
1233 pSymFile->aSymbols[i].st_value = 0;
1234# else
1235 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1236 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1237# endif
1238 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1239 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1240 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1241# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1242 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1243 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1244# endif
1245 i++;
1246
1247 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1248 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1249
1250 /*
1251 * The GDB JIT entry and informing GDB.
1252 */
1253 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1254# if 1
1255 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1256# else
1257 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1258# endif
1259
1260 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1261 RTCritSectEnter(&g_IemNativeGdbJitLock);
1262 pEhFrame->GdbJitEntry.pNext = NULL;
1263 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1264 if (__jit_debug_descriptor.pTail)
1265 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1266 else
1267 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1268 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1269 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1270
1271 /* Notify GDB: */
1272 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1273 __jit_debug_register_code();
1274 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1275 RTCritSectLeave(&g_IemNativeGdbJitLock);
1276
1277# else /* !IEMNATIVE_USE_GDB_JIT */
1278 RT_NOREF(pVCpu);
1279# endif /* !IEMNATIVE_USE_GDB_JIT */
1280
1281 return VINF_SUCCESS;
1282}
1283
1284# endif /* !RT_OS_WINDOWS */
1285#endif /* IN_RING3 */
1286
1287
1288/**
1289 * Adds another chunk to the executable memory allocator.
1290 *
1291 * This is used by the init code for the initial allocation and later by the
1292 * regular allocator function when it's out of memory.
1293 */
1294static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1295{
1296 /* Check that we've room for growth. */
1297 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1298 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1299
1300 /* Allocate a chunk. */
1301#ifdef RT_OS_DARWIN
1302 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1303#else
1304 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1305#endif
1306 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1307
1308#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1309 int rc = VINF_SUCCESS;
1310#else
1311 /* Initialize the heap for the chunk. */
1312 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1313 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1314 AssertRC(rc);
1315 if (RT_SUCCESS(rc))
1316 {
1317 /*
1318 * We want the memory to be aligned on 64 byte, so the first time thru
1319 * here we do some exploratory allocations to see how we can achieve this.
1320 * On subsequent runs we only make an initial adjustment allocation, if
1321 * necessary.
1322 *
1323 * Since we own the heap implementation, we know that the internal block
1324 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1325 * so all we need to wrt allocation size adjustments is to add 32 bytes
1326 * to the size, align up by 64 bytes, and subtract 32 bytes.
1327 *
1328 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1329 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1330 * allocation to force subsequent allocations to return 64 byte aligned
1331 * user areas.
1332 */
1333 if (!pExecMemAllocator->cbHeapBlockHdr)
1334 {
1335 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1336 pExecMemAllocator->cbHeapAlignTweak = 64;
1337 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1338 32 /*cbAlignment*/);
1339 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1340
1341 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1342 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1343 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1344 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1345 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1346
1347 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1348 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1349 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1350 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1351 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1352
1353 RTHeapSimpleFree(hHeap, pvTest2);
1354 RTHeapSimpleFree(hHeap, pvTest1);
1355 }
1356 else
1357 {
1358 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1359 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1360 }
1361 if (RT_SUCCESS(rc))
1362#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1363 {
1364 /*
1365 * Add the chunk.
1366 *
1367 * This must be done before the unwind init so windows can allocate
1368 * memory from the chunk when using the alternative sub-allocator.
1369 */
1370 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1371#ifdef IN_RING3
1372 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1373#endif
1374#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1375 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1376#else
1377 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1378 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1379 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1380 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1381#endif
1382
1383 pExecMemAllocator->cChunks = idxChunk + 1;
1384 pExecMemAllocator->idxChunkHint = idxChunk;
1385
1386#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1387 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1388 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1389#else
1390 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1391 pExecMemAllocator->cbTotal += cbFree;
1392 pExecMemAllocator->cbFree += cbFree;
1393#endif
1394
1395#ifdef IN_RING3
1396 /*
1397 * Initialize the unwind information (this cannot really fail atm).
1398 * (This sets pvUnwindInfo.)
1399 */
1400 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1401 if (RT_SUCCESS(rc))
1402#endif
1403 {
1404 return VINF_SUCCESS;
1405 }
1406
1407#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1408 /* Just in case the impossible happens, undo the above up: */
1409 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1410 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1411 pExecMemAllocator->cChunks = idxChunk;
1412 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1413 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1414 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1415 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1416#endif
1417 }
1418#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1419 }
1420#endif
1421 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1422 RT_NOREF(pVCpu);
1423 return rc;
1424}
1425
1426
1427/**
1428 * Initializes the executable memory allocator for native recompilation on the
1429 * calling EMT.
1430 *
1431 * @returns VBox status code.
1432 * @param pVCpu The cross context virtual CPU structure of the calling
1433 * thread.
1434 * @param cbMax The max size of the allocator.
1435 * @param cbInitial The initial allocator size.
1436 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1437 * dependent).
1438 */
1439int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1440{
1441 /*
1442 * Validate input.
1443 */
1444 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1445 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1446 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1447 || cbChunk == 0
1448 || ( RT_IS_POWER_OF_TWO(cbChunk)
1449 && cbChunk >= _1M
1450 && cbChunk <= _256M
1451 && cbChunk <= cbMax),
1452 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1453 VERR_OUT_OF_RANGE);
1454
1455 /*
1456 * Adjust/figure out the chunk size.
1457 */
1458 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1459 {
1460 if (cbMax >= _256M)
1461 cbChunk = _64M;
1462 else
1463 {
1464 if (cbMax < _16M)
1465 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1466 else
1467 cbChunk = (uint32_t)cbMax / 4;
1468 if (!RT_IS_POWER_OF_TWO(cbChunk))
1469 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1470 }
1471 }
1472
1473 if (cbChunk > cbMax)
1474 cbMax = cbChunk;
1475 else
1476 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1477 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1478 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1479
1480 /*
1481 * Allocate and initialize the allocatore instance.
1482 */
1483 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1484#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1485 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1486 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1487 cbNeeded += cbBitmap * cMaxChunks;
1488 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1489 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1490#endif
1491#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1492 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1493 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1494#endif
1495 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1496 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1497 VERR_NO_MEMORY);
1498 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1499 pExecMemAllocator->cbChunk = cbChunk;
1500 pExecMemAllocator->cMaxChunks = cMaxChunks;
1501 pExecMemAllocator->cChunks = 0;
1502 pExecMemAllocator->idxChunkHint = 0;
1503 pExecMemAllocator->cAllocations = 0;
1504 pExecMemAllocator->cbTotal = 0;
1505 pExecMemAllocator->cbFree = 0;
1506 pExecMemAllocator->cbAllocated = 0;
1507#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1508 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1509 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1510 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1511 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1512#endif
1513#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1514 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1515#endif
1516 for (uint32_t i = 0; i < cMaxChunks; i++)
1517 {
1518#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1519 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1520 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1521#else
1522 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1523#endif
1524 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1525#ifdef IN_RING0
1526 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1527#else
1528 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1529#endif
1530 }
1531 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1532
1533 /*
1534 * Do the initial allocations.
1535 */
1536 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1537 {
1538 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1539 AssertLogRelRCReturn(rc, rc);
1540 }
1541
1542 pExecMemAllocator->idxChunkHint = 0;
1543
1544 return VINF_SUCCESS;
1545}
1546
1547
1548/*********************************************************************************************************************************
1549* Native Recompilation *
1550*********************************************************************************************************************************/
1551
1552
1553/**
1554 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1555 */
1556IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1557{
1558 pVCpu->iem.s.cInstructions += idxInstr;
1559 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1560}
1561
1562
1563/**
1564 * Used by TB code when it wants to raise a \#GP(0).
1565 */
1566IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1567{
1568 pVCpu->iem.s.cInstructions += idxInstr;
1569 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1570 return VINF_IEM_RAISED_XCPT; /* not reached */
1571}
1572
1573
1574/**
1575 * Reinitializes the native recompiler state.
1576 *
1577 * Called before starting a new recompile job.
1578 */
1579static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1580{
1581 pReNative->cLabels = 0;
1582 pReNative->bmLabelTypes = 0;
1583 pReNative->cFixups = 0;
1584#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1585 pReNative->pDbgInfo->cEntries = 0;
1586#endif
1587 pReNative->pTbOrg = pTb;
1588
1589 pReNative->bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1590#if IEMNATIVE_HST_GREG_COUNT < 32
1591 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1592#endif
1593 ;
1594 pReNative->bmHstRegsWithGstShadow = 0;
1595 pReNative->bmGstRegShadows = 0;
1596 pReNative->cCondDepth = 0;
1597 pReNative->uCondSeqNo = 0;
1598 pReNative->bmVars = 0;
1599 pReNative->u64ArgVars = UINT64_MAX;
1600
1601 /* Full host register reinit: */
1602 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->aHstRegs); i++)
1603 {
1604 pReNative->aHstRegs[i].fGstRegShadows = 0;
1605 pReNative->aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1606 pReNative->aHstRegs[i].idxVar = UINT8_MAX;
1607 }
1608
1609 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1610 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1611#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1612 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1613#endif
1614#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1615 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1616#endif
1617 );
1618 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1619 {
1620 fRegs &= ~RT_BIT_32(idxReg);
1621 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1622 }
1623
1624 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1625#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1626 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1627#endif
1628#ifdef IEMNATIVE_REG_FIXED_TMP0
1629 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1630#endif
1631 return pReNative;
1632}
1633
1634
1635/**
1636 * Allocates and initializes the native recompiler state.
1637 *
1638 * This is called the first time an EMT wants to recompile something.
1639 *
1640 * @returns Pointer to the new recompiler state.
1641 * @param pVCpu The cross context virtual CPU structure of the calling
1642 * thread.
1643 * @param pTb The TB that's about to be recompiled.
1644 * @thread EMT(pVCpu)
1645 */
1646static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1647{
1648 VMCPU_ASSERT_EMT(pVCpu);
1649
1650 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1651 AssertReturn(pReNative, NULL);
1652
1653 /*
1654 * Try allocate all the buffers and stuff we need.
1655 */
1656 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1657 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1658 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1659#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1660 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
1661#endif
1662 if (RT_LIKELY( pReNative->pInstrBuf
1663 && pReNative->paLabels
1664 && pReNative->paFixups)
1665#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1666 && pReNative->pDbgInfo
1667#endif
1668 )
1669 {
1670 /*
1671 * Set the buffer & array sizes on success.
1672 */
1673 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1674 pReNative->cLabelsAlloc = _8K;
1675 pReNative->cFixupsAlloc = _16K;
1676#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1677 pReNative->cDbgInfoAlloc = _16K;
1678#endif
1679
1680 /*
1681 * Done, just need to save it and reinit it.
1682 */
1683 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1684 return iemNativeReInit(pReNative, pTb);
1685 }
1686
1687 /*
1688 * Failed. Cleanup and return.
1689 */
1690 AssertFailed();
1691 RTMemFree(pReNative->pInstrBuf);
1692 RTMemFree(pReNative->paLabels);
1693 RTMemFree(pReNative->paFixups);
1694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1695 RTMemFree(pReNative->pDbgInfo);
1696#endif
1697 RTMemFree(pReNative);
1698 return NULL;
1699}
1700
1701
1702/**
1703 * Creates a label
1704 *
1705 * If the label does not yet have a defined position,
1706 * call iemNativeLabelDefine() later to set it.
1707 *
1708 * @returns Label ID.
1709 * @param pReNative The native recompile state.
1710 * @param enmType The label type.
1711 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1712 * label is not yet defined (default).
1713 * @param uData Data associated with the lable. Only applicable to
1714 * certain type of labels. Default is zero.
1715 */
1716DECLHIDDEN(uint32_t) iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1717 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1718{
1719 /*
1720 * Locate existing label definition.
1721 *
1722 * This is only allowed for forward declarations where offWhere=UINT32_MAX
1723 * and uData is zero.
1724 */
1725 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1726 uint32_t const cLabels = pReNative->cLabels;
1727 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
1728#ifndef VBOX_STRICT
1729 && offWhere == UINT32_MAX
1730 && uData == 0
1731#endif
1732 )
1733 {
1734 /** @todo Since this is only used for labels with uData = 0, just use a
1735 * lookup array? */
1736 for (uint32_t i = 0; i < cLabels; i++)
1737 if ( paLabels[i].enmType == enmType
1738 && paLabels[i].uData == uData)
1739 {
1740#ifdef VBOX_STRICT
1741 AssertReturn(uData == 0, UINT32_MAX);
1742 AssertReturn(offWhere == UINT32_MAX, UINT32_MAX);
1743#endif
1744 AssertReturn(paLabels[i].off == UINT32_MAX, UINT32_MAX);
1745 return i;
1746 }
1747 }
1748
1749 /*
1750 * Make sure we've got room for another label.
1751 */
1752 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1753 { /* likely */ }
1754 else
1755 {
1756 uint32_t cNew = pReNative->cLabelsAlloc;
1757 AssertReturn(cNew, UINT32_MAX);
1758 AssertReturn(cLabels == cNew, UINT32_MAX);
1759 cNew *= 2;
1760 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1761 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1762 AssertReturn(paLabels, UINT32_MAX);
1763 pReNative->paLabels = paLabels;
1764 pReNative->cLabelsAlloc = cNew;
1765 }
1766
1767 /*
1768 * Define a new label.
1769 */
1770 paLabels[cLabels].off = offWhere;
1771 paLabels[cLabels].enmType = enmType;
1772 paLabels[cLabels].uData = uData;
1773 pReNative->cLabels = cLabels + 1;
1774
1775 Assert(enmType >= 0 && enmType < 64);
1776 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1777
1778 if (offWhere != UINT32_MAX)
1779 {
1780#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1781 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1782 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
1783#endif
1784 }
1785 return cLabels;
1786}
1787
1788
1789/**
1790 * Defines the location of an existing label.
1791 *
1792 * @param pReNative The native recompile state.
1793 * @param idxLabel The label to define.
1794 * @param offWhere The position.
1795 */
1796DECLHIDDEN(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere) RT_NOEXCEPT
1797{
1798 AssertReturnVoid(idxLabel < pReNative->cLabels);
1799 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
1800 AssertReturnVoid(pLabel->off == UINT32_MAX);
1801 pLabel->off = offWhere;
1802#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1803 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
1804 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
1805#endif
1806}
1807
1808
1809/**
1810 * Looks up a lable.
1811 *
1812 * @returns Label ID if found, UINT32_MAX if not.
1813 */
1814static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1815 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1816{
1817 Assert(enmType >= 0 && enmType < 64);
1818 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1819 {
1820 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1821 uint32_t const cLabels = pReNative->cLabels;
1822 for (uint32_t i = 0; i < cLabels; i++)
1823 if ( paLabels[i].enmType == enmType
1824 && paLabels[i].uData == uData
1825 && ( paLabels[i].off == offWhere
1826 || offWhere == UINT32_MAX
1827 || paLabels[i].off == UINT32_MAX))
1828 return i;
1829 }
1830 return UINT32_MAX;
1831}
1832
1833
1834
1835/**
1836 * Adds a fixup.
1837 *
1838 * @returns Success indicator.
1839 * @param pReNative The native recompile state.
1840 * @param offWhere The instruction offset of the fixup location.
1841 * @param idxLabel The target label ID for the fixup.
1842 * @param enmType The fixup type.
1843 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1844 */
1845DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1846 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1847{
1848 Assert(idxLabel <= UINT16_MAX);
1849 Assert((unsigned)enmType <= UINT8_MAX);
1850
1851 /*
1852 * Make sure we've room.
1853 */
1854 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1855 uint32_t const cFixups = pReNative->cFixups;
1856 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1857 { /* likely */ }
1858 else
1859 {
1860 uint32_t cNew = pReNative->cFixupsAlloc;
1861 AssertReturn(cNew, false);
1862 AssertReturn(cFixups == cNew, false);
1863 cNew *= 2;
1864 AssertReturn(cNew <= _128K, false);
1865 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1866 AssertReturn(paFixups, false);
1867 pReNative->paFixups = paFixups;
1868 pReNative->cFixupsAlloc = cNew;
1869 }
1870
1871 /*
1872 * Add the fixup.
1873 */
1874 paFixups[cFixups].off = offWhere;
1875 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1876 paFixups[cFixups].enmType = enmType;
1877 paFixups[cFixups].offAddend = offAddend;
1878 pReNative->cFixups = cFixups + 1;
1879 return true;
1880}
1881
1882/**
1883 * Slow code path for iemNativeInstrBufEnsure.
1884 */
1885DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1886 uint32_t cInstrReq) RT_NOEXCEPT
1887{
1888 /* Double the buffer size till we meet the request. */
1889 uint32_t cNew = pReNative->cInstrBufAlloc;
1890 AssertReturn(cNew > 0, NULL);
1891 do
1892 cNew *= 2;
1893 while (cNew < off + cInstrReq);
1894
1895 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1896#if RT_ARCH_ARM64
1897 AssertReturn(cbNew <= _1M, NULL); /* Limited by the branch instruction range (18+2 bits). */
1898#else
1899 AssertReturn(cbNew <= _2M, NULL);
1900#endif
1901
1902 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1903 AssertReturn(pvNew, NULL);
1904
1905 pReNative->cInstrBufAlloc = cNew;
1906 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1907}
1908
1909#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1910
1911/**
1912 * Grows the static debug info array used during recompilation.
1913 * @returns Pointer to the new debug info block, NULL on failure.
1914 */
1915DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo) RT_NOEXCEPT
1916{
1917 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
1918 AssertReturn(cNew < _1M && cNew != 0, NULL);
1919 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
1920 AssertReturn(pDbgInfo, NULL);
1921 pReNative->pDbgInfo = pDbgInfo;
1922 pReNative->cDbgInfoAlloc = cNew;
1923 return pDbgInfo;
1924}
1925
1926
1927/**
1928 * Adds a new debug info uninitialized entry, returning the pointer to it.
1929 */
1930DECLINLINE(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
1931{
1932 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
1933 { /* likely */ }
1934 else
1935 {
1936 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
1937 AssertReturn(pDbgInfo, NULL);
1938 }
1939 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
1940}
1941
1942
1943/**
1944 * Debug Info: Adds a native offset record, if necessary.
1945 */
1946static bool iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT
1947{
1948 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
1949
1950 /*
1951 * Search backwards to see if we've got a similar record already.
1952 */
1953 uint32_t idx = pDbgInfo->cEntries;
1954 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
1955 while (idx-- > idxStop)
1956 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
1957 {
1958 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
1959 return true;
1960 AssertReturn(pDbgInfo->aEntries[idx].NativeOffset.offNative < off, false);
1961 break;
1962 }
1963
1964 /*
1965 * Add it.
1966 */
1967 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
1968 AssertReturn(pEntry, false);
1969 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
1970 pEntry->NativeOffset.offNative = off;
1971
1972 return true;
1973}
1974
1975
1976/**
1977 * Debug Info: Record info about a label.
1978 */
1979static bool iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData) RT_NOEXCEPT
1980{
1981 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1982 AssertReturn(pEntry, false);
1983
1984 pEntry->Label.uType = kIemTbDbgEntryType_Label;
1985 pEntry->Label.uUnused = 0;
1986 pEntry->Label.enmLabel = (uint8_t)enmType;
1987 pEntry->Label.uData = uData;
1988
1989 return true;
1990}
1991
1992
1993/**
1994 * Debug Info: Record info about a threaded call.
1995 */
1996static bool iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall) RT_NOEXCEPT
1997{
1998 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
1999 AssertReturn(pEntry, false);
2000
2001 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2002 pEntry->ThreadedCall.uUnused = 0;
2003 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2004
2005 return true;
2006}
2007
2008
2009/**
2010 * Debug Info: Record info about a new guest instruction.
2011 */
2012static bool iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec) RT_NOEXCEPT
2013{
2014 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2015 AssertReturn(pEntry, false);
2016
2017 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2018 pEntry->GuestInstruction.uUnused = 0;
2019 pEntry->GuestInstruction.fExec = fExec;
2020
2021 return true;
2022}
2023
2024
2025/**
2026 * Debug Info: Record info about guest register shadowing.
2027 */
2028static bool iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2029 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX) RT_NOEXCEPT
2030{
2031 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2032 AssertReturn(pEntry, false);
2033
2034 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2035 pEntry->GuestRegShadowing.uUnused = 0;
2036 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2037 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2038 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2039
2040 return true;
2041}
2042
2043#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2044
2045
2046/*********************************************************************************************************************************
2047* Register Allocator *
2048*********************************************************************************************************************************/
2049
2050/**
2051 * Register parameter indexes (indexed by argument number).
2052 */
2053DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2054{
2055 IEMNATIVE_CALL_ARG0_GREG,
2056 IEMNATIVE_CALL_ARG1_GREG,
2057 IEMNATIVE_CALL_ARG2_GREG,
2058 IEMNATIVE_CALL_ARG3_GREG,
2059#if defined(IEMNATIVE_CALL_ARG4_GREG)
2060 IEMNATIVE_CALL_ARG4_GREG,
2061# if defined(IEMNATIVE_CALL_ARG5_GREG)
2062 IEMNATIVE_CALL_ARG5_GREG,
2063# if defined(IEMNATIVE_CALL_ARG6_GREG)
2064 IEMNATIVE_CALL_ARG6_GREG,
2065# if defined(IEMNATIVE_CALL_ARG7_GREG)
2066 IEMNATIVE_CALL_ARG7_GREG,
2067# endif
2068# endif
2069# endif
2070#endif
2071};
2072
2073/**
2074 * Call register masks indexed by argument count.
2075 */
2076DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2077{
2078 0,
2079 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2080 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2081 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2082 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2083 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2084#if defined(IEMNATIVE_CALL_ARG4_GREG)
2085 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2086 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2087# if defined(IEMNATIVE_CALL_ARG5_GREG)
2088 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2089 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2090# if defined(IEMNATIVE_CALL_ARG6_GREG)
2091 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2092 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2093 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2094# if defined(IEMNATIVE_CALL_ARG7_GREG)
2095 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2096 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2097 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2098# endif
2099# endif
2100# endif
2101#endif
2102};
2103
2104/**
2105 * Info about shadowed guest register values.
2106 * @see IEMNATIVEGSTREG
2107 */
2108static struct
2109{
2110 /** Offset in VMCPU. */
2111 uint32_t off;
2112 /** The field size. */
2113 uint8_t cb;
2114 /** Name (for logging). */
2115 const char *pszName;
2116} const g_aGstShadowInfo[] =
2117{
2118#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2119 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2120 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2121 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2122 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2123 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2124 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2125 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2126 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2127 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2128 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2129 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2130 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2131 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2132 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2133 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2134 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2135 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2136 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2137 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2138 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2139 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2140 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2141 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2142 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2143 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2144 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2145 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2146 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2147 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2148 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2149 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2150 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2151 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2152 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2153 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2154 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2155 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2156 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2157 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2158 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2159 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2160 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2161#undef CPUMCTX_OFF_AND_SIZE
2162};
2163AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2164
2165
2166/** Host CPU general purpose register names. */
2167const char * const g_apszIemNativeHstRegNames[] =
2168{
2169#ifdef RT_ARCH_AMD64
2170 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2171#elif RT_ARCH_ARM64
2172 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2173 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2174#else
2175# error "port me"
2176#endif
2177};
2178
2179
2180DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
2181 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
2182{
2183 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
2184
2185 pReNative->aHstRegs[idxReg].enmWhat = enmWhat;
2186 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2187 pReNative->aHstRegs[idxReg].idxVar = idxVar;
2188 return (uint8_t)idxReg;
2189}
2190
2191
2192/**
2193 * Locate a register, possibly freeing one up.
2194 *
2195 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2196 * failed.
2197 */
2198static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
2199{
2200 uint32_t fRegMask = fAllowVolatile
2201 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
2202 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
2203
2204 /*
2205 * Try a freed register that's shadowing a guest register
2206 */
2207 uint32_t fRegs = ~pReNative->bmHstRegs & fRegMask;
2208 if (fRegs)
2209 {
2210 /** @todo pick better here: */
2211 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2212
2213 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2214 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2215 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2216 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2217
2218 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2219 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2220 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2221 return idxReg;
2222 }
2223
2224 /*
2225 * Try free up a variable that's in a register.
2226 *
2227 * We do two rounds here, first evacuating variables we don't need to be
2228 * saved on the stack, then in the second round move things to the stack.
2229 */
2230 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2231 {
2232 uint32_t fVars = pReNative->bmVars;
2233 while (fVars)
2234 {
2235 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2236 uint8_t const idxReg = pReNative->aVars[idxVar].idxReg;
2237 if ( idxReg < RT_ELEMENTS(pReNative->aHstRegs)
2238 && (RT_BIT_32(idxReg) & fRegMask)
2239 && ( iLoop == 0
2240 ? pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2241 : pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2242 {
2243 Assert(pReNative->bmHstRegs & RT_BIT_32(idxReg));
2244 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxReg].fGstRegShadows)
2245 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2246 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2247 == RT_BOOL(pReNative->aHstRegs[idxReg].fGstRegShadows));
2248
2249 if (pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2250 {
2251 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
2252 uint32_t off = *poff;
2253 *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
2254 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2255 - IEMNATIVE_FP_OFF_STACK_VARS,
2256 idxReg);
2257 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2258 }
2259
2260 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2261 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2262 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2263 pReNative->bmHstRegs &= ~RT_BIT_32(idxReg);
2264 return idxReg;
2265 }
2266 fVars &= ~RT_BIT_32(idxVar);
2267 }
2268 }
2269
2270 AssertFailedReturn(UINT8_MAX);
2271}
2272
2273
2274/**
2275 * Moves a variable to a different register or spills it onto the stack.
2276 *
2277 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2278 * kinds can easily be recreated if needed later.
2279 *
2280 * @returns The new code buffer position, UINT32_MAX on failure.
2281 * @param pReNative The native recompile state.
2282 * @param off The current code buffer position.
2283 * @param idxVar The variable index.
2284 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2285 * call-volatile registers.
2286 */
2287static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2288 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2289{
2290 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2291 Assert(pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2292
2293 uint8_t const idxRegOld = pReNative->aVars[idxVar].idxReg;
2294 Assert(idxRegOld < RT_ELEMENTS(pReNative->aHstRegs));
2295 Assert(pReNative->bmHstRegs & RT_BIT_32(idxRegOld));
2296 Assert(pReNative->aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2297 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegOld].fGstRegShadows)
2298 == pReNative->aHstRegs[idxRegOld].fGstRegShadows);
2299 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2300 == RT_BOOL(pReNative->aHstRegs[idxRegOld].fGstRegShadows));
2301
2302
2303 /** @todo Add statistics on this.*/
2304 /** @todo Implement basic variable liveness analysis (python) so variables
2305 * can be freed immediately once no longer used. This has the potential to
2306 * be trashing registers and stack for dead variables. */
2307
2308 /*
2309 * First try move it to a different register, as that's cheaper.
2310 */
2311 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2312 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2313 uint32_t fRegs = ~pReNative->bmHstRegs & ~fForbiddenRegs;
2314 if (fRegs)
2315 {
2316 /* Avoid using shadow registers, if possible. */
2317 if (fRegs & ~pReNative->bmHstRegsWithGstShadow)
2318 fRegs &= ~pReNative->bmHstRegsWithGstShadow;
2319 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2320
2321 uint64_t fGstRegShadows = pReNative->aHstRegs[idxRegOld].fGstRegShadows;
2322 pReNative->aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2323 pReNative->aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2324 pReNative->aHstRegs[idxRegNew].idxVar = idxVar;
2325 if (fGstRegShadows)
2326 {
2327 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2328 while (fGstRegShadows)
2329 {
2330 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
2331 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2332
2333 Assert(pReNative->aidxGstRegShadows[idxGstReg] == idxRegOld);
2334 pReNative->aidxGstRegShadows[idxGstReg] = idxRegNew;
2335 }
2336 }
2337
2338 pReNative->aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2339 pReNative->bmHstRegs |= RT_BIT_32(idxRegNew);
2340 }
2341 /*
2342 * Otherwise we must spill the register onto the stack.
2343 */
2344 else
2345 {
2346 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
2347 off = iemNativeEmitStoreGprByBp(pReNative, off,
2348 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t) - IEMNATIVE_FP_OFF_STACK_VARS,
2349 idxRegOld);
2350 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2351
2352 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2353 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxRegOld].fGstRegShadows;
2354 }
2355
2356 pReNative->bmHstRegs &= ~RT_BIT_32(idxRegOld);
2357 pReNative->aHstRegs[idxRegOld].fGstRegShadows = 0;
2358 return off;
2359}
2360
2361
2362/**
2363 * Allocates a temporary host general purpose register.
2364 *
2365 * This may emit code to save register content onto the stack in order to free
2366 * up a register.
2367 *
2368 * @returns The host register number, UINT8_MAX on failure.
2369 * @param pReNative The native recompile state.
2370 * @param poff Pointer to the variable with the code buffer position.
2371 * This will be update if we need to move a variable from
2372 * register to stack in order to satisfy the request.
2373 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2374 * registers (@c true, default) or the other way around
2375 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2376 */
2377DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2378 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2379{
2380 /*
2381 * Try find a completely unused register, preferably a call-volatile one.
2382 */
2383 uint8_t idxReg;
2384 uint32_t fRegs = ~pReNative->bmHstRegs
2385 & ~pReNative->bmHstRegsWithGstShadow
2386 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2387 if (fRegs)
2388 {
2389 if (fPreferVolatile)
2390 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2391 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2392 else
2393 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2394 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2395 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2396 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2397 }
2398 else
2399 {
2400 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2401 AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
2402 }
2403 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2404}
2405
2406
2407/**
2408 * Allocates a temporary register for loading an immediate value into.
2409 *
2410 * This will emit code to load the immediate, unless there happens to be an
2411 * unused register with the value already loaded.
2412 *
2413 * The caller will not modify the returned register, it must be considered
2414 * read-only. Free using iemNativeRegFreeTmpImm.
2415 *
2416 * @returns The host register number, UINT8_MAX on failure.
2417 * @param pReNative The native recompile state.
2418 * @param poff Pointer to the variable with the code buffer position.
2419 * @param uImm The immediate value that the register must hold upon
2420 * return.
2421 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2422 * registers (@c true, default) or the other way around
2423 * (@c false).
2424 *
2425 * @note Reusing immediate values has not been implemented yet.
2426 */
2427DECLHIDDEN(uint8_t) iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm,
2428 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2429{
2430 uint8_t idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2431 if (idxReg < RT_ELEMENTS(pReNative->aHstRegs))
2432 {
2433 uint32_t off = *poff;
2434 *poff = off = iemNativeEmitLoadGprImm64(pReNative, off, idxReg, uImm);
2435 AssertReturnStmt(off != UINT32_MAX, iemNativeRegFreeTmp(pReNative, idxReg), UINT8_MAX);
2436 }
2437 return idxReg;
2438}
2439
2440
2441/**
2442 * Marks host register @a idxHstReg as containing a shadow copy of guest
2443 * register @a enmGstReg.
2444 *
2445 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2446 * host register before calling.
2447 */
2448DECL_FORCE_INLINE(void)
2449iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
2450{
2451 Assert(!(pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg)));
2452
2453 pReNative->aidxGstRegShadows[enmGstReg] = idxHstReg;
2454 pReNative->aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2455 pReNative->bmGstRegShadows |= RT_BIT_64(enmGstReg);
2456 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2457#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2458 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2459 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
2460#else
2461 RT_NOREF(off);
2462#endif
2463}
2464
2465
2466/**
2467 * Clear any guest register shadow claims from @a idxHstReg.
2468 *
2469 * The register does not need to be shadowing any guest registers.
2470 */
2471DECL_FORCE_INLINE(void)
2472iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
2473{
2474 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2475 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2476 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2477 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2478
2479#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2480 uint64_t fGstRegs = pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2481 if (fGstRegs)
2482 {
2483 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2484 while (fGstRegs)
2485 {
2486 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2487 fGstRegs &= ~RT_BIT_64(iGstReg);
2488 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
2489 }
2490 }
2491#else
2492 RT_NOREF(off);
2493#endif
2494
2495 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2496 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2497 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2498}
2499
2500
2501/**
2502 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2503 * to @a idxRegTo.
2504 */
2505DECL_FORCE_INLINE(void)
2506iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
2507 IEMNATIVEGSTREG enmGstReg, uint32_t off)
2508{
2509 Assert(pReNative->aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2510 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegFrom].fGstRegShadows)
2511 == pReNative->aHstRegs[idxRegFrom].fGstRegShadows);
2512 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2513 == RT_BOOL(pReNative->aHstRegs[idxRegFrom].fGstRegShadows));
2514
2515 pReNative->aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2516 pReNative->aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2517 pReNative->aidxGstRegShadows[enmGstReg] = idxRegTo;
2518#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2519 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2520 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
2521#else
2522 RT_NOREF(off);
2523#endif
2524}
2525
2526
2527
2528/**
2529 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
2530 */
2531typedef enum IEMNATIVEGSTREGUSE
2532{
2533 /** The usage is read-only, the register holding the guest register
2534 * shadow copy will not be modified by the caller. */
2535 kIemNativeGstRegUse_ReadOnly = 0,
2536 /** The caller will update the guest register (think: PC += cbInstr).
2537 * The guest shadow copy will follow the returned register. */
2538 kIemNativeGstRegUse_ForUpdate,
2539 /** The caller will use the guest register value as input in a calculation
2540 * and the host register will be modified.
2541 * This means that the returned host register will not be marked as a shadow
2542 * copy of the guest register. */
2543 kIemNativeGstRegUse_Calculation
2544} IEMNATIVEGSTREGUSE;
2545
2546/**
2547 * Allocates a temporary host general purpose register for updating a guest
2548 * register value.
2549 *
2550 * Since we may already have a register holding the guest register value,
2551 * code will be emitted to do the loading if that's not the case. Code may also
2552 * be emitted if we have to free up a register to satify the request.
2553 *
2554 * @returns The host register number, UINT8_MAX on failure.
2555 * @param pReNative The native recompile state.
2556 * @param poff Pointer to the variable with the code buffer
2557 * position. This will be update if we need to move a
2558 * variable from register to stack in order to satisfy
2559 * the request.
2560 * @param enmGstReg The guest register that will is to be updated.
2561 * @param enmIntendedUse How the caller will be using the host register.
2562 */
2563DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2564 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
2565{
2566 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2567#ifdef LOG_ENABLED
2568 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2569#endif
2570
2571 /*
2572 * First check if the guest register value is already in a host register.
2573 */
2574 if (pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg))
2575 {
2576 uint8_t idxReg = pReNative->aidxGstRegShadows[enmGstReg];
2577 Assert(idxReg < RT_ELEMENTS(pReNative->aHstRegs));
2578 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2579 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2580
2581 if (!(pReNative->bmHstRegs & RT_BIT_32(idxReg)))
2582 {
2583 /*
2584 * If the register will trash the guest shadow copy, try find a
2585 * completely unused register we can use instead. If that fails,
2586 * we need to disassociate the host reg from the guest reg.
2587 */
2588 /** @todo would be nice to know if preserving the register is in any way helpful. */
2589 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2590 && ( ~pReNative->bmHstRegs
2591 & ~pReNative->bmHstRegsWithGstShadow
2592 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2593 {
2594 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2595 Assert(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs));
2596
2597 uint32_t off = *poff;
2598 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2599 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2600
2601 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2602 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2603 g_apszIemNativeHstRegNames[idxRegNew]));
2604 idxReg = idxRegNew;
2605 }
2606 else
2607 {
2608 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
2609 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2610 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2611 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2612 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2613 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2614 else
2615 {
2616 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
2617 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2618 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2619 }
2620 }
2621 }
2622 else
2623 {
2624 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2625 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2626
2627 /*
2628 * Allocate a new register, copy the value and, if updating, the
2629 * guest shadow copy assignment to the new register.
2630 */
2631 /** @todo share register for readonly access. */
2632 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2633 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2634
2635 uint32_t off = *poff;
2636 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2637 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2638
2639 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2640 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2641 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2642 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2643 else
2644 {
2645 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
2646 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2647 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2648 g_apszIemNativeHstRegNames[idxRegNew]));
2649 }
2650 idxReg = idxRegNew;
2651 }
2652
2653#ifdef VBOX_STRICT
2654 /* Strict builds: Check that the value is correct. */
2655 uint32_t off = *poff;
2656 *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
2657 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2658#endif
2659
2660 return idxReg;
2661 }
2662
2663 /*
2664 * Allocate a new register, load it with the guest value and designate it as a copy of the
2665 */
2666 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2667 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2668
2669 uint32_t off = *poff;
2670 *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
2671 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2672
2673 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2674 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, off);
2675 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2676 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2677
2678 return idxRegNew;
2679}
2680
2681
2682DECLHIDDEN(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
2683
2684
2685/**
2686 * Allocates argument registers for a function call.
2687 *
2688 * @returns New code buffer offset on success, UINT32_MAX on failure.
2689 * @param pReNative The native recompile state.
2690 * @param off The current code buffer offset.
2691 * @param cArgs The number of arguments the function call takes.
2692 */
2693DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
2694{
2695 AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
2696 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2697 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2698
2699 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2700 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2701 else if (cArgs == 0)
2702 return true;
2703
2704 /*
2705 * Do we get luck and all register are free and not shadowing anything?
2706 */
2707 if (((pReNative->bmHstRegs | pReNative->bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2708 for (uint32_t i = 0; i < cArgs; i++)
2709 {
2710 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2711 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2712 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2713 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2714 }
2715 /*
2716 * Okay, not lucky so we have to free up the registers.
2717 */
2718 else
2719 for (uint32_t i = 0; i < cArgs; i++)
2720 {
2721 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2722 if (pReNative->bmHstRegs & RT_BIT_32(idxReg))
2723 {
2724 switch (pReNative->aHstRegs[idxReg].enmWhat)
2725 {
2726 case kIemNativeWhat_Var:
2727 {
2728 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2729 AssertReturn(idxVar < RT_ELEMENTS(pReNative->aVars), false);
2730 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2731 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2732
2733 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2734 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2735 else
2736 {
2737 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2738 AssertReturn(off != UINT32_MAX, false);
2739 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2740 }
2741 break;
2742 }
2743
2744 case kIemNativeWhat_Tmp:
2745 case kIemNativeWhat_Arg:
2746 case kIemNativeWhat_rc:
2747 AssertFailedReturn(false);
2748 default:
2749 AssertFailedReturn(false);
2750 }
2751
2752 }
2753 if (pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2754 {
2755 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2756 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2757 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2758 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2759 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2760 }
2761 else
2762 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2763 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2764 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2765 }
2766 pReNative->bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2767 return true;
2768}
2769
2770
2771DECLHIDDEN(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
2772
2773
2774#if 0
2775/**
2776 * Frees a register assignment of any type.
2777 *
2778 * @param pReNative The native recompile state.
2779 * @param idxHstReg The register to free.
2780 *
2781 * @note Does not update variables.
2782 */
2783DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2784{
2785 Assert(idxHstReg < RT_ELEMENTS(pReNative->aHstRegs));
2786 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2787 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2788 Assert( pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2789 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2790 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2791 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2792 Assert( pReNative->aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2793 || pReNative->aVars[pReNative->aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2794 || (pReNative->bmVars & RT_BIT_32(pReNative->aHstRegs[idxHstReg].idxVar)));
2795 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2796 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2797 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2798 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2799
2800 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2801 /* no flushing, right:
2802 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2803 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2804 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2805 */
2806}
2807#endif
2808
2809
2810/**
2811 * Frees a temporary register.
2812 *
2813 * Any shadow copies of guest registers assigned to the host register will not
2814 * be flushed by this operation.
2815 */
2816DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2817{
2818 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2819 Assert(pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2820 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2821 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2822 g_apszIemNativeHstRegNames[idxHstReg], pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2823}
2824
2825
2826/**
2827 * Frees a temporary immediate register.
2828 *
2829 * It is assumed that the call has not modified the register, so it still hold
2830 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2831 */
2832DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2833{
2834 iemNativeRegFreeTmp(pReNative, idxHstReg);
2835}
2836
2837
2838/**
2839 * Called right before emitting a call instruction to move anything important
2840 * out of call-volatile registers, free and flush the call-volatile registers,
2841 * optionally freeing argument variables.
2842 *
2843 * @returns New code buffer offset, UINT32_MAX on failure.
2844 * @param pReNative The native recompile state.
2845 * @param off The code buffer offset.
2846 * @param cArgs The number of arguments the function call takes.
2847 * It is presumed that the host register part of these have
2848 * been allocated as such already and won't need moving,
2849 * just freeing.
2850 * @param fFreeArgVars Whether to free argument variables for the call.
2851 */
2852DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2853 uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
2854{
2855 /*
2856 * Free argument variables first (simplified).
2857 */
2858 AssertReturn(cArgs <= RT_ELEMENTS(pReNative->aidxArgVars), UINT32_MAX);
2859 if (fFreeArgVars && cArgs > 0)
2860 {
2861 for (uint32_t i = 0; i < cArgs; i++)
2862 {
2863 uint8_t idxVar = pReNative->aidxArgVars[i];
2864 if (idxVar < RT_ELEMENTS(pReNative->aVars))
2865 {
2866 pReNative->aidxArgVars[i] = UINT8_MAX;
2867 pReNative->bmVars &= ~RT_BIT_32(idxVar);
2868 Assert( pReNative->aVars[idxVar].idxReg
2869 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2870 }
2871 }
2872 Assert(pReNative->u64ArgVars == UINT64_MAX);
2873 }
2874
2875 /*
2876 * Move anything important out of volatile registers.
2877 */
2878 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2879 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2880 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2881#ifdef IEMNATIVE_REG_FIXED_TMP0
2882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2883#endif
2884 & ~g_afIemNativeCallRegs[cArgs];
2885
2886 fRegsToMove &= pReNative->bmHstRegs;
2887 if (!fRegsToMove)
2888 { /* likely */ }
2889 else
2890 while (fRegsToMove != 0)
2891 {
2892 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2893 fRegsToMove &= ~RT_BIT_32(idxReg);
2894
2895 switch (pReNative->aHstRegs[idxReg].enmWhat)
2896 {
2897 case kIemNativeWhat_Var:
2898 {
2899 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2900 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2901 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2902 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2903 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2904 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2905 else
2906 {
2907 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2908 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2909 }
2910 continue;
2911 }
2912
2913 case kIemNativeWhat_Arg:
2914 AssertMsgFailed(("What?!?: %u\n", idxReg));
2915 continue;
2916
2917 case kIemNativeWhat_rc:
2918 case kIemNativeWhat_Tmp:
2919 AssertMsgFailed(("Missing free: %u\n", idxReg));
2920 continue;
2921
2922 case kIemNativeWhat_FixedTmp:
2923 case kIemNativeWhat_pVCpuFixed:
2924 case kIemNativeWhat_pCtxFixed:
2925 case kIemNativeWhat_FixedReserved:
2926 case kIemNativeWhat_Invalid:
2927 case kIemNativeWhat_End:
2928 AssertFailedReturn(UINT32_MAX);
2929 }
2930 AssertFailedReturn(UINT32_MAX);
2931 }
2932
2933 /*
2934 * Do the actual freeing.
2935 */
2936 pReNative->bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2937
2938 /* If there are guest register shadows in any call-volatile register, we
2939 have to clear the corrsponding guest register masks for each register. */
2940 uint32_t fHstRegsWithGstShadow = pReNative->bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2941 if (fHstRegsWithGstShadow)
2942 {
2943 pReNative->bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2944 do
2945 {
2946 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2947 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2948
2949 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2950 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2951 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2952 } while (fHstRegsWithGstShadow != 0);
2953 }
2954
2955 return off;
2956}
2957
2958
2959/**
2960 * Flushes a set of guest register shadow copies.
2961 *
2962 * This is usually done after calling a threaded function or a C-implementation
2963 * of an instruction.
2964 *
2965 * @param pReNative The native recompile state.
2966 * @param fGstRegs Set of guest registers to flush.
2967 */
2968DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
2969{
2970 /*
2971 * Reduce the mask by what's currently shadowed
2972 */
2973 fGstRegs &= pReNative->bmGstRegShadows;
2974 if (fGstRegs)
2975 {
2976 pReNative->bmGstRegShadows &= ~fGstRegs;
2977 if (pReNative->bmGstRegShadows)
2978 {
2979 /*
2980 * Partial.
2981 */
2982 do
2983 {
2984 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2985 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
2986 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
2987 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2988 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2989
2990 uint64_t const fInThisHstReg = (pReNative->aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
2991 fGstRegs &= ~fInThisHstReg;
2992 pReNative->aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
2993 if (!pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2994 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2995 } while (fGstRegs != 0);
2996 }
2997 else
2998 {
2999 /*
3000 * Clear all.
3001 */
3002 do
3003 {
3004 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3005 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
3006 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
3007 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
3008 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
3009
3010 fGstRegs &= ~(pReNative->aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
3011 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
3012 } while (fGstRegs != 0);
3013 pReNative->bmHstRegsWithGstShadow = 0;
3014 }
3015 }
3016}
3017
3018
3019/**
3020 * Flushes any delayed guest register writes.
3021 *
3022 * This must be called prior to calling CImpl functions and any helpers that use
3023 * the guest state (like raising exceptions) and such.
3024 *
3025 * This optimization has not yet been implemented. The first target would be
3026 * RIP updates, since these are the most common ones.
3027 */
3028DECLHIDDEN(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT
3029{
3030 RT_NOREF(pReNative, off);
3031 return off;
3032}
3033
3034
3035/*********************************************************************************************************************************
3036* Code Emitters (larger snippets) *
3037*********************************************************************************************************************************/
3038
3039/**
3040 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
3041 * extending to 64-bit width.
3042 *
3043 * @returns New code buffer offset on success, UINT32_MAX on failure.
3044 * @param pReNative .
3045 * @param off The current code buffer position.
3046 * @param idxHstReg The host register to load the guest register value into.
3047 * @param enmGstReg The guest register to load.
3048 *
3049 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
3050 * that is something the caller needs to do if applicable.
3051 */
3052DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3053 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT
3054{
3055 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
3056 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3057
3058 switch (g_aGstShadowInfo[enmGstReg].cb)
3059 {
3060 case sizeof(uint64_t):
3061 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3062 case sizeof(uint32_t):
3063 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3064 case sizeof(uint16_t):
3065 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3066#if 0 /* not present in the table. */
3067 case sizeof(uint8_t):
3068 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3069#endif
3070 default:
3071 AssertFailedReturn(UINT32_MAX);
3072 }
3073}
3074
3075
3076#ifdef VBOX_STRICT
3077/**
3078 * Emitting code that checks that the content of register @a idxReg is the same
3079 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
3080 * instruction if that's not the case.
3081 *
3082 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
3083 * Trashes EFLAGS on AMD64.
3084 */
3085static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3086 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT
3087{
3088# ifdef RT_ARCH_AMD64
3089 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3090 AssertReturn(pbCodeBuf, UINT32_MAX);
3091
3092 /* cmp reg, [mem] */
3093 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
3094 {
3095 if (idxReg >= 8)
3096 pbCodeBuf[off++] = X86_OP_REX_R;
3097 pbCodeBuf[off++] = 0x38;
3098 }
3099 else
3100 {
3101 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
3102 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
3103 else
3104 {
3105 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
3106 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3107 else
3108 AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
3109 if (idxReg >= 8)
3110 pbCodeBuf[off++] = X86_OP_REX_R;
3111 }
3112 pbCodeBuf[off++] = 0x39;
3113 }
3114 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
3115
3116 /* je/jz +1 */
3117 pbCodeBuf[off++] = 0x74;
3118 pbCodeBuf[off++] = 0x01;
3119
3120 /* int3 */
3121 pbCodeBuf[off++] = 0xcc;
3122
3123 /* For values smaller than the register size, we must check that the rest
3124 of the register is all zeros. */
3125 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
3126 {
3127 /* test reg64, imm32 */
3128 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3129 pbCodeBuf[off++] = 0xf7;
3130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3131 pbCodeBuf[off++] = 0;
3132 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
3133 pbCodeBuf[off++] = 0xff;
3134 pbCodeBuf[off++] = 0xff;
3135
3136 /* je/jz +1 */
3137 pbCodeBuf[off++] = 0x74;
3138 pbCodeBuf[off++] = 0x01;
3139
3140 /* int3 */
3141 pbCodeBuf[off++] = 0xcc;
3142 }
3143 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
3144 {
3145 /* rol reg64, 32 */
3146 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3147 pbCodeBuf[off++] = 0xc1;
3148 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3149 pbCodeBuf[off++] = 32;
3150
3151 /* test reg32, ffffffffh */
3152 if (idxReg >= 8)
3153 pbCodeBuf[off++] = X86_OP_REX_B;
3154 pbCodeBuf[off++] = 0xf7;
3155 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3156 pbCodeBuf[off++] = 0xff;
3157 pbCodeBuf[off++] = 0xff;
3158 pbCodeBuf[off++] = 0xff;
3159 pbCodeBuf[off++] = 0xff;
3160
3161 /* je/jz +1 */
3162 pbCodeBuf[off++] = 0x74;
3163 pbCodeBuf[off++] = 0x01;
3164
3165 /* int3 */
3166 pbCodeBuf[off++] = 0xcc;
3167
3168 /* rol reg64, 32 */
3169 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
3170 pbCodeBuf[off++] = 0xc1;
3171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
3172 pbCodeBuf[off++] = 32;
3173 }
3174
3175# elif defined(RT_ARCH_ARM64)
3176 /* mov TMP0, [gstreg] */
3177 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
3178
3179 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3180 AssertReturn(pu32CodeBuf, UINT32_MAX);
3181 /* sub tmp0, tmp0, idxReg */
3182 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
3183 /* cbz tmp0, +1 */
3184 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
3185 /* brk #0x1000+enmGstReg */
3186 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
3187
3188# else
3189# error "Port me!"
3190# endif
3191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3192 return off;
3193}
3194#endif /* VBOX_STRICT */
3195
3196
3197
3198/**
3199 * Emits a code for checking the return code of a call and rcPassUp, returning
3200 * from the code if either are non-zero.
3201 */
3202DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3203 uint8_t idxInstr) RT_NOEXCEPT
3204{
3205#ifdef RT_ARCH_AMD64
3206 /*
3207 * AMD64: eax = call status code.
3208 */
3209
3210 /* edx = rcPassUp */
3211 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3212# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3213 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
3214# endif
3215
3216 /* edx = eax | rcPassUp */
3217 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3218 AssertReturn(pbCodeBuf, UINT32_MAX);
3219 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
3220 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
3221 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3222
3223 /* Jump to non-zero status return path. */
3224 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3225
3226 /* done. */
3227
3228#elif RT_ARCH_ARM64
3229 /*
3230 * ARM64: w0 = call status code.
3231 */
3232 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3233 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3234
3235 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3236 AssertReturn(pu32CodeBuf, UINT32_MAX);
3237
3238 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3239
3240 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3241 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
3242 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
3243 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
3244
3245#else
3246# error "port me"
3247#endif
3248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3249 return off;
3250}
3251
3252
3253/**
3254 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3255 * raising a \#GP(0) if it isn't.
3256 *
3257 * @returns New code buffer offset, UINT32_MAX on failure.
3258 * @param pReNative The native recompile state.
3259 * @param off The code buffer offset.
3260 * @param idxAddrReg The host register with the address to check.
3261 * @param idxInstr The current instruction.
3262 */
3263DECLHIDDEN(uint32_t) iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3264 uint8_t idxAddrReg, uint8_t idxInstr)
3265{
3266 RT_NOREF(idxInstr);
3267
3268 /*
3269 * Make sure we don't have any outstanding guest register writes as we may
3270 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3271 */
3272 off = iemNativeRegFlushPendingWrites(pReNative, off);
3273
3274#ifdef RT_ARCH_AMD64
3275 /*
3276 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3277 * return raisexcpt();
3278 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3279 */
3280 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3281 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3282
3283 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3284 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3285 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3286 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3287
3288# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3289 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3290# else
3291 uint32_t const offFixup = off;
3292 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3293 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3294 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3295 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3296# endif
3297
3298 iemNativeRegFreeTmp(pReNative, iTmpReg);
3299
3300#elif defined(RT_ARCH_ARM64)
3301 /*
3302 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3303 * return raisexcpt();
3304 * ----
3305 * mov x1, 0x800000000000
3306 * add x1, x0, x1
3307 * cmp xzr, x1, lsr 48
3308 * and either:
3309 * b.ne .Lraisexcpt
3310 * or:
3311 * b.eq .Lnoexcept
3312 * movz x1, #instruction-number
3313 * b .Lraisexcpt
3314 * .Lnoexcept:
3315 */
3316 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3317 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3318
3319 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3320 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3321 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3322
3323# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3324 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3325# else
3326 uint32_t const offFixup = off;
3327 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3328 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3329 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3330 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3331# endif
3332
3333 iemNativeRegFreeTmp(pReNative, iTmpReg);
3334
3335#else
3336# error "Port me"
3337#endif
3338 return off;
3339}
3340
3341
3342/**
3343 * Emits code to check if the content of @a idxAddrReg is within the limit of
3344 * idxSegReg, raising a \#GP(0) if it isn't.
3345 *
3346 * @returns New code buffer offset, UINT32_MAX on failure.
3347 * @param pReNative The native recompile state.
3348 * @param off The code buffer offset.
3349 * @param idxAddrReg The host register (32-bit) with the address to
3350 * check.
3351 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3352 * against.
3353 * @param idxInstr The current instruction.
3354 */
3355DECLHIDDEN(uint32_t) iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3356 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3357{
3358 /*
3359 * Make sure we don't have any outstanding guest register writes as we may
3360 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3361 */
3362 off = iemNativeRegFlushPendingWrites(pReNative, off);
3363
3364 /** @todo implement expand down/whatnot checking */
3365 AssertReturn(idxSegReg == X86_SREG_CS, UINT32_MAX);
3366
3367 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3368 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3369 kIemNativeGstRegUse_ForUpdate);
3370 AssertReturn(iTmpLimReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3371
3372 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3373
3374#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3375 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3376 RT_NOREF(idxInstr);
3377#else
3378 uint32_t const offFixup = off;
3379 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3380 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3381 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3382 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3383#endif
3384
3385 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3386 return off;
3387}
3388
3389
3390/**
3391 * Emits a call to a CImpl function or something similar.
3392 */
3393static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3394 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3395 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3396{
3397 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3398 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3399
3400 /*
3401 * Load the parameters.
3402 */
3403#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3404 /* Special code the hidden VBOXSTRICTRC pointer. */
3405 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3406 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3407 if (cAddParams > 0)
3408 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3409 if (cAddParams > 1)
3410 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3411 if (cAddParams > 2)
3412 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3413 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3414
3415#else
3416 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3417 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3418 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3419 if (cAddParams > 0)
3420 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3421 if (cAddParams > 1)
3422 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3423 if (cAddParams > 2)
3424# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3426# else
3427 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3428# endif
3429#endif
3430 AssertReturn(off != UINT32_MAX, off);
3431
3432 /*
3433 * Make the call.
3434 */
3435 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3436
3437#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3438 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3439#endif
3440
3441 /*
3442 * Check the status code.
3443 */
3444 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3445}
3446
3447
3448/**
3449 * Emits a call to a threaded worker function.
3450 */
3451static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3452{
3453 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3454 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3455 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3456
3457#ifdef RT_ARCH_AMD64
3458 /* Load the parameters and emit the call. */
3459# ifdef RT_OS_WINDOWS
3460# ifndef VBOXSTRICTRC_STRICT_ENABLED
3461 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3462 if (cParams > 0)
3463 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3464 if (cParams > 1)
3465 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3466 if (cParams > 2)
3467 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3468# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3469 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3470 if (cParams > 0)
3471 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3472 if (cParams > 1)
3473 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3474 if (cParams > 2)
3475 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3476 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3477 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3478# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3479# else
3480 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3481 if (cParams > 0)
3482 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3483 if (cParams > 1)
3484 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3485 if (cParams > 2)
3486 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3487# endif
3488
3489 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3490
3491# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3492 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3493# endif
3494
3495#elif RT_ARCH_ARM64
3496 /*
3497 * ARM64:
3498 */
3499 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3500 if (cParams > 0)
3501 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3502 if (cParams > 1)
3503 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3504 if (cParams > 2)
3505 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3506
3507 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3508
3509#else
3510# error "port me"
3511#endif
3512
3513 /*
3514 * Check the status code.
3515 */
3516 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3517 AssertReturn(off != UINT32_MAX, off);
3518
3519 return off;
3520}
3521
3522
3523/**
3524 * Emits the code at the RaiseGP0 label.
3525 */
3526static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3527{
3528 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
3529 if (idxLabel != UINT32_MAX)
3530 {
3531 iemNativeLabelDefine(pReNative, idxLabel, off);
3532
3533 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3534 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3535#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3536 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3537#endif
3538 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3539
3540 /* jump back to the return sequence. */
3541 off = iemNativeEmitJmpToLabel(pReNative, off, iemNativeLabelFind(pReNative, kIemNativeLabelType_Return));
3542 }
3543 return off;
3544}
3545
3546
3547/**
3548 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3549 */
3550static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3551{
3552 /*
3553 * Generate the rc + rcPassUp fiddling code if needed.
3554 */
3555 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3556 if (idxLabel != UINT32_MAX)
3557 {
3558 iemNativeLabelDefine(pReNative, idxLabel, off);
3559
3560 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3561#ifdef RT_ARCH_AMD64
3562# ifdef RT_OS_WINDOWS
3563# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3565# endif
3566 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3568# else
3569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3570 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3571# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3572 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3573# endif
3574# endif
3575# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3576 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3577# endif
3578
3579#else
3580 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3582 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3583#endif
3584
3585 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3586 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3587 }
3588 return off;
3589}
3590
3591
3592/**
3593 * Emits a standard epilog.
3594 */
3595static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3596{
3597 /*
3598 * Successful return, so clear the return register (eax, w0).
3599 */
3600 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3601 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3602
3603 /*
3604 * Define label for common return point.
3605 */
3606 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
3607 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
3608
3609 /*
3610 * Restore registers and return.
3611 */
3612#ifdef RT_ARCH_AMD64
3613 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3614 AssertReturn(pbCodeBuf, UINT32_MAX);
3615
3616 /* Reposition esp at the r15 restore point. */
3617 pbCodeBuf[off++] = X86_OP_REX_W;
3618 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3619 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3620 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3621
3622 /* Pop non-volatile registers and return */
3623 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3624 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3625 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3626 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3627 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3628 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3629 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3630 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3631# ifdef RT_OS_WINDOWS
3632 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3633 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3634# endif
3635 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3636 pbCodeBuf[off++] = 0xc9; /* leave */
3637 pbCodeBuf[off++] = 0xc3; /* ret */
3638 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3639
3640#elif RT_ARCH_ARM64
3641 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3642 AssertReturn(pu32CodeBuf, UINT32_MAX);
3643
3644 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3645 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3646 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3647 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3648 IEMNATIVE_FRAME_VAR_SIZE / 8);
3649 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3650 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3651 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3653 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3654 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3655 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3656 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3657 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3658 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3659 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3660 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3661
3662 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3663 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3664 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3665 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3666
3667 /* retab / ret */
3668# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3669 if (1)
3670 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3671 else
3672# endif
3673 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3674
3675#else
3676# error "port me"
3677#endif
3678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3679
3680 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3681}
3682
3683
3684/**
3685 * Emits a standard prolog.
3686 */
3687static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3688{
3689#ifdef RT_ARCH_AMD64
3690 /*
3691 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3692 * reserving 64 bytes for stack variables plus 4 non-register argument
3693 * slots. Fixed register assignment: xBX = pReNative;
3694 *
3695 * Since we always do the same register spilling, we can use the same
3696 * unwind description for all the code.
3697 */
3698 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3699 AssertReturn(pbCodeBuf, UINT32_MAX);
3700 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3701 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3702 pbCodeBuf[off++] = 0x8b;
3703 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3704 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3705 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3706# ifdef RT_OS_WINDOWS
3707 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3708 pbCodeBuf[off++] = 0x8b;
3709 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3710 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3711 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3712# else
3713 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3714 pbCodeBuf[off++] = 0x8b;
3715 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3716# endif
3717 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3718 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3719 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3720 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3721 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3722 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3723 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3724 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3725
3726 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3727 X86_GREG_xSP,
3728 IEMNATIVE_FRAME_ALIGN_SIZE
3729 + IEMNATIVE_FRAME_VAR_SIZE
3730 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3731 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3732 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3733 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3734 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3735
3736#elif RT_ARCH_ARM64
3737 /*
3738 * We set up a stack frame exactly like on x86, only we have to push the
3739 * return address our selves here. We save all non-volatile registers.
3740 */
3741 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3742 AssertReturn(pu32CodeBuf, UINT32_MAX);
3743
3744# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3745 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3746 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3747 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3748 /* pacibsp */
3749 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3750# endif
3751
3752 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3753 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3754 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3755 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3756 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3757 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3758 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3759 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3760 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3761 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3762 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3763 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3764 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3765 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3766 /* Save the BP and LR (ret address) registers at the top of the frame. */
3767 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3768 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3769 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3770 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3771 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3772 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3773
3774 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3775 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3776
3777 /* mov r28, r0 */
3778 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3779 /* mov r27, r1 */
3780 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3781
3782#else
3783# error "port me"
3784#endif
3785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3786 return off;
3787}
3788
3789
3790
3791/*********************************************************************************************************************************
3792* Emitters for IEM_MC_XXXX and the associated IEM_MC_XXXX recompiler definitions *
3793*********************************************************************************************************************************/
3794
3795#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3796 {
3797
3798/** We have to get to the end in recompilation mode, as otherwise we won't
3799 * generate code for all the IEM_MC_IF_XXX branches. */
3800#define IEM_MC_END() \
3801 } return off
3802
3803
3804/*
3805 * Standalone CImpl deferals.
3806 */
3807
3808#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3809 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3810
3811
3812#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3813 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3814
3815DECLINLINE(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3816 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3817{
3818 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3819}
3820
3821
3822#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3823 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3824
3825DECLINLINE(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3826 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3827{
3828 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3829}
3830
3831
3832#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3833 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3834
3835DECLINLINE(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3836 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1, uint64_t uArg2)
3837{
3838 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3839}
3840
3841
3842/*
3843 * Advancing PC/RIP/EIP/IP.
3844 */
3845
3846#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3847 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3848 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3849
3850/** Same as iemRegAddToRip64AndFinishingNoFlags. */
3851DECLINLINE(uint32_t) iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3852{
3853 /* Allocate a temporary PC register. */
3854 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3855 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3856
3857 /* Perform the addition and store the result. */
3858 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
3859 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3860
3861 /* Free but don't flush the PC register. */
3862 iemNativeRegFreeTmp(pReNative, idxPcReg);
3863
3864 return off;
3865}
3866
3867
3868#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3869 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3870 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3871
3872/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3873DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3874{
3875 /* Allocate a temporary PC register. */
3876 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3877 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3878
3879 /* Perform the addition and store the result. */
3880 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3881 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3882
3883 /* Free but don't flush the PC register. */
3884 iemNativeRegFreeTmp(pReNative, idxPcReg);
3885
3886 return off;
3887}
3888
3889
3890#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3891 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3892 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3893
3894/** Same as iemRegAddToIp16AndFinishingNoFlags. */
3895DECLINLINE(uint32_t) iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3896{
3897 /* Allocate a temporary PC register. */
3898 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3899 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3900
3901 /* Perform the addition and store the result. */
3902 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3903 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3904 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3905
3906 /* Free but don't flush the PC register. */
3907 iemNativeRegFreeTmp(pReNative, idxPcReg);
3908
3909 return off;
3910}
3911
3912
3913/*
3914 * Changing PC/RIP/EIP/IP with a relative jump.
3915 */
3916
3917#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
3918 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3919 (a_enmEffOpSize), pCallEntry->idxInstr); \
3920 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3921
3922
3923#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
3924 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3925 IEMMODE_16BIT, pCallEntry->idxInstr); \
3926 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3927
3928#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
3929 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3930 IEMMODE_64BIT, pCallEntry->idxInstr); \
3931 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3932
3933/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
3934 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
3935 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
3936DECLINLINE(uint32_t) iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3937 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3938 uint8_t idxInstr)
3939{
3940 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
3941
3942 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3943 off = iemNativeRegFlushPendingWrites(pReNative, off);
3944
3945 /* Allocate a temporary PC register. */
3946 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3947 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3948
3949 /* Perform the addition. */
3950 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
3951
3952 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
3953 {
3954 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
3955 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
3956 }
3957 else
3958 {
3959 /* Just truncate the result to 16-bit IP. */
3960 Assert(enmEffOpSize == IEMMODE_16BIT);
3961 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3962 }
3963 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3964
3965 /* Free but don't flush the PC register. */
3966 iemNativeRegFreeTmp(pReNative, idxPcReg);
3967
3968 return off;
3969}
3970
3971
3972#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
3973 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3974 (a_enmEffOpSize), pCallEntry->idxInstr); \
3975 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3976
3977#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
3978 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3979 IEMMODE_16BIT, pCallEntry->idxInstr); \
3980 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3981
3982#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
3983 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3984 IEMMODE_32BIT, pCallEntry->idxInstr); \
3985 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3986
3987/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
3988 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
3989 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
3990DECLINLINE(uint32_t) iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3991 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3992 uint8_t idxInstr)
3993{
3994 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
3995
3996 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3997 off = iemNativeRegFlushPendingWrites(pReNative, off);
3998
3999 /* Allocate a temporary PC register. */
4000 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4001 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
4002
4003 /* Perform the addition. */
4004 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4005
4006 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
4007 if (enmEffOpSize == IEMMODE_16BIT)
4008 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4009
4010 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
4011 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4012
4013 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4014
4015 /* Free but don't flush the PC register. */
4016 iemNativeRegFreeTmp(pReNative, idxPcReg);
4017
4018 return off;
4019}
4020
4021
4022#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
4023 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
4024 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4025
4026#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
4027 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
4028 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4029
4030#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
4031 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
4032 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4033
4034/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
4035DECLINLINE(uint32_t) iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4036 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
4037{
4038 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
4039 off = iemNativeRegFlushPendingWrites(pReNative, off);
4040
4041 /* Allocate a temporary PC register. */
4042 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4043 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
4044
4045 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
4046 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
4047 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
4048 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
4049 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4050
4051 /* Free but don't flush the PC register. */
4052 iemNativeRegFreeTmp(pReNative, idxPcReg);
4053
4054 return off;
4055}
4056
4057
4058/*
4059 * Conditionals.
4060 */
4061
4062/**
4063 * Pushes an IEM_MC_IF_XXX onto the condition stack.
4064 *
4065 * @returns Pointer to the condition stack entry on success, NULL on failure
4066 * (too many nestings)
4067 */
4068DECLINLINE(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
4069{
4070 uint32_t const idxStack = pReNative->cCondDepth;
4071 AssertReturn(idxStack < RT_ELEMENTS(pReNative->aCondStack), NULL);
4072
4073 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
4074 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
4075
4076 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
4077 pEntry->fInElse = false;
4078 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
4079 AssertReturn(pEntry->idxLabelElse != UINT32_MAX, NULL);
4080 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
4081 AssertReturn(pEntry->idxLabelEndIf != UINT32_MAX, NULL);
4082
4083 return pEntry;
4084}
4085
4086
4087#define IEM_MC_ELSE() } while (0); \
4088 off = iemNativeEmitElse(pReNative, off); \
4089 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
4090 do {
4091
4092/** Emits code related to IEM_MC_ELSE. */
4093DECLINLINE(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4094{
4095 /* Check sanity and get the conditional stack entry. */
4096 Assert(off != UINT32_MAX);
4097 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4098 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4099 Assert(!pEntry->fInElse);
4100
4101 /* Jump to the endif */
4102 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
4103
4104 /* Define the else label and enter the else part of the condition. */
4105 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4106 pEntry->fInElse = true;
4107
4108 return off;
4109}
4110
4111
4112#define IEM_MC_ENDIF() } while (0); \
4113 off = iemNativeEmitEndIf(pReNative, off); \
4114 AssertReturn(off != UINT32_MAX, UINT32_MAX)
4115
4116/** Emits code related to IEM_MC_ENDIF. */
4117DECLINLINE(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
4118{
4119 /* Check sanity and get the conditional stack entry. */
4120 Assert(off != UINT32_MAX);
4121 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
4122 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
4123
4124 /* Define the endif label and maybe the else one if we're still in the 'if' part. */
4125 if (!pEntry->fInElse)
4126 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
4127 else
4128 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
4129 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
4130
4131 /* Pop the conditional stack.*/
4132 pReNative->cCondDepth -= 1;
4133
4134 return off;
4135}
4136
4137
4138#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
4139 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
4140 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
4141 do {
4142
4143/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
4144DECLINLINE(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
4145{
4146 PIEMNATIVECOND pEntry = iemNativeCondPushIf(pReNative);
4147 AssertReturn(pEntry, UINT32_MAX);
4148
4149 /* Get the eflags. */
4150 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4151 kIemNativeGstRegUse_ReadOnly);
4152 AssertReturn(idxEflReg != UINT8_MAX, UINT32_MAX);
4153
4154 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
4155 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
4156
4157 /* Test and jump. */
4158 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
4159
4160 /* Free but don't flush the EFlags register. */
4161 iemNativeRegFreeTmp(pReNative, idxEflReg);
4162
4163 return off;
4164}
4165
4166
4167
4168/*********************************************************************************************************************************
4169* Builtin functions *
4170*********************************************************************************************************************************/
4171
4172/**
4173 * Built-in function that calls a C-implemention function taking zero arguments.
4174 */
4175static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
4176{
4177 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
4178 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
4179 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
4180}
4181
4182
4183
4184/*********************************************************************************************************************************
4185* The native code generator functions for each MC block. *
4186*********************************************************************************************************************************/
4187
4188
4189/*
4190 * Include g_apfnIemNativeRecompileFunctions and associated functions.
4191 *
4192 * This should probably live in it's own file later, but lets see what the
4193 * compile times turn out to be first.
4194 */
4195#include "IEMNativeFunctions.cpp.h"
4196
4197
4198
4199/*********************************************************************************************************************************
4200* Recompiler Core. *
4201*********************************************************************************************************************************/
4202
4203
4204/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
4205static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
4206{
4207 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
4208 pDis->cbCachedInstr += cbMaxRead;
4209 RT_NOREF(cbMinRead);
4210 return VERR_NO_DATA;
4211}
4212
4213
4214/**
4215 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
4216 * @returns pszBuf.
4217 * @param fFlags The flags.
4218 * @param pszBuf The output buffer.
4219 * @param cbBuf The output buffer size. At least 32 bytes.
4220 */
4221const char *iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf)
4222{
4223 Assert(cbBuf >= 32);
4224 static RTSTRTUPLE const s_aModes[] =
4225 {
4226 /* [00] = */ { RT_STR_TUPLE("16BIT") },
4227 /* [01] = */ { RT_STR_TUPLE("32BIT") },
4228 /* [02] = */ { RT_STR_TUPLE("!2!") },
4229 /* [03] = */ { RT_STR_TUPLE("!3!") },
4230 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
4231 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
4232 /* [06] = */ { RT_STR_TUPLE("!6!") },
4233 /* [07] = */ { RT_STR_TUPLE("!7!") },
4234 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
4235 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
4236 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
4237 /* [0b] = */ { RT_STR_TUPLE("!b!") },
4238 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
4239 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
4240 /* [0e] = */ { RT_STR_TUPLE("!e!") },
4241 /* [0f] = */ { RT_STR_TUPLE("!f!") },
4242 /* [10] = */ { RT_STR_TUPLE("!10!") },
4243 /* [11] = */ { RT_STR_TUPLE("!11!") },
4244 /* [12] = */ { RT_STR_TUPLE("!12!") },
4245 /* [13] = */ { RT_STR_TUPLE("!13!") },
4246 /* [14] = */ { RT_STR_TUPLE("!14!") },
4247 /* [15] = */ { RT_STR_TUPLE("!15!") },
4248 /* [16] = */ { RT_STR_TUPLE("!16!") },
4249 /* [17] = */ { RT_STR_TUPLE("!17!") },
4250 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
4251 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
4252 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
4253 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
4254 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
4255 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
4256 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
4257 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
4258 };
4259 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
4260 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
4261 unsigned off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
4262
4263 pszBuf[off++] = ' ';
4264 pszBuf[off++] = 'C';
4265 pszBuf[off++] = 'P';
4266 pszBuf[off++] = 'L';
4267 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
4268 Assert(off < 32);
4269
4270 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
4271
4272 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
4273 {
4274 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
4275 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
4276 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
4277 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
4278 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
4279 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
4280 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
4281 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
4282 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
4283 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
4284 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
4285 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
4286 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
4287 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
4288 };
4289 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags) && fFlags; i++)
4290 if (s_aFlags[i].fFlag & fFlags)
4291 {
4292 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
4293 pszBuf[off++] = ' ';
4294 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
4295 off += s_aFlags[i].cchName;
4296 fFlags &= ~s_aFlags[i].fFlag;
4297 }
4298 pszBuf[off] = '\0';
4299
4300 return pszBuf;
4301}
4302
4303
4304void iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp)
4305{
4306 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
4307
4308 char szDisBuf[512];
4309 DISSTATE Dis;
4310 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
4311 uint32_t const cNative = pTb->Native.cInstructions;
4312 uint32_t offNative = 0;
4313#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4314 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
4315#endif
4316 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
4317 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
4318 : DISCPUMODE_64BIT;
4319#ifdef RT_ARCH_AMD64
4320 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
4321#elif defined(RT_ARCH_ARM64)
4322 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
4323#else
4324# error "Port me"
4325#endif
4326
4327 /*
4328 * Print TB info.
4329 */
4330 pHlp->pfnPrintf(pHlp,
4331 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
4332 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
4333 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
4334 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
4335#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4336 if (pDbgInfo && pDbgInfo->cEntries > 1)
4337 {
4338 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
4339
4340 /*
4341 * This disassembly is driven by the debug info which follows the native
4342 * code and indicates when it starts with the next guest instructions,
4343 * where labels are and such things.
4344 */
4345 uint32_t idxThreadedCall = 0;
4346 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
4347 uint8_t idxRange = UINT8_MAX;
4348 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
4349 uint32_t offRange = 0;
4350 uint32_t offOpcodes = 0;
4351 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
4352 uint32_t const cDbgEntries = pDbgInfo->cEntries;
4353 uint32_t iDbgEntry = 1;
4354 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
4355
4356 while (offNative < cNative)
4357 {
4358 /* If we're at or have passed the point where the next chunk of debug
4359 info starts, process it. */
4360 if (offDbgNativeNext <= offNative)
4361 {
4362 offDbgNativeNext = UINT32_MAX;
4363 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
4364 {
4365 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
4366 {
4367 case kIemTbDbgEntryType_GuestInstruction:
4368 {
4369 /* Did the exec flag change? */
4370 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
4371 {
4372 pHlp->pfnPrintf(pHlp,
4373 " fExec change %#08x -> %#08x %s\n",
4374 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
4375 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
4376 szDisBuf, sizeof(szDisBuf)));
4377 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
4378 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
4379 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
4380 : DISCPUMODE_64BIT;
4381 }
4382
4383 /* New opcode range? We need to fend up a spurious debug info entry here for cases
4384 where the compilation was aborted before the opcode was recorded and the actual
4385 instruction was translated to a threaded call. This may happen when we run out
4386 of ranges, or when some complicated interrupts/FFs are found to be pending or
4387 similar. So, we just deal with it here rather than in the compiler code as it
4388 is a lot simpler to do up here. */
4389 if ( idxRange == UINT8_MAX
4390 || idxRange >= cRanges
4391 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
4392 {
4393 idxRange += 1;
4394 if (idxRange < cRanges)
4395 offRange = 0;
4396 else
4397 continue;
4398 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes);
4399 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
4400 + (pTb->aRanges[idxRange].idxPhysPage == 0
4401 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
4402 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
4403 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
4404 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
4405 pTb->aRanges[idxRange].idxPhysPage);
4406 }
4407
4408 /* Disassemble the instruction. */
4409 uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
4410 uint32_t cbInstr = 1;
4411 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
4412 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
4413 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
4414 if (RT_SUCCESS(rc))
4415 {
4416 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4417 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4418 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4419 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4420
4421 static unsigned const s_offMarker = 55;
4422 static char const s_szMarker[] = " ; <--- guest";
4423 if (cch < s_offMarker)
4424 {
4425 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
4426 cch = s_offMarker;
4427 }
4428 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
4429 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
4430
4431 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
4432 }
4433 else
4434 {
4435 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
4436 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
4437 cbInstr = 1;
4438 }
4439 GCPhysPc += cbInstr;
4440 offOpcodes += cbInstr;
4441 offRange += cbInstr;
4442 continue;
4443 }
4444
4445 case kIemTbDbgEntryType_ThreadedCall:
4446 pHlp->pfnPrintf(pHlp,
4447 " Call #%u to %s (%u args)\n",
4448 idxThreadedCall,
4449 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
4450 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall]);
4451 idxThreadedCall++;
4452 continue;
4453
4454 case kIemTbDbgEntryType_GuestRegShadowing:
4455 {
4456 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
4457 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
4458 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
4459 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
4460 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
4461 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
4462 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
4463 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
4464 else
4465 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
4466 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
4467 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
4468 continue;
4469 }
4470
4471 case kIemTbDbgEntryType_Label:
4472 {
4473 const char *pszName = "what_the_fudge";
4474 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
4475 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
4476 {
4477 case kIemNativeLabelType_Return: pszName = "Return"; break;
4478 case kIemNativeLabelType_Else: pszName = "Else"; fNumbered = true; break;
4479 case kIemNativeLabelType_Endif: pszName = "Endif"; fNumbered = true; break;
4480 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
4481 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
4482 case kIemNativeLabelType_Invalid: break;
4483 case kIemNativeLabelType_End: break;
4484 }
4485 if (fNumbered)
4486 pHlp->pfnPrintf(pHlp, " %s_%u:\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData);
4487 else
4488 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
4489 continue;
4490 }
4491
4492 case kIemTbDbgEntryType_NativeOffset:
4493 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
4494 Assert(offDbgNativeNext > offNative);
4495 break;
4496
4497 default:
4498 AssertFailed();
4499 }
4500 iDbgEntry++;
4501 break;
4502 }
4503 }
4504
4505 /*
4506 * Disassemble the next native instruction.
4507 */
4508 uint32_t cbInstr = sizeof(paNative[0]);
4509 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
4510 if (RT_SUCCESS(rc))
4511 {
4512# if defined(RT_ARCH_AMD64)
4513 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
4514 {
4515 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
4516 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
4517 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args)\n",
4518 &paNative[offNative], RT_LOWORD(uInfo), g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
4519 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)]);
4520 else
4521 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
4522 }
4523 else
4524# endif
4525 {
4526 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4527 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4528 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4529 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4530 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
4531 }
4532 }
4533 else
4534 {
4535# if defined(RT_ARCH_AMD64)
4536 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
4537 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
4538# elif defined(RT_ARCH_ARM64)
4539 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
4540 &paNative[offNative], paNative[offNative], rc);
4541# else
4542# error "Port me"
4543# endif
4544 cbInstr = sizeof(paNative[0]);
4545 }
4546 offNative += cbInstr / sizeof(paNative[0]);
4547 }
4548 }
4549 else
4550#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
4551 {
4552 /*
4553 * No debug info, just disassemble the x86 code and then the native code.
4554 */
4555 /* The guest code. */
4556 for (unsigned i = 0; i < pTb->cRanges; i++)
4557 {
4558 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
4559 + (pTb->aRanges[i].idxPhysPage == 0
4560 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
4561 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
4562 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
4563 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
4564 unsigned off = pTb->aRanges[i].offOpcodes;
4565 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
4566 while (off < cbOpcodes)
4567 {
4568 uint32_t cbInstr = 1;
4569 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
4570 &pTb->pabOpcodes[off], cbOpcodes - off,
4571 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
4572 if (RT_SUCCESS(rc))
4573 {
4574 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4575 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4576 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4577 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4578 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
4579 GCPhysPc += cbInstr;
4580 off += cbInstr;
4581 }
4582 else
4583 {
4584 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
4585 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
4586 break;
4587 }
4588 }
4589 }
4590
4591 /* The native code: */
4592 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
4593 while(offNative < cNative)
4594 {
4595 uint32_t cbInstr = sizeof(paNative[0]);
4596 int const rc = DISInstr(&paNative[offNative], enmHstCpuMode, &Dis, &cbInstr);
4597 if (RT_SUCCESS(rc))
4598 {
4599# if defined(RT_ARCH_AMD64)
4600 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
4601 {
4602 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
4603 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
4604 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args)\n",
4605 &paNative[offNative], RT_LOWORD(uInfo), g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
4606 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)]);
4607 else
4608 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", &paNative[offNative], uInfo, uInfo);
4609 }
4610 else
4611# endif
4612 {
4613 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4614 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
4615 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
4616 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4617 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paNative[offNative], szDisBuf);
4618 }
4619 }
4620 else
4621 {
4622# if defined(RT_ARCH_AMD64)
4623 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
4624 &paNative[offNative], RT_MIN(cNative - offNative, 16), &paNative[offNative], rc);
4625# elif defined(RT_ARCH_ARM64)
4626 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
4627 &paNative[offNative], paNative[offNative], rc);
4628# else
4629# error "Port me"
4630#endif
4631 cbInstr = sizeof(paNative[0]);
4632 }
4633 offNative += cbInstr / sizeof(paNative[0]);
4634 }
4635 }
4636}
4637
4638
4639/**
4640 * Recompiles the given threaded TB into a native one.
4641 *
4642 * In case of failure the translation block will be returned as-is.
4643 *
4644 * @returns pTb.
4645 * @param pVCpu The cross context virtual CPU structure of the calling
4646 * thread.
4647 * @param pTb The threaded translation to recompile to native.
4648 */
4649PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
4650{
4651 /*
4652 * The first time thru, we allocate the recompiler state, the other times
4653 * we just need to reset it before using it again.
4654 */
4655 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
4656 if (RT_LIKELY(pReNative))
4657 iemNativeReInit(pReNative, pTb);
4658 else
4659 {
4660 pReNative = iemNativeInit(pVCpu, pTb);
4661 AssertReturn(pReNative, pTb);
4662 }
4663
4664 /*
4665 * Emit prolog code (fixed).
4666 */
4667 uint32_t off = iemNativeEmitProlog(pReNative, 0);
4668 AssertReturn(off != UINT32_MAX, pTb);
4669
4670 /*
4671 * Convert the calls to native code.
4672 */
4673#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4674 int32_t iGstInstr = -1;
4675 uint32_t fExec = pTb->fFlags;
4676#endif
4677 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
4678 uint32_t cCallsLeft = pTb->Thrd.cCalls;
4679#ifdef LOG_ENABLED
4680 uint32_t const cCallsOrg = cCallsLeft;
4681#endif
4682 while (cCallsLeft-- > 0)
4683 {
4684 /*
4685 * Debug info and assembly markup.
4686 */
4687#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4688 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
4689 fExec = pCallEntry->auParams[0];
4690 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4691 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
4692 {
4693 if (iGstInstr < (int32_t)pTb->cInstructions)
4694 iemNativeDbgInfoAddGuestInstruction(pReNative, fExec);
4695 else
4696 Assert(iGstInstr == pTb->cInstructions);
4697 iGstInstr = pCallEntry->idxInstr;
4698 }
4699 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction);
4700#endif
4701
4702#ifdef VBOX_STRICT
4703 off = iemNativeEmitMarker(pReNative, off, RT_MAKE_U32(pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->enmFunction));
4704 AssertReturn(off != UINT32_MAX, pTb);
4705#endif
4706 /*
4707 * Actual work.
4708 */
4709 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
4710 if (pfnRecom) /** @todo stats on this. */
4711 {
4712 //STAM_COUNTER_INC()
4713 off = pfnRecom(pReNative, off, pCallEntry);
4714 }
4715 else
4716 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
4717 AssertReturn(off != UINT32_MAX, pTb);
4718 Assert(pReNative->cCondDepth == 0);
4719
4720 /*
4721 * Advance.
4722 */
4723 pCallEntry++;
4724 }
4725
4726 /*
4727 * Emit the epilog code.
4728 */
4729 off = iemNativeEmitEpilog(pReNative, off);
4730 AssertReturn(off != UINT32_MAX, pTb);
4731
4732 /*
4733 * Generate special jump labels.
4734 */
4735 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
4736 {
4737 off = iemNativeEmitRaiseGp0(pReNative, off);
4738 AssertReturn(off != UINT32_MAX, pTb);
4739 }
4740
4741 /*
4742 * Make sure all labels has been defined.
4743 */
4744 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
4745#ifdef VBOX_STRICT
4746 uint32_t const cLabels = pReNative->cLabels;
4747 for (uint32_t i = 0; i < cLabels; i++)
4748 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
4749#endif
4750
4751 /*
4752 * Allocate executable memory, copy over the code we've generated.
4753 */
4754 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
4755 if (pTbAllocator->pDelayedFreeHead)
4756 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
4757
4758 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
4759 AssertReturn(paFinalInstrBuf, pTb);
4760 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
4761
4762 /*
4763 * Apply fixups.
4764 */
4765 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
4766 uint32_t const cFixups = pReNative->cFixups;
4767 for (uint32_t i = 0; i < cFixups; i++)
4768 {
4769 Assert(paFixups[i].off < off);
4770 Assert(paFixups[i].idxLabel < cLabels);
4771 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
4772 switch (paFixups[i].enmType)
4773 {
4774#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4775 case kIemNativeFixupType_Rel32:
4776 Assert(paFixups[i].off + 4 <= off);
4777 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4778 continue;
4779
4780#elif defined(RT_ARCH_ARM64)
4781 case kIemNativeFixupType_RelImm19At5:
4782 {
4783 Assert(paFixups[i].off < off);
4784 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4785 Assert(offDisp >= -262144 && offDisp < 262144);
4786 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
4787 continue;
4788 }
4789#endif
4790 case kIemNativeFixupType_Invalid:
4791 case kIemNativeFixupType_End:
4792 break;
4793 }
4794 AssertFailed();
4795 }
4796
4797 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
4798
4799 /*
4800 * Convert the translation block.
4801 */
4802 //RT_BREAKPOINT();
4803 RTMemFree(pTb->Thrd.paCalls);
4804 pTb->Native.paInstructions = paFinalInstrBuf;
4805 pTb->Native.cInstructions = off;
4806 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
4807#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4808 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
4809 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
4810#endif
4811
4812 Assert(pTbAllocator->cThreadedTbs > 0);
4813 pTbAllocator->cThreadedTbs -= 1;
4814 pTbAllocator->cNativeTbs += 1;
4815 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
4816
4817#ifdef LOG_ENABLED
4818 /*
4819 * Disassemble to the log if enabled.
4820 */
4821 if (LogIs3Enabled())
4822 {
4823 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
4824 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
4825 }
4826#endif
4827
4828 return pTb;
4829}
4830
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette