VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101541

Last change on this file since 101541 was 101539, checked in by vboxsync, 19 months ago

DIS,VMM,DBGC,IPRT,++: Some disassembler tweaks and TB disassembly work. bugref:10371 bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 174.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 101539 2023-10-22 02:43:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): Register allocator
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94
95#include "IEMInline.h"
96#include "IEMThreadedFunctions.h"
97#include "IEMN8veRecompiler.h"
98#include "IEMNativeFunctions.h"
99
100
101/*
102 * Narrow down configs here to avoid wasting time on unused configs here.
103 * Note! Same checks in IEMAllThrdRecompiler.cpp.
104 */
105
106#ifndef IEM_WITH_CODE_TLB
107# error The code TLB must be enabled for the recompiler.
108#endif
109
110#ifndef IEM_WITH_DATA_TLB
111# error The data TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_SETJMP
115# error The setjmp approach must be enabled for the recompiler.
116#endif
117
118
119/*********************************************************************************************************************************
120* Defined Constants And Macros *
121*********************************************************************************************************************************/
122/** Always count instructions for now. */
123#define IEMNATIVE_WITH_INSTRUCTION_COUNTING
124
125
126/*********************************************************************************************************************************
127* Internal Functions *
128*********************************************************************************************************************************/
129static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
130 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
131
132
133/*********************************************************************************************************************************
134* Executable Memory Allocator *
135*********************************************************************************************************************************/
136/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
137 * Use an alternative chunk sub-allocator that does store internal data
138 * in the chunk.
139 *
140 * Using the RTHeapSimple is not practial on newer darwin systems where
141 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
142 * memory. We would have to change the protection of the whole chunk for
143 * every call to RTHeapSimple, which would be rather expensive.
144 *
145 * This alternative implemenation let restrict page protection modifications
146 * to the pages backing the executable memory we just allocated.
147 */
148#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
149/** The chunk sub-allocation unit size in bytes. */
150#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
151/** The chunk sub-allocation unit size as a shift factor. */
152#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
153
154#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
155# ifdef IEMNATIVE_USE_GDB_JIT
156# define IEMNATIVE_USE_GDB_JIT_ET_DYN
157
158/** GDB JIT: Code entry. */
159typedef struct GDBJITCODEENTRY
160{
161 struct GDBJITCODEENTRY *pNext;
162 struct GDBJITCODEENTRY *pPrev;
163 uint8_t *pbSymFile;
164 uint64_t cbSymFile;
165} GDBJITCODEENTRY;
166
167/** GDB JIT: Actions. */
168typedef enum GDBJITACTIONS : uint32_t
169{
170 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
171} GDBJITACTIONS;
172
173/** GDB JIT: Descriptor. */
174typedef struct GDBJITDESCRIPTOR
175{
176 uint32_t uVersion;
177 GDBJITACTIONS enmAction;
178 GDBJITCODEENTRY *pRelevant;
179 GDBJITCODEENTRY *pHead;
180 /** Our addition: */
181 GDBJITCODEENTRY *pTail;
182} GDBJITDESCRIPTOR;
183
184/** GDB JIT: Our simple symbol file data. */
185typedef struct GDBJITSYMFILE
186{
187 Elf64_Ehdr EHdr;
188# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
189 Elf64_Shdr aShdrs[5];
190# else
191 Elf64_Shdr aShdrs[7];
192 Elf64_Phdr aPhdrs[2];
193# endif
194 /** The dwarf ehframe data for the chunk. */
195 uint8_t abEhFrame[512];
196 char szzStrTab[128];
197 Elf64_Sym aSymbols[3];
198# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
199 Elf64_Sym aDynSyms[2];
200 Elf64_Dyn aDyn[6];
201# endif
202} GDBJITSYMFILE;
203
204extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
205extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
206
207/** Init once for g_IemNativeGdbJitLock. */
208static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
209/** Init once for the critical section. */
210static RTCRITSECT g_IemNativeGdbJitLock;
211
212/** GDB reads the info here. */
213GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
214
215/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
216DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
217{
218 ASMNopPause();
219}
220
221/** @callback_method_impl{FNRTONCE} */
222static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
223{
224 RT_NOREF(pvUser);
225 return RTCritSectInit(&g_IemNativeGdbJitLock);
226}
227
228
229# endif /* IEMNATIVE_USE_GDB_JIT */
230
231/**
232 * Per-chunk unwind info for non-windows hosts.
233 */
234typedef struct IEMEXECMEMCHUNKEHFRAME
235{
236# ifdef IEMNATIVE_USE_LIBUNWIND
237 /** The offset of the FDA into abEhFrame. */
238 uintptr_t offFda;
239# else
240 /** 'struct object' storage area. */
241 uint8_t abObject[1024];
242# endif
243# ifdef IEMNATIVE_USE_GDB_JIT
244# if 0
245 /** The GDB JIT 'symbol file' data. */
246 GDBJITSYMFILE GdbJitSymFile;
247# endif
248 /** The GDB JIT list entry. */
249 GDBJITCODEENTRY GdbJitEntry;
250# endif
251 /** The dwarf ehframe data for the chunk. */
252 uint8_t abEhFrame[512];
253} IEMEXECMEMCHUNKEHFRAME;
254/** Pointer to per-chunk info info for non-windows hosts. */
255typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
256#endif
257
258
259/**
260 * An chunk of executable memory.
261 */
262typedef struct IEMEXECMEMCHUNK
263{
264#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
265 /** Number of free items in this chunk. */
266 uint32_t cFreeUnits;
267 /** Hint were to start searching for free space in the allocation bitmap. */
268 uint32_t idxFreeHint;
269#else
270 /** The heap handle. */
271 RTHEAPSIMPLE hHeap;
272#endif
273 /** Pointer to the chunk. */
274 void *pvChunk;
275#ifdef IN_RING3
276 /**
277 * Pointer to the unwind information.
278 *
279 * This is used during C++ throw and longjmp (windows and probably most other
280 * platforms). Some debuggers (windbg) makes use of it as well.
281 *
282 * Windows: This is allocated from hHeap on windows because (at least for
283 * AMD64) the UNWIND_INFO structure address in the
284 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
285 *
286 * Others: Allocated from the regular heap to avoid unnecessary executable data
287 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
288 void *pvUnwindInfo;
289#elif defined(IN_RING0)
290 /** Allocation handle. */
291 RTR0MEMOBJ hMemObj;
292#endif
293} IEMEXECMEMCHUNK;
294/** Pointer to a memory chunk. */
295typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
296
297
298/**
299 * Executable memory allocator for the native recompiler.
300 */
301typedef struct IEMEXECMEMALLOCATOR
302{
303 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
304 uint32_t uMagic;
305
306 /** The chunk size. */
307 uint32_t cbChunk;
308 /** The maximum number of chunks. */
309 uint32_t cMaxChunks;
310 /** The current number of chunks. */
311 uint32_t cChunks;
312 /** Hint where to start looking for available memory. */
313 uint32_t idxChunkHint;
314 /** Statistics: Current number of allocations. */
315 uint32_t cAllocations;
316
317 /** The total amount of memory available. */
318 uint64_t cbTotal;
319 /** Total amount of free memory. */
320 uint64_t cbFree;
321 /** Total amount of memory allocated. */
322 uint64_t cbAllocated;
323
324#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
325 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
326 *
327 * Since the chunk size is a power of two and the minimum chunk size is a lot
328 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
329 * require a whole number of uint64_t elements in the allocation bitmap. So,
330 * for sake of simplicity, they are allocated as one continous chunk for
331 * simplicity/laziness. */
332 uint64_t *pbmAlloc;
333 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
334 uint32_t cUnitsPerChunk;
335 /** Number of bitmap elements per chunk (for quickly locating the bitmap
336 * portion corresponding to an chunk). */
337 uint32_t cBitmapElementsPerChunk;
338#else
339 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
340 * @{ */
341 /** The size of the heap internal block header. This is used to adjust the
342 * request memory size to make sure there is exacly enough room for a header at
343 * the end of the blocks we allocate before the next 64 byte alignment line. */
344 uint32_t cbHeapBlockHdr;
345 /** The size of initial heap allocation required make sure the first
346 * allocation is correctly aligned. */
347 uint32_t cbHeapAlignTweak;
348 /** The alignment tweak allocation address. */
349 void *pvAlignTweak;
350 /** @} */
351#endif
352
353#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
354 /** Pointer to the array of unwind info running parallel to aChunks (same
355 * allocation as this structure, located after the bitmaps).
356 * (For Windows, the structures must reside in 32-bit RVA distance to the
357 * actual chunk, so they are allocated off the chunk.) */
358 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
359#endif
360
361 /** The allocation chunks. */
362 RT_FLEXIBLE_ARRAY_EXTENSION
363 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
364} IEMEXECMEMALLOCATOR;
365/** Pointer to an executable memory allocator. */
366typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
367
368/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
369#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
370
371
372static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
373
374
375/**
376 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
377 * the heap statistics.
378 */
379static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
380 uint32_t cbReq, uint32_t idxChunk)
381{
382 pExecMemAllocator->cAllocations += 1;
383 pExecMemAllocator->cbAllocated += cbReq;
384#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
385 pExecMemAllocator->cbFree -= cbReq;
386#else
387 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
388#endif
389 pExecMemAllocator->idxChunkHint = idxChunk;
390
391#ifdef RT_OS_DARWIN
392 /*
393 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
394 * on darwin. So, we mark the pages returned as read+write after alloc and
395 * expect the caller to call iemExecMemAllocatorReadyForUse when done
396 * writing to the allocation.
397 *
398 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
399 * for details.
400 */
401 /** @todo detect if this is necessary... it wasn't required on 10.15 or
402 * whatever older version it was. */
403 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
404 AssertRC(rc);
405#endif
406
407 return pvRet;
408}
409
410
411#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
412static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
413 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
414{
415 /*
416 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
417 */
418 Assert(!(cToScan & 63));
419 Assert(!(idxFirst & 63));
420 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
421 pbmAlloc += idxFirst / 64;
422
423 /*
424 * Scan the bitmap for cReqUnits of consequtive clear bits
425 */
426 /** @todo This can probably be done more efficiently for non-x86 systems. */
427 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
428 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
429 {
430 uint32_t idxAddBit = 1;
431 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
432 idxAddBit++;
433 if (idxAddBit >= cReqUnits)
434 {
435 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
436
437 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
438 pChunk->cFreeUnits -= cReqUnits;
439 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
440
441 void * const pvRet = (uint8_t *)pChunk->pvChunk
442 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
443
444 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
445 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
446 }
447
448 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
449 }
450 return NULL;
451}
452#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
453
454
455static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
456{
457#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
458 /*
459 * Figure out how much to allocate.
460 */
461 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
462 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
463 {
464 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
465 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
466 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
467 {
468 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
469 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
470 if (pvRet)
471 return pvRet;
472 }
473 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
474 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
475 cReqUnits, idxChunk);
476 }
477#else
478 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
479 if (pvRet)
480 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
481#endif
482 return NULL;
483
484}
485
486
487/**
488 * Allocates @a cbReq bytes of executable memory.
489 *
490 * @returns Pointer to the memory, NULL if out of memory or other problem
491 * encountered.
492 * @param pVCpu The cross context virtual CPU structure of the calling
493 * thread.
494 * @param cbReq How many bytes are required.
495 */
496static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
497{
498 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
499 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
500 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
501
502 /*
503 * Adjust the request size so it'll fit the allocator alignment/whatnot.
504 *
505 * For the RTHeapSimple allocator this means to follow the logic described
506 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
507 * existing chunks if we think we've got sufficient free memory around.
508 *
509 * While for the alternative one we just align it up to a whole unit size.
510 */
511#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
512 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
513#else
514 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
515#endif
516 if (cbReq <= pExecMemAllocator->cbFree)
517 {
518 uint32_t const cChunks = pExecMemAllocator->cChunks;
519 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
520 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
521 {
522 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
523 if (pvRet)
524 return pvRet;
525 }
526 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
527 {
528 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
529 if (pvRet)
530 return pvRet;
531 }
532 }
533
534 /*
535 * Can we grow it with another chunk?
536 */
537 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
538 {
539 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
540 AssertLogRelRCReturn(rc, NULL);
541
542 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 AssertFailed();
547 }
548
549 /* What now? Prune native translation blocks from the cache? */
550 AssertFailed();
551 return NULL;
552}
553
554
555/** This is a hook that we may need later for changing memory protection back
556 * to readonly+exec */
557static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
558{
559#ifdef RT_OS_DARWIN
560 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
561 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
562 AssertRC(rc); RT_NOREF(pVCpu);
563
564 /*
565 * Flush the instruction cache:
566 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
567 */
568 /* sys_dcache_flush(pv, cb); - not necessary */
569 sys_icache_invalidate(pv, cb);
570#else
571 RT_NOREF(pVCpu, pv, cb);
572#endif
573}
574
575
576/**
577 * Frees executable memory.
578 */
579void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
580{
581 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
582 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
583 Assert(pv);
584#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
585 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
586#else
587 Assert(!((uintptr_t)pv & 63));
588#endif
589
590 /* Align the size as we did when allocating the block. */
591#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
592 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
593#else
594 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
595#endif
596
597 /* Free it / assert sanity. */
598#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
599 uint32_t const cChunks = pExecMemAllocator->cChunks;
600 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
601 bool fFound = false;
602 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
603 {
604 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
605 fFound = offChunk < cbChunk;
606 if (fFound)
607 {
608#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
609 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
610 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
611
612 /* Check that it's valid and free it. */
613 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
614 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
615 for (uint32_t i = 1; i < cReqUnits; i++)
616 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
617 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
618
619 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
620 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
621
622 /* Update the stats. */
623 pExecMemAllocator->cbAllocated -= cb;
624 pExecMemAllocator->cbFree += cb;
625 pExecMemAllocator->cAllocations -= 1;
626 return;
627#else
628 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
629 break;
630#endif
631 }
632 }
633# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
634 AssertFailed();
635# else
636 Assert(fFound);
637# endif
638#endif
639
640#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
641 /* Update stats while cb is freshly calculated.*/
642 pExecMemAllocator->cbAllocated -= cb;
643 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
644 pExecMemAllocator->cAllocations -= 1;
645
646 /* Free it. */
647 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
648#endif
649}
650
651
652
653#ifdef IN_RING3
654# ifdef RT_OS_WINDOWS
655
656/**
657 * Initializes the unwind info structures for windows hosts.
658 */
659static int
660iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
661 void *pvChunk, uint32_t idxChunk)
662{
663 RT_NOREF(pVCpu);
664
665 /*
666 * The AMD64 unwind opcodes.
667 *
668 * This is a program that starts with RSP after a RET instruction that
669 * ends up in recompiled code, and the operations we describe here will
670 * restore all non-volatile registers and bring RSP back to where our
671 * RET address is. This means it's reverse order from what happens in
672 * the prologue.
673 *
674 * Note! Using a frame register approach here both because we have one
675 * and but mainly because the UWOP_ALLOC_LARGE argument values
676 * would be a pain to write initializers for. On the positive
677 * side, we're impervious to changes in the the stack variable
678 * area can can deal with dynamic stack allocations if necessary.
679 */
680 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
681 {
682 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
683 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
684 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
685 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
686 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
687 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
688 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
689 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
690 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
691 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
692 };
693 union
694 {
695 IMAGE_UNWIND_INFO Info;
696 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
697 } s_UnwindInfo =
698 {
699 {
700 /* .Version = */ 1,
701 /* .Flags = */ 0,
702 /* .SizeOfProlog = */ 16, /* whatever */
703 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
704 /* .FrameRegister = */ X86_GREG_xBP,
705 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
706 }
707 };
708 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
709 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
710
711 /*
712 * Calc how much space we need and allocate it off the exec heap.
713 */
714 unsigned const cFunctionEntries = 1;
715 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
716 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
717# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
718 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
719 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
720 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
721# else
722 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
723 - pExecMemAllocator->cbHeapBlockHdr;
724 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
725 32 /*cbAlignment*/);
726# endif
727 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
728 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
729
730 /*
731 * Initialize the structures.
732 */
733 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
734
735 paFunctions[0].BeginAddress = 0;
736 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
737 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
738
739 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
740 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
741
742 /*
743 * Register it.
744 */
745 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
746 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
747
748 return paFunctions;
749}
750
751
752# else /* !RT_OS_WINDOWS */
753
754/**
755 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
756 */
757DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
758{
759 if (iValue >= 64)
760 {
761 Assert(iValue < 0x2000);
762 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
763 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
764 }
765 else if (iValue >= 0)
766 *Ptr.pb++ = (uint8_t)iValue;
767 else if (iValue > -64)
768 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
769 else
770 {
771 Assert(iValue > -0x2000);
772 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
773 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
774 }
775 return Ptr;
776}
777
778
779/**
780 * Emits an ULEB128 encoded value (up to 64-bit wide).
781 */
782DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
783{
784 while (uValue >= 0x80)
785 {
786 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
787 uValue >>= 7;
788 }
789 *Ptr.pb++ = (uint8_t)uValue;
790 return Ptr;
791}
792
793
794/**
795 * Emits a CFA rule as register @a uReg + offset @a off.
796 */
797DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
798{
799 *Ptr.pb++ = DW_CFA_def_cfa;
800 Ptr = iemDwarfPutUleb128(Ptr, uReg);
801 Ptr = iemDwarfPutUleb128(Ptr, off);
802 return Ptr;
803}
804
805
806/**
807 * Emits a register (@a uReg) save location:
808 * CFA + @a off * data_alignment_factor
809 */
810DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
811{
812 if (uReg < 0x40)
813 *Ptr.pb++ = DW_CFA_offset | uReg;
814 else
815 {
816 *Ptr.pb++ = DW_CFA_offset_extended;
817 Ptr = iemDwarfPutUleb128(Ptr, uReg);
818 }
819 Ptr = iemDwarfPutUleb128(Ptr, off);
820 return Ptr;
821}
822
823
824# if 0 /* unused */
825/**
826 * Emits a register (@a uReg) save location, using signed offset:
827 * CFA + @a offSigned * data_alignment_factor
828 */
829DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
830{
831 *Ptr.pb++ = DW_CFA_offset_extended_sf;
832 Ptr = iemDwarfPutUleb128(Ptr, uReg);
833 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
834 return Ptr;
835}
836# endif
837
838
839/**
840 * Initializes the unwind info section for non-windows hosts.
841 */
842static int
843iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
844 void *pvChunk, uint32_t idxChunk)
845{
846 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
847 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
848
849 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
850
851 /*
852 * Generate the CIE first.
853 */
854# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
855 uint8_t const iDwarfVer = 3;
856# else
857 uint8_t const iDwarfVer = 4;
858# endif
859 RTPTRUNION const PtrCie = Ptr;
860 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
861 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
862 *Ptr.pb++ = iDwarfVer; /* DwARF version */
863 *Ptr.pb++ = 0; /* Augmentation. */
864 if (iDwarfVer >= 4)
865 {
866 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
867 *Ptr.pb++ = 0; /* Segment selector size. */
868 }
869# ifdef RT_ARCH_AMD64
870 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
871# else
872 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
873# endif
874 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
875# ifdef RT_ARCH_AMD64
876 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
877# elif defined(RT_ARCH_ARM64)
878 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
879# else
880# error "port me"
881# endif
882 /* Initial instructions: */
883# ifdef RT_ARCH_AMD64
884 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
885 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
886 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
889 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
890 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
892# elif defined(RT_ARCH_ARM64)
893# if 1
894 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
895# else
896 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
897# endif
898 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
899 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
910 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
911 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
912# else
913# error "port me"
914# endif
915 while ((Ptr.u - PtrCie.u) & 3)
916 *Ptr.pb++ = DW_CFA_nop;
917 /* Finalize the CIE size. */
918 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
919
920 /*
921 * Generate an FDE for the whole chunk area.
922 */
923# ifdef IEMNATIVE_USE_LIBUNWIND
924 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
925# endif
926 RTPTRUNION const PtrFde = Ptr;
927 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
928 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
929 Ptr.pu32++;
930 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
931 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
932# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
933 *Ptr.pb++ = DW_CFA_nop;
934# endif
935 while ((Ptr.u - PtrFde.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the FDE size. */
938 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
939
940 /* Terminator entry. */
941 *Ptr.pu32++ = 0;
942 *Ptr.pu32++ = 0; /* just to be sure... */
943 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
944
945 /*
946 * Register it.
947 */
948# ifdef IEMNATIVE_USE_LIBUNWIND
949 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
950# else
951 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
952 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
953# endif
954
955# ifdef IEMNATIVE_USE_GDB_JIT
956 /*
957 * Now for telling GDB about this (experimental).
958 *
959 * This seems to work best with ET_DYN.
960 */
961 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
962# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
963 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
964 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
965# else
966 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
967 - pExecMemAllocator->cbHeapBlockHdr;
968 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
969# endif
970 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
971 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
972
973 RT_ZERO(*pSymFile);
974
975 /*
976 * The ELF header:
977 */
978 pSymFile->EHdr.e_ident[0] = ELFMAG0;
979 pSymFile->EHdr.e_ident[1] = ELFMAG1;
980 pSymFile->EHdr.e_ident[2] = ELFMAG2;
981 pSymFile->EHdr.e_ident[3] = ELFMAG3;
982 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
983 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
984 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
985 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
986# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
987 pSymFile->EHdr.e_type = ET_DYN;
988# else
989 pSymFile->EHdr.e_type = ET_REL;
990# endif
991# ifdef RT_ARCH_AMD64
992 pSymFile->EHdr.e_machine = EM_AMD64;
993# elif defined(RT_ARCH_ARM64)
994 pSymFile->EHdr.e_machine = EM_AARCH64;
995# else
996# error "port me"
997# endif
998 pSymFile->EHdr.e_version = 1; /*?*/
999 pSymFile->EHdr.e_entry = 0;
1000# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1001 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1002# else
1003 pSymFile->EHdr.e_phoff = 0;
1004# endif
1005 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1006 pSymFile->EHdr.e_flags = 0;
1007 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1008# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1009 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1010 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1011# else
1012 pSymFile->EHdr.e_phentsize = 0;
1013 pSymFile->EHdr.e_phnum = 0;
1014# endif
1015 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1016 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1017 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1018
1019 uint32_t offStrTab = 0;
1020#define APPEND_STR(a_szStr) do { \
1021 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1022 offStrTab += sizeof(a_szStr); \
1023 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1024 } while (0)
1025#define APPEND_STR_FMT(a_szStr, ...) do { \
1026 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1027 offStrTab++; \
1028 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1029 } while (0)
1030
1031 /*
1032 * Section headers.
1033 */
1034 /* Section header #0: NULL */
1035 unsigned i = 0;
1036 APPEND_STR("");
1037 RT_ZERO(pSymFile->aShdrs[i]);
1038 i++;
1039
1040 /* Section header: .eh_frame */
1041 pSymFile->aShdrs[i].sh_name = offStrTab;
1042 APPEND_STR(".eh_frame");
1043 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1044 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1045# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1046 pSymFile->aShdrs[i].sh_offset
1047 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1048# else
1049 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1050 pSymFile->aShdrs[i].sh_offset = 0;
1051# endif
1052
1053 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1054 pSymFile->aShdrs[i].sh_link = 0;
1055 pSymFile->aShdrs[i].sh_info = 0;
1056 pSymFile->aShdrs[i].sh_addralign = 1;
1057 pSymFile->aShdrs[i].sh_entsize = 0;
1058 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1059 i++;
1060
1061 /* Section header: .shstrtab */
1062 unsigned const iShStrTab = i;
1063 pSymFile->EHdr.e_shstrndx = iShStrTab;
1064 pSymFile->aShdrs[i].sh_name = offStrTab;
1065 APPEND_STR(".shstrtab");
1066 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1067 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1068# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1069 pSymFile->aShdrs[i].sh_offset
1070 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1071# else
1072 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1073 pSymFile->aShdrs[i].sh_offset = 0;
1074# endif
1075 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1076 pSymFile->aShdrs[i].sh_link = 0;
1077 pSymFile->aShdrs[i].sh_info = 0;
1078 pSymFile->aShdrs[i].sh_addralign = 1;
1079 pSymFile->aShdrs[i].sh_entsize = 0;
1080 i++;
1081
1082 /* Section header: .symbols */
1083 pSymFile->aShdrs[i].sh_name = offStrTab;
1084 APPEND_STR(".symtab");
1085 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1086 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1087 pSymFile->aShdrs[i].sh_offset
1088 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1089 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1090 pSymFile->aShdrs[i].sh_link = iShStrTab;
1091 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1092 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1093 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1094 i++;
1095
1096# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1097 /* Section header: .symbols */
1098 pSymFile->aShdrs[i].sh_name = offStrTab;
1099 APPEND_STR(".dynsym");
1100 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1101 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1102 pSymFile->aShdrs[i].sh_offset
1103 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1104 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1105 pSymFile->aShdrs[i].sh_link = iShStrTab;
1106 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1107 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1108 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1109 i++;
1110# endif
1111
1112# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1113 /* Section header: .dynamic */
1114 pSymFile->aShdrs[i].sh_name = offStrTab;
1115 APPEND_STR(".dynamic");
1116 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1117 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1118 pSymFile->aShdrs[i].sh_offset
1119 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1120 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1121 pSymFile->aShdrs[i].sh_link = iShStrTab;
1122 pSymFile->aShdrs[i].sh_info = 0;
1123 pSymFile->aShdrs[i].sh_addralign = 1;
1124 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1125 i++;
1126# endif
1127
1128 /* Section header: .text */
1129 unsigned const iShText = i;
1130 pSymFile->aShdrs[i].sh_name = offStrTab;
1131 APPEND_STR(".text");
1132 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1133 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1134# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1135 pSymFile->aShdrs[i].sh_offset
1136 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1137# else
1138 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1139 pSymFile->aShdrs[i].sh_offset = 0;
1140# endif
1141 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1142 pSymFile->aShdrs[i].sh_link = 0;
1143 pSymFile->aShdrs[i].sh_info = 0;
1144 pSymFile->aShdrs[i].sh_addralign = 1;
1145 pSymFile->aShdrs[i].sh_entsize = 0;
1146 i++;
1147
1148 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1149
1150# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1151 /*
1152 * The program headers:
1153 */
1154 /* Everything in a single LOAD segment: */
1155 i = 0;
1156 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1157 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1158 pSymFile->aPhdrs[i].p_offset
1159 = pSymFile->aPhdrs[i].p_vaddr
1160 = pSymFile->aPhdrs[i].p_paddr = 0;
1161 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1162 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1163 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1164 i++;
1165 /* The .dynamic segment. */
1166 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1167 pSymFile->aPhdrs[i].p_flags = PF_R;
1168 pSymFile->aPhdrs[i].p_offset
1169 = pSymFile->aPhdrs[i].p_vaddr
1170 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1171 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1172 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1173 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1174 i++;
1175
1176 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1177
1178 /*
1179 * The dynamic section:
1180 */
1181 i = 0;
1182 pSymFile->aDyn[i].d_tag = DT_SONAME;
1183 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1184 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1185 i++;
1186 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1187 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1188 i++;
1189 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1190 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1191 i++;
1192 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1193 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1194 i++;
1195 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1196 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1197 i++;
1198 pSymFile->aDyn[i].d_tag = DT_NULL;
1199 i++;
1200 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1201# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1202
1203 /*
1204 * Symbol tables:
1205 */
1206 /** @todo gdb doesn't seem to really like this ... */
1207 i = 0;
1208 pSymFile->aSymbols[i].st_name = 0;
1209 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1210 pSymFile->aSymbols[i].st_value = 0;
1211 pSymFile->aSymbols[i].st_size = 0;
1212 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1213 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1214# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1215 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1216# endif
1217 i++;
1218
1219 pSymFile->aSymbols[i].st_name = 0;
1220 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1221 pSymFile->aSymbols[i].st_value = 0;
1222 pSymFile->aSymbols[i].st_size = 0;
1223 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1224 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1225 i++;
1226
1227 pSymFile->aSymbols[i].st_name = offStrTab;
1228 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1229# if 0
1230 pSymFile->aSymbols[i].st_shndx = iShText;
1231 pSymFile->aSymbols[i].st_value = 0;
1232# else
1233 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1234 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1235# endif
1236 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1237 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1238 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1239# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1240 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1241 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1242# endif
1243 i++;
1244
1245 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1246 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1247
1248 /*
1249 * The GDB JIT entry and informing GDB.
1250 */
1251 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1252# if 1
1253 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1254# else
1255 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1256# endif
1257
1258 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1259 RTCritSectEnter(&g_IemNativeGdbJitLock);
1260 pEhFrame->GdbJitEntry.pNext = NULL;
1261 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1262 if (__jit_debug_descriptor.pTail)
1263 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1264 else
1265 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1266 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1267 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1268
1269 /* Notify GDB: */
1270 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1271 __jit_debug_register_code();
1272 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1273 RTCritSectLeave(&g_IemNativeGdbJitLock);
1274
1275# else /* !IEMNATIVE_USE_GDB_JIT */
1276 RT_NOREF(pVCpu);
1277# endif /* !IEMNATIVE_USE_GDB_JIT */
1278
1279 return VINF_SUCCESS;
1280}
1281
1282# endif /* !RT_OS_WINDOWS */
1283#endif /* IN_RING3 */
1284
1285
1286/**
1287 * Adds another chunk to the executable memory allocator.
1288 *
1289 * This is used by the init code for the initial allocation and later by the
1290 * regular allocator function when it's out of memory.
1291 */
1292static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1293{
1294 /* Check that we've room for growth. */
1295 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1296 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1297
1298 /* Allocate a chunk. */
1299#ifdef RT_OS_DARWIN
1300 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1301#else
1302 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1303#endif
1304 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1305
1306#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1307 int rc = VINF_SUCCESS;
1308#else
1309 /* Initialize the heap for the chunk. */
1310 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1311 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1312 AssertRC(rc);
1313 if (RT_SUCCESS(rc))
1314 {
1315 /*
1316 * We want the memory to be aligned on 64 byte, so the first time thru
1317 * here we do some exploratory allocations to see how we can achieve this.
1318 * On subsequent runs we only make an initial adjustment allocation, if
1319 * necessary.
1320 *
1321 * Since we own the heap implementation, we know that the internal block
1322 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1323 * so all we need to wrt allocation size adjustments is to add 32 bytes
1324 * to the size, align up by 64 bytes, and subtract 32 bytes.
1325 *
1326 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1327 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1328 * allocation to force subsequent allocations to return 64 byte aligned
1329 * user areas.
1330 */
1331 if (!pExecMemAllocator->cbHeapBlockHdr)
1332 {
1333 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1334 pExecMemAllocator->cbHeapAlignTweak = 64;
1335 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1336 32 /*cbAlignment*/);
1337 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1338
1339 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1340 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1341 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1342 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1343 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1344
1345 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1346 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1347 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1348 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1349 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1350
1351 RTHeapSimpleFree(hHeap, pvTest2);
1352 RTHeapSimpleFree(hHeap, pvTest1);
1353 }
1354 else
1355 {
1356 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1358 }
1359 if (RT_SUCCESS(rc))
1360#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1361 {
1362 /*
1363 * Add the chunk.
1364 *
1365 * This must be done before the unwind init so windows can allocate
1366 * memory from the chunk when using the alternative sub-allocator.
1367 */
1368 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1369#ifdef IN_RING3
1370 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1371#endif
1372#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1373 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1374#else
1375 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1376 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1377 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1378 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1379#endif
1380
1381 pExecMemAllocator->cChunks = idxChunk + 1;
1382 pExecMemAllocator->idxChunkHint = idxChunk;
1383
1384#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1385 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1386 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1387#else
1388 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1389 pExecMemAllocator->cbTotal += cbFree;
1390 pExecMemAllocator->cbFree += cbFree;
1391#endif
1392
1393#ifdef IN_RING3
1394 /*
1395 * Initialize the unwind information (this cannot really fail atm).
1396 * (This sets pvUnwindInfo.)
1397 */
1398 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1399 if (RT_SUCCESS(rc))
1400#endif
1401 {
1402 return VINF_SUCCESS;
1403 }
1404
1405#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1406 /* Just in case the impossible happens, undo the above up: */
1407 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1408 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1409 pExecMemAllocator->cChunks = idxChunk;
1410 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1411 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1412 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1413 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1414#endif
1415 }
1416#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1417 }
1418#endif
1419 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1420 RT_NOREF(pVCpu);
1421 return rc;
1422}
1423
1424
1425/**
1426 * Initializes the executable memory allocator for native recompilation on the
1427 * calling EMT.
1428 *
1429 * @returns VBox status code.
1430 * @param pVCpu The cross context virtual CPU structure of the calling
1431 * thread.
1432 * @param cbMax The max size of the allocator.
1433 * @param cbInitial The initial allocator size.
1434 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1435 * dependent).
1436 */
1437int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1438{
1439 /*
1440 * Validate input.
1441 */
1442 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1443 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1444 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1445 || cbChunk == 0
1446 || ( RT_IS_POWER_OF_TWO(cbChunk)
1447 && cbChunk >= _1M
1448 && cbChunk <= _256M
1449 && cbChunk <= cbMax),
1450 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1451 VERR_OUT_OF_RANGE);
1452
1453 /*
1454 * Adjust/figure out the chunk size.
1455 */
1456 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1457 {
1458 if (cbMax >= _256M)
1459 cbChunk = _64M;
1460 else
1461 {
1462 if (cbMax < _16M)
1463 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1464 else
1465 cbChunk = (uint32_t)cbMax / 4;
1466 if (!RT_IS_POWER_OF_TWO(cbChunk))
1467 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1468 }
1469 }
1470
1471 if (cbChunk > cbMax)
1472 cbMax = cbChunk;
1473 else
1474 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1475 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1476 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1477
1478 /*
1479 * Allocate and initialize the allocatore instance.
1480 */
1481 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1482#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1483 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1484 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1485 cbNeeded += cbBitmap * cMaxChunks;
1486 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1487 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1488#endif
1489#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1490 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1491 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1492#endif
1493 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1494 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1495 VERR_NO_MEMORY);
1496 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1497 pExecMemAllocator->cbChunk = cbChunk;
1498 pExecMemAllocator->cMaxChunks = cMaxChunks;
1499 pExecMemAllocator->cChunks = 0;
1500 pExecMemAllocator->idxChunkHint = 0;
1501 pExecMemAllocator->cAllocations = 0;
1502 pExecMemAllocator->cbTotal = 0;
1503 pExecMemAllocator->cbFree = 0;
1504 pExecMemAllocator->cbAllocated = 0;
1505#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1506 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1507 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1508 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1509 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1510#endif
1511#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1512 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1513#endif
1514 for (uint32_t i = 0; i < cMaxChunks; i++)
1515 {
1516#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1517 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1518 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1519#else
1520 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1521#endif
1522 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1523#ifdef IN_RING0
1524 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1525#else
1526 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1527#endif
1528 }
1529 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1530
1531 /*
1532 * Do the initial allocations.
1533 */
1534 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1535 {
1536 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1537 AssertLogRelRCReturn(rc, rc);
1538 }
1539
1540 pExecMemAllocator->idxChunkHint = 0;
1541
1542 return VINF_SUCCESS;
1543}
1544
1545
1546/*********************************************************************************************************************************
1547* Native Recompilation *
1548*********************************************************************************************************************************/
1549
1550
1551/**
1552 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1553 */
1554IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1555{
1556 pVCpu->iem.s.cInstructions += idxInstr;
1557 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1558}
1559
1560
1561/**
1562 * Used by TB code when it wants to raise a \#GP(0).
1563 */
1564IEM_DECL_IMPL_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu, uint8_t idxInstr))
1565{
1566 pVCpu->iem.s.cInstructions += idxInstr;
1567 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1568 return VINF_IEM_RAISED_XCPT; /* not reached */
1569}
1570
1571
1572/**
1573 * Reinitializes the native recompiler state.
1574 *
1575 * Called before starting a new recompile job.
1576 */
1577static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1578{
1579 pReNative->cLabels = 0;
1580 pReNative->bmLabelTypes = 0;
1581 pReNative->cFixups = 0;
1582 pReNative->pTbOrg = pTb;
1583
1584 pReNative->bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1585#if IEMNATIVE_HST_GREG_COUNT < 32
1586 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1587#endif
1588 ;
1589 pReNative->bmHstRegsWithGstShadow = 0;
1590 pReNative->bmGstRegShadows = 0;
1591 pReNative->cCondDepth = 0;
1592 pReNative->uCondSeqNo = 0;
1593 pReNative->bmVars = 0;
1594 pReNative->u64ArgVars = UINT64_MAX;
1595
1596 /* Full host register reinit: */
1597 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->aHstRegs); i++)
1598 {
1599 pReNative->aHstRegs[i].fGstRegShadows = 0;
1600 pReNative->aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1601 pReNative->aHstRegs[i].idxVar = UINT8_MAX;
1602 }
1603
1604 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1605 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1606#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1607 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1608#endif
1609#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1610 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1611#endif
1612 );
1613 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1614 {
1615 fRegs &= ~RT_BIT_32(idxReg);
1616 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1617 }
1618
1619 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1620#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1621 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1622#endif
1623#ifdef IEMNATIVE_REG_FIXED_TMP0
1624 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1625#endif
1626 return pReNative;
1627}
1628
1629
1630/**
1631 * Allocates and initializes the native recompiler state.
1632 *
1633 * This is called the first time an EMT wants to recompile something.
1634 *
1635 * @returns Pointer to the new recompiler state.
1636 * @param pVCpu The cross context virtual CPU structure of the calling
1637 * thread.
1638 * @param pTb The TB that's about to be recompiled.
1639 * @thread EMT(pVCpu)
1640 */
1641static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1642{
1643 VMCPU_ASSERT_EMT(pVCpu);
1644
1645 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1646 AssertReturn(pReNative, NULL);
1647
1648 /*
1649 * Try allocate all the buffers and stuff we need.
1650 */
1651 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1652 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1653 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1654 if (RT_LIKELY( pReNative->pInstrBuf
1655 && pReNative->paLabels
1656 && pReNative->paFixups))
1657 {
1658 /*
1659 * Set the buffer & array sizes on success.
1660 */
1661 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1662 pReNative->cLabelsAlloc = _8K;
1663 pReNative->cFixupsAlloc = _16K;
1664
1665 /*
1666 * Done, just need to save it and reinit it.
1667 */
1668 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1669 return iemNativeReInit(pReNative, pTb);
1670 }
1671
1672 /*
1673 * Failed. Cleanup and return.
1674 */
1675 AssertFailed();
1676 RTMemFree(pReNative->pInstrBuf);
1677 RTMemFree(pReNative->paLabels);
1678 RTMemFree(pReNative->paFixups);
1679 RTMemFree(pReNative);
1680 return NULL;
1681}
1682
1683
1684/**
1685 * Defines a label.
1686 *
1687 * @returns Label ID.
1688 * @param pReNative The native recompile state.
1689 * @param enmType The label type.
1690 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1691 * label is not yet defined (default).
1692 * @param uData Data associated with the lable. Only applicable to
1693 * certain type of labels. Default is zero.
1694 */
1695DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1696 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1697{
1698 /*
1699 * Do we have the label already?
1700 */
1701 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1702 uint32_t const cLabels = pReNative->cLabels;
1703 for (uint32_t i = 0; i < cLabels; i++)
1704 if ( paLabels[i].enmType == enmType
1705 && paLabels[i].uData == uData)
1706 {
1707 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
1708 return i;
1709 if (paLabels[i].off == UINT32_MAX)
1710 {
1711 paLabels[i].off = offWhere;
1712 return i;
1713 }
1714 }
1715
1716 /*
1717 * Make sure we've got room for another label.
1718 */
1719 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1720 { /* likely */ }
1721 else
1722 {
1723 uint32_t cNew = pReNative->cLabelsAlloc;
1724 AssertReturn(cNew, UINT32_MAX);
1725 AssertReturn(cLabels == cNew, UINT32_MAX);
1726 cNew *= 2;
1727 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1728 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1729 AssertReturn(paLabels, UINT32_MAX);
1730 pReNative->paLabels = paLabels;
1731 pReNative->cLabelsAlloc = cNew;
1732 }
1733
1734 /*
1735 * Define a new label.
1736 */
1737 paLabels[cLabels].off = offWhere;
1738 paLabels[cLabels].enmType = enmType;
1739 paLabels[cLabels].uData = uData;
1740 pReNative->cLabels = cLabels + 1;
1741
1742 Assert(enmType >= 0 && enmType < 64);
1743 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
1744 return cLabels;
1745}
1746
1747
1748/**
1749 * Looks up a lable.
1750 *
1751 * @returns Label ID if found, UINT32_MAX if not.
1752 */
1753static uint32_t iemNativeFindLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1754 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1755{
1756 Assert(enmType >= 0 && enmType < 64);
1757 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
1758 {
1759 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1760 uint32_t const cLabels = pReNative->cLabels;
1761 for (uint32_t i = 0; i < cLabels; i++)
1762 if ( paLabels[i].enmType == enmType
1763 && paLabels[i].uData == uData
1764 && ( paLabels[i].off == offWhere
1765 || offWhere == UINT32_MAX
1766 || paLabels[i].off == UINT32_MAX))
1767 return i;
1768 }
1769 return UINT32_MAX;
1770}
1771
1772
1773
1774/**
1775 * Adds a fixup.
1776 *
1777 * @returns Success indicator.
1778 * @param pReNative The native recompile state.
1779 * @param offWhere The instruction offset of the fixup location.
1780 * @param idxLabel The target label ID for the fixup.
1781 * @param enmType The fixup type.
1782 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1783 */
1784DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1785 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1786{
1787 Assert(idxLabel <= UINT16_MAX);
1788 Assert((unsigned)enmType <= UINT8_MAX);
1789
1790 /*
1791 * Make sure we've room.
1792 */
1793 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1794 uint32_t const cFixups = pReNative->cFixups;
1795 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1796 { /* likely */ }
1797 else
1798 {
1799 uint32_t cNew = pReNative->cFixupsAlloc;
1800 AssertReturn(cNew, false);
1801 AssertReturn(cFixups == cNew, false);
1802 cNew *= 2;
1803 AssertReturn(cNew <= _128K, false);
1804 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1805 AssertReturn(paFixups, false);
1806 pReNative->paFixups = paFixups;
1807 pReNative->cFixupsAlloc = cNew;
1808 }
1809
1810 /*
1811 * Add the fixup.
1812 */
1813 paFixups[cFixups].off = offWhere;
1814 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1815 paFixups[cFixups].enmType = enmType;
1816 paFixups[cFixups].offAddend = offAddend;
1817 pReNative->cFixups = cFixups + 1;
1818 return true;
1819}
1820
1821/**
1822 * Slow code path for iemNativeInstrBufEnsure.
1823 */
1824DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1825 uint32_t cInstrReq) RT_NOEXCEPT
1826{
1827 /* Double the buffer size till we meet the request. */
1828 uint32_t cNew = pReNative->cInstrBufAlloc;
1829 AssertReturn(cNew > 0, NULL);
1830 do
1831 cNew *= 2;
1832 while (cNew < off + cInstrReq);
1833
1834 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1835#if RT_ARCH_ARM64
1836 AssertReturn(cbNew <= _1M, NULL); /* Limited by the branch instruction range (18+2 bits). */
1837#else
1838 AssertReturn(cbNew <= _2M, NULL);
1839#endif
1840
1841 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1842 AssertReturn(pvNew, NULL);
1843
1844 pReNative->cInstrBufAlloc = cNew;
1845 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1846}
1847
1848
1849
1850/*********************************************************************************************************************************
1851* Register Allocator *
1852*********************************************************************************************************************************/
1853
1854/**
1855 * Register parameter indexes (indexed by argument number).
1856 */
1857DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
1858{
1859 IEMNATIVE_CALL_ARG0_GREG,
1860 IEMNATIVE_CALL_ARG1_GREG,
1861 IEMNATIVE_CALL_ARG2_GREG,
1862 IEMNATIVE_CALL_ARG3_GREG,
1863#if defined(IEMNATIVE_CALL_ARG4_GREG)
1864 IEMNATIVE_CALL_ARG4_GREG,
1865# if defined(IEMNATIVE_CALL_ARG5_GREG)
1866 IEMNATIVE_CALL_ARG5_GREG,
1867# if defined(IEMNATIVE_CALL_ARG6_GREG)
1868 IEMNATIVE_CALL_ARG6_GREG,
1869# if defined(IEMNATIVE_CALL_ARG7_GREG)
1870 IEMNATIVE_CALL_ARG7_GREG,
1871# endif
1872# endif
1873# endif
1874#endif
1875};
1876
1877/**
1878 * Call register masks indexed by argument count.
1879 */
1880DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
1881{
1882 0,
1883 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
1884 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
1885 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
1886 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1887 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
1888#if defined(IEMNATIVE_CALL_ARG4_GREG)
1889 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1890 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
1891# if defined(IEMNATIVE_CALL_ARG5_GREG)
1892 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1893 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
1894# if defined(IEMNATIVE_CALL_ARG6_GREG)
1895 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1896 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1897 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
1898# if defined(IEMNATIVE_CALL_ARG7_GREG)
1899 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1900 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1901 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
1902# endif
1903# endif
1904# endif
1905#endif
1906};
1907
1908/**
1909 * Info about shadowed guest register values.
1910 * @see IEMNATIVEGSTREG
1911 */
1912static struct
1913{
1914 /** Offset in VMCPU. */
1915 uint32_t off;
1916 /** The field size. */
1917 uint8_t cb;
1918 /** Name (for logging). */
1919 const char *pszName;
1920} const g_aGstShadowInfo[] =
1921{
1922#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
1923 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
1924 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
1925 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
1926 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
1927 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
1928 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
1929 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
1930 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
1931 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
1932 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
1933 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
1934 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
1935 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
1936 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
1937 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
1938 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
1939 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
1940 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
1941 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
1942 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
1943 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
1944 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
1945 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
1946 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
1947 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
1948 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
1949 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
1950 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
1951 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
1952 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
1953 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
1954 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
1955 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
1956 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
1957 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
1958 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
1959 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
1960 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
1961 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
1962 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
1963 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
1964 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
1965#undef CPUMCTX_OFF_AND_SIZE
1966};
1967AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
1968
1969
1970/** Host CPU general purpose register names. */
1971const char * const g_apszIemNativeHstRegNames[] =
1972{
1973#ifdef RT_ARCH_AMD64
1974 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
1975#elif RT_ARCH_ARM64
1976 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1977 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
1978#else
1979# error "port me"
1980#endif
1981};
1982
1983
1984DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
1985 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
1986{
1987 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
1988
1989 pReNative->aHstRegs[idxReg].enmWhat = enmWhat;
1990 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
1991 pReNative->aHstRegs[idxReg].idxVar = idxVar;
1992 return (uint8_t)idxReg;
1993}
1994
1995
1996/**
1997 * Locate a register, possibly freeing one up.
1998 *
1999 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2000 * failed.
2001 */
2002static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
2003{
2004 uint32_t fRegMask = fAllowVolatile
2005 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
2006 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
2007
2008 /*
2009 * Try a freed register that's shadowing a guest register
2010 */
2011 uint32_t fRegs = ~pReNative->bmHstRegs & fRegMask;
2012 if (fRegs)
2013 {
2014 /** @todo pick better here: */
2015 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2016
2017 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2018 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2019 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2020 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2021
2022 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2023 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2024 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2025 return idxReg;
2026 }
2027
2028 /*
2029 * Try free up a variable that's in a register.
2030 *
2031 * We do two rounds here, first evacuating variables we don't need to be
2032 * saved on the stack, then in the second round move things to the stack.
2033 */
2034 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
2035 {
2036 uint32_t fVars = pReNative->bmVars;
2037 while (fVars)
2038 {
2039 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
2040 uint8_t const idxReg = pReNative->aVars[idxVar].idxReg;
2041 if ( idxReg < RT_ELEMENTS(pReNative->aHstRegs)
2042 && (RT_BIT_32(idxReg) & fRegMask)
2043 && ( iLoop == 0
2044 ? pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack
2045 : pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
2046 {
2047 Assert(pReNative->bmHstRegs & RT_BIT_32(idxReg));
2048 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxReg].fGstRegShadows)
2049 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2050 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2051 == RT_BOOL(pReNative->aHstRegs[idxReg].fGstRegShadows));
2052
2053 if (pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
2054 {
2055 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
2056 uint32_t off = *poff;
2057 *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
2058 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t)
2059 - IEMNATIVE_FP_OFF_STACK_VARS,
2060 idxReg);
2061 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2062 }
2063
2064 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2065 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2066 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2067 pReNative->bmHstRegs &= ~RT_BIT_32(idxReg);
2068 return idxReg;
2069 }
2070 fVars &= ~RT_BIT_32(idxVar);
2071 }
2072 }
2073
2074 AssertFailedReturn(UINT8_MAX);
2075}
2076
2077
2078/**
2079 * Moves a variable to a different register or spills it onto the stack.
2080 *
2081 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
2082 * kinds can easily be recreated if needed later.
2083 *
2084 * @returns The new code buffer position, UINT32_MAX on failure.
2085 * @param pReNative The native recompile state.
2086 * @param off The current code buffer position.
2087 * @param idxVar The variable index.
2088 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
2089 * call-volatile registers.
2090 */
2091static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
2092 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
2093{
2094 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2095 Assert(pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
2096
2097 uint8_t const idxRegOld = pReNative->aVars[idxVar].idxReg;
2098 Assert(idxRegOld < RT_ELEMENTS(pReNative->aHstRegs));
2099 Assert(pReNative->bmHstRegs & RT_BIT_32(idxRegOld));
2100 Assert(pReNative->aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
2101 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegOld].fGstRegShadows)
2102 == pReNative->aHstRegs[idxRegOld].fGstRegShadows);
2103 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
2104 == RT_BOOL(pReNative->aHstRegs[idxRegOld].fGstRegShadows));
2105
2106
2107 /** @todo Add statistics on this.*/
2108 /** @todo Implement basic variable liveness analysis (python) so variables
2109 * can be freed immediately once no longer used. This has the potential to
2110 * be trashing registers and stack for dead variables. */
2111
2112 /*
2113 * First try move it to a different register, as that's cheaper.
2114 */
2115 fForbiddenRegs |= RT_BIT_32(idxRegOld);
2116 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
2117 uint32_t fRegs = ~pReNative->bmHstRegs & ~fForbiddenRegs;
2118 if (fRegs)
2119 {
2120 /* Avoid using shadow registers, if possible. */
2121 if (fRegs & ~pReNative->bmHstRegsWithGstShadow)
2122 fRegs &= ~pReNative->bmHstRegsWithGstShadow;
2123 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
2124
2125 uint64_t fGstRegShadows = pReNative->aHstRegs[idxRegOld].fGstRegShadows;
2126 pReNative->aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
2127 pReNative->aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
2128 pReNative->aHstRegs[idxRegNew].idxVar = idxVar;
2129 if (fGstRegShadows)
2130 {
2131 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
2132 while (fGstRegShadows)
2133 {
2134 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
2135 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
2136
2137 Assert(pReNative->aidxGstRegShadows[idxGstReg] == idxRegOld);
2138 pReNative->aidxGstRegShadows[idxGstReg] = idxRegNew;
2139 }
2140 }
2141
2142 pReNative->aVars[idxVar].idxReg = (uint8_t)idxRegNew;
2143 pReNative->bmHstRegs |= RT_BIT_32(idxRegNew);
2144 }
2145 /*
2146 * Otherwise we must spill the register onto the stack.
2147 */
2148 else
2149 {
2150 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
2151 off = iemNativeEmitStoreGprByBp(pReNative, off,
2152 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t) - IEMNATIVE_FP_OFF_STACK_VARS,
2153 idxRegOld);
2154 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2155
2156 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
2157 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxRegOld].fGstRegShadows;
2158 }
2159
2160 pReNative->bmHstRegs &= ~RT_BIT_32(idxRegOld);
2161 pReNative->aHstRegs[idxRegOld].fGstRegShadows = 0;
2162 return off;
2163}
2164
2165
2166/**
2167 * Allocates a temporary host general purpose register.
2168 *
2169 * This may emit code to save register content onto the stack in order to free
2170 * up a register.
2171 *
2172 * @returns The host register number, UINT8_MAX on failure.
2173 * @param pReNative The native recompile state.
2174 * @param poff Pointer to the variable with the code buffer position.
2175 * This will be update if we need to move a variable from
2176 * register to stack in order to satisfy the request.
2177 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2178 * registers (@c true, default) or the other way around
2179 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2180 */
2181DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2182 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2183{
2184 /*
2185 * Try find a completely unused register, preferably a call-volatile one.
2186 */
2187 uint8_t idxReg;
2188 uint32_t fRegs = ~pReNative->bmHstRegs
2189 & ~pReNative->bmHstRegsWithGstShadow
2190 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2191 if (fRegs)
2192 {
2193 if (fPreferVolatile)
2194 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2195 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2196 else
2197 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2198 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2199 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2200 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2201 }
2202 else
2203 {
2204 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2205 AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
2206 }
2207 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2208}
2209
2210
2211/**
2212 * Allocates a temporary register for loading an immediate value into.
2213 *
2214 * This will emit code to load the immediate, unless there happens to be an
2215 * unused register with the value already loaded.
2216 *
2217 * The caller will not modify the returned register, it must be considered
2218 * read-only. Free using iemNativeRegFreeTmpImm.
2219 *
2220 * @returns The host register number, UINT8_MAX on failure.
2221 * @param pReNative The native recompile state.
2222 * @param poff Pointer to the variable with the code buffer position.
2223 * @param uImm The immediate value that the register must hold upon
2224 * return.
2225 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
2226 * registers (@c true, default) or the other way around
2227 * (@c false).
2228 *
2229 * @note Reusing immediate values has not been implemented yet.
2230 */
2231DECLHIDDEN(uint8_t) iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm,
2232 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2233{
2234 uint8_t idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
2235 if (idxReg < RT_ELEMENTS(pReNative->aHstRegs))
2236 {
2237 uint32_t off = *poff;
2238 *poff = off = iemNativeEmitLoadGprImm64(pReNative, off, idxReg, uImm);
2239 AssertReturnStmt(off != UINT32_MAX, iemNativeRegFreeTmp(pReNative, idxReg), UINT8_MAX);
2240 }
2241 return idxReg;
2242}
2243
2244
2245/**
2246 * Marks host register @a idxHstReg as containing a shadow copy of guest
2247 * register @a enmGstReg.
2248 *
2249 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2250 * host register before calling.
2251 */
2252DECL_FORCE_INLINE(void)
2253iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
2254{
2255 Assert(!(pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg)));
2256
2257 pReNative->aidxGstRegShadows[enmGstReg] = idxHstReg;
2258 pReNative->aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2259 pReNative->bmGstRegShadows |= RT_BIT_64(enmGstReg);
2260 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2261}
2262
2263
2264/**
2265 * Clear any guest register shadow claims from @a idxHstReg.
2266 *
2267 * The register does not need to be shadowing any guest registers.
2268 */
2269DECL_FORCE_INLINE(void)
2270iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg)
2271{
2272 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2273 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2274 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2275 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2276
2277 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2278 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2279 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2280}
2281
2282
2283/**
2284 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2285 * to @a idxRegTo.
2286 */
2287DECL_FORCE_INLINE(void)
2288iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo, IEMNATIVEGSTREG enmGstReg)
2289{
2290 Assert(pReNative->aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2291 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegFrom].fGstRegShadows)
2292 == pReNative->aHstRegs[idxRegFrom].fGstRegShadows);
2293 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2294 == RT_BOOL(pReNative->aHstRegs[idxRegFrom].fGstRegShadows));
2295
2296 pReNative->aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2297 pReNative->aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2298 pReNative->aidxGstRegShadows[enmGstReg] = idxRegTo;
2299}
2300
2301
2302
2303/**
2304 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
2305 */
2306typedef enum IEMNATIVEGSTREGUSE
2307{
2308 /** The usage is read-only, the register holding the guest register
2309 * shadow copy will not be modified by the caller. */
2310 kIemNativeGstRegUse_ReadOnly = 0,
2311 /** The caller will update the guest register (think: PC += cbInstr).
2312 * The guest shadow copy will follow the returned register. */
2313 kIemNativeGstRegUse_ForUpdate,
2314 /** The caller will use the guest register value as input in a calculation
2315 * and the host register will be modified.
2316 * This means that the returned host register will not be marked as a shadow
2317 * copy of the guest register. */
2318 kIemNativeGstRegUse_Calculation
2319} IEMNATIVEGSTREGUSE;
2320
2321/**
2322 * Allocates a temporary host general purpose register for updating a guest
2323 * register value.
2324 *
2325 * Since we may already have a register holding the guest register value,
2326 * code will be emitted to do the loading if that's not the case. Code may also
2327 * be emitted if we have to free up a register to satify the request.
2328 *
2329 * @returns The host register number, UINT8_MAX on failure.
2330 * @param pReNative The native recompile state.
2331 * @param poff Pointer to the variable with the code buffer
2332 * position. This will be update if we need to move a
2333 * variable from register to stack in order to satisfy
2334 * the request.
2335 * @param enmGstReg The guest register that will is to be updated.
2336 * @param enmIntendedUse How the caller will be using the host register.
2337 */
2338DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2339 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
2340{
2341 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2342#ifdef LOG_ENABLED
2343 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2344#endif
2345
2346 /*
2347 * First check if the guest register value is already in a host register.
2348 */
2349 if (pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg))
2350 {
2351 uint8_t idxReg = pReNative->aidxGstRegShadows[enmGstReg];
2352 Assert(idxReg < RT_ELEMENTS(pReNative->aHstRegs));
2353 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2354 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2355
2356 if (!(pReNative->bmHstRegs & RT_BIT_32(idxReg)))
2357 {
2358 /*
2359 * If the register will trash the guest shadow copy, try find a
2360 * completely unused register we can use instead. If that fails,
2361 * we need to disassociate the host reg from the guest reg.
2362 */
2363 /** @todo would be nice to know if preserving the register is in any way helpful. */
2364 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2365 && ( ~pReNative->bmHstRegs
2366 & ~pReNative->bmHstRegsWithGstShadow
2367 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2368 {
2369 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2370 Assert(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs));
2371
2372 uint32_t off = *poff;
2373 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2374 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2375
2376 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2377 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2378 g_apszIemNativeHstRegNames[idxRegNew]));
2379 idxReg = idxRegNew;
2380 }
2381 else
2382 {
2383 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
2384 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2385 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2386 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2387 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2388 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2389 else
2390 {
2391 iemNativeRegClearGstRegShadowing(pReNative, idxReg);
2392 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2393 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2394 }
2395 }
2396 }
2397 else
2398 {
2399 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2400 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2401
2402 /*
2403 * Allocate a new register, copy the value and, if updating, the
2404 * guest shadow copy assignment to the new register.
2405 */
2406 /** @todo share register for readonly access. */
2407 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2408 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2409
2410 uint32_t off = *poff;
2411 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2412 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2413
2414 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2415 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2416 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2417 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2418 else
2419 {
2420 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg);
2421 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2422 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2423 g_apszIemNativeHstRegNames[idxRegNew]));
2424 }
2425 idxReg = idxRegNew;
2426 }
2427
2428#ifdef VBOX_STRICT
2429 /* Strict builds: Check that the value is correct. */
2430 uint32_t off = *poff;
2431 *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
2432 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2433#endif
2434
2435 return idxReg;
2436 }
2437
2438 /*
2439 * Allocate a new register, load it with the guest value and designate it as a copy of the
2440 */
2441 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2442 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2443
2444 uint32_t off = *poff;
2445 *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
2446 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2447
2448 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2449 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg);
2450 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2451 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2452
2453 return idxRegNew;
2454}
2455
2456
2457DECLHIDDEN(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
2458
2459
2460/**
2461 * Allocates argument registers for a function call.
2462 *
2463 * @returns New code buffer offset on success, UINT32_MAX on failure.
2464 * @param pReNative The native recompile state.
2465 * @param off The current code buffer offset.
2466 * @param cArgs The number of arguments the function call takes.
2467 */
2468DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
2469{
2470 AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
2471 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2472 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2473
2474 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2475 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2476 else if (cArgs == 0)
2477 return true;
2478
2479 /*
2480 * Do we get luck and all register are free and not shadowing anything?
2481 */
2482 if (((pReNative->bmHstRegs | pReNative->bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2483 for (uint32_t i = 0; i < cArgs; i++)
2484 {
2485 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2486 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2487 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2488 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2489 }
2490 /*
2491 * Okay, not lucky so we have to free up the registers.
2492 */
2493 else
2494 for (uint32_t i = 0; i < cArgs; i++)
2495 {
2496 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2497 if (pReNative->bmHstRegs & RT_BIT_32(idxReg))
2498 {
2499 switch (pReNative->aHstRegs[idxReg].enmWhat)
2500 {
2501 case kIemNativeWhat_Var:
2502 {
2503 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2504 AssertReturn(idxVar < RT_ELEMENTS(pReNative->aVars), false);
2505 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2506 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2507
2508 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2509 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2510 else
2511 {
2512 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2513 AssertReturn(off != UINT32_MAX, false);
2514 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2515 }
2516 break;
2517 }
2518
2519 case kIemNativeWhat_Tmp:
2520 case kIemNativeWhat_Arg:
2521 case kIemNativeWhat_rc:
2522 AssertFailedReturn(false);
2523 default:
2524 AssertFailedReturn(false);
2525 }
2526
2527 }
2528 if (pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2529 {
2530 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2531 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2532 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2533 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2534 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2535 }
2536 else
2537 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2538 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2539 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2540 }
2541 pReNative->bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2542 return true;
2543}
2544
2545
2546DECLHIDDEN(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
2547
2548
2549#if 0
2550/**
2551 * Frees a register assignment of any type.
2552 *
2553 * @param pReNative The native recompile state.
2554 * @param idxHstReg The register to free.
2555 *
2556 * @note Does not update variables.
2557 */
2558DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2559{
2560 Assert(idxHstReg < RT_ELEMENTS(pReNative->aHstRegs));
2561 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2562 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2563 Assert( pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2564 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2565 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2566 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2567 Assert( pReNative->aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2568 || pReNative->aVars[pReNative->aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2569 || (pReNative->bmVars & RT_BIT_32(pReNative->aHstRegs[idxHstReg].idxVar)));
2570 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2571 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2572 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2573 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2574
2575 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2576 /* no flushing, right:
2577 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2578 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2579 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2580 */
2581}
2582#endif
2583
2584
2585/**
2586 * Frees a temporary register.
2587 *
2588 * Any shadow copies of guest registers assigned to the host register will not
2589 * be flushed by this operation.
2590 */
2591DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2592{
2593 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2594 Assert(pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2595 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2596 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2597 g_apszIemNativeHstRegNames[idxHstReg], pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2598}
2599
2600
2601/**
2602 * Frees a temporary immediate register.
2603 *
2604 * It is assumed that the call has not modified the register, so it still hold
2605 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
2606 */
2607DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2608{
2609 iemNativeRegFreeTmp(pReNative, idxHstReg);
2610}
2611
2612
2613/**
2614 * Called right before emitting a call instruction to move anything important
2615 * out of call-volatile registers, free and flush the call-volatile registers,
2616 * optionally freeing argument variables.
2617 *
2618 * @returns New code buffer offset, UINT32_MAX on failure.
2619 * @param pReNative The native recompile state.
2620 * @param off The code buffer offset.
2621 * @param cArgs The number of arguments the function call takes.
2622 * It is presumed that the host register part of these have
2623 * been allocated as such already and won't need moving,
2624 * just freeing.
2625 * @param fFreeArgVars Whether to free argument variables for the call.
2626 */
2627DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2628 uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
2629{
2630 /*
2631 * Free argument variables first (simplified).
2632 */
2633 AssertReturn(cArgs <= RT_ELEMENTS(pReNative->aidxArgVars), UINT32_MAX);
2634 if (fFreeArgVars && cArgs > 0)
2635 {
2636 for (uint32_t i = 0; i < cArgs; i++)
2637 {
2638 uint8_t idxVar = pReNative->aidxArgVars[i];
2639 if (idxVar < RT_ELEMENTS(pReNative->aVars))
2640 {
2641 pReNative->aidxArgVars[i] = UINT8_MAX;
2642 pReNative->bmVars &= ~RT_BIT_32(idxVar);
2643 Assert( pReNative->aVars[idxVar].idxReg
2644 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2645 }
2646 }
2647 Assert(pReNative->u64ArgVars == UINT64_MAX);
2648 }
2649
2650 /*
2651 * Move anything important out of volatile registers.
2652 */
2653 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2654 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2655 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2656#ifdef IEMNATIVE_REG_FIXED_TMP0
2657 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2658#endif
2659 & ~g_afIemNativeCallRegs[cArgs];
2660
2661 fRegsToMove &= pReNative->bmHstRegs;
2662 if (!fRegsToMove)
2663 { /* likely */ }
2664 else
2665 while (fRegsToMove != 0)
2666 {
2667 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2668 fRegsToMove &= ~RT_BIT_32(idxReg);
2669
2670 switch (pReNative->aHstRegs[idxReg].enmWhat)
2671 {
2672 case kIemNativeWhat_Var:
2673 {
2674 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2675 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2676 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2677 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2678 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2679 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2680 else
2681 {
2682 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2683 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2684 }
2685 continue;
2686 }
2687
2688 case kIemNativeWhat_Arg:
2689 AssertMsgFailed(("What?!?: %u\n", idxReg));
2690 continue;
2691
2692 case kIemNativeWhat_rc:
2693 case kIemNativeWhat_Tmp:
2694 AssertMsgFailed(("Missing free: %u\n", idxReg));
2695 continue;
2696
2697 case kIemNativeWhat_FixedTmp:
2698 case kIemNativeWhat_pVCpuFixed:
2699 case kIemNativeWhat_pCtxFixed:
2700 case kIemNativeWhat_FixedReserved:
2701 case kIemNativeWhat_Invalid:
2702 case kIemNativeWhat_End:
2703 AssertFailedReturn(UINT32_MAX);
2704 }
2705 AssertFailedReturn(UINT32_MAX);
2706 }
2707
2708 /*
2709 * Do the actual freeing.
2710 */
2711 pReNative->bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2712
2713 /* If there are guest register shadows in any call-volatile register, we
2714 have to clear the corrsponding guest register masks for each register. */
2715 uint32_t fHstRegsWithGstShadow = pReNative->bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2716 if (fHstRegsWithGstShadow)
2717 {
2718 pReNative->bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2719 do
2720 {
2721 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2722 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2723
2724 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2725 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2726 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2727 } while (fHstRegsWithGstShadow != 0);
2728 }
2729
2730 return off;
2731}
2732
2733
2734/**
2735 * Flushes a set of guest register shadow copies.
2736 *
2737 * This is usually done after calling a threaded function or a C-implementation
2738 * of an instruction.
2739 *
2740 * @param pReNative The native recompile state.
2741 * @param fGstRegs Set of guest registers to flush.
2742 */
2743DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
2744{
2745 /*
2746 * Reduce the mask by what's currently shadowed
2747 */
2748 fGstRegs &= pReNative->bmGstRegShadows;
2749 if (fGstRegs)
2750 {
2751 pReNative->bmGstRegShadows &= ~fGstRegs;
2752 if (pReNative->bmGstRegShadows)
2753 {
2754 /*
2755 * Partial.
2756 */
2757 do
2758 {
2759 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2760 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
2761 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
2762 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2763 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2764
2765 uint64_t const fInThisHstReg = (pReNative->aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
2766 fGstRegs &= ~fInThisHstReg;
2767 pReNative->aHstRegs[idxHstReg].fGstRegShadows &= fInThisHstReg;
2768 if (!pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2769 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2770 } while (fGstRegs != 0);
2771 }
2772 else
2773 {
2774 /*
2775 * Clear all.
2776 */
2777 do
2778 {
2779 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2780 uint8_t const idxHstReg = pReNative->aidxGstRegShadows[idxGstReg];
2781 Assert(idxHstReg < RT_ELEMENTS(pReNative->aidxGstRegShadows));
2782 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
2783 Assert(pReNative->aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
2784
2785 fGstRegs &= ~(pReNative->aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
2786 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2787 } while (fGstRegs != 0);
2788 pReNative->bmHstRegsWithGstShadow = 0;
2789 }
2790 }
2791}
2792
2793
2794/**
2795 * Flushes any delayed guest register writes.
2796 *
2797 * This must be called prior to calling CImpl functions and any helpers that use
2798 * the guest state (like raising exceptions) and such.
2799 *
2800 * This optimization has not yet been implemented. The first target would be
2801 * RIP updates, since these are the most common ones.
2802 */
2803DECLHIDDEN(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off) RT_NOEXCEPT
2804{
2805 RT_NOREF(pReNative, off);
2806 return off;
2807}
2808
2809
2810/*********************************************************************************************************************************
2811* Code Emitters (larger snippets) *
2812*********************************************************************************************************************************/
2813
2814/**
2815 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
2816 * extending to 64-bit width.
2817 *
2818 * @returns New code buffer offset on success, UINT32_MAX on failure.
2819 * @param pReNative .
2820 * @param off The current code buffer position.
2821 * @param idxHstReg The host register to load the guest register value into.
2822 * @param enmGstReg The guest register to load.
2823 *
2824 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
2825 * that is something the caller needs to do if applicable.
2826 */
2827DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2828 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT
2829{
2830 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
2831 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2832
2833 switch (g_aGstShadowInfo[enmGstReg].cb)
2834 {
2835 case sizeof(uint64_t):
2836 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2837 case sizeof(uint32_t):
2838 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2839 case sizeof(uint16_t):
2840 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2841#if 0 /* not present in the table. */
2842 case sizeof(uint8_t):
2843 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2844#endif
2845 default:
2846 AssertFailedReturn(UINT32_MAX);
2847 }
2848}
2849
2850
2851#ifdef VBOX_STRICT
2852/**
2853 * Emitting code that checks that the content of register @a idxReg is the same
2854 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
2855 * instruction if that's not the case.
2856 *
2857 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
2858 * Trashes EFLAGS on AMD64.
2859 */
2860static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2861 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
2862{
2863# ifdef RT_ARCH_AMD64
2864 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2865 AssertReturn(pbCodeBuf, UINT32_MAX);
2866
2867 /* cmp reg, [mem] */
2868 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
2869 {
2870 if (idxReg >= 8)
2871 pbCodeBuf[off++] = X86_OP_REX_R;
2872 pbCodeBuf[off++] = 0x38;
2873 }
2874 else
2875 {
2876 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
2877 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
2878 else
2879 {
2880 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
2881 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2882 else
2883 AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
2884 if (idxReg >= 8)
2885 pbCodeBuf[off++] = X86_OP_REX_R;
2886 }
2887 pbCodeBuf[off++] = 0x39;
2888 }
2889 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
2890
2891 /* je/jz +1 */
2892 pbCodeBuf[off++] = 0x74;
2893 pbCodeBuf[off++] = 0x01;
2894
2895 /* int3 */
2896 pbCodeBuf[off++] = 0xcc;
2897
2898 /* For values smaller than the register size, we must check that the rest
2899 of the register is all zeros. */
2900 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
2901 {
2902 /* test reg64, imm32 */
2903 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2904 pbCodeBuf[off++] = 0xf7;
2905 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2906 pbCodeBuf[off++] = 0;
2907 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
2908 pbCodeBuf[off++] = 0xff;
2909 pbCodeBuf[off++] = 0xff;
2910
2911 /* je/jz +1 */
2912 pbCodeBuf[off++] = 0x74;
2913 pbCodeBuf[off++] = 0x01;
2914
2915 /* int3 */
2916 pbCodeBuf[off++] = 0xcc;
2917 }
2918 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
2919 {
2920 /* rol reg64, 32 */
2921 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2922 pbCodeBuf[off++] = 0xc1;
2923 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2924 pbCodeBuf[off++] = 32;
2925
2926 /* test reg32, ffffffffh */
2927 if (idxReg >= 8)
2928 pbCodeBuf[off++] = X86_OP_REX_B;
2929 pbCodeBuf[off++] = 0xf7;
2930 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2931 pbCodeBuf[off++] = 0xff;
2932 pbCodeBuf[off++] = 0xff;
2933 pbCodeBuf[off++] = 0xff;
2934 pbCodeBuf[off++] = 0xff;
2935
2936 /* je/jz +1 */
2937 pbCodeBuf[off++] = 0x74;
2938 pbCodeBuf[off++] = 0x01;
2939
2940 /* int3 */
2941 pbCodeBuf[off++] = 0xcc;
2942
2943 /* rol reg64, 32 */
2944 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2945 pbCodeBuf[off++] = 0xc1;
2946 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2947 pbCodeBuf[off++] = 32;
2948 }
2949
2950# elif defined(RT_ARCH_ARM64)
2951 /* mov TMP0, [gstreg] */
2952 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
2953
2954 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2955 AssertReturn(pu32CodeBuf, UINT32_MAX);
2956 /* sub tmp0, tmp0, idxReg */
2957 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
2958 /* cbz tmp0, +1 */
2959 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
2960 /* brk #0x1000+enmGstReg */
2961 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
2962
2963# else
2964# error "Port me!"
2965# endif
2966 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2967 return off;
2968}
2969#endif /* VBOX_STRICT */
2970
2971
2972
2973/**
2974 * Emits a code for checking the return code of a call and rcPassUp, returning
2975 * from the code if either are non-zero.
2976 */
2977DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2978 uint8_t idxInstr) RT_NOEXCEPT
2979{
2980#ifdef RT_ARCH_AMD64
2981 /*
2982 * AMD64: eax = call status code.
2983 */
2984
2985 /* edx = rcPassUp */
2986 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
2987# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2988 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
2989# endif
2990
2991 /* edx = eax | rcPassUp */
2992 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2993 AssertReturn(pbCodeBuf, UINT32_MAX);
2994 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
2995 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
2996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2997
2998 /* Jump to non-zero status return path. */
2999 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
3000
3001 /* done. */
3002
3003#elif RT_ARCH_ARM64
3004 /*
3005 * ARM64: w0 = call status code.
3006 */
3007 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
3008 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
3009
3010 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3011 AssertReturn(pu32CodeBuf, UINT32_MAX);
3012
3013 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
3014
3015 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3016 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
3017 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
3018 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
3019
3020#else
3021# error "port me"
3022#endif
3023 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3024 return off;
3025}
3026
3027
3028/**
3029 * Emits code to check if the content of @a idxAddrReg is a canonical address,
3030 * raising a \#GP(0) if it isn't.
3031 *
3032 * @returns New code buffer offset, UINT32_MAX on failure.
3033 * @param pReNative The native recompile state.
3034 * @param off The code buffer offset.
3035 * @param idxAddrReg The host register with the address to check.
3036 * @param idxInstr The current instruction.
3037 */
3038DECLHIDDEN(uint32_t) iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3039 uint8_t idxAddrReg, uint8_t idxInstr)
3040{
3041 RT_NOREF(idxInstr);
3042
3043 /*
3044 * Make sure we don't have any outstanding guest register writes as we may
3045 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3046 */
3047 off = iemNativeRegFlushPendingWrites(pReNative, off);
3048
3049#ifdef RT_ARCH_AMD64
3050 /*
3051 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
3052 * return raisexcpt();
3053 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
3054 */
3055 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3056 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3057
3058 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
3059 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
3060 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
3061 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
3062
3063# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3064 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3065# else
3066 uint32_t const offFixup = off;
3067 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3068 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3069 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3070 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3071# endif
3072
3073 iemNativeRegFreeTmp(pReNative, iTmpReg);
3074
3075#elif defined(RT_ARCH_ARM64)
3076 /*
3077 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
3078 * return raisexcpt();
3079 * ----
3080 * mov x1, 0x800000000000
3081 * add x1, x0, x1
3082 * cmp xzr, x1, lsr 48
3083 * and either:
3084 * b.ne .Lraisexcpt
3085 * or:
3086 * b.eq .Lnoexcept
3087 * movz x1, #instruction-number
3088 * b .Lraisexcpt
3089 * .Lnoexcept:
3090 */
3091 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3092 AssertReturn(iTmpReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3093
3094 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
3095 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
3096 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, idxAddrReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
3097
3098# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3099 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3100# else
3101 uint32_t const offFixup = off;
3102 off = iemNativeEmitJzToFixed(pReNative, off, 0);
3103 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3104 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3105 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3106# endif
3107
3108 iemNativeRegFreeTmp(pReNative, iTmpReg);
3109
3110#else
3111# error "Port me"
3112#endif
3113 return off;
3114}
3115
3116
3117/**
3118 * Emits code to check if the content of @a idxAddrReg is within the limit of
3119 * idxSegReg, raising a \#GP(0) if it isn't.
3120 *
3121 * @returns New code buffer offset, UINT32_MAX on failure.
3122 * @param pReNative The native recompile state.
3123 * @param off The code buffer offset.
3124 * @param idxAddrReg The host register (32-bit) with the address to
3125 * check.
3126 * @param idxSegReg The segment register (X86_SREG_XXX) to check
3127 * against.
3128 * @param idxInstr The current instruction.
3129 */
3130DECLHIDDEN(uint32_t) iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3131 uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
3132{
3133 /*
3134 * Make sure we don't have any outstanding guest register writes as we may
3135 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
3136 */
3137 off = iemNativeRegFlushPendingWrites(pReNative, off);
3138
3139 /** @todo implement expand down/whatnot checking */
3140 AssertReturn(idxSegReg == X86_SREG_CS, UINT32_MAX);
3141
3142 uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
3143 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
3144 kIemNativeGstRegUse_ForUpdate);
3145 AssertReturn(iTmpLimReg < RT_ELEMENTS(pReNative->aHstRegs), UINT32_MAX);
3146
3147 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
3148
3149#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3150 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3151 RT_NOREF(idxInstr);
3152#else
3153 uint32_t const offFixup = off;
3154 off = iemNativeEmitJbeToFixed(pReNative, off, 0);
3155 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxInstr);
3156 off = iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
3157 iemNativeFixupFixedJump(pReNative, offFixup, off /*offTarget*/);
3158#endif
3159
3160 iemNativeRegFreeTmp(pReNative, iTmpLimReg);
3161 return off;
3162}
3163
3164
3165/**
3166 * Emits a call to a CImpl function or something similar.
3167 */
3168static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3169 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
3170 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
3171{
3172 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3173 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3174
3175 /*
3176 * Load the parameters.
3177 */
3178#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
3179 /* Special code the hidden VBOXSTRICTRC pointer. */
3180 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3181 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3182 if (cAddParams > 0)
3183 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
3184 if (cAddParams > 1)
3185 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
3186 if (cAddParams > 2)
3187 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
3188 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3189
3190#else
3191 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
3192 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3193 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
3194 if (cAddParams > 0)
3195 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
3196 if (cAddParams > 1)
3197 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
3198 if (cAddParams > 2)
3199# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
3200 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
3201# else
3202 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
3203# endif
3204#endif
3205 AssertReturn(off != UINT32_MAX, off);
3206
3207 /*
3208 * Make the call.
3209 */
3210 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
3211
3212#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3213 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3214#endif
3215
3216 /*
3217 * Check the status code.
3218 */
3219 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3220}
3221
3222
3223/**
3224 * Emits a call to a threaded worker function.
3225 */
3226static uint32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
3227{
3228 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
3229 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
3230 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
3231
3232#ifdef RT_ARCH_AMD64
3233 /* Load the parameters and emit the call. */
3234# ifdef RT_OS_WINDOWS
3235# ifndef VBOXSTRICTRC_STRICT_ENABLED
3236 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3237 if (cParams > 0)
3238 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
3239 if (cParams > 1)
3240 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
3241 if (cParams > 2)
3242 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
3243# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
3244 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
3245 if (cParams > 0)
3246 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
3247 if (cParams > 1)
3248 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
3249 if (cParams > 2)
3250 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
3251 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
3252 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3253# endif /* VBOXSTRICTRC_STRICT_ENABLED */
3254# else
3255 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3256 if (cParams > 0)
3257 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
3258 if (cParams > 1)
3259 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
3260 if (cParams > 2)
3261 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
3262# endif
3263
3264 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3265
3266# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
3267 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3268# endif
3269
3270#elif RT_ARCH_ARM64
3271 /*
3272 * ARM64:
3273 */
3274 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3275 if (cParams > 0)
3276 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
3277 if (cParams > 1)
3278 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
3279 if (cParams > 2)
3280 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
3281
3282 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
3283
3284#else
3285# error "port me"
3286#endif
3287
3288 /*
3289 * Check the status code.
3290 */
3291 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
3292 AssertReturn(off != UINT32_MAX, off);
3293
3294 return off;
3295}
3296
3297
3298/**
3299 * Emits the code at the RaiseGP0 label.
3300 */
3301static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3302{
3303 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_RaiseGp0);
3304 if (idxLabel != UINT32_MAX)
3305 {
3306 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
3307 pReNative->paLabels[idxLabel].off = off;
3308
3309 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu, uint8_t idxInstr) */
3310 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3311#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3312 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, 0);
3313#endif
3314 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
3315
3316 /* jump back to the return sequence. */
3317 off = iemNativeEmitJmpToLabel(pReNative, off, iemNativeFindLabel(pReNative, kIemNativeLabelType_Return));
3318 }
3319 return off;
3320}
3321
3322
3323/**
3324 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
3325 */
3326static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
3327{
3328 /*
3329 * Generate the rc + rcPassUp fiddling code if needed.
3330 */
3331 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
3332 if (idxLabel != UINT32_MAX)
3333 {
3334 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
3335 pReNative->paLabels[idxLabel].off = off;
3336
3337 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
3338#ifdef RT_ARCH_AMD64
3339# ifdef RT_OS_WINDOWS
3340# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3341 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
3342# endif
3343 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
3344 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
3345# else
3346 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3348# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3350# endif
3351# endif
3352# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3353 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
3354# endif
3355
3356#else
3357 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3358 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3359 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3360#endif
3361
3362 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3363 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
3364 }
3365 return off;
3366}
3367
3368
3369/**
3370 * Emits a standard epilog.
3371 */
3372static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3373{
3374 /*
3375 * Successful return, so clear the return register (eax, w0).
3376 */
3377 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3378 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3379
3380 /*
3381 * Define label for common return point.
3382 */
3383 uint32_t const idxReturn = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Return, off);
3384 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
3385
3386 /*
3387 * Restore registers and return.
3388 */
3389#ifdef RT_ARCH_AMD64
3390 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3391 AssertReturn(pbCodeBuf, UINT32_MAX);
3392
3393 /* Reposition esp at the r15 restore point. */
3394 pbCodeBuf[off++] = X86_OP_REX_W;
3395 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3397 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3398
3399 /* Pop non-volatile registers and return */
3400 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3401 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3402 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3403 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3404 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3405 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3406 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3407 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3408# ifdef RT_OS_WINDOWS
3409 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3410 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3411# endif
3412 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3413 pbCodeBuf[off++] = 0xc9; /* leave */
3414 pbCodeBuf[off++] = 0xc3; /* ret */
3415 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3416
3417#elif RT_ARCH_ARM64
3418 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3419 AssertReturn(pu32CodeBuf, UINT32_MAX);
3420
3421 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3422 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3423 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3424 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3425 IEMNATIVE_FRAME_VAR_SIZE / 8);
3426 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3427 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3428 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3429 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3430 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3431 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3432 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3433 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3434 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3435 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3436 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3437 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3438
3439 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3440 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3441 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3442 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3443
3444 /* retab / ret */
3445# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3446 if (1)
3447 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3448 else
3449# endif
3450 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3451
3452#else
3453# error "port me"
3454#endif
3455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3456
3457 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3458}
3459
3460
3461/**
3462 * Emits a standard prolog.
3463 */
3464static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3465{
3466#ifdef RT_ARCH_AMD64
3467 /*
3468 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3469 * reserving 64 bytes for stack variables plus 4 non-register argument
3470 * slots. Fixed register assignment: xBX = pReNative;
3471 *
3472 * Since we always do the same register spilling, we can use the same
3473 * unwind description for all the code.
3474 */
3475 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3476 AssertReturn(pbCodeBuf, UINT32_MAX);
3477 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3478 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3479 pbCodeBuf[off++] = 0x8b;
3480 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3481 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3482 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3483# ifdef RT_OS_WINDOWS
3484 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3485 pbCodeBuf[off++] = 0x8b;
3486 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3487 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3488 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3489# else
3490 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3491 pbCodeBuf[off++] = 0x8b;
3492 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3493# endif
3494 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3495 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3496 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3497 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3498 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3499 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3500 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3501 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3502
3503 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3504 X86_GREG_xSP,
3505 IEMNATIVE_FRAME_ALIGN_SIZE
3506 + IEMNATIVE_FRAME_VAR_SIZE
3507 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3508 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3509 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3510 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3511 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3512
3513#elif RT_ARCH_ARM64
3514 /*
3515 * We set up a stack frame exactly like on x86, only we have to push the
3516 * return address our selves here. We save all non-volatile registers.
3517 */
3518 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3519 AssertReturn(pu32CodeBuf, UINT32_MAX);
3520
3521# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3522 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3523 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3524 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3525 /* pacibsp */
3526 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3527# endif
3528
3529 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3530 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3531 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
3532 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3533 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3534 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3535 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3536 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3537 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3538 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3539 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3540 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3541 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3542 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3543 /* Save the BP and LR (ret address) registers at the top of the frame. */
3544 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
3545 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3546 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3547 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3548 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3549 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3550
3551 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3552 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3553
3554 /* mov r28, r0 */
3555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3556 /* mov r27, r1 */
3557 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3558
3559#else
3560# error "port me"
3561#endif
3562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3563 return off;
3564}
3565
3566
3567
3568/*********************************************************************************************************************************
3569* Emitters for IEM_MC_XXXX and the associated IEM_MC_XXXX recompiler definitions *
3570*********************************************************************************************************************************/
3571
3572#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3573 {
3574
3575/** We have to get to the end in recompilation mode, as otherwise we won't
3576 * generate code for all the IEM_MC_IF_XXX branches. */
3577#define IEM_MC_END() \
3578 } return off
3579
3580
3581/*
3582 * Standalone CImpl deferals.
3583 */
3584
3585#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3586 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3587
3588
3589#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3590 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3591
3592DECLINLINE(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3593 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3594{
3595 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3596}
3597
3598
3599#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3600 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3601
3602DECLINLINE(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3603 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3604{
3605 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3606}
3607
3608
3609#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3610 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3611
3612DECLINLINE(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3613 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1, uint64_t uArg2)
3614{
3615 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3616}
3617
3618
3619/*
3620 * Advancing PC/RIP/EIP/IP.
3621 */
3622
3623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3624 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3625 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3626
3627/** Same as iemRegAddToRip64AndFinishingNoFlags. */
3628DECLINLINE(uint32_t) iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3629{
3630 /* Allocate a temporary PC register. */
3631 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3632 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3633
3634 /* Perform the addition and store the result. */
3635 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
3636 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3637
3638 /* Free but don't flush the PC register. */
3639 iemNativeRegFreeTmp(pReNative, idxPcReg);
3640
3641 return off;
3642}
3643
3644
3645#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3646 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3647 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3648
3649/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3650DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3651{
3652 /* Allocate a temporary PC register. */
3653 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3654 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3655
3656 /* Perform the addition and store the result. */
3657 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3658 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3659
3660 /* Free but don't flush the PC register. */
3661 iemNativeRegFreeTmp(pReNative, idxPcReg);
3662
3663 return off;
3664}
3665
3666
3667#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3668 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
3669 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3670
3671/** Same as iemRegAddToIp16AndFinishingNoFlags. */
3672DECLINLINE(uint32_t) iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3673{
3674 /* Allocate a temporary PC register. */
3675 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3676 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3677
3678 /* Perform the addition and store the result. */
3679 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3680 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3681 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3682
3683 /* Free but don't flush the PC register. */
3684 iemNativeRegFreeTmp(pReNative, idxPcReg);
3685
3686 return off;
3687}
3688
3689
3690/*
3691 * Changing PC/RIP/EIP/IP with a relative jump.
3692 */
3693
3694#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
3695 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3696 (a_enmEffOpSize), pCallEntry->idxInstr); \
3697 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3698
3699
3700#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
3701 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3702 IEMMODE_16BIT, pCallEntry->idxInstr); \
3703 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3704
3705#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
3706 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3707 IEMMODE_64BIT, pCallEntry->idxInstr); \
3708 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3709
3710/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
3711 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
3712 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
3713DECLINLINE(uint32_t) iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3714 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3715 uint8_t idxInstr)
3716{
3717 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
3718
3719 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3720 off = iemNativeRegFlushPendingWrites(pReNative, off);
3721
3722 /* Allocate a temporary PC register. */
3723 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3724 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3725
3726 /* Perform the addition. */
3727 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
3728
3729 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
3730 {
3731 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
3732 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
3733 }
3734 else
3735 {
3736 /* Just truncate the result to 16-bit IP. */
3737 Assert(enmEffOpSize == IEMMODE_16BIT);
3738 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3739 }
3740 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3741
3742 /* Free but don't flush the PC register. */
3743 iemNativeRegFreeTmp(pReNative, idxPcReg);
3744
3745 return off;
3746}
3747
3748
3749#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
3750 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
3751 (a_enmEffOpSize), pCallEntry->idxInstr); \
3752 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3753
3754#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
3755 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
3756 IEMMODE_16BIT, pCallEntry->idxInstr); \
3757 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3758
3759#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
3760 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
3761 IEMMODE_32BIT, pCallEntry->idxInstr); \
3762 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3763
3764/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
3765 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
3766 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
3767DECLINLINE(uint32_t) iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3768 uint8_t cbInstr, int32_t offDisp, IEMMODE enmEffOpSize,
3769 uint8_t idxInstr)
3770{
3771 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
3772
3773 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3774 off = iemNativeRegFlushPendingWrites(pReNative, off);
3775
3776 /* Allocate a temporary PC register. */
3777 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3778 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3779
3780 /* Perform the addition. */
3781 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
3782
3783 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
3784 if (enmEffOpSize == IEMMODE_16BIT)
3785 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3786
3787 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
3788 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
3789
3790 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3791
3792 /* Free but don't flush the PC register. */
3793 iemNativeRegFreeTmp(pReNative, idxPcReg);
3794
3795 return off;
3796}
3797
3798
3799#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
3800 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
3801 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3802
3803#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
3804 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
3805 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3806
3807#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
3808 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
3809 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3810
3811/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
3812DECLINLINE(uint32_t) iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3813 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
3814{
3815 /* We speculatively modify PC and may raise #GP(0), so make sure the right value is in CPUMCTX. */
3816 off = iemNativeRegFlushPendingWrites(pReNative, off);
3817
3818 /* Allocate a temporary PC register. */
3819 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3820 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3821
3822 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
3823 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
3824 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
3825 off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
3826 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3827
3828 /* Free but don't flush the PC register. */
3829 iemNativeRegFreeTmp(pReNative, idxPcReg);
3830
3831 return off;
3832}
3833
3834
3835/*
3836 * Conditionals.
3837 */
3838
3839/**
3840 * Pushes an IEM_MC_IF_XXX onto the condition stack.
3841 *
3842 * @returns Pointer to the condition stack entry on success, NULL on failure
3843 * (too many nestings)
3844 */
3845DECLINLINE(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3846{
3847 uint32_t const idxStack = pReNative->cCondDepth;
3848 AssertReturn(idxStack < RT_ELEMENTS(pReNative->aCondStack), NULL);
3849
3850 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3851 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3852
3853 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3854 pEntry->fInElse = false;
3855 pEntry->idxLabelElse = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3856 AssertReturn(pEntry->idxLabelElse != UINT32_MAX, NULL);
3857 pEntry->idxLabelEndIf = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3858 AssertReturn(pEntry->idxLabelEndIf != UINT32_MAX, NULL);
3859
3860 return pEntry;
3861}
3862
3863
3864#define IEM_MC_ELSE() } while (0); \
3865 off = iemNativeEmitElse(pReNative, off); \
3866 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
3867 do {
3868
3869/** Emits code related to IEM_MC_ELSE. */
3870DECLINLINE(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3871{
3872 /* Check sanity and get the conditional stack entry. */
3873 Assert(off != UINT32_MAX);
3874 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3875 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3876 Assert(!pEntry->fInElse);
3877
3878 /* Jump to the endif */
3879 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3880
3881 /* Define the else label and enter the else part of the condition. */
3882 pReNative->paLabels[pEntry->idxLabelElse].off = off;
3883 pEntry->fInElse = true;
3884
3885 return off;
3886}
3887
3888
3889#define IEM_MC_ENDIF() } while (0); \
3890 off = iemNativeEmitEndIf(pReNative, off); \
3891 AssertReturn(off != UINT32_MAX, UINT32_MAX)
3892
3893/** Emits code related to IEM_MC_ENDIF. */
3894DECLINLINE(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3895{
3896 /* Check sanity and get the conditional stack entry. */
3897 Assert(off != UINT32_MAX);
3898 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3899 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3900
3901 /* Define the endif label and maybe the else one if we're still in the 'if' part. */
3902 if (!pEntry->fInElse)
3903 pReNative->paLabels[pEntry->idxLabelElse].off = off;
3904 else
3905 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3906 pReNative->paLabels[pEntry->idxLabelEndIf].off = off;
3907
3908 /* Pop the conditional stack.*/
3909 pReNative->cCondDepth -= 1;
3910
3911 return off;
3912}
3913
3914
3915#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3916 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3917 AssertReturn(off != UINT32_MAX, UINT32_MAX); \
3918 do {
3919
3920/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3921DECLINLINE(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3922{
3923 PIEMNATIVECOND pEntry = iemNativeCondPushIf(pReNative);
3924 AssertReturn(pEntry, UINT32_MAX);
3925
3926 /* Get the eflags. */
3927 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3928 kIemNativeGstRegUse_ReadOnly);
3929 AssertReturn(idxEflReg != UINT8_MAX, UINT32_MAX);
3930
3931 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3932 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3933
3934 /* Test and jump. */
3935 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3936
3937 /* Free but don't flush the EFlags register. */
3938 iemNativeRegFreeTmp(pReNative, idxEflReg);
3939
3940 return off;
3941}
3942
3943
3944
3945/*********************************************************************************************************************************
3946* Builtin functions *
3947*********************************************************************************************************************************/
3948
3949/**
3950 * Built-in function that calls a C-implemention function taking zero arguments.
3951 */
3952static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
3953{
3954 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
3955 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
3956 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
3957}
3958
3959
3960
3961/*********************************************************************************************************************************
3962* The native code generator functions for each MC block. *
3963*********************************************************************************************************************************/
3964
3965
3966/*
3967 * Include g_apfnIemNativeRecompileFunctions and associated functions.
3968 *
3969 * This should probably live in it's own file later, but lets see what the
3970 * compile times turn out to be first.
3971 */
3972#include "IEMNativeFunctions.cpp.h"
3973
3974
3975
3976/*********************************************************************************************************************************
3977* Recompiler Core. *
3978*********************************************************************************************************************************/
3979
3980
3981/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
3982static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
3983{
3984 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
3985 pDis->cbCachedInstr += cbMaxRead;
3986 RT_NOREF(cbMinRead);
3987 return VERR_NO_DATA;
3988}
3989
3990
3991
3992void iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp)
3993{
3994 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
3995
3996 char szDisBuf[512];
3997 DISSTATE Dis;
3998 PCIEMNATIVEINSTR const paInstrs = pTb->Native.paInstructions;
3999 uint32_t const cInstrs = pTb->Native.cInstructions;
4000 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
4001 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
4002 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
4003 : DISCPUMODE_64BIT;
4004#ifdef RT_ARCH_AMD64
4005 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
4006#elif defined(RT_ARCH_ARM64)
4007 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
4008#else
4009# error "Port me"
4010#endif
4011
4012 pHlp->pfnPrintf(pHlp,
4013 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
4014 "pTb=%p: fFlags=%#010x cUsed=%u msLastUsed=%u\n",
4015 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
4016 pTb, pTb->fFlags, pTb->cUsed, pTb->msLastUsed);
4017 if (pDbgInfo)
4018 {
4019
4020
4021 }
4022 else
4023 {
4024 /*
4025 * No debug info, just disassemble the x86 code and then the native code.
4026 */
4027 /* The guest code. */
4028 for (unsigned i = 0; i < pTb->cRanges; i++)
4029 {
4030 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
4031 + (pTb->aRanges[i].idxPhysPage == 0
4032 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
4033 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
4034 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
4035 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
4036 unsigned off = pTb->aRanges[i].offOpcodes;
4037 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
4038 while (off < cbOpcodes)
4039 {
4040 uint32_t cbInstr = 1;
4041 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
4042 &pTb->pabOpcodes[off], cbOpcodes - off,
4043 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
4044 if (RT_SUCCESS(rc))
4045 {
4046 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4047 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH,
4048 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4049 pHlp->pfnPrintf(pHlp, " %s\n", szDisBuf);
4050 off += cbInstr;
4051 }
4052 else
4053 {
4054 pHlp->pfnPrintf(pHlp, " %.*Rhxs - disassembly failure %Rrc\n",
4055 cbOpcodes - off, &pTb->pabOpcodes[off], rc);
4056 break;
4057 }
4058 }
4059 }
4060
4061 /* The native code: */
4062 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paInstrs, cInstrs);
4063 for (uint32_t offNative = 0; offNative < cInstrs; )
4064 {
4065 uint32_t cbInstr = sizeof(paInstrs[0]);
4066 int const rc = DISInstr(&paInstrs[offNative], enmHstCpuMode, &Dis, &cbInstr);
4067 if (RT_SUCCESS(rc))
4068 {
4069# if defined(RT_ARCH_AMD64) && 0
4070 if (Dis.pCurInstr->uOpcode == )
4071 {
4072 }
4073 else
4074# endif
4075 {
4076 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
4077 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH,
4078 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
4079 pHlp->pfnPrintf(pHlp, " %p: %s\n", &paInstrs[offNative], szDisBuf);
4080 }
4081 }
4082 else
4083 {
4084# if defined(RT_ARCH_AMD64)
4085 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
4086 &paInstrs[offNative], RT_MIN(cInstrs - offNative, 16), &paInstrs[offNative], rc);
4087# elif defined(RT_ARCH_ARM64)
4088 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n",
4089 &paInstrs[offNative], paInstrs[offNative], rc);
4090# else
4091# error "Port me"
4092#endif
4093 cbInstr = sizeof(paInstrs[0]);
4094 }
4095 offNative += cbInstr / sizeof(paInstrs[0]);
4096 }
4097 }
4098}
4099
4100
4101/**
4102 * Recompiles the given threaded TB into a native one.
4103 *
4104 * In case of failure the translation block will be returned as-is.
4105 *
4106 * @returns pTb.
4107 * @param pVCpu The cross context virtual CPU structure of the calling
4108 * thread.
4109 * @param pTb The threaded translation to recompile to native.
4110 */
4111PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
4112{
4113 /*
4114 * The first time thru, we allocate the recompiler state, the other times
4115 * we just need to reset it before using it again.
4116 */
4117 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
4118 if (RT_LIKELY(pReNative))
4119 iemNativeReInit(pReNative, pTb);
4120 else
4121 {
4122 pReNative = iemNativeInit(pVCpu, pTb);
4123 AssertReturn(pReNative, pTb);
4124 }
4125
4126 /*
4127 * Emit prolog code (fixed).
4128 */
4129 uint32_t off = iemNativeEmitProlog(pReNative, 0);
4130 AssertReturn(off != UINT32_MAX, pTb);
4131
4132 /*
4133 * Convert the calls to native code.
4134 */
4135 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
4136 uint32_t cCallsLeft = pTb->Thrd.cCalls;
4137 while (cCallsLeft-- > 0)
4138 {
4139#ifdef VBOX_STRICT
4140 off = iemNativeEmitMarker(pReNative, off, RT_MAKE_U32(pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->enmFunction));
4141 AssertReturn(off != UINT32_MAX, pTb);
4142#endif
4143 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
4144 if (pfnRecom) /** @todo stats on this. */
4145 {
4146 //STAM_COUNTER_INC()
4147 off = pfnRecom(pReNative, off, pCallEntry);
4148 }
4149 else
4150 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
4151 AssertReturn(off != UINT32_MAX, pTb);
4152 Assert(pReNative->cCondDepth == 0);
4153
4154 pCallEntry++;
4155 }
4156
4157 /*
4158 * Emit the epilog code.
4159 */
4160 off = iemNativeEmitEpilog(pReNative, off);
4161 AssertReturn(off != UINT32_MAX, pTb);
4162
4163 /*
4164 * Generate special jump labels.
4165 */
4166 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
4167 {
4168 off = iemNativeEmitRaiseGp0(pReNative, off);
4169 AssertReturn(off != UINT32_MAX, pTb);
4170 }
4171
4172 /*
4173 * Make sure all labels has been defined.
4174 */
4175 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
4176#ifdef VBOX_STRICT
4177 uint32_t const cLabels = pReNative->cLabels;
4178 for (uint32_t i = 0; i < cLabels; i++)
4179 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
4180#endif
4181
4182 /*
4183 * Allocate executable memory, copy over the code we've generated.
4184 */
4185 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
4186 if (pTbAllocator->pDelayedFreeHead)
4187 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
4188
4189 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
4190 AssertReturn(paFinalInstrBuf, pTb);
4191 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
4192
4193 /*
4194 * Apply fixups.
4195 */
4196 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
4197 uint32_t const cFixups = pReNative->cFixups;
4198 for (uint32_t i = 0; i < cFixups; i++)
4199 {
4200 Assert(paFixups[i].off < off);
4201 Assert(paFixups[i].idxLabel < cLabels);
4202 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
4203 switch (paFixups[i].enmType)
4204 {
4205#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4206 case kIemNativeFixupType_Rel32:
4207 Assert(paFixups[i].off + 4 <= off);
4208 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4209 continue;
4210
4211#elif defined(RT_ARCH_ARM64)
4212 case kIemNativeFixupType_RelImm19At5:
4213 {
4214 Assert(paFixups[i].off < off);
4215 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
4216 Assert(offDisp >= -262144 && offDisp < 262144);
4217 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
4218 continue;
4219 }
4220#endif
4221 case kIemNativeFixupType_Invalid:
4222 case kIemNativeFixupType_End:
4223 break;
4224 }
4225 AssertFailed();
4226 }
4227
4228 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
4229
4230 /*
4231 * Convert the translation block.
4232 */
4233 //RT_BREAKPOINT();
4234 RTMemFree(pTb->Thrd.paCalls);
4235 pTb->Native.paInstructions = paFinalInstrBuf;
4236 pTb->Native.cInstructions = off;
4237 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
4238
4239 Assert(pTbAllocator->cThreadedTbs > 0);
4240 pTbAllocator->cThreadedTbs -= 1;
4241 pTbAllocator->cNativeTbs += 1;
4242 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
4243
4244#ifdef LOG_ENABLED
4245 /*
4246 * Disassemble to the log if enabled.
4247 */
4248 if (LogIs3Enabled())
4249 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
4250#endif
4251
4252 return pTb;
4253}
4254
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette