VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp

Last change on this file was 104146, checked in by vboxsync, 6 weeks ago

VMM/IEM: Increased IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE from 128 to 256, given that the smallest TB on arm64 seems to be 144 bytes and the average is around 8K booting win11. Reduces the bitmap tranversal and upating effort. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 61.9 KB
Line 
1/* $Id: IEMAllN8veExecMem.cpp 104146 2024-04-04 01:13:21Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# define IEMNATIVE_USE_LIBUNWIND
75extern "C" void __register_frame(const void *pvFde);
76extern "C" void __deregister_frame(const void *pvFde);
77# else
78# ifdef DEBUG_bird /** @todo not thread safe yet */
79# define IEMNATIVE_USE_GDB_JIT
80# endif
81# ifdef IEMNATIVE_USE_GDB_JIT
82# include <iprt/critsect.h>
83# include <iprt/once.h>
84# include <iprt/formats/elf64.h>
85# endif
86extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
87extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
88# endif
89#endif
90
91#include "IEMN8veRecompiler.h"
92
93
94/*********************************************************************************************************************************
95* Executable Memory Allocator *
96*********************************************************************************************************************************/
97/** The chunk sub-allocation unit size in bytes. */
98#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
99/** The chunk sub-allocation unit size as a shift factor. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
101/** Enables adding a header to the sub-allocator allocations.
102 * This is useful for freeing up executable memory among other things. */
103#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
104/** Use alternative pruning. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
106
107
108#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
109# ifdef IEMNATIVE_USE_GDB_JIT
110# define IEMNATIVE_USE_GDB_JIT_ET_DYN
111
112/** GDB JIT: Code entry. */
113typedef struct GDBJITCODEENTRY
114{
115 struct GDBJITCODEENTRY *pNext;
116 struct GDBJITCODEENTRY *pPrev;
117 uint8_t *pbSymFile;
118 uint64_t cbSymFile;
119} GDBJITCODEENTRY;
120
121/** GDB JIT: Actions. */
122typedef enum GDBJITACTIONS : uint32_t
123{
124 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
125} GDBJITACTIONS;
126
127/** GDB JIT: Descriptor. */
128typedef struct GDBJITDESCRIPTOR
129{
130 uint32_t uVersion;
131 GDBJITACTIONS enmAction;
132 GDBJITCODEENTRY *pRelevant;
133 GDBJITCODEENTRY *pHead;
134 /** Our addition: */
135 GDBJITCODEENTRY *pTail;
136} GDBJITDESCRIPTOR;
137
138/** GDB JIT: Our simple symbol file data. */
139typedef struct GDBJITSYMFILE
140{
141 Elf64_Ehdr EHdr;
142# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
143 Elf64_Shdr aShdrs[5];
144# else
145 Elf64_Shdr aShdrs[7];
146 Elf64_Phdr aPhdrs[2];
147# endif
148 /** The dwarf ehframe data for the chunk. */
149 uint8_t abEhFrame[512];
150 char szzStrTab[128];
151 Elf64_Sym aSymbols[3];
152# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
153 Elf64_Sym aDynSyms[2];
154 Elf64_Dyn aDyn[6];
155# endif
156} GDBJITSYMFILE;
157
158extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
159extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
160
161/** Init once for g_IemNativeGdbJitLock. */
162static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
163/** Init once for the critical section. */
164static RTCRITSECT g_IemNativeGdbJitLock;
165
166/** GDB reads the info here. */
167GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
168
169/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
170DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
171{
172 ASMNopPause();
173}
174
175/** @callback_method_impl{FNRTONCE} */
176static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
177{
178 RT_NOREF(pvUser);
179 return RTCritSectInit(&g_IemNativeGdbJitLock);
180}
181
182
183# endif /* IEMNATIVE_USE_GDB_JIT */
184
185/**
186 * Per-chunk unwind info for non-windows hosts.
187 */
188typedef struct IEMEXECMEMCHUNKEHFRAME
189{
190# ifdef IEMNATIVE_USE_LIBUNWIND
191 /** The offset of the FDA into abEhFrame. */
192 uintptr_t offFda;
193# else
194 /** 'struct object' storage area. */
195 uint8_t abObject[1024];
196# endif
197# ifdef IEMNATIVE_USE_GDB_JIT
198# if 0
199 /** The GDB JIT 'symbol file' data. */
200 GDBJITSYMFILE GdbJitSymFile;
201# endif
202 /** The GDB JIT list entry. */
203 GDBJITCODEENTRY GdbJitEntry;
204# endif
205 /** The dwarf ehframe data for the chunk. */
206 uint8_t abEhFrame[512];
207} IEMEXECMEMCHUNKEHFRAME;
208/** Pointer to per-chunk info info for non-windows hosts. */
209typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
210#endif
211
212
213/**
214 * An chunk of executable memory.
215 */
216typedef struct IEMEXECMEMCHUNK
217{
218 /** Number of free items in this chunk. */
219 uint32_t cFreeUnits;
220 /** Hint were to start searching for free space in the allocation bitmap. */
221 uint32_t idxFreeHint;
222 /** Pointer to the chunk. */
223 void *pvChunk;
224#ifdef IN_RING3
225 /**
226 * Pointer to the unwind information.
227 *
228 * This is used during C++ throw and longjmp (windows and probably most other
229 * platforms). Some debuggers (windbg) makes use of it as well.
230 *
231 * Windows: This is allocated from hHeap on windows because (at least for
232 * AMD64) the UNWIND_INFO structure address in the
233 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
234 *
235 * Others: Allocated from the regular heap to avoid unnecessary executable data
236 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
237 void *pvUnwindInfo;
238#elif defined(IN_RING0)
239 /** Allocation handle. */
240 RTR0MEMOBJ hMemObj;
241#endif
242} IEMEXECMEMCHUNK;
243/** Pointer to a memory chunk. */
244typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
245
246
247/**
248 * Executable memory allocator for the native recompiler.
249 */
250typedef struct IEMEXECMEMALLOCATOR
251{
252 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
253 uint32_t uMagic;
254
255 /** The chunk size. */
256 uint32_t cbChunk;
257 /** The maximum number of chunks. */
258 uint32_t cMaxChunks;
259 /** The current number of chunks. */
260 uint32_t cChunks;
261 /** Hint where to start looking for available memory. */
262 uint32_t idxChunkHint;
263 /** Statistics: Current number of allocations. */
264 uint32_t cAllocations;
265
266 /** The total amount of memory available. */
267 uint64_t cbTotal;
268 /** Total amount of free memory. */
269 uint64_t cbFree;
270 /** Total amount of memory allocated. */
271 uint64_t cbAllocated;
272
273 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
274 *
275 * Since the chunk size is a power of two and the minimum chunk size is a lot
276 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
277 * require a whole number of uint64_t elements in the allocation bitmap. So,
278 * for sake of simplicity, they are allocated as one continous chunk for
279 * simplicity/laziness. */
280 uint64_t *pbmAlloc;
281 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
282 uint32_t cUnitsPerChunk;
283 /** Number of bitmap elements per chunk (for quickly locating the bitmap
284 * portion corresponding to an chunk). */
285 uint32_t cBitmapElementsPerChunk;
286
287#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
288 /** The next chunk to prune in. */
289 uint32_t idxChunkPrune;
290 /** Where in chunk offset to start pruning at. */
291 uint32_t offChunkPrune;
292 /** Profiling the pruning code. */
293 STAMPROFILE StatPruneProf;
294 /** Number of bytes recovered by the pruning. */
295 STAMPROFILE StatPruneRecovered;
296#endif
297
298#ifdef VBOX_WITH_STATISTICS
299 STAMPROFILE StatAlloc;
300#endif
301
302
303#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
304 /** Pointer to the array of unwind info running parallel to aChunks (same
305 * allocation as this structure, located after the bitmaps).
306 * (For Windows, the structures must reside in 32-bit RVA distance to the
307 * actual chunk, so they are allocated off the chunk.) */
308 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
309#endif
310
311 /** The allocation chunks. */
312 RT_FLEXIBLE_ARRAY_EXTENSION
313 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
314} IEMEXECMEMALLOCATOR;
315/** Pointer to an executable memory allocator. */
316typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
317
318/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
319#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
320
321
322#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
323/**
324 * Allocation header.
325 */
326typedef struct IEMEXECMEMALLOCHDR
327{
328 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
329 uint32_t uMagic;
330 /** The allocation chunk (for speeding up freeing). */
331 uint32_t idxChunk;
332 /** Pointer to the translation block the allocation belongs to.
333 * This is the whole point of the header. */
334 PIEMTB pTb;
335} IEMEXECMEMALLOCHDR;
336/** Pointer to an allocation header. */
337typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
338/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
339# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
340#endif
341
342
343static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
344
345#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
346/**
347 * Frees up executable memory when we're out space.
348 *
349 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
350 * space in a more linear fashion from the allocator's point of view. It may
351 * also defragment if implemented & enabled
352 */
353static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
354{
355# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
356# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
357# endif
358 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
359
360 /*
361 * Before we can start, we must process delayed frees.
362 */
363 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
364
365 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
366
367 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
368 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
369 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
370
371 uint32_t const cChunks = pExecMemAllocator->cChunks;
372 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
373 AssertReturnVoid(cChunks >= 1);
374
375 Assert(!pVCpu->iem.s.pCurTbR3);
376
377 /*
378 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
379 * scanning a multiple of two here as well.
380 */
381 uint32_t cbToPrune = cbChunk;
382
383 /* Never more than 25%. */
384 if (cChunks < 4)
385 cbToPrune /= cChunks == 1 ? 4 : 2;
386
387 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
388 if (cbToPrune > _4M)
389 cbToPrune = _4M;
390
391 /*
392 * Adjust the pruning chunk and offset accordingly.
393 */
394 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
395 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
396 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
397 if (offChunk >= cbChunk)
398 {
399 offChunk = 0;
400 idxChunk += 1;
401 }
402 if (idxChunk >= cChunks)
403 {
404 offChunk = 0;
405 idxChunk = 0;
406 }
407
408 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
409
410 /*
411 * Do the pruning. The current approach is the sever kind.
412 */
413 uint64_t cbPruned = 0;
414 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunk;
415 while (offChunk < offPruneEnd)
416 {
417 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
418
419 /* Is this the start of an allocation block for TB? (We typically have
420 one allocation at the start of each chunk for the unwind info where
421 pTb is NULL.) */
422 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
423 && pHdr->pTb != NULL
424 && pHdr->idxChunk == idxChunk)
425 {
426 PIEMTB const pTb = pHdr->pTb;
427 AssertPtr(pTb);
428
429 /* We now have to check that this isn't a old freed header, given
430 that we don't invalidate the header upon free because of darwin
431 restrictions on executable memory (iemExecMemAllocatorFree).
432 This relies upon iemTbAllocatorFreeInner resetting TB members. */
433 if ( pTb->Native.paInstructions == (PIEMNATIVEINSTR)(pHdr + 1)
434 && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
435 {
436 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
437 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
438 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
439
440 iemTbAllocatorFree(pVCpu, pTb);
441
442 cbPruned += cbBlock;
443 offChunk += cbBlock;
444 }
445 else
446 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
447 }
448 else
449 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
450 }
451 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
452
453 /*
454 * Save the current pruning point.
455 */
456 pExecMemAllocator->offChunkPrune = offChunk;
457 pExecMemAllocator->idxChunkPrune = idxChunk;
458
459 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
460}
461#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
462
463
464/**
465 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
466 */
467static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
468 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
469{
470 /*
471 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
472 */
473 Assert(!(cToScan & 63));
474 Assert(!(idxFirst & 63));
475 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
476 pbmAlloc += idxFirst / 64;
477
478 /*
479 * Scan the bitmap for cReqUnits of consequtive clear bits
480 */
481 /** @todo This can probably be done more efficiently for non-x86 systems. */
482 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
483 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
484 {
485 uint32_t idxAddBit = 1;
486 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
487 idxAddBit++;
488 if (idxAddBit >= cReqUnits)
489 {
490 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
491
492 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
493 pChunk->cFreeUnits -= cReqUnits;
494 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
495
496 pExecMemAllocator->cAllocations += 1;
497 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
498 pExecMemAllocator->cbAllocated += cbReq;
499 pExecMemAllocator->cbFree -= cbReq;
500 pExecMemAllocator->idxChunkHint = idxChunk;
501
502 void * const pvMem = (uint8_t *)pChunk->pvChunk
503 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
504#ifdef RT_OS_DARWIN
505 /*
506 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
507 * on darwin. So, we mark the pages returned as read+write after alloc and
508 * expect the caller to call iemExecMemAllocatorReadyForUse when done
509 * writing to the allocation.
510 *
511 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
512 * for details.
513 */
514 /** @todo detect if this is necessary... it wasn't required on 10.15 or
515 * whatever older version it was. */
516 int rc = RTMemProtect(pvMem, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
517 AssertRC(rc);
518#endif
519
520 /*
521 * Initialize the header and return.
522 */
523# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
524 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMem;
525 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
526 pHdr->idxChunk = idxChunk;
527 pHdr->pTb = pTb;
528 return pHdr + 1;
529#else
530 RT_NOREF(pTb);
531 return pvMem;
532#endif
533 }
534
535 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
536 }
537 return NULL;
538}
539
540
541static void *
542iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
543{
544 /*
545 * Figure out how much to allocate.
546 */
547#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
548 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
549#else
550 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
551#endif
552 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
553 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
554 {
555 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
556 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
557 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
558 {
559 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
560 pExecMemAllocator->cUnitsPerChunk - idxHint,
561 cReqUnits, idxChunk, pTb);
562 if (pvRet)
563 return pvRet;
564 }
565 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
566 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
567 cReqUnits, idxChunk, pTb);
568 }
569 return NULL;
570}
571
572
573/**
574 * Allocates @a cbReq bytes of executable memory.
575 *
576 * @returns Pointer to the memory, NULL if out of memory or other problem
577 * encountered.
578 * @param pVCpu The cross context virtual CPU structure of the calling
579 * thread.
580 * @param cbReq How many bytes are required.
581 * @param pTb The translation block that will be using the allocation.
582 */
583DECLHIDDEN(void *) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb) RT_NOEXCEPT
584{
585 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
586 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
587 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
588 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
589
590 for (unsigned iIteration = 0;; iIteration++)
591 {
592 if (cbReq <= pExecMemAllocator->cbFree)
593 {
594 uint32_t const cChunks = pExecMemAllocator->cChunks;
595 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
596 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
597 {
598 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
599 if (pvRet)
600 {
601 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
602 return pvRet;
603 }
604 }
605 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
606 {
607 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
608 if (pvRet)
609 {
610 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
611 return pvRet;
612 }
613 }
614 }
615
616 /*
617 * Can we grow it with another chunk?
618 */
619 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
620 {
621 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
622 AssertLogRelRCReturn(rc, NULL);
623
624 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
625 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
626 if (pvRet)
627 {
628 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
629 return pvRet;
630 }
631 AssertFailed();
632 }
633
634 /*
635 * Try prune native TBs once.
636 */
637 if (iIteration == 0)
638 {
639#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
640 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
641#else
642 /* No header included in the instruction count here. */
643 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
644 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
645#endif
646 }
647 else
648 {
649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
650 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
651 return NULL;
652 }
653 }
654}
655
656
657/** This is a hook that we may need later for changing memory protection back
658 * to readonly+exec */
659DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
660{
661#ifdef RT_OS_DARWIN
662 /* See iemExecMemAllocatorAllocInChunkInt for the explanation. */
663 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
664 AssertRC(rc); RT_NOREF(pVCpu);
665
666 /*
667 * Flush the instruction cache:
668 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
669 */
670 /* sys_dcache_flush(pv, cb); - not necessary */
671 sys_icache_invalidate(pv, cb);
672#else
673 RT_NOREF(pVCpu, pv, cb);
674#endif
675}
676
677
678/**
679 * Frees executable memory.
680 */
681DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
682{
683 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
684 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
685 AssertPtr(pv);
686#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
687 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
688
689 /* Align the size as we did when allocating the block. */
690 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
691
692#else
693 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
694 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
695 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
696 uint32_t const idxChunk = pHdr->idxChunk;
697 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
698 pv = pHdr;
699
700 /* Adjust and align the size to cover the whole allocation area. */
701 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
702#endif
703
704 /* Free it / assert sanity. */
705 bool fFound = false;
706 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
707#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
708 uint32_t const cChunks = pExecMemAllocator->cChunks;
709 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
710#endif
711 {
712 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
713 fFound = offChunk < cbChunk;
714 if (fFound)
715 {
716 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
717 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
718
719 /* Check that it's valid and free it. */
720 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
721 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
722 for (uint32_t i = 1; i < cReqUnits; i++)
723 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
724 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
725
726#if 0 /*def IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER - not necessary, we'll validate the header in the pruning code. */
727# ifdef RT_OS_DARWIN
728 int rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_WRITE | RTMEM_PROT_READ);
729 AssertRC(rc); RT_NOREF(pVCpu);
730# endif
731 pHdr->uMagic = 0;
732 pHdr->idxChunk = 0;
733 pHdr->pTb = NULL;
734# ifdef RT_OS_DARWIN
735 rc = RTMemProtect(pHdr, sizeof(*pHdr), RTMEM_PROT_EXEC | RTMEM_PROT_READ);
736 AssertRC(rc); RT_NOREF(pVCpu);
737# endif
738#endif
739 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
740 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
741
742 /* Update the stats. */
743 pExecMemAllocator->cbAllocated -= cb;
744 pExecMemAllocator->cbFree += cb;
745 pExecMemAllocator->cAllocations -= 1;
746 return;
747 }
748 }
749 AssertFailed();
750}
751
752
753
754#ifdef IN_RING3
755# ifdef RT_OS_WINDOWS
756
757/**
758 * Initializes the unwind info structures for windows hosts.
759 */
760static int
761iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
762 void *pvChunk, uint32_t idxChunk)
763{
764 RT_NOREF(pVCpu);
765
766 /*
767 * The AMD64 unwind opcodes.
768 *
769 * This is a program that starts with RSP after a RET instruction that
770 * ends up in recompiled code, and the operations we describe here will
771 * restore all non-volatile registers and bring RSP back to where our
772 * RET address is. This means it's reverse order from what happens in
773 * the prologue.
774 *
775 * Note! Using a frame register approach here both because we have one
776 * and but mainly because the UWOP_ALLOC_LARGE argument values
777 * would be a pain to write initializers for. On the positive
778 * side, we're impervious to changes in the the stack variable
779 * area can can deal with dynamic stack allocations if necessary.
780 */
781 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
782 {
783 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
784 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
785 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
786 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
787 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
788 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
789 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
790 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
791 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
792 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
793 };
794 union
795 {
796 IMAGE_UNWIND_INFO Info;
797 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
798 } s_UnwindInfo =
799 {
800 {
801 /* .Version = */ 1,
802 /* .Flags = */ 0,
803 /* .SizeOfProlog = */ 16, /* whatever */
804 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
805 /* .FrameRegister = */ X86_GREG_xBP,
806 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
807 }
808 };
809 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
810 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
811
812 /*
813 * Calc how much space we need and allocate it off the exec heap.
814 */
815 unsigned const cFunctionEntries = 1;
816 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
817 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
818 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
819 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
820 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
821 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
822
823 /*
824 * Initialize the structures.
825 */
826 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
827
828 paFunctions[0].BeginAddress = 0;
829 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
830 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
831
832 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
833 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
834
835 /*
836 * Register it.
837 */
838 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
839 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
840
841 return VINF_SUCCESS;
842}
843
844
845# else /* !RT_OS_WINDOWS */
846
847/**
848 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
849 */
850DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
851{
852 if (iValue >= 64)
853 {
854 Assert(iValue < 0x2000);
855 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
856 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
857 }
858 else if (iValue >= 0)
859 *Ptr.pb++ = (uint8_t)iValue;
860 else if (iValue > -64)
861 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
862 else
863 {
864 Assert(iValue > -0x2000);
865 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
866 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
867 }
868 return Ptr;
869}
870
871
872/**
873 * Emits an ULEB128 encoded value (up to 64-bit wide).
874 */
875DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
876{
877 while (uValue >= 0x80)
878 {
879 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
880 uValue >>= 7;
881 }
882 *Ptr.pb++ = (uint8_t)uValue;
883 return Ptr;
884}
885
886
887/**
888 * Emits a CFA rule as register @a uReg + offset @a off.
889 */
890DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
891{
892 *Ptr.pb++ = DW_CFA_def_cfa;
893 Ptr = iemDwarfPutUleb128(Ptr, uReg);
894 Ptr = iemDwarfPutUleb128(Ptr, off);
895 return Ptr;
896}
897
898
899/**
900 * Emits a register (@a uReg) save location:
901 * CFA + @a off * data_alignment_factor
902 */
903DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
904{
905 if (uReg < 0x40)
906 *Ptr.pb++ = DW_CFA_offset | uReg;
907 else
908 {
909 *Ptr.pb++ = DW_CFA_offset_extended;
910 Ptr = iemDwarfPutUleb128(Ptr, uReg);
911 }
912 Ptr = iemDwarfPutUleb128(Ptr, off);
913 return Ptr;
914}
915
916
917# if 0 /* unused */
918/**
919 * Emits a register (@a uReg) save location, using signed offset:
920 * CFA + @a offSigned * data_alignment_factor
921 */
922DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
923{
924 *Ptr.pb++ = DW_CFA_offset_extended_sf;
925 Ptr = iemDwarfPutUleb128(Ptr, uReg);
926 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
927 return Ptr;
928}
929# endif
930
931
932/**
933 * Initializes the unwind info section for non-windows hosts.
934 */
935static int
936iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
937 void *pvChunk, uint32_t idxChunk)
938{
939 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
940 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
941
942 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
943
944 /*
945 * Generate the CIE first.
946 */
947# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
948 uint8_t const iDwarfVer = 3;
949# else
950 uint8_t const iDwarfVer = 4;
951# endif
952 RTPTRUNION const PtrCie = Ptr;
953 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
954 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
955 *Ptr.pb++ = iDwarfVer; /* DwARF version */
956 *Ptr.pb++ = 0; /* Augmentation. */
957 if (iDwarfVer >= 4)
958 {
959 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
960 *Ptr.pb++ = 0; /* Segment selector size. */
961 }
962# ifdef RT_ARCH_AMD64
963 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
964# else
965 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
966# endif
967 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
968# ifdef RT_ARCH_AMD64
969 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
970# elif defined(RT_ARCH_ARM64)
971 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
972# else
973# error "port me"
974# endif
975 /* Initial instructions: */
976# ifdef RT_ARCH_AMD64
977 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
978 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
979 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
980 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
981 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
982 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
983 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
984 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
985# elif defined(RT_ARCH_ARM64)
986# if 1
987 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
988# else
989 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
990# endif
991 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
992 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
993 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
994 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
995 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
996 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
997 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
998 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
999 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1000 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1001 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1002 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1003 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1004 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1005# else
1006# error "port me"
1007# endif
1008 while ((Ptr.u - PtrCie.u) & 3)
1009 *Ptr.pb++ = DW_CFA_nop;
1010 /* Finalize the CIE size. */
1011 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1012
1013 /*
1014 * Generate an FDE for the whole chunk area.
1015 */
1016# ifdef IEMNATIVE_USE_LIBUNWIND
1017 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1018# endif
1019 RTPTRUNION const PtrFde = Ptr;
1020 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1021 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1022 Ptr.pu32++;
1023 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1024 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1025# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1026 *Ptr.pb++ = DW_CFA_nop;
1027# endif
1028 while ((Ptr.u - PtrFde.u) & 3)
1029 *Ptr.pb++ = DW_CFA_nop;
1030 /* Finalize the FDE size. */
1031 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1032
1033 /* Terminator entry. */
1034 *Ptr.pu32++ = 0;
1035 *Ptr.pu32++ = 0; /* just to be sure... */
1036 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1037
1038 /*
1039 * Register it.
1040 */
1041# ifdef IEMNATIVE_USE_LIBUNWIND
1042 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1043# else
1044 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1045 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1046# endif
1047
1048# ifdef IEMNATIVE_USE_GDB_JIT
1049 /*
1050 * Now for telling GDB about this (experimental).
1051 *
1052 * This seems to work best with ET_DYN.
1053 */
1054 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1055 sizeof(GDBJITSYMFILE), NULL);
1056 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1057 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1058
1059 RT_ZERO(*pSymFile);
1060
1061 /*
1062 * The ELF header:
1063 */
1064 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1065 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1066 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1067 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1068 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1069 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1070 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1071 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1072# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1073 pSymFile->EHdr.e_type = ET_DYN;
1074# else
1075 pSymFile->EHdr.e_type = ET_REL;
1076# endif
1077# ifdef RT_ARCH_AMD64
1078 pSymFile->EHdr.e_machine = EM_AMD64;
1079# elif defined(RT_ARCH_ARM64)
1080 pSymFile->EHdr.e_machine = EM_AARCH64;
1081# else
1082# error "port me"
1083# endif
1084 pSymFile->EHdr.e_version = 1; /*?*/
1085 pSymFile->EHdr.e_entry = 0;
1086# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1087 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1088# else
1089 pSymFile->EHdr.e_phoff = 0;
1090# endif
1091 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1092 pSymFile->EHdr.e_flags = 0;
1093 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1094# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1095 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1096 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1097# else
1098 pSymFile->EHdr.e_phentsize = 0;
1099 pSymFile->EHdr.e_phnum = 0;
1100# endif
1101 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1102 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1103 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1104
1105 uint32_t offStrTab = 0;
1106#define APPEND_STR(a_szStr) do { \
1107 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1108 offStrTab += sizeof(a_szStr); \
1109 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1110 } while (0)
1111#define APPEND_STR_FMT(a_szStr, ...) do { \
1112 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1113 offStrTab++; \
1114 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1115 } while (0)
1116
1117 /*
1118 * Section headers.
1119 */
1120 /* Section header #0: NULL */
1121 unsigned i = 0;
1122 APPEND_STR("");
1123 RT_ZERO(pSymFile->aShdrs[i]);
1124 i++;
1125
1126 /* Section header: .eh_frame */
1127 pSymFile->aShdrs[i].sh_name = offStrTab;
1128 APPEND_STR(".eh_frame");
1129 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1130 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1131# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1134# else
1135 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1136 pSymFile->aShdrs[i].sh_offset = 0;
1137# endif
1138
1139 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1140 pSymFile->aShdrs[i].sh_link = 0;
1141 pSymFile->aShdrs[i].sh_info = 0;
1142 pSymFile->aShdrs[i].sh_addralign = 1;
1143 pSymFile->aShdrs[i].sh_entsize = 0;
1144 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1145 i++;
1146
1147 /* Section header: .shstrtab */
1148 unsigned const iShStrTab = i;
1149 pSymFile->EHdr.e_shstrndx = iShStrTab;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".shstrtab");
1152 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 /* Section header: .symbols */
1169 pSymFile->aShdrs[i].sh_name = offStrTab;
1170 APPEND_STR(".symtab");
1171 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1172 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1173 pSymFile->aShdrs[i].sh_offset
1174 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1175 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1176 pSymFile->aShdrs[i].sh_link = iShStrTab;
1177 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1178 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1179 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1180 i++;
1181
1182# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1183 /* Section header: .symbols */
1184 pSymFile->aShdrs[i].sh_name = offStrTab;
1185 APPEND_STR(".dynsym");
1186 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1187 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1188 pSymFile->aShdrs[i].sh_offset
1189 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1190 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1191 pSymFile->aShdrs[i].sh_link = iShStrTab;
1192 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1193 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1194 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1195 i++;
1196# endif
1197
1198# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1199 /* Section header: .dynamic */
1200 pSymFile->aShdrs[i].sh_name = offStrTab;
1201 APPEND_STR(".dynamic");
1202 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1203 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1204 pSymFile->aShdrs[i].sh_offset
1205 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1206 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1207 pSymFile->aShdrs[i].sh_link = iShStrTab;
1208 pSymFile->aShdrs[i].sh_info = 0;
1209 pSymFile->aShdrs[i].sh_addralign = 1;
1210 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1211 i++;
1212# endif
1213
1214 /* Section header: .text */
1215 unsigned const iShText = i;
1216 pSymFile->aShdrs[i].sh_name = offStrTab;
1217 APPEND_STR(".text");
1218 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1219 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1220# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1221 pSymFile->aShdrs[i].sh_offset
1222 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1223# else
1224 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1225 pSymFile->aShdrs[i].sh_offset = 0;
1226# endif
1227 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1228 pSymFile->aShdrs[i].sh_link = 0;
1229 pSymFile->aShdrs[i].sh_info = 0;
1230 pSymFile->aShdrs[i].sh_addralign = 1;
1231 pSymFile->aShdrs[i].sh_entsize = 0;
1232 i++;
1233
1234 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1235
1236# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1237 /*
1238 * The program headers:
1239 */
1240 /* Everything in a single LOAD segment: */
1241 i = 0;
1242 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1243 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1244 pSymFile->aPhdrs[i].p_offset
1245 = pSymFile->aPhdrs[i].p_vaddr
1246 = pSymFile->aPhdrs[i].p_paddr = 0;
1247 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1248 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1249 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1250 i++;
1251 /* The .dynamic segment. */
1252 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1253 pSymFile->aPhdrs[i].p_flags = PF_R;
1254 pSymFile->aPhdrs[i].p_offset
1255 = pSymFile->aPhdrs[i].p_vaddr
1256 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1257 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1258 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1259 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1260 i++;
1261
1262 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1263
1264 /*
1265 * The dynamic section:
1266 */
1267 i = 0;
1268 pSymFile->aDyn[i].d_tag = DT_SONAME;
1269 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1270 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1271 i++;
1272 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1273 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1274 i++;
1275 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1276 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1277 i++;
1278 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1279 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1280 i++;
1281 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1282 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1283 i++;
1284 pSymFile->aDyn[i].d_tag = DT_NULL;
1285 i++;
1286 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1287# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1288
1289 /*
1290 * Symbol tables:
1291 */
1292 /** @todo gdb doesn't seem to really like this ... */
1293 i = 0;
1294 pSymFile->aSymbols[i].st_name = 0;
1295 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1296 pSymFile->aSymbols[i].st_value = 0;
1297 pSymFile->aSymbols[i].st_size = 0;
1298 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1299 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1300# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1301 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1302# endif
1303 i++;
1304
1305 pSymFile->aSymbols[i].st_name = 0;
1306 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1307 pSymFile->aSymbols[i].st_value = 0;
1308 pSymFile->aSymbols[i].st_size = 0;
1309 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1310 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1311 i++;
1312
1313 pSymFile->aSymbols[i].st_name = offStrTab;
1314 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1315# if 0
1316 pSymFile->aSymbols[i].st_shndx = iShText;
1317 pSymFile->aSymbols[i].st_value = 0;
1318# else
1319 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1320 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1321# endif
1322 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1323 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1324 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1325# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1326 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1327 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1328# endif
1329 i++;
1330
1331 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1332 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1333
1334 /*
1335 * The GDB JIT entry and informing GDB.
1336 */
1337 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1338# if 1
1339 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1340# else
1341 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1342# endif
1343
1344 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1345 RTCritSectEnter(&g_IemNativeGdbJitLock);
1346 pEhFrame->GdbJitEntry.pNext = NULL;
1347 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1348 if (__jit_debug_descriptor.pTail)
1349 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1350 else
1351 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1352 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1353 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1354
1355 /* Notify GDB: */
1356 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1357 __jit_debug_register_code();
1358 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1359 RTCritSectLeave(&g_IemNativeGdbJitLock);
1360
1361# else /* !IEMNATIVE_USE_GDB_JIT */
1362 RT_NOREF(pVCpu);
1363# endif /* !IEMNATIVE_USE_GDB_JIT */
1364
1365 return VINF_SUCCESS;
1366}
1367
1368# endif /* !RT_OS_WINDOWS */
1369#endif /* IN_RING3 */
1370
1371
1372/**
1373 * Adds another chunk to the executable memory allocator.
1374 *
1375 * This is used by the init code for the initial allocation and later by the
1376 * regular allocator function when it's out of memory.
1377 */
1378static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1379{
1380 /* Check that we've room for growth. */
1381 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1382 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1383
1384 /* Allocate a chunk. */
1385#ifdef RT_OS_DARWIN
1386 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1387#else
1388 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1389#endif
1390 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1391
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1403 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1404 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1405 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1406
1407 pExecMemAllocator->cChunks = idxChunk + 1;
1408 pExecMemAllocator->idxChunkHint = idxChunk;
1409
1410 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1411 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420 { /* likely */ }
1421 else
1422 {
1423 /* Just in case the impossible happens, undo the above up: */
1424 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1425 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1426 pExecMemAllocator->cChunks = idxChunk;
1427 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1428 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1429 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1430 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1431
1432 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1433 return rc;
1434 }
1435#endif
1436 return VINF_SUCCESS;
1437}
1438
1439
1440/**
1441 * Initializes the executable memory allocator for native recompilation on the
1442 * calling EMT.
1443 *
1444 * @returns VBox status code.
1445 * @param pVCpu The cross context virtual CPU structure of the calling
1446 * thread.
1447 * @param cbMax The max size of the allocator.
1448 * @param cbInitial The initial allocator size.
1449 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1450 * dependent).
1451 */
1452int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1453{
1454 /*
1455 * Validate input.
1456 */
1457 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1458 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1459 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1460 || cbChunk == 0
1461 || ( RT_IS_POWER_OF_TWO(cbChunk)
1462 && cbChunk >= _1M
1463 && cbChunk <= _256M
1464 && cbChunk <= cbMax),
1465 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1466 VERR_OUT_OF_RANGE);
1467
1468 /*
1469 * Adjust/figure out the chunk size.
1470 */
1471 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1472 {
1473 if (cbMax >= _256M)
1474 cbChunk = _64M;
1475 else
1476 {
1477 if (cbMax < _16M)
1478 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1479 else
1480 cbChunk = (uint32_t)cbMax / 4;
1481 if (!RT_IS_POWER_OF_TWO(cbChunk))
1482 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1483 }
1484 }
1485
1486 if (cbChunk > cbMax)
1487 cbMax = cbChunk;
1488 else
1489 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1490 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1491 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1492
1493 /*
1494 * Allocate and initialize the allocatore instance.
1495 */
1496 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1497 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1498 size_t cbNeeded = offBitmaps + cbBitmaps;
1499 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1500 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1501#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1502 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1503 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1504#endif
1505 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1506 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1507 VERR_NO_MEMORY);
1508 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1509 pExecMemAllocator->cbChunk = cbChunk;
1510 pExecMemAllocator->cMaxChunks = cMaxChunks;
1511 pExecMemAllocator->cChunks = 0;
1512 pExecMemAllocator->idxChunkHint = 0;
1513 pExecMemAllocator->cAllocations = 0;
1514 pExecMemAllocator->cbTotal = 0;
1515 pExecMemAllocator->cbFree = 0;
1516 pExecMemAllocator->cbAllocated = 0;
1517 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1518 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1519 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1520 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1521#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1522 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1523#endif
1524 for (uint32_t i = 0; i < cMaxChunks; i++)
1525 {
1526 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1527 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1528 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1529#ifdef IN_RING0
1530 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1531#else
1532 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1533#endif
1534 }
1535 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1536
1537 /*
1538 * Do the initial allocations.
1539 */
1540 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1541 {
1542 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1543 AssertLogRelRCReturn(rc, rc);
1544 }
1545
1546 pExecMemAllocator->idxChunkHint = 0;
1547
1548 /*
1549 * Register statistics.
1550 */
1551 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1552 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1553 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1554 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1555 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1556 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1557 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1558 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1559 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1560 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1561 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1562 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1563 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1564 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1565 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1566#ifdef VBOX_WITH_STATISTICS
1567 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1568 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1569#endif
1570#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1571 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1572 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1573 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1574 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1575#endif
1576
1577 return VINF_SUCCESS;
1578}
1579
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use