VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMRZ/PGMRZDynMap.cpp@ 74795

Last change on this file since 74795 was 73097, checked in by vboxsync, 6 years ago

*: Made RT_UOFFSETOF, RT_OFFSETOF, RT_UOFFSETOF_ADD and RT_OFFSETOF_ADD work like builtin_offsetof() and require compile time resolvable requests, adding RT_UOFFSETOF_DYN for the dynamic questions that can only be answered at runtime.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 98.0 KB
Line 
1/* $Id: PGMRZDynMap.cpp 73097 2018-07-12 21:06:33Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, dynamic mapping cache.
4 */
5
6/*
7 * Copyright (C) 2008-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Internal Functions *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_DYNMAP
23#include <VBox/vmm/pgm.h>
24#include "PGMInternal.h"
25#include <VBox/vmm/vm.h>
26#include "PGMInline.h"
27#include <VBox/err.h>
28#include <VBox/param.h>
29#include <VBox/sup.h>
30#include <iprt/asm.h>
31#include <iprt/asm-amd64-x86.h>
32#include <iprt/assert.h>
33#ifndef IN_RC
34# include <iprt/cpuset.h>
35# include <iprt/mem.h>
36# include <iprt/memobj.h>
37# include <iprt/mp.h>
38# include <iprt/semaphore.h>
39# include <iprt/spinlock.h>
40#endif
41#include <iprt/string.h>
42
43
44/*********************************************************************************************************************************
45* Defined Constants And Macros *
46*********************************************************************************************************************************/
47#ifdef IN_RING0
48/** The max size of the mapping cache (in pages). */
49# define PGMR0DYNMAP_MAX_PAGES ((16*_1M) >> PAGE_SHIFT)
50/** The small segment size that is adopted on out-of-memory conditions with a
51 * single big segment. */
52# define PGMR0DYNMAP_SMALL_SEG_PAGES 128
53/** The number of pages we reserve per CPU. */
54# define PGMR0DYNMAP_PAGES_PER_CPU 256
55/** The minimum number of pages we reserve per CPU.
56 * This must be equal or larger than the autoset size. */
57# define PGMR0DYNMAP_PAGES_PER_CPU_MIN 64
58/** Calcs the overload threshold (safety margin). Current set at 50%. */
59# define PGMR0DYNMAP_CALC_OVERLOAD(cPages) ((cPages) / 2)
60/** The number of guard pages.
61 * @remarks Never do tuning of the hashing or whatnot with a strict build! */
62# if defined(VBOX_STRICT)
63# define PGMR0DYNMAP_GUARD_PAGES 1
64# else
65# define PGMR0DYNMAP_GUARD_PAGES 0
66# endif
67#endif /* IN_RING0 */
68/** The dummy physical address of guard pages. */
69#define PGMR0DYNMAP_GUARD_PAGE_HCPHYS UINT32_C(0x7777feed)
70/** The dummy reference count of guard pages. (Must be non-zero.) */
71#define PGMR0DYNMAP_GUARD_PAGE_REF_COUNT INT32_C(0x7777feed)
72#if 0
73/** Define this to just clear the present bit on guard pages.
74 * The alternative is to replace the entire PTE with an bad not-present
75 * PTE. Either way, XNU will screw us. :-/ */
76# define PGMR0DYNMAP_GUARD_NP
77#endif
78/** The dummy PTE value for a page. */
79#define PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE X86_PTE_PG_MASK
80/** The dummy PTE value for a page. */
81#define PGMR0DYNMAP_GUARD_PAGE_PAE_PTE UINT64_MAX /*X86_PTE_PAE_PG_MASK*/
82
83#ifdef IN_RING0 /* Note! Assertions causes panics if preemption is disabled,
84 * disable this to work around that. */
85/**
86 * Acquire the spinlock.
87 * This will declare a temporary variable and expands to two statements!
88 */
89# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) \
90 RTSpinlockAcquire((pThis)->hSpinlock)
91
92/**
93 * Releases the spinlock.
94 */
95# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) \
96 RTSpinlockRelease((pThis)->hSpinlock)
97
98/**
99 * Re-acquires the spinlock.
100 */
101# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) \
102 RTSpinlockAcquire((pThis)->hSpinlock)
103#else
104# define PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis) do { } while (0)
105# define PGMRZDYNMAP_SPINLOCK_RELEASE(pThis) do { } while (0)
106# define PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis) do { } while (0)
107#endif
108
109
110/** Converts a PGMCPUM::AutoSet pointer into a PVMCPU. */
111#define PGMRZDYNMAP_SET_2_VMCPU(pSet) (RT_FROM_MEMBER(pSet, VMCPU, pgm.s.AutoSet))
112
113/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
114#define PGMRZDYNMAP_SET_2_VM(pSet) (PGMRZDYNMAP_SET_2_VMCPU(pSet)->CTX_SUFF(pVM))
115
116/** Converts a PGMCPUM::AutoSet pointer into a PVM. */
117#ifdef IN_RC
118# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (PGMRZDYNMAP_SET_2_VM(pSet)->pgm.s.pRCDynMap)
119#else
120# define PGMRZDYNMAP_SET_2_DYNMAP(pSet) (g_pPGMR0DynMap)
121#endif
122
123/**
124 * Gets the set index of the current CPU.
125 *
126 * This always returns 0 when in raw-mode context because there is only ever
127 * one EMT in that context (at least presently).
128 */
129#ifdef IN_RC
130# define PGMRZDYNMAP_CUR_CPU() (0)
131#else
132# define PGMRZDYNMAP_CUR_CPU() RTMpCurSetIndex()
133#endif
134
135/** PGMRZDYNMAP::u32Magic. (Jens Christian Bugge Wesseltoft) */
136#define PGMRZDYNMAP_MAGIC UINT32_C(0x19640201)
137
138
139/** Zaps an set entry. */
140#define PGMRZDYNMAP_ZAP_ENTRY(pEntry) \
141 do \
142 { \
143 (pEntry)->iPage = UINT16_MAX; \
144 (pEntry)->cRefs = 0; \
145 (pEntry)->cInlinedRefs = 0; \
146 (pEntry)->cUnrefs = 0; \
147 } while (0)
148
149
150/** @def PGMRZDYNMAP_STRICT_RELEASE
151 * Define this to force pages to be released and make non-present ASAP after
152 * use. This should not normally be enabled as it is a bit expensive. */
153#if 0 || defined(DOXYGEN_RUNNING)
154# define PGMRZDYNMAP_STRICT_RELEASE
155#endif
156
157
158/*********************************************************************************************************************************
159* Structures and Typedefs *
160*********************************************************************************************************************************/
161#ifdef IN_RING0
162/**
163 * Ring-0 dynamic mapping cache segment.
164 *
165 * The dynamic mapping cache can be extended with additional segments if the
166 * load is found to be too high. This done the next time a VM is created, under
167 * the protection of the init mutex. The arrays is reallocated and the new
168 * segment is added to the end of these. Nothing is rehashed of course, as the
169 * indexes / addresses must remain unchanged.
170 *
171 * This structure is only modified while owning the init mutex or during module
172 * init / term.
173 */
174typedef struct PGMR0DYNMAPSEG
175{
176 /** Pointer to the next segment. */
177 struct PGMR0DYNMAPSEG *pNext;
178 /** The memory object for the virtual address range that we're abusing. */
179 RTR0MEMOBJ hMemObj;
180 /** The start page in the cache. (I.e. index into the arrays.) */
181 uint16_t iPage;
182 /** The number of pages this segment contributes. */
183 uint16_t cPages;
184 /** The number of page tables. */
185 uint16_t cPTs;
186 /** The memory objects for the page tables. */
187 RTR0MEMOBJ ahMemObjPTs[1];
188} PGMR0DYNMAPSEG;
189/** Pointer to a ring-0 dynamic mapping cache segment. */
190typedef PGMR0DYNMAPSEG *PPGMR0DYNMAPSEG;
191
192
193/**
194 * Ring-0 dynamic mapping cache entry.
195 *
196 * @sa PGMRZDYNMAPENTRY, PGMRCDYNMAPENTRY.
197 */
198typedef struct PGMR0DYNMAPENTRY
199{
200 /** The physical address of the currently mapped page.
201 * This is duplicate for three reasons: cache locality, cache policy of the PT
202 * mappings and sanity checks. */
203 RTHCPHYS HCPhys;
204 /** Pointer to the page. */
205 void *pvPage;
206 /** The number of references. */
207 int32_t volatile cRefs;
208 /** PTE pointer union. */
209 union PGMR0DYNMAPENTRY_PPTE
210 {
211 /** PTE pointer, 32-bit legacy version. */
212 PX86PTE pLegacy;
213 /** PTE pointer, PAE version. */
214 PX86PTEPAE pPae;
215 /** PTE pointer, the void version. */
216 void *pv;
217 } uPte;
218 /** CPUs that haven't invalidated this entry after it's last update. */
219 RTCPUSET PendingSet;
220} PGMR0DYNMAPENTRY;
221/** Pointer a mapping cache entry for the ring-0.
222 * @sa PPGMRZDYNMAPENTRY, PPGMRCDYNMAPENTRY, */
223typedef PGMR0DYNMAPENTRY *PPGMR0DYNMAPENTRY;
224
225
226/**
227 * Dynamic mapping cache for ring-0.
228 *
229 * This is initialized during VMMR0 module init but no segments are allocated
230 * at that time. Segments will be added when the first VM is started and
231 * removed again when the last VM shuts down, thus avoid consuming memory while
232 * dormant. At module termination, the remaining bits will be freed up.
233 *
234 * @sa PPGMRZDYNMAP, PGMRCDYNMAP.
235 */
236typedef struct PGMR0DYNMAP
237{
238 /** The usual magic number / eye catcher (PGMRZDYNMAP_MAGIC). */
239 uint32_t u32Magic;
240 /** Spinlock serializing the normal operation of the cache. */
241 RTSPINLOCK hSpinlock;
242 /** Array for tracking and managing the pages. */
243 PPGMR0DYNMAPENTRY paPages;
244 /** The cache size given as a number of pages. */
245 uint32_t cPages;
246 /** Whether it's 32-bit legacy or PAE/AMD64 paging mode. */
247 bool fLegacyMode;
248 /** The current load.
249 * This does not include guard pages. */
250 uint32_t cLoad;
251 /** The max load ever.
252 * This is maintained to trigger the adding of more mapping space. */
253 uint32_t cMaxLoad;
254 /** Initialization / termination lock. */
255 RTSEMFASTMUTEX hInitLock;
256 /** The number of guard pages. */
257 uint32_t cGuardPages;
258 /** The number of users (protected by hInitLock). */
259 uint32_t cUsers;
260 /** Array containing a copy of the original page tables.
261 * The entries are either X86PTE or X86PTEPAE according to fLegacyMode. */
262 void *pvSavedPTEs;
263 /** List of segments. */
264 PPGMR0DYNMAPSEG pSegHead;
265 /** The paging mode. */
266 SUPPAGINGMODE enmPgMode;
267} PGMR0DYNMAP;
268
269
270/**
271 * Paging level data.
272 */
273typedef struct PGMR0DYNMAPPGLVL
274{
275 uint32_t cLevels; /**< The number of levels. */
276 struct
277 {
278 RTHCPHYS HCPhys; /**< The address of the page for the current level,
279 * i.e. what hMemObj/hMapObj is currently mapping. */
280 RTHCPHYS fPhysMask; /**< Mask for extracting HCPhys from uEntry. */
281 RTR0MEMOBJ hMemObj; /**< Memory object for HCPhys, PAGE_SIZE. */
282 RTR0MEMOBJ hMapObj; /**< Mapping object for hMemObj. */
283 uint32_t fPtrShift; /**< The pointer shift count. */
284 uint64_t fPtrMask; /**< The mask to apply to the shifted pointer to get the table index. */
285 uint64_t fAndMask; /**< And mask to check entry flags. */
286 uint64_t fResMask; /**< The result from applying fAndMask. */
287 union
288 {
289 void *pv; /**< hMapObj address. */
290 PX86PGUINT paLegacy; /**< Legacy table view. */
291 PX86PGPAEUINT paPae; /**< PAE/AMD64 table view. */
292 } u;
293 } a[4];
294} PGMR0DYNMAPPGLVL;
295/** Pointer to paging level data. */
296typedef PGMR0DYNMAPPGLVL *PPGMR0DYNMAPPGLVL;
297#endif
298
299/** Mapping cache entry for the current context.
300 * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
301typedef CTX_MID(PGM,DYNMAPENTRY) PGMRZDYNMAPENTRY;
302/** Pointer a mapping cache entry for the current context.
303 * @sa PGMR0DYNMAPENTRY, PGMRCDYNMAPENTRY */
304typedef PGMRZDYNMAPENTRY *PPGMRZDYNMAPENTRY;
305
306/** Pointer to the mapping cache instance for the current context.
307 * @sa PGMR0DYNMAP, PGMRCDYNMAP */
308typedef CTX_MID(PGM,DYNMAP) *PPGMRZDYNMAP;
309
310
311
312/*********************************************************************************************************************************
313* Global Variables *
314*********************************************************************************************************************************/
315#ifdef IN_RING0
316/** Pointer to the ring-0 dynamic mapping cache. */
317static PGMR0DYNMAP *g_pPGMR0DynMap;
318#endif
319/** For overflow testing. */
320static bool g_fPGMR0DynMapTestRunning = false;
321
322
323/*********************************************************************************************************************************
324* Internal Functions *
325*********************************************************************************************************************************/
326static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs);
327#ifdef IN_RING0
328static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis);
329static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis);
330static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis);
331#endif
332#if 0 /*def DEBUG*/
333static int pgmR0DynMapTest(PVM pVM);
334#endif
335
336
337/**
338 * Initializes the auto mapping sets for a VM.
339 *
340 * @returns VINF_SUCCESS on success, VERR_PGM_DYNMAP_IPE on failure.
341 * @param pVM The cross context VM structure.
342 */
343static int pgmRZDynMapInitAutoSetsForVM(PVM pVM)
344{
345 VMCPUID idCpu = pVM->cCpus;
346 AssertReturn(idCpu > 0 && idCpu <= VMM_MAX_CPU_COUNT, VERR_PGM_DYNMAP_IPE);
347 while (idCpu-- > 0)
348 {
349 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
350 uint32_t j = RT_ELEMENTS(pSet->aEntries);
351 while (j-- > 0)
352 {
353 pSet->aEntries[j].pvPage = NULL;
354 pSet->aEntries[j].HCPhys = NIL_RTHCPHYS;
355 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
356 }
357 pSet->cEntries = PGMMAPSET_CLOSED;
358 pSet->iSubset = UINT32_MAX;
359 pSet->iCpu = -1;
360 memset(&pSet->aiHashTable[0], 0xff, sizeof(pSet->aiHashTable));
361 }
362
363 return VINF_SUCCESS;
364}
365
366
367#ifdef IN_RING0
368
369/**
370 * Initializes the ring-0 dynamic mapping cache.
371 *
372 * @returns VBox status code.
373 */
374VMMR0DECL(int) PGMR0DynMapInit(void)
375{
376 Assert(!g_pPGMR0DynMap);
377
378 /*
379 * Create and initialize the cache instance.
380 */
381 PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)RTMemAllocZ(sizeof(*pThis));
382 AssertLogRelReturn(pThis, VERR_NO_MEMORY);
383 int rc = VINF_SUCCESS;
384 pThis->enmPgMode = SUPR0GetPagingMode();
385 switch (pThis->enmPgMode)
386 {
387 case SUPPAGINGMODE_32_BIT:
388 case SUPPAGINGMODE_32_BIT_GLOBAL:
389 pThis->fLegacyMode = false;
390 break;
391 case SUPPAGINGMODE_PAE:
392 case SUPPAGINGMODE_PAE_GLOBAL:
393 case SUPPAGINGMODE_PAE_NX:
394 case SUPPAGINGMODE_PAE_GLOBAL_NX:
395 case SUPPAGINGMODE_AMD64:
396 case SUPPAGINGMODE_AMD64_GLOBAL:
397 case SUPPAGINGMODE_AMD64_NX:
398 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
399 pThis->fLegacyMode = false;
400 break;
401 default:
402 rc = VERR_PGM_DYNMAP_IPE;
403 break;
404 }
405 if (RT_SUCCESS(rc))
406 {
407 rc = RTSemFastMutexCreate(&pThis->hInitLock);
408 if (RT_SUCCESS(rc))
409 {
410 rc = RTSpinlockCreate(&pThis->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "PGMR0DynMap");
411 if (RT_SUCCESS(rc))
412 {
413 pThis->u32Magic = PGMRZDYNMAP_MAGIC;
414 g_pPGMR0DynMap = pThis;
415 return VINF_SUCCESS;
416 }
417 RTSemFastMutexDestroy(pThis->hInitLock);
418 }
419 }
420 RTMemFree(pThis);
421 return rc;
422}
423
424
425/**
426 * Terminates the ring-0 dynamic mapping cache.
427 */
428VMMR0DECL(void) PGMR0DynMapTerm(void)
429{
430 /*
431 * Destroy the cache.
432 *
433 * There is not supposed to be any races here, the loader should
434 * make sure about that. So, don't bother locking anything.
435 *
436 * The VM objects should all be destroyed by now, so there is no
437 * dangling users or anything like that to clean up. This routine
438 * is just a mirror image of PGMR0DynMapInit.
439 */
440 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
441 if (pThis)
442 {
443 AssertPtr(pThis);
444 g_pPGMR0DynMap = NULL;
445
446 /* This should *never* happen, but in case it does try not to leak memory. */
447 AssertLogRelMsg(!pThis->cUsers && !pThis->paPages && !pThis->pvSavedPTEs && !pThis->cPages,
448 ("cUsers=%d paPages=%p pvSavedPTEs=%p cPages=%#x\n",
449 pThis->cUsers, pThis->paPages, pThis->pvSavedPTEs, pThis->cPages));
450 if (pThis->paPages)
451 pgmR0DynMapTearDown(pThis);
452
453 /* Free the associated resources. */
454 RTSemFastMutexDestroy(pThis->hInitLock);
455 pThis->hInitLock = NIL_RTSEMFASTMUTEX;
456 RTSpinlockDestroy(pThis->hSpinlock);
457 pThis->hSpinlock = NIL_RTSPINLOCK;
458 pThis->u32Magic = UINT32_MAX;
459 RTMemFree(pThis);
460 }
461}
462
463
464/**
465 * Initializes the dynamic mapping cache for a new VM.
466 *
467 * @returns VBox status code.
468 * @param pVM The cross context VM structure.
469 */
470VMMR0DECL(int) PGMR0DynMapInitVM(PVM pVM)
471{
472 AssertMsgReturn(!pVM->pgm.s.pvR0DynMapUsed, ("%p (pThis=%p)\n", pVM->pgm.s.pvR0DynMapUsed, g_pPGMR0DynMap), VERR_WRONG_ORDER);
473
474 /*
475 * Initialize the auto sets.
476 */
477 int rc = pgmRZDynMapInitAutoSetsForVM(pVM);
478 if (RT_FAILURE(rc))
479 return rc;
480
481 /*
482 * Do we need the cache? Skip the last bit if we don't.
483 */
484 if (VM_IS_RAW_MODE_ENABLED(pVM))
485 return VINF_SUCCESS;
486
487 /*
488 * Reference and if necessary setup or expand the cache.
489 */
490 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
491 AssertPtrReturn(pThis, VERR_PGM_DYNMAP_IPE);
492 rc = RTSemFastMutexRequest(pThis->hInitLock);
493 AssertLogRelRCReturn(rc, rc);
494
495 pThis->cUsers++;
496 if (pThis->cUsers == 1)
497 {
498 rc = pgmR0DynMapSetup(pThis);
499#if 0 /*def DEBUG*/
500 if (RT_SUCCESS(rc))
501 {
502 rc = pgmR0DynMapTest(pVM);
503 if (RT_FAILURE(rc))
504 pgmR0DynMapTearDown(pThis);
505 }
506#endif
507 }
508 else if (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(pThis->cPages - pThis->cGuardPages))
509 rc = pgmR0DynMapExpand(pThis);
510 if (RT_SUCCESS(rc))
511 pVM->pgm.s.pvR0DynMapUsed = pThis;
512 else
513 pThis->cUsers--;
514
515 RTSemFastMutexRelease(pThis->hInitLock);
516 return rc;
517}
518
519
520/**
521 * Terminates the dynamic mapping cache usage for a VM.
522 *
523 * @param pVM The cross context VM structure.
524 */
525VMMR0DECL(void) PGMR0DynMapTermVM(PVM pVM)
526{
527 /*
528 * Return immediately if we're not using the cache.
529 */
530 if (!pVM->pgm.s.pvR0DynMapUsed)
531 return;
532
533 PPGMRZDYNMAP pThis = g_pPGMR0DynMap;
534 AssertPtrReturnVoid(pThis);
535
536 int rc = RTSemFastMutexRequest(pThis->hInitLock);
537 AssertLogRelRCReturnVoid(rc);
538
539 if (pVM->pgm.s.pvR0DynMapUsed == pThis)
540 {
541 pVM->pgm.s.pvR0DynMapUsed = NULL;
542
543#ifdef VBOX_STRICT
544 PGMR0DynMapAssertIntegrity();
545#endif
546
547 /*
548 * Clean up and check the auto sets.
549 */
550 VMCPUID idCpu = pVM->cCpus;
551 while (idCpu-- > 0)
552 {
553 PPGMMAPSET pSet = &pVM->aCpus[idCpu].pgm.s.AutoSet;
554 uint32_t j = pSet->cEntries;
555 if (j <= RT_ELEMENTS(pSet->aEntries))
556 {
557 /*
558 * The set is open, close it.
559 */
560 while (j-- > 0)
561 {
562 int32_t cRefs = pSet->aEntries[j].cRefs;
563 uint32_t iPage = pSet->aEntries[j].iPage;
564 LogRel(("PGMR0DynMapTermVM: %d dangling refs to %#x\n", cRefs, iPage));
565 if (iPage < pThis->cPages && cRefs > 0)
566 pgmRZDynMapReleasePage(pThis, iPage, cRefs);
567 else
568 AssertLogRelMsgFailed(("cRefs=%d iPage=%#x cPages=%u\n", cRefs, iPage, pThis->cPages));
569
570 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
571 }
572 pSet->cEntries = PGMMAPSET_CLOSED;
573 pSet->iSubset = UINT32_MAX;
574 pSet->iCpu = -1;
575 }
576 else
577 AssertMsg(j == PGMMAPSET_CLOSED, ("cEntries=%#x\n", j));
578
579 j = RT_ELEMENTS(pSet->aEntries);
580 while (j-- > 0)
581 {
582 Assert(pSet->aEntries[j].iPage == UINT16_MAX);
583 Assert(!pSet->aEntries[j].cRefs);
584 }
585 }
586
587 /*
588 * Release our reference to the mapping cache.
589 */
590 Assert(pThis->cUsers > 0);
591 pThis->cUsers--;
592 if (!pThis->cUsers)
593 pgmR0DynMapTearDown(pThis);
594 }
595 else
596 AssertLogRelMsgFailed(("pvR0DynMapUsed=%p pThis=%p\n", pVM->pgm.s.pvR0DynMapUsed, pThis));
597
598 RTSemFastMutexRelease(pThis->hInitLock);
599}
600
601
602/**
603 * Shoots down the TLBs for all the cache pages, pgmR0DynMapTearDown helper.
604 *
605 * @param idCpu The current CPU.
606 * @param pvUser1 The dynamic mapping cache instance.
607 * @param pvUser2 Unused, NULL.
608 */
609static DECLCALLBACK(void) pgmR0DynMapShootDownTlbs(RTCPUID idCpu, void *pvUser1, void *pvUser2)
610{
611 Assert(!pvUser2);
612 PPGMRZDYNMAP pThis = (PPGMRZDYNMAP)pvUser1;
613 Assert(pThis == g_pPGMR0DynMap);
614 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
615 uint32_t iPage = pThis->cPages;
616 while (iPage-- > 0)
617 ASMInvalidatePage((uintptr_t)paPages[iPage].pvPage);
618}
619
620
621/**
622 * Shoot down the TLBs for every single cache entry on all CPUs.
623 *
624 * @returns IPRT status code (RTMpOnAll).
625 * @param pThis The dynamic mapping cache instance.
626 */
627static int pgmR0DynMapTlbShootDown(PPGMRZDYNMAP pThis)
628{
629 int rc = RTMpOnAll(pgmR0DynMapShootDownTlbs, pThis, NULL);
630 AssertRC(rc);
631 if (RT_FAILURE(rc))
632 {
633 uint32_t iPage = pThis->cPages;
634 while (iPage-- > 0)
635 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
636 }
637 return rc;
638}
639
640
641/**
642 * Calculate the new cache size based on cMaxLoad statistics.
643 *
644 * @returns Number of pages.
645 * @param pThis The dynamic mapping cache instance.
646 * @param pcMinPages The minimal size in pages.
647 */
648static uint32_t pgmR0DynMapCalcNewSize(PPGMRZDYNMAP pThis, uint32_t *pcMinPages)
649{
650 Assert(pThis->cPages <= PGMR0DYNMAP_MAX_PAGES);
651
652 /* cCpus * PGMR0DYNMAP_PAGES_PER_CPU(_MIN). */
653 RTCPUID cCpus = RTMpGetCount();
654 AssertReturn(cCpus > 0 && cCpus <= RTCPUSET_MAX_CPUS, 0);
655 uint32_t cPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU;
656 uint32_t cMinPages = cCpus * PGMR0DYNMAP_PAGES_PER_CPU_MIN;
657
658 /* adjust against cMaxLoad. */
659 AssertMsg(pThis->cMaxLoad <= PGMR0DYNMAP_MAX_PAGES, ("%#x\n", pThis->cMaxLoad));
660 if (pThis->cMaxLoad > PGMR0DYNMAP_MAX_PAGES)
661 pThis->cMaxLoad = 0;
662
663 while (pThis->cMaxLoad > PGMR0DYNMAP_CALC_OVERLOAD(cPages))
664 cPages += PGMR0DYNMAP_PAGES_PER_CPU;
665
666 if (pThis->cMaxLoad > cMinPages)
667 cMinPages = pThis->cMaxLoad;
668
669 /* adjust against max and current size. */
670 if (cPages < pThis->cPages)
671 cPages = pThis->cPages;
672 cPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
673 if (cPages > PGMR0DYNMAP_MAX_PAGES)
674 cPages = PGMR0DYNMAP_MAX_PAGES;
675
676 if (cMinPages < pThis->cPages)
677 cMinPages = pThis->cPages;
678 cMinPages *= PGMR0DYNMAP_GUARD_PAGES + 1;
679 if (cMinPages > PGMR0DYNMAP_MAX_PAGES)
680 cMinPages = PGMR0DYNMAP_MAX_PAGES;
681
682 Assert(cMinPages);
683 *pcMinPages = cMinPages;
684 return cPages;
685}
686
687
688/**
689 * Initializes the paging level data.
690 *
691 * @param pThis The dynamic mapping cache instance.
692 * @param pPgLvl The paging level data.
693 */
694void pgmR0DynMapPagingArrayInit(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl)
695{
696 RTCCUINTREG cr4 = ASMGetCR4();
697 switch (pThis->enmPgMode)
698 {
699 case SUPPAGINGMODE_32_BIT:
700 case SUPPAGINGMODE_32_BIT_GLOBAL:
701 pPgLvl->cLevels = 2;
702 pPgLvl->a[0].fPhysMask = X86_CR3_PAGE_MASK;
703 pPgLvl->a[0].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
704 pPgLvl->a[0].fResMask = X86_PDE_P | X86_PDE_RW;
705 pPgLvl->a[0].fPtrMask = X86_PD_MASK;
706 pPgLvl->a[0].fPtrShift = X86_PD_SHIFT;
707
708 pPgLvl->a[1].fPhysMask = X86_PDE_PG_MASK;
709 pPgLvl->a[1].fAndMask = X86_PTE_P | X86_PTE_RW;
710 pPgLvl->a[1].fResMask = X86_PTE_P | X86_PTE_RW;
711 pPgLvl->a[1].fPtrMask = X86_PT_MASK;
712 pPgLvl->a[1].fPtrShift = X86_PT_SHIFT;
713 break;
714
715 case SUPPAGINGMODE_PAE:
716 case SUPPAGINGMODE_PAE_GLOBAL:
717 case SUPPAGINGMODE_PAE_NX:
718 case SUPPAGINGMODE_PAE_GLOBAL_NX:
719 pPgLvl->cLevels = 3;
720 pPgLvl->a[0].fPhysMask = X86_CR3_PAE_PAGE_MASK;
721 pPgLvl->a[0].fPtrMask = X86_PDPT_MASK_PAE;
722 pPgLvl->a[0].fPtrShift = X86_PDPT_SHIFT;
723 pPgLvl->a[0].fAndMask = X86_PDPE_P;
724 pPgLvl->a[0].fResMask = X86_PDPE_P;
725
726 pPgLvl->a[1].fPhysMask = X86_PDPE_PG_MASK;
727 pPgLvl->a[1].fPtrMask = X86_PD_PAE_MASK;
728 pPgLvl->a[1].fPtrShift = X86_PD_PAE_SHIFT;
729 pPgLvl->a[1].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
730 pPgLvl->a[1].fResMask = X86_PDE_P | X86_PDE_RW;
731
732 pPgLvl->a[2].fPhysMask = X86_PDE_PAE_PG_MASK;
733 pPgLvl->a[2].fPtrMask = X86_PT_PAE_MASK;
734 pPgLvl->a[2].fPtrShift = X86_PT_PAE_SHIFT;
735 pPgLvl->a[2].fAndMask = X86_PTE_P | X86_PTE_RW;
736 pPgLvl->a[2].fResMask = X86_PTE_P | X86_PTE_RW;
737 break;
738
739 case SUPPAGINGMODE_AMD64:
740 case SUPPAGINGMODE_AMD64_GLOBAL:
741 case SUPPAGINGMODE_AMD64_NX:
742 case SUPPAGINGMODE_AMD64_GLOBAL_NX:
743 pPgLvl->cLevels = 4;
744 pPgLvl->a[0].fPhysMask = X86_CR3_AMD64_PAGE_MASK;
745 pPgLvl->a[0].fPtrShift = X86_PML4_SHIFT;
746 pPgLvl->a[0].fPtrMask = X86_PML4_MASK;
747 pPgLvl->a[0].fAndMask = X86_PML4E_P | X86_PML4E_RW;
748 pPgLvl->a[0].fResMask = X86_PML4E_P | X86_PML4E_RW;
749
750 pPgLvl->a[1].fPhysMask = X86_PML4E_PG_MASK;
751 pPgLvl->a[1].fPtrShift = X86_PDPT_SHIFT;
752 pPgLvl->a[1].fPtrMask = X86_PDPT_MASK_AMD64;
753 pPgLvl->a[1].fAndMask = X86_PDPE_P | X86_PDPE_RW /** @todo check for X86_PDPT_PS support. */;
754 pPgLvl->a[1].fResMask = X86_PDPE_P | X86_PDPE_RW;
755
756 pPgLvl->a[2].fPhysMask = X86_PDPE_PG_MASK;
757 pPgLvl->a[2].fPtrShift = X86_PD_PAE_SHIFT;
758 pPgLvl->a[2].fPtrMask = X86_PD_PAE_MASK;
759 pPgLvl->a[2].fAndMask = X86_PDE_P | X86_PDE_RW | (cr4 & X86_CR4_PSE ? X86_PDE_PS : 0);
760 pPgLvl->a[2].fResMask = X86_PDE_P | X86_PDE_RW;
761
762 pPgLvl->a[3].fPhysMask = X86_PDE_PAE_PG_MASK;
763 pPgLvl->a[3].fPtrShift = X86_PT_PAE_SHIFT;
764 pPgLvl->a[3].fPtrMask = X86_PT_PAE_MASK;
765 pPgLvl->a[3].fAndMask = X86_PTE_P | X86_PTE_RW;
766 pPgLvl->a[3].fResMask = X86_PTE_P | X86_PTE_RW;
767 break;
768
769 default:
770 AssertFailed();
771 pPgLvl->cLevels = 0;
772 break;
773 }
774
775 for (uint32_t i = 0; i < 4; i++) /* ASSUMING array size. */
776 {
777 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
778 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
779 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
780 pPgLvl->a[i].u.pv = NULL;
781 }
782}
783
784
785/**
786 * Maps a PTE.
787 *
788 * This will update the segment structure when new PTs are mapped.
789 *
790 * It also assumes that we (for paranoid reasons) wish to establish a mapping
791 * chain from CR3 to the PT that all corresponds to the processor we're
792 * currently running on, and go about this by running with interrupts disabled
793 * and restarting from CR3 for every change.
794 *
795 * @returns VBox status code, VINF_TRY_AGAIN if we changed any mappings and had
796 * to re-enable interrupts.
797 * @param pThis The dynamic mapping cache instance.
798 * @param pPgLvl The paging level structure.
799 * @param pvPage The page.
800 * @param pSeg The segment.
801 * @param cMaxPTs The max number of PTs expected in the segment.
802 * @param ppvPTE Where to store the PTE address.
803 */
804static int pgmR0DynMapPagingArrayMapPte(PPGMRZDYNMAP pThis, PPGMR0DYNMAPPGLVL pPgLvl, void *pvPage,
805 PPGMR0DYNMAPSEG pSeg, uint32_t cMaxPTs, void **ppvPTE)
806{
807 Assert(!(ASMGetFlags() & X86_EFL_IF));
808 void *pvEntry = NULL;
809 X86PGPAEUINT uEntry = ASMGetCR3();
810 for (uint32_t i = 0; i < pPgLvl->cLevels; i++)
811 {
812 RTHCPHYS HCPhys = uEntry & pPgLvl->a[i].fPhysMask;
813 if (pPgLvl->a[i].HCPhys != HCPhys)
814 {
815 /*
816 * Need to remap this level.
817 * The final level, the PT, will not be freed since that is what it's all about.
818 */
819 ASMIntEnable();
820 if (i + 1 == pPgLvl->cLevels)
821 AssertReturn(pSeg->cPTs < cMaxPTs, VERR_PGM_DYNMAP_IPE);
822 else
823 {
824 int rc2 = RTR0MemObjFree(pPgLvl->a[i].hMemObj, true /* fFreeMappings */); AssertRC(rc2);
825 pPgLvl->a[i].hMemObj = pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
826 }
827
828 int rc = RTR0MemObjEnterPhys(&pPgLvl->a[i].hMemObj, HCPhys, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
829 if (RT_SUCCESS(rc))
830 {
831 rc = RTR0MemObjMapKernel(&pPgLvl->a[i].hMapObj, pPgLvl->a[i].hMemObj,
832 (void *)-1 /* pvFixed */, 0 /* cbAlignment */,
833 RTMEM_PROT_WRITE | RTMEM_PROT_READ);
834 if (RT_SUCCESS(rc))
835 {
836 pPgLvl->a[i].u.pv = RTR0MemObjAddress(pPgLvl->a[i].hMapObj);
837 AssertMsg(((uintptr_t)pPgLvl->a[i].u.pv & ~(uintptr_t)PAGE_OFFSET_MASK), ("%p\n", pPgLvl->a[i].u.pv));
838 pPgLvl->a[i].HCPhys = HCPhys;
839 if (i + 1 == pPgLvl->cLevels)
840 pSeg->ahMemObjPTs[pSeg->cPTs++] = pPgLvl->a[i].hMemObj;
841 ASMIntDisable();
842 return VINF_TRY_AGAIN;
843 }
844
845 pPgLvl->a[i].hMapObj = NIL_RTR0MEMOBJ;
846 }
847 else
848 pPgLvl->a[i].hMemObj = NIL_RTR0MEMOBJ;
849 pPgLvl->a[i].HCPhys = NIL_RTHCPHYS;
850 return rc;
851 }
852
853 /*
854 * The next level.
855 */
856 uint32_t iEntry = ((uint64_t)(uintptr_t)pvPage >> pPgLvl->a[i].fPtrShift) & pPgLvl->a[i].fPtrMask;
857 if (pThis->fLegacyMode)
858 {
859 pvEntry = &pPgLvl->a[i].u.paLegacy[iEntry];
860 uEntry = pPgLvl->a[i].u.paLegacy[iEntry];
861 }
862 else
863 {
864 pvEntry = &pPgLvl->a[i].u.paPae[iEntry];
865 uEntry = pPgLvl->a[i].u.paPae[iEntry];
866 }
867
868 if ((uEntry & pPgLvl->a[i].fAndMask) != pPgLvl->a[i].fResMask)
869 {
870 LogRel(("PGMR0DynMap: internal error - iPgLvl=%u cLevels=%u uEntry=%#llx fAnd=%#llx fRes=%#llx got=%#llx\n"
871 "PGMR0DynMap: pv=%p pvPage=%p iEntry=%#x fLegacyMode=%RTbool\n",
872 i, pPgLvl->cLevels, uEntry, pPgLvl->a[i].fAndMask, pPgLvl->a[i].fResMask, uEntry & pPgLvl->a[i].fAndMask,
873 pPgLvl->a[i].u.pv, pvPage, iEntry, pThis->fLegacyMode));
874 return VERR_PGM_DYNMAP_IPE;
875 }
876 /*Log(("#%d: iEntry=%4d uEntry=%#llx pvEntry=%p HCPhys=%RHp \n", i, iEntry, uEntry, pvEntry, pPgLvl->a[i].HCPhys));*/
877 }
878
879 /* made it thru without needing to remap anything. */
880 *ppvPTE = pvEntry;
881 return VINF_SUCCESS;
882}
883
884
885/**
886 * Sets up a guard page.
887 *
888 * @param pThis The dynamic mapping cache instance.
889 * @param pPage The page.
890 */
891DECLINLINE(void) pgmR0DynMapSetupGuardPage(PPGMRZDYNMAP pThis, PPGMRZDYNMAPENTRY pPage)
892{
893 memset(pPage->pvPage, 0xfd, PAGE_SIZE);
894 pPage->cRefs = PGMR0DYNMAP_GUARD_PAGE_REF_COUNT;
895 pPage->HCPhys = PGMR0DYNMAP_GUARD_PAGE_HCPHYS;
896#ifdef PGMR0DYNMAP_GUARD_NP
897 ASMAtomicBitClear(pPage->uPte.pv, X86_PTE_BIT_P);
898#else
899 if (pThis->fLegacyMode)
900 ASMAtomicWriteU32(&pPage->uPte.pLegacy->u, PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE);
901 else
902 ASMAtomicWriteU64(&pPage->uPte.pPae->u, PGMR0DYNMAP_GUARD_PAGE_PAE_PTE);
903#endif
904 pThis->cGuardPages++;
905}
906
907
908/**
909 * Adds a new segment of the specified size.
910 *
911 * @returns VBox status code.
912 * @param pThis The dynamic mapping cache instance.
913 * @param cPages The size of the new segment, give as a page count.
914 */
915static int pgmR0DynMapAddSeg(PPGMRZDYNMAP pThis, uint32_t cPages)
916{
917 int rc2;
918 AssertReturn(ASMGetFlags() & X86_EFL_IF, VERR_PREEMPT_DISABLED);
919
920 /*
921 * Do the array reallocations first.
922 * (The pages array has to be replaced behind the spinlock of course.)
923 */
924 void *pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * (pThis->cPages + cPages));
925 if (!pvSavedPTEs)
926 return VERR_NO_MEMORY;
927 pThis->pvSavedPTEs = pvSavedPTEs;
928
929 void *pvPages = RTMemAllocZ(sizeof(pThis->paPages[0]) * (pThis->cPages + cPages));
930 if (!pvPages)
931 {
932 pvSavedPTEs = RTMemRealloc(pThis->pvSavedPTEs, (pThis->fLegacyMode ? sizeof(X86PGUINT) : sizeof(X86PGPAEUINT)) * pThis->cPages);
933 if (pvSavedPTEs)
934 pThis->pvSavedPTEs = pvSavedPTEs;
935 return VERR_NO_MEMORY;
936 }
937
938 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
939
940 memcpy(pvPages, pThis->paPages, sizeof(pThis->paPages[0]) * pThis->cPages);
941 void *pvToFree = pThis->paPages;
942 pThis->paPages = (PPGMRZDYNMAPENTRY)pvPages;
943
944 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
945 RTMemFree(pvToFree);
946
947 /*
948 * Allocate the segment structure and pages of memory, then touch all the pages (paranoia).
949 */
950 uint32_t cMaxPTs = cPages / (pThis->fLegacyMode ? X86_PG_ENTRIES : X86_PG_PAE_ENTRIES) + 2;
951 PPGMR0DYNMAPSEG pSeg = (PPGMR0DYNMAPSEG)RTMemAllocZ(RT_UOFFSETOF_DYN(PGMR0DYNMAPSEG, ahMemObjPTs[cMaxPTs]));
952 if (!pSeg)
953 return VERR_NO_MEMORY;
954 pSeg->pNext = NULL;
955 pSeg->cPages = cPages;
956 pSeg->iPage = pThis->cPages;
957 pSeg->cPTs = 0;
958 int rc = RTR0MemObjAllocPage(&pSeg->hMemObj, cPages << PAGE_SHIFT, false);
959 if (RT_SUCCESS(rc))
960 {
961 uint8_t *pbPage = (uint8_t *)RTR0MemObjAddress(pSeg->hMemObj);
962 AssertMsg(VALID_PTR(pbPage) && !((uintptr_t)pbPage & PAGE_OFFSET_MASK), ("%p\n", pbPage));
963 memset(pbPage, 0xfe, cPages << PAGE_SHIFT);
964
965 /*
966 * Walk thru the pages and set them up with a mapping of their PTE and everything.
967 */
968 ASMIntDisable();
969 PGMR0DYNMAPPGLVL PgLvl;
970 pgmR0DynMapPagingArrayInit(pThis, &PgLvl);
971 uint32_t const iEndPage = pSeg->iPage + cPages;
972 for (uint32_t iPage = pSeg->iPage;
973 iPage < iEndPage;
974 iPage++, pbPage += PAGE_SIZE)
975 {
976 /* Initialize the page data. */
977 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
978 pThis->paPages[iPage].pvPage = pbPage;
979 pThis->paPages[iPage].cRefs = 0;
980 pThis->paPages[iPage].uPte.pPae = 0;
981#ifndef IN_RC
982 RTCpuSetFill(&pThis->paPages[iPage].PendingSet);
983#endif
984
985 /* Map its page table, retry until we've got a clean run (paranoia). */
986 do
987 rc = pgmR0DynMapPagingArrayMapPte(pThis, &PgLvl, pbPage, pSeg, cMaxPTs,
988 &pThis->paPages[iPage].uPte.pv);
989 while (rc == VINF_TRY_AGAIN);
990 if (RT_FAILURE(rc))
991 break;
992
993 /* Save the PTE. */
994 if (pThis->fLegacyMode)
995 ((PX86PGUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pLegacy->u;
996 else
997 ((PX86PGPAEUINT)pThis->pvSavedPTEs)[iPage] = pThis->paPages[iPage].uPte.pPae->u;
998
999#ifdef VBOX_STRICT
1000 /* Check that we've got the right entry. */
1001 RTHCPHYS HCPhysPage = RTR0MemObjGetPagePhysAddr(pSeg->hMemObj, iPage - pSeg->iPage);
1002 RTHCPHYS HCPhysPte = pThis->fLegacyMode
1003 ? pThis->paPages[iPage].uPte.pLegacy->u & X86_PTE_PG_MASK
1004 : pThis->paPages[iPage].uPte.pPae->u & X86_PTE_PAE_PG_MASK;
1005 if (HCPhysPage != HCPhysPte)
1006 {
1007 LogRel(("pgmR0DynMapAddSeg: internal error - page #%u HCPhysPage=%RHp HCPhysPte=%RHp pbPage=%p pvPte=%p\n",
1008 iPage - pSeg->iPage, HCPhysPage, HCPhysPte, pbPage, pThis->paPages[iPage].uPte.pv));
1009 rc = VERR_PGM_DYNMAP_IPE;
1010 break;
1011 }
1012#endif
1013 } /* for each page */
1014 ASMIntEnable();
1015
1016 /* cleanup non-PT mappings */
1017 for (uint32_t i = 0; i < PgLvl.cLevels - 1; i++)
1018 RTR0MemObjFree(PgLvl.a[i].hMemObj, true /* fFreeMappings */);
1019
1020 if (RT_SUCCESS(rc))
1021 {
1022#if PGMR0DYNMAP_GUARD_PAGES > 0
1023 /*
1024 * Setup guard pages.
1025 * (Note: TLBs will be shot down later on.)
1026 */
1027 uint32_t iPage = pSeg->iPage;
1028 while (iPage < iEndPage)
1029 {
1030 for (uint32_t iGPg = 0; iGPg < PGMR0DYNMAP_GUARD_PAGES && iPage < iEndPage; iGPg++, iPage++)
1031 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
1032 iPage++; /* the guarded page */
1033 }
1034
1035 /* Make sure the very last page is a guard page too. */
1036 iPage = iEndPage - 1;
1037 if (pThis->paPages[iPage].cRefs != PGMR0DYNMAP_GUARD_PAGE_REF_COUNT)
1038 pgmR0DynMapSetupGuardPage(pThis, &pThis->paPages[iPage]);
1039#endif /* PGMR0DYNMAP_GUARD_PAGES > 0 */
1040
1041 /*
1042 * Commit it by adding the segment to the list and updating the page count.
1043 */
1044 pSeg->pNext = pThis->pSegHead;
1045 pThis->pSegHead = pSeg;
1046 pThis->cPages += cPages;
1047 return VINF_SUCCESS;
1048 }
1049
1050 /*
1051 * Bail out.
1052 */
1053 while (pSeg->cPTs-- > 0)
1054 {
1055 rc2 = RTR0MemObjFree(pSeg->ahMemObjPTs[pSeg->cPTs], true /* fFreeMappings */);
1056 AssertRC(rc2);
1057 pSeg->ahMemObjPTs[pSeg->cPTs] = NIL_RTR0MEMOBJ;
1058 }
1059
1060 rc2 = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */);
1061 AssertRC(rc2);
1062 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1063 }
1064 else if (rc == VERR_NO_PAGE_MEMORY || rc == VERR_NO_PHYS_MEMORY)
1065 rc = VERR_NO_MEMORY;
1066 RTMemFree(pSeg);
1067
1068 /* Don't bother resizing the arrays, but free them if we're the only user. */
1069 if (!pThis->cPages)
1070 {
1071 RTMemFree(pThis->paPages);
1072 pThis->paPages = NULL;
1073 RTMemFree(pThis->pvSavedPTEs);
1074 pThis->pvSavedPTEs = NULL;
1075 }
1076 return rc;
1077}
1078
1079
1080/**
1081 * Called by PGMR0DynMapInitVM under the init lock.
1082 *
1083 * @returns VBox status code.
1084 * @param pThis The dynamic mapping cache instance.
1085 */
1086static int pgmR0DynMapSetup(PPGMRZDYNMAP pThis)
1087{
1088 /*
1089 * Calc the size and add a segment of that size.
1090 */
1091 uint32_t cMinPages;
1092 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1093 AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
1094 int rc = pgmR0DynMapAddSeg(pThis, cPages);
1095 if (rc == VERR_NO_MEMORY)
1096 {
1097 /*
1098 * Try adding smaller segments.
1099 */
1100 do
1101 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1102 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1103 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1104 rc = VINF_SUCCESS;
1105 if (rc == VERR_NO_MEMORY)
1106 {
1107 if (pThis->cPages)
1108 pgmR0DynMapTearDown(pThis);
1109 rc = VERR_PGM_DYNMAP_SETUP_ERROR;
1110 }
1111 }
1112 Assert(ASMGetFlags() & X86_EFL_IF);
1113
1114#if PGMR0DYNMAP_GUARD_PAGES > 0
1115 /* paranoia */
1116 if (RT_SUCCESS(rc))
1117 pgmR0DynMapTlbShootDown(pThis);
1118#endif
1119 return rc;
1120}
1121
1122
1123/**
1124 * Called by PGMR0DynMapInitVM under the init lock.
1125 *
1126 * @returns VBox status code.
1127 * @param pThis The dynamic mapping cache instance.
1128 */
1129static int pgmR0DynMapExpand(PPGMRZDYNMAP pThis)
1130{
1131 /*
1132 * Calc the new target size and add a segment of the appropriate size.
1133 */
1134 uint32_t cMinPages;
1135 uint32_t cPages = pgmR0DynMapCalcNewSize(pThis, &cMinPages);
1136 AssertReturn(cPages, VERR_PGM_DYNMAP_IPE);
1137 if (pThis->cPages >= cPages)
1138 return VINF_SUCCESS;
1139
1140 uint32_t cAdd = cPages - pThis->cPages;
1141 int rc = pgmR0DynMapAddSeg(pThis, cAdd);
1142 if (rc == VERR_NO_MEMORY)
1143 {
1144 /*
1145 * Try adding smaller segments.
1146 */
1147 do
1148 rc = pgmR0DynMapAddSeg(pThis, PGMR0DYNMAP_SMALL_SEG_PAGES);
1149 while (RT_SUCCESS(rc) && pThis->cPages < cPages);
1150 if (rc == VERR_NO_MEMORY && pThis->cPages >= cMinPages)
1151 rc = VINF_SUCCESS;
1152 if (rc == VERR_NO_MEMORY)
1153 rc = VERR_PGM_DYNMAP_EXPAND_ERROR;
1154 }
1155 Assert(ASMGetFlags() & X86_EFL_IF);
1156
1157#if PGMR0DYNMAP_GUARD_PAGES > 0
1158 /* paranoia */
1159 if (RT_SUCCESS(rc))
1160 pgmR0DynMapTlbShootDown(pThis);
1161#endif
1162 return rc;
1163}
1164
1165
1166/**
1167 * Called by PGMR0DynMapTermVM under the init lock.
1168 *
1169 * @returns VBox status code.
1170 * @param pThis The dynamic mapping cache instance.
1171 */
1172static void pgmR0DynMapTearDown(PPGMRZDYNMAP pThis)
1173{
1174 /*
1175 * Restore the original page table entries
1176 */
1177 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1178 uint32_t iPage = pThis->cPages;
1179 if (pThis->fLegacyMode)
1180 {
1181 X86PGUINT const *paSavedPTEs = (X86PGUINT const *)pThis->pvSavedPTEs;
1182 while (iPage-- > 0)
1183 {
1184 X86PGUINT uOld = paPages[iPage].uPte.pLegacy->u;
1185 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1186 X86PGUINT uNew = paSavedPTEs[iPage];
1187 while (!ASMAtomicCmpXchgExU32(&paPages[iPage].uPte.pLegacy->u, uNew, uOld, &uOld))
1188 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1189 Assert(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage]);
1190 }
1191 }
1192 else
1193 {
1194 X86PGPAEUINT const *paSavedPTEs = (X86PGPAEUINT const *)pThis->pvSavedPTEs;
1195 while (iPage-- > 0)
1196 {
1197 X86PGPAEUINT uOld = paPages[iPage].uPte.pPae->u;
1198 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1199 X86PGPAEUINT uNew = paSavedPTEs[iPage];
1200 while (!ASMAtomicCmpXchgExU64(&paPages[iPage].uPte.pPae->u, uNew, uOld, &uOld))
1201 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1202 Assert(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage]);
1203 }
1204 }
1205
1206 /*
1207 * Shoot down the TLBs on all CPUs before freeing them.
1208 */
1209 pgmR0DynMapTlbShootDown(pThis);
1210
1211 /*
1212 * Free the segments.
1213 */
1214 while (pThis->pSegHead)
1215 {
1216 int rc;
1217 PPGMR0DYNMAPSEG pSeg = pThis->pSegHead;
1218 pThis->pSegHead = pSeg->pNext;
1219
1220 uint32_t iPT = pSeg->cPTs;
1221 while (iPT-- > 0)
1222 {
1223 rc = RTR0MemObjFree(pSeg->ahMemObjPTs[iPT], true /* fFreeMappings */); AssertRC(rc);
1224 pSeg->ahMemObjPTs[iPT] = NIL_RTR0MEMOBJ;
1225 }
1226 rc = RTR0MemObjFree(pSeg->hMemObj, true /* fFreeMappings */); AssertRC(rc);
1227 pSeg->hMemObj = NIL_RTR0MEMOBJ;
1228 pSeg->pNext = NULL;
1229 pSeg->iPage = UINT16_MAX;
1230 pSeg->cPages = 0;
1231 pSeg->cPTs = 0;
1232 RTMemFree(pSeg);
1233 }
1234
1235 /*
1236 * Free the arrays and restore the initial state.
1237 * The cLoadMax value is left behind for the next setup.
1238 */
1239 RTMemFree(pThis->paPages);
1240 pThis->paPages = NULL;
1241 RTMemFree(pThis->pvSavedPTEs);
1242 pThis->pvSavedPTEs = NULL;
1243 pThis->cPages = 0;
1244 pThis->cLoad = 0;
1245 pThis->cGuardPages = 0;
1246}
1247
1248#endif /* IN_RING0 */
1249#ifdef IN_RC
1250
1251/**
1252 * Initializes the dynamic mapping cache in raw-mode context.
1253 *
1254 * @returns VBox status code.
1255 * @param pVM The cross context VM structure.
1256 */
1257VMMRCDECL(int) PGMRCDynMapInit(PVM pVM)
1258{
1259 /*
1260 * Allocate and initialize the instance data and page array.
1261 */
1262 PPGMRZDYNMAP pThis;
1263 size_t const cPages = MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE;
1264 size_t const cb = RT_ALIGN_Z(sizeof(*pThis), 32)
1265 + sizeof(PGMRZDYNMAPENTRY) * cPages;
1266 int rc = MMHyperAlloc(pVM, cb, 32, MM_TAG_PGM, (void **)&pThis);
1267 if (RT_FAILURE(rc))
1268 return rc;
1269
1270 pThis->u32Magic = PGMRZDYNMAP_MAGIC;
1271 pThis->paPages = RT_ALIGN_PT(pThis + 1, 32, PPGMRZDYNMAPENTRY);
1272 pThis->cPages = cPages;
1273 pThis->cLoad = 0;
1274 pThis->cMaxLoad = 0;
1275 pThis->cGuardPages = 0;
1276 pThis->cUsers = 1;
1277
1278 for (size_t iPage = 0; iPage < cPages; iPage++)
1279 {
1280 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
1281 pThis->paPages[iPage].pvPage = pVM->pgm.s.pbDynPageMapBaseGC + iPage * PAGE_SIZE;
1282 pThis->paPages[iPage].cRefs = 0;
1283 pThis->paPages[iPage].uPte.pLegacy = &pVM->pgm.s.paDynPageMap32BitPTEsGC[iPage];
1284 pThis->paPages[iPage].uPte.pPae = (PX86PTEPAE)&pVM->pgm.s.paDynPageMapPaePTEsGC[iPage];
1285 }
1286
1287 pVM->pgm.s.pRCDynMap = pThis;
1288
1289 /*
1290 * Initialize the autosets the VM.
1291 */
1292 rc = pgmRZDynMapInitAutoSetsForVM(pVM);
1293 if (RT_FAILURE(rc))
1294 return rc;
1295
1296 return VINF_SUCCESS;
1297}
1298
1299#endif /* IN_RC */
1300
1301/**
1302 * Release references to a page, caller owns the spin lock.
1303 *
1304 * @param pThis The dynamic mapping cache instance.
1305 * @param iPage The page.
1306 * @param cRefs The number of references to release.
1307 */
1308DECLINLINE(void) pgmRZDynMapReleasePageLocked(PPGMRZDYNMAP pThis, uint32_t iPage, int32_t cRefs)
1309{
1310 cRefs = ASMAtomicSubS32(&pThis->paPages[iPage].cRefs, cRefs) - cRefs;
1311 AssertMsg(cRefs >= 0, ("%d\n", cRefs));
1312 if (!cRefs)
1313 {
1314 pThis->cLoad--;
1315#ifdef PGMRZDYNMAP_STRICT_RELEASE
1316 pThis->paPages[iPage].HCPhys = NIL_RTHCPHYS;
1317 ASMAtomicBitClear(pThis->paPages[iPage].uPte.pv, X86_PTE_BIT_P);
1318 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
1319#endif
1320 }
1321}
1322
1323
1324/**
1325 * Release references to a page, caller does not own the spin lock.
1326 *
1327 * @param pThis The dynamic mapping cache instance.
1328 * @param iPage The page.
1329 * @param cRefs The number of references to release.
1330 */
1331static void pgmRZDynMapReleasePage(PPGMRZDYNMAP pThis, uint32_t iPage, uint32_t cRefs)
1332{
1333 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1334 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
1335 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1336}
1337
1338
1339/**
1340 * pgmR0DynMapPage worker that deals with the tedious bits.
1341 *
1342 * @returns The page index on success, UINT32_MAX on failure.
1343 * @param pThis The dynamic mapping cache instance.
1344 * @param HCPhys The address of the page to be mapped.
1345 * @param iPage The page index pgmR0DynMapPage hashed HCPhys to.
1346 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1347 * For statistics.
1348 * @param pfNew Set to @c true if a new entry was made and @c false if
1349 * an old entry was found and reused.
1350 */
1351static uint32_t pgmR0DynMapPageSlow(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, uint32_t iPage, PVMCPU pVCpu, bool *pfNew)
1352{
1353 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlow); RT_NOREF_PV(pVCpu);
1354
1355 /*
1356 * Check if any of the first 3 pages are unreferenced since the caller
1357 * already has made sure they aren't matching.
1358 */
1359#ifdef VBOX_WITH_STATISTICS
1360 bool fLooped = false;
1361#endif
1362 uint32_t const cPages = pThis->cPages;
1363 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1364 uint32_t iFreePage;
1365 if (!paPages[iPage].cRefs)
1366 iFreePage = iPage;
1367 else if (!paPages[(iPage + 1) % cPages].cRefs)
1368 iFreePage = (iPage + 1) % cPages;
1369 else if (!paPages[(iPage + 2) % cPages].cRefs)
1370 iFreePage = (iPage + 2) % cPages;
1371 else
1372 {
1373 /*
1374 * Search for an unused or matching entry.
1375 */
1376 iFreePage = (iPage + 3) % cPages;
1377 for (;;)
1378 {
1379 if (paPages[iFreePage].HCPhys == HCPhys)
1380 {
1381 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopHits);
1382 *pfNew = false;
1383 return iFreePage;
1384 }
1385 if (!paPages[iFreePage].cRefs)
1386 break;
1387
1388 /* advance */
1389 iFreePage = (iFreePage + 1) % cPages;
1390 if (RT_UNLIKELY(iFreePage == iPage))
1391 return UINT32_MAX;
1392 }
1393 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageSlowLoopMisses);
1394#ifdef VBOX_WITH_STATISTICS
1395 fLooped = true;
1396#endif
1397 }
1398 Assert(iFreePage < cPages);
1399
1400#if 0 //def VBOX_WITH_STATISTICS
1401 /* Check for lost hits. */
1402 if (!fLooped)
1403 for (uint32_t iPage2 = (iPage + 3) % cPages; iPage2 != iPage; iPage2 = (iPage2 + 1) % cPages)
1404 if (paPages[iPage2].HCPhys == HCPhys)
1405 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZDynMapPageSlowLostHits);
1406#endif
1407
1408 /*
1409 * Setup the new entry.
1410 */
1411 *pfNew = true;
1412 /*Log6(("pgmR0DynMapPageSlow: old - %RHp %#x %#llx\n", paPages[iFreePage].HCPhys, paPages[iFreePage].cRefs, paPages[iFreePage].uPte.pPae->u));*/
1413 paPages[iFreePage].HCPhys = HCPhys;
1414#ifndef IN_RC
1415 RTCpuSetFill(&paPages[iFreePage].PendingSet);
1416
1417 if (pThis->fLegacyMode)
1418#endif
1419 {
1420 X86PGUINT uOld = paPages[iFreePage].uPte.pLegacy->u;
1421 X86PGUINT uOld2 = uOld; NOREF(uOld2);
1422 X86PGUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1423 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1424 | (HCPhys & X86_PTE_PG_MASK);
1425 while (!ASMAtomicCmpXchgExU32(&paPages[iFreePage].uPte.pLegacy->u, uNew, uOld, &uOld))
1426 AssertMsgFailed(("uOld=%#x uOld2=%#x uNew=%#x\n", uOld, uOld2, uNew));
1427 Assert(paPages[iFreePage].uPte.pLegacy->u == uNew);
1428 }
1429#ifndef IN_RC
1430 else
1431#endif
1432 {
1433 X86PGPAEUINT uOld = paPages[iFreePage].uPte.pPae->u;
1434 X86PGPAEUINT uOld2 = uOld; NOREF(uOld2);
1435 X86PGPAEUINT uNew = (uOld & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1436 | X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1437 | (HCPhys & X86_PTE_PAE_PG_MASK);
1438 while (!ASMAtomicCmpXchgExU64(&paPages[iFreePage].uPte.pPae->u, uNew, uOld, &uOld))
1439 AssertMsgFailed(("uOld=%#llx uOld2=%#llx uNew=%#llx\n", uOld, uOld2, uNew));
1440 Assert(paPages[iFreePage].uPte.pPae->u == uNew);
1441 /*Log6(("pgmR0DynMapPageSlow: #%x - %RHp %p %#llx\n", iFreePage, HCPhys, paPages[iFreePage].pvPage, uNew));*/
1442 }
1443 return iFreePage;
1444}
1445
1446
1447/**
1448 * Maps a page into the pool.
1449 *
1450 * @returns Page index on success, UINT32_MAX on failure.
1451 * @param pThis The dynamic mapping cache instance.
1452 * @param HCPhys The address of the page to be mapped.
1453 * @param iRealCpu The real cpu set index. (optimization)
1454 * @param pVCpu The cross context virtual CPU structure of the calling
1455 * EMT. For statistics.
1456 * @param ppvPage Where to the page address.
1457 */
1458DECLINLINE(uint32_t) pgmR0DynMapPage(PPGMRZDYNMAP pThis, RTHCPHYS HCPhys, int32_t iRealCpu, PVMCPU pVCpu, void **ppvPage)
1459{
1460 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1461 AssertMsg(!(HCPhys & PAGE_OFFSET_MASK), ("HCPhys=%RHp\n", HCPhys));
1462 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPage);
1463
1464 /*
1465 * Find an entry, if possible a matching one. The HCPhys address is hashed
1466 * down to a page index, collisions are handled by linear searching.
1467 * Optimized for a hit in the first 3 pages.
1468 *
1469 * Field easy hits here and defer the tedious searching and inserting
1470 * to pgmR0DynMapPageSlow().
1471 */
1472 bool fNew = false;
1473 uint32_t const cPages = pThis->cPages;
1474 uint32_t iPage = (HCPhys >> PAGE_SHIFT) % cPages;
1475 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1476 if (RT_LIKELY(paPages[iPage].HCPhys == HCPhys))
1477 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits0);
1478 else
1479 {
1480 uint32_t iPage2 = (iPage + 1) % cPages;
1481 if (RT_LIKELY(paPages[iPage2].HCPhys == HCPhys))
1482 {
1483 iPage = iPage2;
1484 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits1);
1485 }
1486 else
1487 {
1488 iPage2 = (iPage + 2) % cPages;
1489 if (paPages[iPage2].HCPhys == HCPhys)
1490 {
1491 iPage = iPage2;
1492 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageHits2);
1493 }
1494 else
1495 {
1496 iPage = pgmR0DynMapPageSlow(pThis, HCPhys, iPage, pVCpu, &fNew);
1497 if (RT_UNLIKELY(iPage == UINT32_MAX))
1498 {
1499 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1500 *ppvPage = NULL;
1501 return iPage;
1502 }
1503 }
1504 }
1505 }
1506
1507 /*
1508 * Reference it, update statistics and get the return address.
1509 */
1510 int32_t cRefs = ASMAtomicIncS32(&paPages[iPage].cRefs);
1511 if (cRefs == 1)
1512 {
1513 pThis->cLoad++;
1514 if (pThis->cLoad > pThis->cMaxLoad)
1515 pThis->cMaxLoad = pThis->cLoad;
1516 AssertMsg(pThis->cLoad <= pThis->cPages - pThis->cGuardPages, ("%d/%d\n", pThis->cLoad, pThis->cPages - pThis->cGuardPages));
1517 }
1518 else if (RT_UNLIKELY(cRefs <= 0))
1519 {
1520 ASMAtomicDecS32(&paPages[iPage].cRefs);
1521 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1522 *ppvPage = NULL;
1523 AssertLogRelMsgFailedReturn(("cRefs=%d iPage=%u HCPhys=%RHp\n", cRefs, iPage, HCPhys), UINT32_MAX);
1524 }
1525 void *pvPage = paPages[iPage].pvPage;
1526
1527#ifndef IN_RC
1528 /*
1529 * Invalidate the entry?
1530 */
1531 bool fInvalidateIt = RTCpuSetIsMemberByIndex(&paPages[iPage].PendingSet, iRealCpu);
1532 if (RT_UNLIKELY(fInvalidateIt))
1533 RTCpuSetDelByIndex(&paPages[iPage].PendingSet, iRealCpu);
1534#else
1535 NOREF(iRealCpu);
1536#endif
1537
1538 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1539
1540 /*
1541 * Do the actual invalidation outside the spinlock.
1542 */
1543#ifdef IN_RC
1544 if (RT_UNLIKELY(fNew))
1545#else
1546 if (RT_UNLIKELY(fInvalidateIt))
1547#endif
1548 {
1549 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapPageInvlPg);
1550 ASMInvalidatePage((uintptr_t)pvPage);
1551 }
1552
1553 *ppvPage = pvPage;
1554 return iPage;
1555}
1556
1557
1558/**
1559 * Assert the integrity of the pool.
1560 *
1561 * @returns VBox status code.
1562 */
1563static int pgmRZDynMapAssertIntegrity(PPGMRZDYNMAP pThis)
1564{
1565 /*
1566 * Basic pool stuff that doesn't require any lock, just assumes we're a user.
1567 */
1568 if (!pThis)
1569 return VINF_SUCCESS;
1570 AssertPtrReturn(pThis, VERR_INVALID_POINTER);
1571 AssertReturn(pThis->u32Magic == PGMRZDYNMAP_MAGIC, VERR_INVALID_MAGIC);
1572 if (!pThis->cUsers)
1573 return VERR_INVALID_PARAMETER;
1574
1575 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1576
1577#define CHECK_RET(expr, a) \
1578 do { \
1579 if (RT_UNLIKELY(!(expr))) \
1580 { \
1581 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis); \
1582 RTAssertMsg1Weak(#expr, __LINE__, __FILE__, __PRETTY_FUNCTION__); \
1583 RTAssertMsg2Weak a; \
1584 return VERR_PGM_DYNMAP_IPE; \
1585 } \
1586 } while (0)
1587
1588 /*
1589 * Check that the PTEs are correct.
1590 */
1591 uint32_t cGuard = 0;
1592 uint32_t cLoad = 0;
1593 PPGMRZDYNMAPENTRY paPages = pThis->paPages;
1594
1595#ifndef IN_RC
1596 if (pThis->fLegacyMode)
1597#endif
1598 {
1599#ifdef IN_RING0
1600 PCX86PGUINT paSavedPTEs = (PCX86PGUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1601#endif
1602 uint32_t iPage = pThis->cPages;
1603 while (iPage-- > 0)
1604 {
1605 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1606 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1607 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1608 {
1609#ifdef PGMR0DYNMAP_GUARD_NP
1610 CHECK_RET(paPages[iPage].uPte.pLegacy->u == (paSavedPTEs[iPage] & ~(X86PGUINT)X86_PTE_P),
1611 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1612#else
1613 CHECK_RET(paPages[iPage].uPte.pLegacy->u == PGMR0DYNMAP_GUARD_PAGE_LEGACY_PTE,
1614 ("#%u: %#x", iPage, paPages[iPage].uPte.pLegacy->u));
1615#endif
1616 cGuard++;
1617 }
1618 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1619 {
1620 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1621 X86PGUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1622#ifdef IN_RING0
1623 | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1624#endif
1625 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1626 CHECK_RET(paPages[iPage].uPte.pLegacy->u == uPte,
1627 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1628 if (paPages[iPage].cRefs)
1629 cLoad++;
1630 }
1631#if defined(IN_RING0) && !defined(PGMRZDYNMAP_STRICT_RELEASE)
1632 else
1633 CHECK_RET(paPages[iPage].uPte.pLegacy->u == paSavedPTEs[iPage],
1634 ("#%u: %#x %#x", iPage, paPages[iPage].uPte.pLegacy->u, paSavedPTEs[iPage]));
1635#endif
1636 }
1637 }
1638#ifndef IN_RC
1639 else
1640#endif
1641 {
1642#ifdef IN_RING0
1643 PCX86PGPAEUINT paSavedPTEs = (PCX86PGPAEUINT)pThis->pvSavedPTEs; NOREF(paSavedPTEs);
1644#endif
1645 uint32_t iPage = pThis->cPages;
1646 while (iPage-- > 0)
1647 {
1648 CHECK_RET(!((uintptr_t)paPages[iPage].pvPage & PAGE_OFFSET_MASK), ("#%u: %p\n", iPage, paPages[iPage].pvPage));
1649 if ( paPages[iPage].cRefs == PGMR0DYNMAP_GUARD_PAGE_REF_COUNT
1650 && paPages[iPage].HCPhys == PGMR0DYNMAP_GUARD_PAGE_HCPHYS)
1651 {
1652#ifdef PGMR0DYNMAP_GUARD_NP
1653 CHECK_RET(paPages[iPage].uPte.pPae->u == (paSavedPTEs[iPage] & ~(X86PGPAEUINT)X86_PTE_P),
1654 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1655#else
1656 CHECK_RET(paPages[iPage].uPte.pPae->u == PGMR0DYNMAP_GUARD_PAGE_PAE_PTE,
1657 ("#%u: %#llx", iPage, paPages[iPage].uPte.pPae->u));
1658#endif
1659 cGuard++;
1660 }
1661 else if (paPages[iPage].HCPhys != NIL_RTHCPHYS)
1662 {
1663 CHECK_RET(!(paPages[iPage].HCPhys & PAGE_OFFSET_MASK), ("#%u: %RHp\n", iPage, paPages[iPage].HCPhys));
1664 X86PGPAEUINT uPte = X86_PTE_P | X86_PTE_RW | X86_PTE_A | X86_PTE_D
1665#ifdef IN_RING0
1666 | (paSavedPTEs[iPage] & (X86_PTE_G | X86_PTE_PAT | X86_PTE_PCD | X86_PTE_PWT))
1667#endif
1668 | (paPages[iPage].HCPhys & X86_PTE_PAE_PG_MASK);
1669 CHECK_RET(paPages[iPage].uPte.pPae->u == uPte,
1670 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pLegacy->u, uPte));
1671 if (paPages[iPage].cRefs)
1672 cLoad++;
1673 }
1674#ifdef IN_RING0
1675 else
1676 CHECK_RET(paPages[iPage].uPte.pPae->u == paSavedPTEs[iPage],
1677 ("#%u: %#llx %#llx", iPage, paPages[iPage].uPte.pPae->u, paSavedPTEs[iPage]));
1678#endif
1679 }
1680 }
1681
1682 CHECK_RET(cLoad == pThis->cLoad, ("%u %u\n", cLoad, pThis->cLoad));
1683 CHECK_RET(cGuard == pThis->cGuardPages, ("%u %u\n", cGuard, pThis->cGuardPages));
1684
1685#undef CHECK_RET
1686 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1687 return VINF_SUCCESS;
1688}
1689
1690#ifdef IN_RING0
1691/**
1692 * Assert the integrity of the pool.
1693 *
1694 * @returns VBox status code.
1695 */
1696VMMR0DECL(int) PGMR0DynMapAssertIntegrity(void)
1697{
1698 return pgmRZDynMapAssertIntegrity(g_pPGMR0DynMap);
1699}
1700#endif /* IN_RING0 */
1701
1702#ifdef IN_RC
1703/**
1704 * Assert the integrity of the pool.
1705 *
1706 * @returns VBox status code.
1707 */
1708VMMRCDECL(int) PGMRCDynMapAssertIntegrity(PVM pVM)
1709{
1710 return pgmRZDynMapAssertIntegrity((PPGMRZDYNMAP)pVM->pgm.s.pRCDynMap);
1711}
1712#endif /* IN_RC */
1713
1714
1715/**
1716 * As a final resort for a (somewhat) full auto set or full cache, try merge
1717 * duplicate entries and flush the ones we can.
1718 *
1719 * @param pSet The set.
1720 */
1721static void pgmDynMapOptimizeAutoSet(PPGMMAPSET pSet)
1722{
1723 LogFlow(("pgmDynMapOptimizeAutoSet\n"));
1724
1725 for (uint32_t i = 0 ; i < pSet->cEntries; i++)
1726 {
1727 /*
1728 * Try merge entries.
1729 */
1730 uint16_t const iPage = pSet->aEntries[i].iPage;
1731 uint32_t j = i + 1;
1732 while ( j < pSet->cEntries
1733 && ( pSet->iSubset == UINT32_MAX
1734 || pSet->iSubset < pSet->cEntries) )
1735 {
1736 if (pSet->aEntries[j].iPage != iPage)
1737 j++;
1738 else
1739 {
1740 uint32_t const cHardRefs = (uint32_t)pSet->aEntries[i].cRefs
1741 + (uint32_t)pSet->aEntries[j].cRefs;
1742 uint32_t cInlinedRefs = (uint32_t)pSet->aEntries[i].cInlinedRefs
1743 + (uint32_t)pSet->aEntries[j].cInlinedRefs;
1744 uint32_t cUnrefs = (uint32_t)pSet->aEntries[i].cUnrefs
1745 + (uint32_t)pSet->aEntries[j].cUnrefs;
1746 uint32_t cSub = RT_MIN(cUnrefs, cInlinedRefs);
1747 cInlinedRefs -= cSub;
1748 cUnrefs -= cSub;
1749
1750 if ( cHardRefs < UINT16_MAX
1751 && cInlinedRefs < UINT16_MAX
1752 && cUnrefs < UINT16_MAX)
1753 {
1754 /* merge j into i removing j. */
1755 Log2(("pgmDynMapOptimizeAutoSet: Merging #%u into #%u\n", j, i));
1756 pSet->aEntries[i].cRefs = cHardRefs;
1757 pSet->aEntries[i].cInlinedRefs = cInlinedRefs;
1758 pSet->aEntries[i].cUnrefs = cUnrefs;
1759 pSet->cEntries--;
1760 if (j < pSet->cEntries)
1761 {
1762 pSet->aEntries[j] = pSet->aEntries[pSet->cEntries];
1763 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
1764 }
1765 else
1766 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[j]);
1767 }
1768#if 0 /* too complicated, skip it. */
1769 else
1770 {
1771 /* migrate the max number of refs from j into i and quit the inner loop. */
1772 uint32_t cMigrate = UINT16_MAX - 1 - pSet->aEntries[i].cRefs;
1773 Assert(pSet->aEntries[j].cRefs > cMigrate);
1774 pSet->aEntries[j].cRefs -= cMigrate;
1775 pSet->aEntries[i].cRefs = UINT16_MAX - 1;
1776 break;
1777 }
1778#endif
1779 }
1780 }
1781
1782 /*
1783 * Try make use of the unused hinting (cUnrefs) to evict entries
1784 * from both the set as well as the mapping cache.
1785 */
1786
1787 uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[i].cRefs + pSet->aEntries[i].cInlinedRefs;
1788 Log2(("pgmDynMapOptimizeAutoSet: #%u/%u/%u pvPage=%p iPage=%u cRefs=%u cInlinedRefs=%u cUnrefs=%u cTotalRefs=%u\n",
1789 i,
1790 pSet->iSubset,
1791 pSet->cEntries,
1792 pSet->aEntries[i].pvPage,
1793 pSet->aEntries[i].iPage,
1794 pSet->aEntries[i].cRefs,
1795 pSet->aEntries[i].cInlinedRefs,
1796 pSet->aEntries[i].cUnrefs,
1797 cTotalRefs));
1798 Assert(cTotalRefs >= pSet->aEntries[i].cUnrefs);
1799
1800 if ( cTotalRefs == pSet->aEntries[i].cUnrefs
1801 && ( pSet->iSubset == UINT32_MAX
1802 || pSet->iSubset < pSet->cEntries)
1803 )
1804 {
1805 Log2(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
1806 //LogFlow(("pgmDynMapOptimizeAutoSet: Releasing iPage=%d/%p\n", pSet->aEntries[i].iPage, pSet->aEntries[i].pvPage));
1807 pgmRZDynMapReleasePage(PGMRZDYNMAP_SET_2_DYNMAP(pSet),
1808 pSet->aEntries[i].iPage,
1809 pSet->aEntries[i].cRefs);
1810 pSet->cEntries--;
1811 if (i < pSet->cEntries)
1812 {
1813 pSet->aEntries[i] = pSet->aEntries[pSet->cEntries];
1814 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[pSet->cEntries]);
1815 }
1816
1817 i--;
1818 }
1819 }
1820}
1821
1822
1823
1824
1825/**
1826 * Signals the start of a new set of mappings.
1827 *
1828 * Mostly for strictness. PGMDynMapHCPage won't work unless this
1829 * API is called.
1830 *
1831 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1832 */
1833VMMDECL(void) PGMRZDynMapStartAutoSet(PVMCPU pVCpu)
1834{
1835 LogFlow(("PGMRZDynMapStartAutoSet:\n"));
1836 Assert(pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED);
1837 Assert(pVCpu->pgm.s.AutoSet.iSubset == UINT32_MAX);
1838 pVCpu->pgm.s.AutoSet.cEntries = 0;
1839 pVCpu->pgm.s.AutoSet.iCpu = PGMRZDYNMAP_CUR_CPU();
1840}
1841
1842
1843#ifdef IN_RING0
1844/**
1845 * Starts or migrates the autoset of a virtual CPU.
1846 *
1847 * This is used by HMR0Enter. When we've longjumped out of the HM
1848 * execution loop with the set open, we'll migrate it when re-entering. While
1849 * under normal circumstances, we'll start it so VMXR0LoadGuestState can access
1850 * guest memory.
1851 *
1852 * @returns @c true if started, @c false if migrated.
1853 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1854 * @thread EMT
1855 */
1856VMMR0DECL(bool) PGMR0DynMapStartOrMigrateAutoSet(PVMCPU pVCpu)
1857{
1858 bool fStartIt = pVCpu->pgm.s.AutoSet.cEntries == PGMMAPSET_CLOSED;
1859 if (fStartIt)
1860 PGMRZDynMapStartAutoSet(pVCpu);
1861 else
1862 PGMR0DynMapMigrateAutoSet(pVCpu);
1863 return fStartIt;
1864}
1865#endif /* IN_RING0 */
1866
1867
1868/**
1869 * Checks if the set has high load.
1870 *
1871 * @returns true on high load, otherwise false.
1872 * @param pSet The set.
1873 */
1874DECLINLINE(bool) pgmRZDynMapHasHighLoad(PPGMMAPSET pSet)
1875{
1876#ifdef IN_RC
1877 if (pSet->cEntries < MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE / 2)
1878 return false;
1879#endif
1880
1881 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
1882 uint32_t cUnusedPages = pThis->cPages - pThis->cLoad;
1883#ifdef IN_RC
1884 return cUnusedPages <= MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE * 36 / 100;
1885#else
1886 return cUnusedPages <= PGMR0DYNMAP_PAGES_PER_CPU_MIN;
1887#endif
1888}
1889
1890
1891/**
1892 * Worker that performs the actual flushing of the set.
1893 *
1894 * @param pSet The set to flush.
1895 * @param cEntries The number of entries.
1896 */
1897DECLINLINE(void) pgmDynMapFlushAutoSetWorker(PPGMMAPSET pSet, uint32_t cEntries)
1898{
1899 /*
1900 * Release any pages it's referencing.
1901 */
1902 if ( cEntries != 0
1903 && RT_LIKELY(cEntries <= RT_ELEMENTS(pSet->aEntries)))
1904 {
1905 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
1906 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
1907
1908 uint32_t i = cEntries;
1909 while (i-- > 0)
1910 {
1911 uint32_t iPage = pSet->aEntries[i].iPage;
1912 Assert(iPage < pThis->cPages);
1913 int32_t cRefs = pSet->aEntries[i].cRefs;
1914 Assert(cRefs > 0);
1915 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
1916
1917 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
1918 }
1919
1920 Assert(pThis->cLoad <= pThis->cPages - pThis->cGuardPages);
1921 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
1922 }
1923}
1924
1925
1926/**
1927 * Releases the dynamic memory mappings made by PGMDynMapHCPage and associates
1928 * since the PGMDynMapStartAutoSet call.
1929 *
1930 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1931 */
1932VMMDECL(void) PGMRZDynMapReleaseAutoSet(PVMCPU pVCpu)
1933{
1934 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1935
1936 /*
1937 * Close and flush the set.
1938 */
1939 uint32_t cEntries = pSet->cEntries;
1940 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1941 pSet->cEntries = PGMMAPSET_CLOSED;
1942 pSet->iSubset = UINT32_MAX;
1943 pSet->iCpu = -1;
1944
1945#ifdef IN_RC
1946 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
1947 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
1948 else
1949#endif
1950 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1951 if (cEntries > RT_ELEMENTS(pSet->aEntries) * 50 / 100)
1952 Log(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
1953 else
1954 LogFlow(("PGMRZDynMapReleaseAutoSet: cEntries=%d\n", cEntries));
1955
1956 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1957}
1958
1959
1960/**
1961 * Flushes the set if it's above a certain threshold.
1962 *
1963 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1964 */
1965VMMDECL(void) PGMRZDynMapFlushAutoSet(PVMCPU pVCpu)
1966{
1967 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
1968 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
1969
1970 /*
1971 * Only flush it if it's 45% full.
1972 */
1973 uint32_t cEntries = pSet->cEntries;
1974 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
1975 Assert(pSet->iSubset == UINT32_MAX);
1976#ifdef IN_RC
1977 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
1978 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
1979 else
1980#endif
1981 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
1982 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 45 / 100
1983 || pgmRZDynMapHasHighLoad(pSet))
1984 {
1985 pSet->cEntries = 0;
1986 Log(("PGMDynMapFlushAutoSet: cEntries=%d\n", pSet->cEntries));
1987
1988 pgmDynMapFlushAutoSetWorker(pSet, cEntries);
1989 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
1990 }
1991}
1992
1993
1994#ifndef IN_RC
1995/**
1996 * Migrates the automatic mapping set of the current vCPU if it's active and
1997 * necessary.
1998 *
1999 * This is called when re-entering the hardware assisted execution mode after a
2000 * nip down to ring-3. We run the risk that the CPU might have change and we
2001 * will therefore make sure all the cache entries currently in the auto set will
2002 * be valid on the new CPU. If the cpu didn't change nothing will happen as all
2003 * the entries will have been flagged as invalidated.
2004 *
2005 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2006 * @thread EMT
2007 */
2008VMMR0DECL(void) PGMR0DynMapMigrateAutoSet(PVMCPU pVCpu)
2009{
2010 LogFlow(("PGMR0DynMapMigrateAutoSet\n"));
2011 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2012 int32_t iRealCpu = PGMRZDYNMAP_CUR_CPU();
2013 if (pSet->iCpu != iRealCpu)
2014 {
2015 uint32_t i = pSet->cEntries;
2016 if (i != PGMMAPSET_CLOSED)
2017 {
2018 AssertMsg(i <= RT_ELEMENTS(pSet->aEntries), ("%#x (%u)\n", i, i));
2019 if (i != 0 && RT_LIKELY(i <= RT_ELEMENTS(pSet->aEntries)))
2020 {
2021 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2022 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
2023
2024 while (i-- > 0)
2025 {
2026 Assert(pSet->aEntries[i].cRefs > 0);
2027 uint32_t iPage = pSet->aEntries[i].iPage;
2028 Assert(iPage < pThis->cPages);
2029 if (RTCpuSetIsMemberByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu))
2030 {
2031 RTCpuSetDelByIndex(&pThis->paPages[iPage].PendingSet, iRealCpu);
2032 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2033
2034 ASMInvalidatePage((uintptr_t)pThis->paPages[iPage].pvPage);
2035 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapMigrateInvlPg);
2036
2037 PGMRZDYNMAP_SPINLOCK_REACQUIRE(pThis);
2038 }
2039 }
2040
2041 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2042 }
2043 }
2044 pSet->iCpu = iRealCpu;
2045 }
2046}
2047#endif /* !IN_RC */
2048
2049
2050/**
2051 * Worker function that flushes the current subset.
2052 *
2053 * This is called when the set is popped or when the set
2054 * hash a too high load. As also pointed out elsewhere, the
2055 * whole subset thing is a hack for working around code that
2056 * accesses too many pages. Like PGMPool.
2057 *
2058 * @param pSet The set which subset to flush.
2059 */
2060static void pgmDynMapFlushSubset(PPGMMAPSET pSet)
2061{
2062 uint32_t iSubset = pSet->iSubset;
2063 uint32_t i = pSet->cEntries;
2064 Assert(i <= RT_ELEMENTS(pSet->aEntries));
2065 if ( i > iSubset
2066 && i <= RT_ELEMENTS(pSet->aEntries))
2067 {
2068 Log(("pgmDynMapFlushSubset: cEntries=%d iSubset=%d\n", pSet->cEntries, iSubset));
2069 pSet->cEntries = iSubset;
2070
2071 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2072 PGMRZDYNMAP_SPINLOCK_ACQUIRE(pThis);
2073
2074 while (i-- > iSubset)
2075 {
2076 uint32_t iPage = pSet->aEntries[i].iPage;
2077 Assert(iPage < pThis->cPages);
2078 int32_t cRefs = pSet->aEntries[i].cRefs;
2079 Assert(cRefs > 0);
2080 pgmRZDynMapReleasePageLocked(pThis, iPage, cRefs);
2081
2082 PGMRZDYNMAP_ZAP_ENTRY(&pSet->aEntries[i]);
2083 }
2084
2085 PGMRZDYNMAP_SPINLOCK_RELEASE(pThis);
2086 }
2087}
2088
2089
2090/**
2091 * Creates a subset.
2092 *
2093 * A subset is a hack to avoid having to rewrite code that touches a lot of
2094 * pages. It prevents the mapping set from being overflowed by automatically
2095 * flushing previous mappings when a certain threshold is reached.
2096 *
2097 * Pages mapped after calling this function are only valid until the next page
2098 * is mapped.
2099 *
2100 * @returns The index of the previous subset. Pass this to
2101 * PGMDynMapPopAutoSubset when popping it.
2102 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2103 */
2104VMMDECL(uint32_t) PGMRZDynMapPushAutoSubset(PVMCPU pVCpu)
2105{
2106 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2107 AssertReturn(pSet->cEntries != PGMMAPSET_CLOSED, UINT32_MAX);
2108 uint32_t iPrevSubset = pSet->iSubset;
2109 LogFlow(("PGMRZDynMapPushAutoSubset: pVCpu=%p iPrevSubset=%u\n", pVCpu, iPrevSubset));
2110
2111 /*
2112 * If it looks like we're approaching the max set size or mapping space
2113 * optimize the set to drop off unused pages.
2114 */
2115 if ( pSet->cEntries > RT_ELEMENTS(pSet->aEntries) * 60 / 100
2116 || pgmRZDynMapHasHighLoad(pSet))
2117 {
2118 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2119 pgmDynMapOptimizeAutoSet(pSet);
2120 }
2121
2122 pSet->iSubset = pSet->cEntries;
2123 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSubsets);
2124
2125 AssertMsg(iPrevSubset <= pSet->iSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%#x iSubset=%#x\n", iPrevSubset, pSet->iSubset));
2126 return iPrevSubset;
2127}
2128
2129
2130/**
2131 * Pops a subset created by a previous call to PGMDynMapPushAutoSubset.
2132 *
2133 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2134 * @param iPrevSubset What PGMDynMapPushAutoSubset returned.
2135 */
2136VMMDECL(void) PGMRZDynMapPopAutoSubset(PVMCPU pVCpu, uint32_t iPrevSubset)
2137{
2138 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2139 uint32_t cEntries = pSet->cEntries;
2140 LogFlow(("PGMRZDynMapPopAutoSubset: pVCpu=%p iPrevSubset=%u iSubset=%u cEntries=%u\n", pVCpu, iPrevSubset, pSet->iSubset, cEntries));
2141 AssertReturnVoid(cEntries != PGMMAPSET_CLOSED);
2142 AssertMsgReturnVoid(pSet->iSubset >= iPrevSubset || iPrevSubset == UINT32_MAX, ("iPrevSubset=%u iSubset=%u cEntries=%u\n", iPrevSubset, pSet->iSubset, cEntries));
2143#ifdef IN_RC
2144 if (RT_ELEMENTS(pSet->aEntries) > MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)
2145 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / (MM_HYPER_DYNAMIC_SIZE / PAGE_SIZE)) % 11]);
2146 else
2147#endif
2148 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
2149 if ( cEntries >= RT_ELEMENTS(pSet->aEntries) * 40 / 100
2150 && cEntries != pSet->iSubset)
2151 {
2152 pgmDynMapFlushSubset(pSet);
2153 Assert(pSet->cEntries >= iPrevSubset || iPrevSubset == UINT32_MAX);
2154 }
2155 pSet->iSubset = iPrevSubset;
2156}
2157
2158
2159/**
2160 * Indicates that the given page is unused and its mapping can be re-used.
2161 *
2162 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
2163 * @param pvHint The page that is now unused. This does not have to
2164 * point at the start of the page. NULL is ignored.
2165 */
2166#ifdef LOG_ENABLED
2167void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint, RT_SRC_POS_DECL)
2168#else
2169void pgmRZDynMapUnusedHint(PVMCPU pVCpu, void *pvHint)
2170#endif
2171{
2172 /*
2173 * Ignore NULL pointers and mask off the page offset bits.
2174 */
2175 if (pvHint == NULL)
2176 return;
2177 pvHint = (void *)((uintptr_t)pvHint & ~(uintptr_t)PAGE_OFFSET_MASK);
2178
2179 PPGMMAPSET pSet = &pVCpu->pgm.s.AutoSet;
2180 uint32_t iEntry = pSet->cEntries;
2181 AssertReturnVoid(iEntry > 0);
2182
2183 /*
2184 * Find the entry in the usual unrolled fashion.
2185 */
2186 /** @todo add a hint to the set which entry was used last since it's not
2187 * always the last entry? */
2188#define IS_MATCHING_ENTRY(pSet, iEntry, pvHint) \
2189 ( (pSet)->aEntries[(iEntry)].pvPage == (pvHint) \
2190 && (uint32_t)(pSet)->aEntries[(iEntry)].cRefs + (pSet)->aEntries[(iEntry)].cInlinedRefs \
2191 > (pSet)->aEntries[(iEntry)].cUnrefs )
2192 if ( iEntry >= 1 && IS_MATCHING_ENTRY(pSet, iEntry - 1, pvHint))
2193 iEntry = iEntry - 1;
2194 else if (iEntry >= 2 && IS_MATCHING_ENTRY(pSet, iEntry - 2, pvHint))
2195 iEntry = iEntry - 2;
2196 else if (iEntry >= 3 && IS_MATCHING_ENTRY(pSet, iEntry - 3, pvHint))
2197 iEntry = iEntry - 3;
2198 else if (iEntry >= 4 && IS_MATCHING_ENTRY(pSet, iEntry - 4, pvHint))
2199 iEntry = iEntry - 4;
2200 else if (iEntry >= 5 && IS_MATCHING_ENTRY(pSet, iEntry - 5, pvHint))
2201 iEntry = iEntry - 5;
2202 else if (iEntry >= 6 && IS_MATCHING_ENTRY(pSet, iEntry - 6, pvHint))
2203 iEntry = iEntry - 6;
2204 else if (iEntry >= 7 && IS_MATCHING_ENTRY(pSet, iEntry - 7, pvHint))
2205 iEntry = iEntry - 7;
2206 else
2207 {
2208 /*
2209 * Loop till we find it.
2210 */
2211 bool fFound = false;
2212 if (iEntry > 7)
2213 {
2214 iEntry -= 7;
2215 while (iEntry-- > 0)
2216 if (IS_MATCHING_ENTRY(pSet, iEntry, pvHint))
2217 {
2218 fFound = true;
2219 break;
2220 }
2221 }
2222 AssertMsgReturnVoid(fFound,
2223 ("pvHint=%p cEntries=%#x iSubset=%#x\n"
2224 "aEntries[0] = {%#x, %#x, %#x, %#x, %p}\n"
2225 "aEntries[1] = {%#x, %#x, %#x, %#x, %p}\n"
2226 "aEntries[2] = {%#x, %#x, %#x, %#x, %p}\n"
2227 "aEntries[3] = {%#x, %#x, %#x, %#x, %p}\n"
2228 "aEntries[4] = {%#x, %#x, %#x, %#x, %p}\n"
2229 "aEntries[5] = {%#x, %#x, %#x, %#x, %p}\n"
2230 ,
2231 pvHint, pSet->cEntries, pSet->iSubset,
2232 pSet->aEntries[0].iPage, pSet->aEntries[0].cRefs, pSet->aEntries[0].cInlinedRefs, pSet->aEntries[0].cUnrefs, pSet->aEntries[0].pvPage,
2233 pSet->aEntries[1].iPage, pSet->aEntries[1].cRefs, pSet->aEntries[1].cInlinedRefs, pSet->aEntries[1].cUnrefs, pSet->aEntries[1].pvPage,
2234 pSet->aEntries[2].iPage, pSet->aEntries[2].cRefs, pSet->aEntries[2].cInlinedRefs, pSet->aEntries[2].cUnrefs, pSet->aEntries[2].pvPage,
2235 pSet->aEntries[3].iPage, pSet->aEntries[3].cRefs, pSet->aEntries[3].cInlinedRefs, pSet->aEntries[3].cUnrefs, pSet->aEntries[3].pvPage,
2236 pSet->aEntries[4].iPage, pSet->aEntries[4].cRefs, pSet->aEntries[4].cInlinedRefs, pSet->aEntries[4].cUnrefs, pSet->aEntries[4].pvPage,
2237 pSet->aEntries[5].iPage, pSet->aEntries[5].cRefs, pSet->aEntries[5].cInlinedRefs, pSet->aEntries[5].cUnrefs, pSet->aEntries[5].pvPage));
2238 }
2239#undef IS_MATCHING_ENTRY
2240
2241 /*
2242 * Update it.
2243 */
2244 uint32_t const cTotalRefs = (uint32_t)pSet->aEntries[iEntry].cRefs + pSet->aEntries[iEntry].cInlinedRefs;
2245 uint32_t const cUnrefs = pSet->aEntries[iEntry].cUnrefs;
2246 LogFlow(("pgmRZDynMapUnusedHint: pvHint=%p #%u cRefs=%d cInlinedRefs=%d cUnrefs=%d (+1) cTotalRefs=%d %s(%d) %s\n",
2247 pvHint, iEntry, pSet->aEntries[iEntry].cRefs, pSet->aEntries[iEntry].cInlinedRefs, cUnrefs, cTotalRefs, pszFile, iLine, pszFunction));
2248 AssertReturnVoid(cTotalRefs > cUnrefs);
2249
2250 if (RT_LIKELY(cUnrefs < UINT16_MAX - 1))
2251 pSet->aEntries[iEntry].cUnrefs++;
2252 else if (pSet->aEntries[iEntry].cInlinedRefs)
2253 {
2254 uint32_t cSub = RT_MIN(pSet->aEntries[iEntry].cInlinedRefs, pSet->aEntries[iEntry].cUnrefs);
2255 pSet->aEntries[iEntry].cInlinedRefs -= cSub;
2256 pSet->aEntries[iEntry].cUnrefs -= cSub;
2257 pSet->aEntries[iEntry].cUnrefs++;
2258 }
2259 else
2260 Log(("pgmRZDynMapUnusedHint: pvHint=%p ignored because of overflow! %s(%d) %s\n", pvHint, pszFile, iLine, pszFunction));
2261
2262#ifdef PGMRZDYNMAP_STRICT_RELEASE
2263 /*
2264 * Optimize the set to trigger the unmapping and invalidation of the page.
2265 */
2266 if (cUnrefs + 1 == cTotalRefs)
2267 pgmDynMapOptimizeAutoSet(pSet);
2268#endif
2269}
2270
2271
2272/**
2273 * Common worker code for pgmRZDynMapHCPageInlined, pgmRZDynMapHCPageV2Inlined
2274 * and pgmR0DynMapGCPageOffInlined.
2275 *
2276 * @returns VINF_SUCCESS, bails out to ring-3 on failure.
2277 * @param pSet The set.
2278 * @param HCPhys The physical address of the page.
2279 * @param ppv Where to store the address of the mapping on success.
2280 *
2281 * @remarks This is a very hot path.
2282 */
2283int pgmRZDynMapHCPageCommon(PPGMMAPSET pSet, RTHCPHYS HCPhys, void **ppv RTLOG_COMMA_SRC_POS_DECL)
2284{
2285 AssertMsg(pSet->iCpu == PGMRZDYNMAP_CUR_CPU(), ("%d %d efl=%#x\n", pSet->iCpu, PGMRZDYNMAP_CUR_CPU(), ASMGetFlags()));
2286 PVMCPU pVCpu = PGMRZDYNMAP_SET_2_VMCPU(pSet);
2287 STAM_PROFILE_START(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2288
2289 /*
2290 * Map it.
2291 */
2292 void *pvPage;
2293 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2294 uint32_t iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
2295 if (RT_UNLIKELY(iPage == UINT32_MAX))
2296 {
2297 /*
2298 * We're out of mapping space, optimize our set to try remedy the
2299 * situation. (Only works if there are unreference hints.)
2300 */
2301 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2302 pgmDynMapOptimizeAutoSet(pSet);
2303
2304 iPage = pgmR0DynMapPage(pThis, HCPhys, pSet->iCpu, pVCpu, &pvPage);
2305 if (RT_UNLIKELY(iPage == UINT32_MAX))
2306 {
2307 RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: cLoad=%u/%u cPages=%u cGuardPages=%u\n",
2308 pThis->cLoad, pThis->cMaxLoad, pThis->cPages, pThis->cGuardPages);
2309 if (!g_fPGMR0DynMapTestRunning)
2310 VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
2311 *ppv = NULL;
2312 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2313 return VERR_PGM_DYNMAP_FAILED;
2314 }
2315 }
2316
2317 /*
2318 * Add the page to the auto reference set.
2319 *
2320 * The typical usage pattern means that the same pages will be mapped
2321 * several times in the same set. We can catch most of these
2322 * remappings by looking a few pages back into the set. (The searching
2323 * and set optimizing path will hardly ever be used when doing this.)
2324 */
2325 AssertCompile(RT_ELEMENTS(pSet->aEntries) >= 8);
2326 int32_t i = pSet->cEntries;
2327 if (i-- < 5)
2328 {
2329 unsigned iEntry = pSet->cEntries++;
2330 pSet->aEntries[iEntry].cRefs = 1;
2331 pSet->aEntries[iEntry].cUnrefs = 0;
2332 pSet->aEntries[iEntry].cInlinedRefs = 0;
2333 pSet->aEntries[iEntry].iPage = iPage;
2334 pSet->aEntries[iEntry].pvPage = pvPage;
2335 pSet->aEntries[iEntry].HCPhys = HCPhys;
2336 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2337 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/0/0 iPage=%#x [a] %s(%d) %s\n",
2338 pSet, HCPhys, iEntry, iEntry + 1, pvPage, 1, iPage, pszFile, iLine, pszFunction));
2339 }
2340 /* Any of the last 5 pages? */
2341 else if ( pSet->aEntries[i - 0].iPage == iPage
2342 && pSet->aEntries[i - 0].cRefs < UINT16_MAX - 1)
2343 {
2344 pSet->aEntries[i - 0].cRefs++;
2345 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [0] %s(%d) %s\n", pSet, HCPhys, i - 0, pSet->cEntries, pvPage, pSet->aEntries[i - 0].cRefs, pSet->aEntries[i - 0].cInlinedRefs, pSet->aEntries[i - 0].cUnrefs, iPage, pszFile, iLine, pszFunction));
2346 }
2347 else if ( pSet->aEntries[i - 1].iPage == iPage
2348 && pSet->aEntries[i - 1].cRefs < UINT16_MAX - 1)
2349 {
2350 pSet->aEntries[i - 1].cRefs++;
2351 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [1] %s(%d) %s\n", pSet, HCPhys, i - 1, pSet->cEntries, pvPage, pSet->aEntries[i - 1].cRefs, pSet->aEntries[i - 1].cInlinedRefs, pSet->aEntries[i - 1].cUnrefs, iPage, pszFile, iLine, pszFunction));
2352 }
2353 else if ( pSet->aEntries[i - 2].iPage == iPage
2354 && pSet->aEntries[i - 2].cRefs < UINT16_MAX - 1)
2355 {
2356 pSet->aEntries[i - 2].cRefs++;
2357 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [2] %s(%d) %s\n", pSet, HCPhys, i - 2, pSet->cEntries, pvPage, pSet->aEntries[i - 2].cRefs, pSet->aEntries[i - 2].cInlinedRefs, pSet->aEntries[i - 2].cUnrefs, iPage, pszFile, iLine, pszFunction));
2358 }
2359 else if ( pSet->aEntries[i - 3].iPage == iPage
2360 && pSet->aEntries[i - 3].cRefs < UINT16_MAX - 1)
2361 {
2362 pSet->aEntries[i - 3].cRefs++;
2363 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 3, pSet->cEntries, pvPage, pSet->aEntries[i - 3].cRefs, pSet->aEntries[i - 3].cInlinedRefs, pSet->aEntries[i - 3].cUnrefs, iPage, pszFile, iLine, pszFunction));
2364 }
2365 else if ( pSet->aEntries[i - 4].iPage == iPage
2366 && pSet->aEntries[i - 4].cRefs < UINT16_MAX - 1)
2367 {
2368 pSet->aEntries[i - 4].cRefs++;
2369 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [4] %s(%d) %s\n", pSet, HCPhys, i - 4, pSet->cEntries, pvPage, pSet->aEntries[i - 4].cRefs, pSet->aEntries[i - 4].cInlinedRefs, pSet->aEntries[i - 4].cUnrefs, iPage, pszFile, iLine, pszFunction));
2370 }
2371 /* Don't bother searching unless we're above a 60% load. */
2372 else if (RT_LIKELY(i <= (int32_t)RT_ELEMENTS(pSet->aEntries) * 60 / 100))
2373 {
2374 unsigned iEntry = pSet->cEntries++;
2375 pSet->aEntries[iEntry].cRefs = 1;
2376 pSet->aEntries[iEntry].cUnrefs = 0;
2377 pSet->aEntries[iEntry].cInlinedRefs = 0;
2378 pSet->aEntries[iEntry].iPage = iPage;
2379 pSet->aEntries[iEntry].pvPage = pvPage;
2380 pSet->aEntries[iEntry].HCPhys = HCPhys;
2381 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2382 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [b] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
2383 }
2384 else
2385 {
2386 /* Search the rest of the set. */
2387 Assert(pSet->cEntries <= RT_ELEMENTS(pSet->aEntries));
2388 i -= 4;
2389 while (i-- > 0)
2390 if ( pSet->aEntries[i].iPage == iPage
2391 && pSet->aEntries[i].cRefs < UINT16_MAX - 1)
2392 {
2393 pSet->aEntries[i].cRefs++;
2394 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchHits);
2395 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=%u/%u/%u iPage=%#x [c] %s(%d) %s\n", pSet, HCPhys, i, pSet->cEntries, pvPage, pSet->aEntries[i].cRefs, pSet->aEntries[i].cInlinedRefs, pSet->aEntries[i].cUnrefs, iPage, pszFile, iLine, pszFunction));
2396 break;
2397 }
2398 if (i < 0)
2399 {
2400 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchMisses);
2401#if 0 /* this is very bogus */
2402 if (pSet->iSubset < pSet->cEntries)
2403 {
2404 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetSearchFlushes);
2405 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->aStatRZDynMapSetFilledPct[(pSet->cEntries * 10 / RT_ELEMENTS(pSet->aEntries)) % 11]);
2406 pgmDynMapFlushSubset(pSet);
2407 }
2408#endif
2409
2410 if (RT_UNLIKELY(pSet->cEntries >= RT_ELEMENTS(pSet->aEntries)))
2411 {
2412 STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapSetOptimize);
2413 pgmDynMapOptimizeAutoSet(pSet);
2414 }
2415
2416 if (RT_LIKELY(pSet->cEntries < RT_ELEMENTS(pSet->aEntries)))
2417 {
2418 unsigned iEntry = pSet->cEntries++;
2419 pSet->aEntries[iEntry].cRefs = 1;
2420 pSet->aEntries[iEntry].cUnrefs = 0;
2421 pSet->aEntries[iEntry].cInlinedRefs = 0;
2422 pSet->aEntries[iEntry].iPage = iPage;
2423 pSet->aEntries[iEntry].pvPage = pvPage;
2424 pSet->aEntries[iEntry].HCPhys = HCPhys;
2425 pSet->aiHashTable[PGMMAPSET_HASH(HCPhys)] = iEntry;
2426 LogFlow(("pgmRZDynMapHCPageCommon: pSet=%p HCPhys=%RHp #%u/%u/%p cRefs=1/0/0 iPage=%#x [d] %s(%d) %s\n", pSet, HCPhys, iEntry, pSet->cEntries, pvPage, iPage, pszFile, iLine, pszFunction));
2427 }
2428 else
2429 {
2430 /* We're screwed. */
2431 pgmRZDynMapReleasePage(pThis, iPage, 1);
2432
2433 RTAssertMsg2Weak("pgmRZDynMapHCPageCommon: set is full!\n");
2434 if (!g_fPGMR0DynMapTestRunning)
2435 VMMRZCallRing3NoCpu(PGMRZDYNMAP_SET_2_VM(pSet), VMMCALLRING3_VM_R0_ASSERTION, 0);
2436 *ppv = NULL;
2437 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2438 return VERR_PGM_DYNMAP_FULL_SET;
2439 }
2440 }
2441 }
2442
2443 *ppv = pvPage;
2444 STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZDynMapHCPage, a);
2445 return VINF_SUCCESS;
2446}
2447
2448
2449#if 0 /*def DEBUG*/
2450/** For pgmR0DynMapTest3PerCpu. */
2451typedef struct PGMR0DYNMAPTEST
2452{
2453 uint32_t u32Expect;
2454 uint32_t *pu32;
2455 uint32_t volatile cFailures;
2456} PGMR0DYNMAPTEST;
2457typedef PGMR0DYNMAPTEST *PPGMR0DYNMAPTEST;
2458
2459/**
2460 * Checks that the content of the page is the same on all CPUs, i.e. that there
2461 * are no CPU specific PTs or similar nasty stuff involved.
2462 *
2463 * @param idCpu The current CPU.
2464 * @param pvUser1 Pointer a PGMR0DYNMAPTEST structure.
2465 * @param pvUser2 Unused, ignored.
2466 */
2467static DECLCALLBACK(void) pgmR0DynMapTest3PerCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
2468{
2469 PPGMR0DYNMAPTEST pTest = (PPGMR0DYNMAPTEST)pvUser1;
2470 ASMInvalidatePage(pTest->pu32);
2471 if (*pTest->pu32 != pTest->u32Expect)
2472 ASMAtomicIncU32(&pTest->cFailures);
2473 NOREF(pvUser2); NOREF(idCpu);
2474}
2475
2476
2477/**
2478 * Performs some basic tests in debug builds.
2479 */
2480static int pgmR0DynMapTest(PVM pVM)
2481{
2482 LogRel(("pgmR0DynMapTest: ****** START ******\n"));
2483 PPGMMAPSET pSet = &pVM->aCpus[0].pgm.s.AutoSet;
2484 PPGMRZDYNMAP pThis = PGMRZDYNMAP_SET_2_DYNMAP(pSet);
2485 uint32_t i;
2486
2487 /*
2488 * Assert internal integrity first.
2489 */
2490 LogRel(("Test #0\n"));
2491 int rc = PGMR0DynMapAssertIntegrity();
2492 if (RT_FAILURE(rc))
2493 return rc;
2494
2495 void *pvR0DynMapUsedSaved = pVM->pgm.s.pvR0DynMapUsed;
2496 pVM->pgm.s.pvR0DynMapUsed = pThis;
2497 g_fPGMR0DynMapTestRunning = true;
2498
2499 /*
2500 * Simple test, map CR3 twice and check that we're getting the
2501 * same mapping address back.
2502 */
2503 LogRel(("Test #1\n"));
2504 ASMIntDisable();
2505 PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
2506
2507 uint64_t cr3 = ASMGetCR3() & ~(uint64_t)PAGE_OFFSET_MASK;
2508 void *pv = (void *)(intptr_t)-1;
2509 void *pv2 = (void *)(intptr_t)-2;
2510 rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv RTLOG_COMMA_SRC_POS);
2511 int rc2 = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
2512 ASMIntEnable();
2513 if ( RT_SUCCESS(rc2)
2514 && RT_SUCCESS(rc)
2515 && pv == pv2)
2516 {
2517 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2518 rc = PGMR0DynMapAssertIntegrity();
2519
2520 /*
2521 * Check that the simple set overflow code works by filling it
2522 * with more CR3 mappings.
2523 */
2524 LogRel(("Test #2\n"));
2525 ASMIntDisable();
2526 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2527 for (i = 0 ; i < UINT16_MAX*2 - 1 && RT_SUCCESS(rc) && pv2 == pv; i++)
2528 {
2529 pv2 = (void *)(intptr_t)-4;
2530 rc = pgmRZDynMapHCPageCommon(pVM, cr3, &pv2 RTLOG_COMMA_SRC_POS);
2531 }
2532 ASMIntEnable();
2533 if (RT_FAILURE(rc) || pv != pv2)
2534 {
2535 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%p\n", __LINE__, rc, pv, pv2, i));
2536 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2537 }
2538 else if (pSet->cEntries != 5)
2539 {
2540 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries) / 2));
2541 rc = VERR_PGM_DYNMAP_IPE;
2542 }
2543 else if ( pSet->aEntries[4].cRefs != UINT16_MAX - 1
2544 || pSet->aEntries[3].cRefs != UINT16_MAX - 1
2545 || pSet->aEntries[2].cRefs != 1
2546 || pSet->aEntries[1].cRefs != 1
2547 || pSet->aEntries[0].cRefs != 1)
2548 {
2549 LogRel(("failed(%d): bad set dist: ", __LINE__));
2550 for (i = 0; i < pSet->cEntries; i++)
2551 LogRel(("[%d]=%d, ", i, pSet->aEntries[i].cRefs));
2552 LogRel(("\n"));
2553 rc = VERR_PGM_DYNMAP_IPE;
2554 }
2555 if (RT_SUCCESS(rc))
2556 rc = PGMR0DynMapAssertIntegrity();
2557 if (RT_SUCCESS(rc))
2558 {
2559 /*
2560 * Trigger an set optimization run (exactly).
2561 */
2562 LogRel(("Test #3\n"));
2563 ASMIntDisable();
2564 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2565 pv2 = NULL;
2566 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) - 5 && RT_SUCCESS(rc) && pv2 != pv; i++)
2567 {
2568 pv2 = (void *)(intptr_t)(-5 - i);
2569 rc = pgmRZDynMapHCPageCommon(pVM, cr3 + PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
2570 }
2571 ASMIntEnable();
2572 if (RT_FAILURE(rc) || pv == pv2)
2573 {
2574 LogRel(("failed(%d): rc=%Rrc; pv=%p pv2=%p i=%d\n", __LINE__, rc, pv, pv2, i));
2575 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2576 }
2577 else if (pSet->cEntries != RT_ELEMENTS(pSet->aEntries))
2578 {
2579 LogRel(("failed(%d): cEntries=%d expected %d\n", __LINE__, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2580 rc = VERR_PGM_DYNMAP_IPE;
2581 }
2582 LogRel(("Load=%u/%u/%u Set=%u/%u\n", pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2583 if (RT_SUCCESS(rc))
2584 rc = PGMR0DynMapAssertIntegrity();
2585 if (RT_SUCCESS(rc))
2586 {
2587 /*
2588 * Trigger an overflow error.
2589 */
2590 LogRel(("Test #4\n"));
2591 ASMIntDisable();
2592 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2593 for (i = 0 ; i < RT_ELEMENTS(pSet->aEntries) + 2; i++)
2594 {
2595 rc = pgmRZDynMapHCPageCommon(pVM, cr3 - PAGE_SIZE * (i + 5), &pv2 RTLOG_COMMA_SRC_POS);
2596 if (RT_SUCCESS(rc))
2597 rc = PGMR0DynMapAssertIntegrity();
2598 if (RT_FAILURE(rc))
2599 break;
2600 }
2601 ASMIntEnable();
2602 if (rc == VERR_PGM_DYNMAP_FULL_SET)
2603 {
2604 /* flush the set. */
2605 LogRel(("Test #5\n"));
2606 ASMIntDisable();
2607 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2608 PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
2609 PGMRZDynMapStartAutoSet(&pVM->aCpus[0]);
2610 ASMIntEnable();
2611
2612 rc = PGMR0DynMapAssertIntegrity();
2613 }
2614 else
2615 {
2616 LogRel(("failed(%d): rc=%Rrc, wanted %d ; pv2=%p Set=%u/%u; i=%d\n", __LINE__,
2617 rc, VERR_PGM_DYNMAP_FULL_SET, pv2, pSet->cEntries, RT_ELEMENTS(pSet->aEntries), i));
2618 if (RT_SUCCESS(rc)) rc = VERR_PGM_DYNMAP_IPE;
2619 }
2620 }
2621 }
2622 }
2623 else
2624 {
2625 LogRel(("failed(%d): rc=%Rrc rc2=%Rrc; pv=%p pv2=%p\n", __LINE__, rc, rc2, pv, pv2));
2626 if (RT_SUCCESS(rc))
2627 rc = rc2;
2628 }
2629
2630 /*
2631 * Check that everyone sees the same stuff.
2632 */
2633 if (RT_SUCCESS(rc))
2634 {
2635 LogRel(("Test #5\n"));
2636 ASMIntDisable();
2637 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2638 RTHCPHYS HCPhysPT = RTR0MemObjGetPagePhysAddr(pThis->pSegHead->ahMemObjPTs[0], 0);
2639 rc = pgmRZDynMapHCPageCommon(pVM, HCPhysPT, &pv RTLOG_COMMA_SRC_POS);
2640 if (RT_SUCCESS(rc))
2641 {
2642 PGMR0DYNMAPTEST Test;
2643 uint32_t *pu32Real = &pThis->paPages[pThis->pSegHead->iPage].uPte.pLegacy->u;
2644 Test.pu32 = (uint32_t *)((uintptr_t)pv | ((uintptr_t)pu32Real & PAGE_OFFSET_MASK));
2645 Test.u32Expect = *pu32Real;
2646 ASMAtomicWriteU32(&Test.cFailures, 0);
2647 ASMIntEnable();
2648
2649 rc = RTMpOnAll(pgmR0DynMapTest3PerCpu, &Test, NULL);
2650 if (RT_FAILURE(rc))
2651 LogRel(("failed(%d): RTMpOnAll rc=%Rrc\n", __LINE__, rc));
2652 else if (Test.cFailures)
2653 {
2654 LogRel(("failed(%d): cFailures=%d pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n", __LINE__,
2655 Test.cFailures, pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2656 rc = VERR_PGM_DYNMAP_IPE;
2657 }
2658 else
2659 LogRel(("pu32Real=%p pu32=%p u32Expect=%#x *pu32=%#x\n",
2660 pu32Real, Test.pu32, Test.u32Expect, *Test.pu32));
2661 }
2662 else
2663 {
2664 ASMIntEnable();
2665 LogRel(("failed(%d): rc=%Rrc\n", rc));
2666 }
2667 }
2668
2669 /*
2670 * Clean up.
2671 */
2672 LogRel(("Cleanup.\n"));
2673 ASMIntDisable();
2674 PGMR0DynMapMigrateAutoSet(&pVM->aCpus[0]);
2675 PGMRZDynMapFlushAutoSet(&pVM->aCpus[0]);
2676 PGMRZDynMapReleaseAutoSet(&pVM->aCpus[0]);
2677 ASMIntEnable();
2678
2679 if (RT_SUCCESS(rc))
2680 rc = PGMR0DynMapAssertIntegrity();
2681 else
2682 PGMR0DynMapAssertIntegrity();
2683
2684 g_fPGMR0DynMapTestRunning = false;
2685 LogRel(("Result: rc=%Rrc Load=%u/%u/%u Set=%#x/%u\n", rc,
2686 pThis->cLoad, pThis->cMaxLoad, pThis->cPages - pThis->cPages, pSet->cEntries, RT_ELEMENTS(pSet->aEntries)));
2687 pVM->pgm.s.pvR0DynMapUsed = pvR0DynMapUsedSaved;
2688 LogRel(("pgmR0DynMapTest: ****** END ******\n"));
2689 return rc;
2690}
2691#endif /* DEBUG */
2692
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use