VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 4917

Last change on this file since 4917 was 4917, checked in by vboxsync, 18 years ago

Don't trigger breakpoints in ring 0. (AssertRelease & co)

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 120.4 KB
Line 
1/* $Id: PGMAllPool.cpp 4917 2007-09-20 10:06:48Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2007 innotek GmbH
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License as published by the Free Software Foundation,
13 * in version 2 as it comes in the "COPYING" file of the VirtualBox OSE
14 * distribution. VirtualBox OSE is distributed in the hope that it will
15 * be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_PGM_POOL
23#include <VBox/pgm.h>
24#include <VBox/mm.h>
25#include <VBox/em.h>
26#include <VBox/cpum.h>
27#ifdef IN_GC
28# include <VBox/patm.h>
29#endif
30#include "PGMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/disopcode.h>
33
34#include <VBox/log.h>
35#include <VBox/err.h>
36#include <iprt/asm.h>
37
38
39/*******************************************************************************
40* Internal Functions *
41*******************************************************************************/
42__BEGIN_DECLS
43static void pgmPoolFlushAllInt(PPGMPOOL pPool);
44#ifdef PGMPOOL_WITH_USER_TRACKING
45DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PVM pVM, PGMPOOLKIND enmKind);
46DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PVM pVM, PGMPOOLKIND enmKind);
47static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
48#endif
49#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
50static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint);
51#endif
52#ifdef PGMPOOL_WITH_CACHE
53static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable);
54#endif
55#ifdef PGMPOOL_WITH_MONITORING
56static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
57#endif
58#ifndef IN_RING3
59DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser);
60#endif
61__END_DECLS
62
63
64/**
65 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
66 *
67 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
68 * @param enmKind The page kind.
69 */
70DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
71{
72 switch (enmKind)
73 {
74 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
75 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
76 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
77 return true;
78 default:
79 return false;
80 }
81}
82
83
84#ifdef IN_GC
85/**
86 * Maps a pool page into the current context.
87 *
88 * @returns Pointer to the mapping.
89 * @param pVM The VM handle.
90 * @param pPage The page to map.
91 */
92void *pgmGCPoolMapPage(PVM pVM, PPGMPOOLPAGE pPage)
93{
94 /* general pages. */
95 if (pPage->idx >= PGMPOOL_IDX_FIRST)
96 {
97 Assert(pPage->idx < pVM->pgm.s.pPoolGC->cCurPages);
98 void *pv;
99 int rc = PGMGCDynMapHCPage(pVM, pPage->Core.Key, &pv);
100 AssertReleaseRC(rc);
101 return pv;
102 }
103
104 /* special pages. */
105 switch (pPage->idx)
106 {
107 case PGMPOOL_IDX_PD:
108 return pVM->pgm.s.pGC32BitPD;
109 case PGMPOOL_IDX_PAE_PD:
110 return pVM->pgm.s.apGCPaePDs[0];
111 case PGMPOOL_IDX_PDPTR:
112 return pVM->pgm.s.pGCPaePDPTR;
113 case PGMPOOL_IDX_PML4:
114 return pVM->pgm.s.pGCPaePML4;
115 default:
116 AssertReleaseMsgFailed(("Invalid index %d\n", pPage->idx));
117 return NULL;
118 }
119}
120#endif /* IN_GC */
121
122
123#ifdef PGMPOOL_WITH_MONITORING
124/**
125 * Determin the size of a write instruction.
126 * @returns number of bytes written.
127 * @param pDis The disassembler state.
128 */
129static unsigned pgmPoolDisasWriteSize(PDISCPUSTATE pDis)
130{
131 /*
132 * This is very crude and possibly wrong for some opcodes,
133 * but since it's not really supposed to be called we can
134 * probably live with that.
135 */
136 return DISGetParamSize(pDis, &pDis->param1);
137}
138
139
140/**
141 * Flushes a chain of pages sharing the same access monitor.
142 *
143 * @returns VBox status code suitable for scheduling.
144 * @param pPool The pool.
145 * @param pPage A page in the chain.
146 */
147int pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
148{
149 /*
150 * Find the list head.
151 */
152 uint16_t idx = pPage->idx;
153 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
154 {
155 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
156 {
157 idx = pPage->iMonitoredPrev;
158 Assert(idx != pPage->idx);
159 pPage = &pPool->aPages[idx];
160 }
161 }
162
163 /*
164 * Itereate the list flushing each shadow page.
165 */
166 int rc = VINF_SUCCESS;
167 for (;;)
168 {
169 idx = pPage->iMonitoredNext;
170 Assert(idx != pPage->idx);
171 if (pPage->idx >= PGMPOOL_IDX_FIRST)
172 {
173 int rc2 = pgmPoolFlushPage(pPool, pPage);
174 if (rc2 == VERR_PGM_POOL_CLEARED && rc == VINF_SUCCESS)
175 rc = VINF_PGM_SYNC_CR3;
176 }
177 /* next */
178 if (idx == NIL_PGMPOOL_IDX)
179 break;
180 pPage = &pPool->aPages[idx];
181 }
182 return rc;
183}
184
185
186/**
187 * Wrapper for getting the current context pointer to the entry being modified.
188 *
189 * @returns Pointer to the current context mapping of the entry.
190 * @param pPool The pool.
191 * @param pvFault The fault virtual address.
192 * @param GCPhysFault The fault physical address.
193 * @param cbEntry The entry size.
194 */
195#ifdef IN_RING3
196DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTHCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
197#else
198DECLINLINE(const void *) pgmPoolMonitorGCPtr2CCPtr(PPGMPOOL pPool, RTGCPTR pvFault, RTGCPHYS GCPhysFault, const unsigned cbEntry)
199#endif
200{
201#ifdef IN_GC
202 return (RTGCPTR)((RTGCUINTPTR)pvFault & ~(RTGCUINTPTR)(cbEntry - 1));
203
204#elif defined(IN_RING0)
205 PVM pVM = pPool->CTXSUFF(pVM);
206 void *pvRet;
207
208 int rc = pgmRamGCPhys2HCPtr(&pPool->pVMHC->pgm.s, GCPhysFault & ~(RTGCPHYS)(cbEntry - 1), &pvRet);
209 AssertFatalRCSuccess(rc);
210 return pvRet;
211
212#elif defined(IN_RING3)
213 return (RTHCPTR)((uintptr_t)pvFault & ~(RTHCUINTPTR)(cbEntry - 1));
214#else
215# error "huh?"
216#endif
217}
218
219
220/**
221 * Process shadow entries before they are changed by the guest.
222 *
223 * For PT entries we will clear them. For PD entries, we'll simply check
224 * for mapping conflicts and set the SyncCR3 FF if found.
225 *
226 * @param pPool The pool.
227 * @param pPage The head page.
228 * @param GCPhysFault The guest physical fault address.
229 * @param uAddress In R0 and GC this is the guest context fault address (flat).
230 * In R3 this is the host context 'fault' address.
231 * @param pCpu The disassembler state for figuring out the write size.
232 * This need not be specified if the caller knows we won't do cross entry accesses.
233 */
234#ifdef IN_RING3
235void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTHCPTR pvAddress, PDISCPUSTATE pCpu)
236#else
237void pgmPoolMonitorChainChanging(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault, RTGCPTR pvAddress, PDISCPUSTATE pCpu)
238#endif
239{
240 const PVM pVM = pPool->CTXSUFF(pVM);
241
242 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
243 const unsigned off = GCPhysFault & PAGE_OFFSET_MASK;
244 for (;;)
245 {
246 union
247 {
248 void *pv;
249 PX86PT pPT;
250 PX86PTPAE pPTPae;
251 PX86PD pPD;
252 PX86PDPAE pPDPae;
253 } uShw;
254 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
255
256 switch (pPage->enmKind)
257 {
258 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
259 {
260 const unsigned iShw = off / sizeof(X86PTE);
261 if (uShw.pPT->a[iShw].n.u1Present)
262 {
263# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
264 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
265 Log4(("pgmPoolMonitorChainChanging 32_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
266 pgmPoolTracDerefGCPhysHint(pPool, pPage,
267 uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK,
268 pGstPte->u & X86_PTE_PG_MASK);
269# endif
270 uShw.pPT->a[iShw].u = 0;
271 }
272 break;
273 }
274
275 /* page/2 sized */
276 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
277 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
278 {
279 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
280 if (uShw.pPTPae->a[iShw].n.u1Present)
281 {
282# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
283 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
284 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PG_MASK));
285 pgmPoolTracDerefGCPhysHint(pPool, pPage,
286 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
287 pGstPte->u & X86_PTE_PG_MASK);
288# endif
289 uShw.pPTPae->a[iShw].u = 0;
290 }
291 }
292 break;
293
294 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
295 {
296 const unsigned iShw = off / sizeof(X86PTPAE);
297 if (uShw.pPTPae->a[iShw].n.u1Present)
298 {
299# ifdef PGMPOOL_WITH_GCPHYS_TRACKING
300 PCX86PTE pGstPte = (PCX86PTE)pgmPoolMonitorGCPtr2CCPtr(pPool, pvAddress, GCPhysFault, sizeof(*pGstPte));
301 Log4(("pgmPoolMonitorChainChanging pae_32: deref %VHp GCPhys %VGp\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, pGstPte->u & X86_PTE_PAE_PG_MASK));
302 pgmPoolTracDerefGCPhysHint(pPool, pPage,
303 uShw.pPTPae->a[iShw].u & X86_PTE_PAE_PG_MASK,
304 pGstPte->u & X86_PTE_PAE_PG_MASK);
305# endif
306 uShw.pPTPae->a[iShw].u = 0;
307 }
308 break;
309 }
310
311 case PGMPOOLKIND_ROOT_32BIT_PD:
312 {
313 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
314 if (uShw.pPD->a[iShw].u & PGM_PDFLAGS_MAPPING)
315 {
316 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
317 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
318 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
319 }
320 /* paranoia / a bit assumptive. */
321 else if ( pCpu
322 && (off & 4)
323 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
324 {
325 const unsigned iShw2 = (off + pgmPoolDisasWriteSize(pCpu) - 1) / sizeof(X86PTE);
326 if ( iShw2 != iShw
327 && iShw2 < ELEMENTS(uShw.pPD->a)
328 && uShw.pPD->a[iShw2].u & PGM_PDFLAGS_MAPPING)
329 {
330 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
331 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
332 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
333 }
334 }
335#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
336 if ( uShw.pPD->a[iShw].n.u1Present
337 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
338 {
339 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
340# ifdef IN_GC /* TLB load - we're pushing things a bit... */
341 ASMProbeReadByte(pvAddress);
342# endif
343 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
344 uShw.pPD->a[iShw].u = 0;
345 }
346#endif
347 break;
348 }
349
350 case PGMPOOLKIND_ROOT_PAE_PD:
351 {
352 unsigned iShw = (off / sizeof(X86PTE)) * 2; // ASSUMING 32-bit guest paging!
353 for (unsigned i = 0; i < 2; i++, iShw++)
354 {
355 if ((uShw.pPDPae->a[iShw].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
356 {
357 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
358 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
359 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw=%#x!\n", iShw));
360 }
361 /* paranoia / a bit assumptive. */
362 else if ( pCpu
363 && (off & 4)
364 && (off & 4) + pgmPoolDisasWriteSize(pCpu) > 4)
365 {
366 const unsigned iShw2 = iShw + 2;
367 if ( iShw2 < ELEMENTS(uShw.pPDPae->a)
368 && (uShw.pPDPae->a[iShw2].u & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == (PGM_PDFLAGS_MAPPING | X86_PDE_P))
369 {
370 Assert(pgmMapAreMappingsEnabled(&pVM->pgm.s));
371 VM_FF_SET(pVM, VM_FF_PGM_SYNC_CR3);
372 LogFlow(("pgmPoolMonitorChainChanging: Detected conflict at iShw2=%#x!\n", iShw2));
373 }
374 }
375#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). */
376 if ( uShw.pPDPae->a[iShw].n.u1Present
377 && !VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3))
378 {
379 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPae->a[iShw].u));
380# ifdef IN_GC /* TLB load - we're pushing things a bit... */
381 ASMProbeReadByte(pvAddress);
382# endif
383 pgmPoolFree(pVM, uShw.pPDPae->a[iShw].u & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
384 uShw.pPDPae->a[iShw].u = 0;
385 }
386#endif
387 }
388 break;
389 }
390
391 default:
392 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
393 }
394
395 /* next */
396 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
397 return;
398 pPage = &pPool->aPages[pPage->iMonitoredNext];
399 }
400}
401
402
403# ifndef IN_RING3
404/**
405 * Checks if a access could be a fork operation in progress.
406 *
407 * Meaning, that the guest is setuping up the parent process for Copy-On-Write.
408 *
409 * @returns true if it's likly that we're forking, otherwise false.
410 * @param pPool The pool.
411 * @param pCpu The disassembled instruction.
412 * @param offFault The access offset.
413 */
414DECLINLINE(bool) pgmPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pCpu, unsigned offFault)
415{
416 /*
417 * i386 linux is using btr to clear X86_PTE_RW.
418 * The functions involved are (2.6.16 source inspection):
419 * clear_bit
420 * ptep_set_wrprotect
421 * copy_one_pte
422 * copy_pte_range
423 * copy_pmd_range
424 * copy_pud_range
425 * copy_page_range
426 * dup_mmap
427 * dup_mm
428 * copy_mm
429 * copy_process
430 * do_fork
431 */
432 if ( pCpu->pCurInstr->opcode == OP_BTR
433 && !(offFault & 4)
434 /** @todo Validate that the bit index is X86_PTE_RW. */
435 )
436 {
437 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,Fork));
438 return true;
439 }
440 return false;
441}
442
443
444/**
445 * Determin whether the page is likely to have been reused.
446 *
447 * @returns true if we consider the page as being reused for a different purpose.
448 * @returns false if we consider it to still be a paging page.
449 * @param pPage The page in question.
450 * @param pCpu The disassembly info for the faulting insturction.
451 * @param pvFault The fault address.
452 *
453 * @remark The REP prefix check is left to the caller because of STOSD/W.
454 */
455DECLINLINE(bool) pgmPoolMonitorIsReused(PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu, RTGCPTR pvFault)
456{
457 switch (pCpu->pCurInstr->opcode)
458 {
459 case OP_PUSH:
460 Log4(("pgmPoolMonitorIsReused: PUSH\n"));
461 return true;
462 case OP_PUSHF:
463 Log4(("pgmPoolMonitorIsReused: PUSHF\n"));
464 return true;
465 case OP_PUSHA:
466 Log4(("pgmPoolMonitorIsReused: PUSHA\n"));
467 return true;
468 case OP_FXSAVE:
469 Log4(("pgmPoolMonitorIsReused: FXSAVE\n"));
470 return true;
471 }
472 if ( (pCpu->param1.flags & USE_REG_GEN32)
473 && (pCpu->param1.base.reg_gen32 == USE_REG_ESP))
474 {
475 Log4(("pgmPoolMonitorIsReused: ESP\n"));
476 return true;
477 }
478
479 //if (pPage->fCR3Mix)
480 // return false;
481 return false;
482}
483
484
485/**
486 * Flushes the page being accessed.
487 *
488 * @returns VBox status code suitable for scheduling.
489 * @param pVM The VM handle.
490 * @param pPool The pool.
491 * @param pPage The pool page (head).
492 * @param pCpu The disassembly of the write instruction.
493 * @param pRegFrame The trap register frame.
494 * @param GCPhysFault The fault address as guest physical address.
495 * @param pvFault The fault address.
496 */
497static int pgmPoolAccessHandlerFlush(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
498 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
499{
500 /*
501 * First, do the flushing.
502 */
503 int rc = pgmPoolMonitorChainFlush(pPool, pPage);
504
505 /*
506 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
507 */
508 uint32_t cbWritten;
509 int rc2 = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cbWritten);
510 if (VBOX_SUCCESS(rc2))
511 pRegFrame->eip += pCpu->opsize;
512 else if (rc2 == VERR_EM_INTERPRETER)
513 {
514#ifdef IN_GC
515 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)pRegFrame->eip))
516 {
517 LogFlow(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv, ignoring.\n",
518 pRegFrame->cs, (RTGCPTR)pRegFrame->eip));
519 rc = VINF_SUCCESS;
520 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch2);
521 }
522 else
523#endif
524 {
525 rc = VINF_EM_RAW_EMULATE_INSTR;
526 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
527 }
528 }
529 else
530 rc = rc2;
531
532 /* See use in pgmPoolAccessHandlerSimple(). */
533 PGM_INVL_GUEST_TLBS();
534
535 LogFlow(("pgmPoolAccessHandlerPT: returns %Vrc (flushed)\n", rc));
536 return rc;
537
538}
539
540
541/**
542 * Handles the STOSD write accesses.
543 *
544 * @returns VBox status code suitable for scheduling.
545 * @param pVM The VM handle.
546 * @param pPool The pool.
547 * @param pPage The pool page (head).
548 * @param pCpu The disassembly of the write instruction.
549 * @param pRegFrame The trap register frame.
550 * @param GCPhysFault The fault address as guest physical address.
551 * @param pvFault The fault address.
552 */
553DECLINLINE(int) pgmPoolAccessHandlerSTOSD(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
554 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
555{
556 /*
557 * Increment the modification counter and insert it into the list
558 * of modified pages the first time.
559 */
560 if (!pPage->cModifications++)
561 pgmPoolMonitorModifiedInsert(pPool, pPage);
562
563 /*
564 * Execute REP STOSD.
565 *
566 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
567 * write situation, meaning that it's safe to write here.
568 */
569#ifdef IN_GC
570 uint32_t *pu32 = (uint32_t *)pvFault;
571#else
572 RTGCPTR pu32 = pvFault;
573#endif
574 while (pRegFrame->ecx)
575 {
576 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pu32, NULL);
577#ifdef IN_GC
578 *pu32++ = pRegFrame->eax;
579#else
580 PGMPhysWriteGCPhys(pVM, GCPhysFault, &pRegFrame->eax, 4);
581 pu32 += 4;
582#endif
583 GCPhysFault += 4;
584 pRegFrame->edi += 4;
585 pRegFrame->ecx--;
586 }
587 pRegFrame->eip += pCpu->opsize;
588
589 /* See use in pgmPoolAccessHandlerSimple(). */
590 PGM_INVL_GUEST_TLBS();
591
592 LogFlow(("pgmPoolAccessHandlerSTOSD: returns\n"));
593 return VINF_SUCCESS;
594}
595
596
597/**
598 * Handles the simple write accesses.
599 *
600 * @returns VBox status code suitable for scheduling.
601 * @param pVM The VM handle.
602 * @param pPool The pool.
603 * @param pPage The pool page (head).
604 * @param pCpu The disassembly of the write instruction.
605 * @param pRegFrame The trap register frame.
606 * @param GCPhysFault The fault address as guest physical address.
607 * @param pvFault The fault address.
608 */
609DECLINLINE(int) pgmPoolAccessHandlerSimple(PVM pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pCpu,
610 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
611{
612 /*
613 * Increment the modification counter and insert it into the list
614 * of modified pages the first time.
615 */
616 if (!pPage->cModifications++)
617 pgmPoolMonitorModifiedInsert(pPool, pPage);
618
619 /*
620 * Clear all the pages. ASSUMES that pvFault is readable.
621 */
622 pgmPoolMonitorChainChanging(pPool, pPage, GCPhysFault, pvFault, pCpu);
623
624 /*
625 * Interpret the instruction.
626 */
627 uint32_t cb;
628 int rc = EMInterpretInstructionCPU(pVM, pCpu, pRegFrame, pvFault, &cb);
629 if (VBOX_SUCCESS(rc))
630 pRegFrame->eip += pCpu->opsize;
631 else if (rc == VERR_EM_INTERPRETER)
632 {
633# ifdef IN_GC
634 if (PATMIsPatchGCAddr(pVM, (RTGCPTR)(RTGCUINTPTR)pCpu->opaddr))
635 {
636 /* We're not able to handle this in ring-3, so fix the interpreter! */
637 /** @note Should be fine. There's no need to flush the whole thing. */
638#ifndef DEBUG_sandervl
639 AssertMsgFailed(("pgmPoolAccessHandlerPTWorker: Interpretation failed for patch code %04x:%RGv - opcode=%d\n",
640 pRegFrame->cs, (RTGCPTR)pRegFrame->eip, pCpu->pCurInstr->opcode));
641#endif
642 STAM_COUNTER_INC(&pPool->StatMonitorGCIntrFailPatch1);
643 rc = pgmPoolMonitorChainFlush(pPool, pPage);
644 }
645 else
646# endif
647 {
648 rc = VINF_EM_RAW_EMULATE_INSTR;
649 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,EmulateInstr));
650 }
651 }
652
653 /*
654 * Quick hack, with logging enabled we're getting stale
655 * code TLBs but no data TLB for EIP and crash in EMInterpretDisasOne.
656 * Flushing here is BAD and expensive, I think EMInterpretDisasOne will
657 * have to be fixed to support this. But that'll have to wait till next week.
658 *
659 * An alternative is to keep track of the changed PTEs together with the
660 * GCPhys from the guest PT. This may proove expensive though.
661 *
662 * At the moment, it's VITAL that it's done AFTER the instruction interpreting
663 * because we need the stale TLBs in some cases (XP boot). This MUST be fixed properly!
664 */
665 PGM_INVL_GUEST_TLBS();
666
667 LogFlow(("pgmPoolAccessHandlerSimple: returns %Vrc cb=%d\n", rc, cb));
668 return rc;
669}
670
671
672/**
673 * \#PF Handler callback for PT write accesses.
674 *
675 * @returns VBox status code (appropriate for GC return).
676 * @param pVM VM Handle.
677 * @param uErrorCode CPU Error code.
678 * @param pRegFrame Trap register frame.
679 * NULL on DMA and other non CPU access.
680 * @param pvFault The fault address (cr2).
681 * @param GCPhysFault The GC physical address corresponding to pvFault.
682 * @param pvUser User argument.
683 */
684DECLEXPORT(int) pgmPoolAccessHandler(PVM pVM, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, RTGCPHYS GCPhysFault, void *pvUser)
685{
686 STAM_PROFILE_START(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), a);
687 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
688 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)pvUser;
689 LogFlow(("pgmPoolAccessHandler: pvFault=%p pPage=%p:{.idx=%d} GCPhysFault=%VGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
690
691 /*
692 * We should ALWAYS have the list head as user parameter. This
693 * is because we use that page to record the changes.
694 */
695 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
696
697 /*
698 * Disassemble the faulting instruction.
699 */
700 DISCPUSTATE Cpu;
701 int rc = EMInterpretDisasOne(pVM, pRegFrame, &Cpu, NULL);
702 AssertRCReturn(rc, rc);
703
704 /*
705 * Check if it's worth dealing with.
706 */
707 if ( ( pPage->cModifications < 48 /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
708 || pPage->fCR3Mix)
709 && !pgmPoolMonitorIsReused(pPage, &Cpu,pvFault)
710 && !pgmPoolMonitorIsForking(pPool, &Cpu, GCPhysFault & PAGE_OFFSET_MASK))
711 {
712 /*
713 * Simple instructions, no REP prefix.
714 */
715 if (!(Cpu.prefix & (PREFIX_REP | PREFIX_REPNE)))
716 {
717 rc = pgmPoolAccessHandlerSimple(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
718 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,Handled), a);
719 return rc;
720 }
721
722 /*
723 * Windows is frequently doing small memset() operations (netio test 4k+).
724 * We have to deal with these or we'll kill the cache and performance.
725 */
726 if ( Cpu.pCurInstr->opcode == OP_STOSWD
727 && CPUMGetGuestCPL(pVM, pRegFrame) == 0
728 && pRegFrame->ecx <= 0x20
729 && pRegFrame->ecx * 4 <= PAGE_SIZE - ((uintptr_t)pvFault & PAGE_OFFSET_MASK)
730 && !((uintptr_t)pvFault & 3)
731 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
732 && Cpu.mode == CPUMODE_32BIT
733 && Cpu.opmode == CPUMODE_32BIT
734 && Cpu.addrmode == CPUMODE_32BIT
735 && Cpu.prefix == PREFIX_REP
736 && !pRegFrame->eflags.Bits.u1DF
737 )
738 {
739 rc = pgmPoolAccessHandlerSTOSD(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
740 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,RepStosd), a);
741 return rc;
742 }
743
744 /* REP prefix, don't bother. */
745 STAM_COUNTER_INC(&pPool->CTXMID(StatMonitor,RepPrefix));
746 Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x eip=%#x opcode=%d prefix=%#x\n",
747 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, pRegFrame->eip, Cpu.pCurInstr->opcode, Cpu.prefix));
748 }
749
750 /*
751 * Not worth it, so flush it.
752 */
753 rc = pgmPoolAccessHandlerFlush(pVM, pPool, pPage, &Cpu, pRegFrame, GCPhysFault, pvFault);
754 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTXSUFF(pPool)->CTXSUFF(StatMonitor), &pPool->CTXMID(StatMonitor,FlushPage), a);
755 return rc;
756}
757
758# endif /* !IN_RING3 */
759#endif /* PGMPOOL_WITH_MONITORING */
760
761
762
763#ifdef PGMPOOL_WITH_CACHE
764/**
765 * Inserts a page into the GCPhys hash table.
766 *
767 * @param pPool The pool.
768 * @param pPage The page.
769 */
770DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
771{
772 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
773 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
774 pPage->iNext = pPool->aiHash[iHash];
775 pPool->aiHash[iHash] = pPage->idx;
776}
777
778
779/**
780 * Removes a page from the GCPhys hash table.
781 *
782 * @param pPool The pool.
783 * @param pPage The page.
784 */
785DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
786{
787 const PVM pVM = pPool->CTXSUFF(pVM);
788
789 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
790 if (pPool->aiHash[iHash] == pPage->idx)
791 pPool->aiHash[iHash] = pPage->iNext;
792 else
793 {
794 uint16_t iPrev = pPool->aiHash[iHash];
795 for (;;)
796 {
797 const int16_t i = pPool->aPages[iPrev].iNext;
798 if (i == pPage->idx)
799 {
800 pPool->aPages[iPrev].iNext = pPage->iNext;
801 break;
802 }
803 if (i == NIL_PGMPOOL_IDX)
804 {
805 AssertReleaseMsgFailed(("GCPhys=%VGp idx=%#x\n", pPage->GCPhys, pPage->idx));
806 break;
807 }
808 iPrev = i;
809 }
810 }
811 pPage->iNext = NIL_PGMPOOL_IDX;
812}
813
814
815/**
816 * Frees up one cache page.
817 *
818 * @returns VBox status code.
819 * @retval VINF_SUCCESS on success.
820 * @retval VERR_PGM_POOL_CLEARED if the deregistration of a physical handler will cause a light weight pool flush.
821 * @param pPool The pool.
822 * @param iUser The user index.
823 */
824static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
825{
826 const PVM pVM = pPool->CTXSUFF(pVM);
827
828 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
829 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
830
831 /*
832 * Select one page from the tail of the age list.
833 */
834 uint16_t iToFree = pPool->iAgeTail;
835 if (iToFree == iUser)
836 iToFree = pPool->aPages[iToFree].iAgePrev;
837/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
838 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
839 {
840 uint16_t i = pPool->aPages[iToFree].iAgePrev;
841 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
842 {
843 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
844 continue;
845 iToFree = i;
846 break;
847 }
848 }
849*/
850 Assert(iToFree != iUser);
851 AssertRelease(iToFree != NIL_PGMPOOL_IDX);
852
853 int rc = pgmPoolFlushPage(pPool, &pPool->aPages[iToFree]);
854 if (rc == VINF_SUCCESS)
855 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
856 return rc;
857}
858
859
860/**
861 * Checks if a kind mismatch is really a page being reused
862 * or if it's just normal remappings.
863 *
864 * @returns true if reused and the cached page (enmKind1) should be flushed
865 * @returns false if not reused.
866 * @param pVM The VM handle.
867 * @param enmKind1 The kind of the cached page.
868 * @param enmKind2 The kind of the requested page.
869 */
870static bool pgmPoolCacheReusedByKind(PVM pVM, PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
871{
872 switch (enmKind1)
873 {
874 /*
875 * Never reuse them. There is no remapping in non-paging mode.
876 */
877 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
878 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
879 return true;
880
881 /*
882 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
883 */
884 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
885 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
886 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
887 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
888 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
889 switch (enmKind2)
890 {
891 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
892 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
893 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
894 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
895 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
896 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
897 return true;
898 default:
899 return false;
900 }
901
902 /*
903 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
904 */
905 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
906 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
907 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
908 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
909 switch (enmKind2)
910 {
911 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
912 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
913 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
914 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
915 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
916 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
917 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
918 return true;
919 default:
920 return false;
921 }
922
923 /*
924 * These cannot be flushed, and it's common to reuse the PDs as PTs.
925 */
926 case PGMPOOLKIND_ROOT_32BIT_PD:
927 case PGMPOOLKIND_ROOT_PAE_PD:
928 case PGMPOOLKIND_ROOT_PDPTR:
929 case PGMPOOLKIND_ROOT_PML4:
930 return false;
931
932 default:
933 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
934 }
935}
936
937
938/**
939 * Attempts to satisfy a pgmPoolAlloc request from the cache.
940 *
941 * @returns VBox status code.
942 * @retval VINF_PGM_CACHED_PAGE on success.
943 * @retval VERR_FILE_NOT_FOUND if not found.
944 * @param pPool The pool.
945 * @param GCPhys The GC physical address of the page we're gonna shadow.
946 * @param enmKind The kind of mapping.
947 * @param iUser The shadow page pool index of the user table.
948 * @param iUserTable The index into the user table (shadowed).
949 * @param ppPage Where to store the pointer to the page.
950 */
951static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
952{
953 const PVM pVM = pPool->CTXSUFF(pVM);
954
955 /*
956 * Look up the GCPhys in the hash.
957 */
958 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
959 if (i != NIL_PGMPOOL_IDX)
960 {
961 do
962 {
963 PPGMPOOLPAGE pPage = &pPool->aPages[i];
964 if (pPage->GCPhys == GCPhys)
965 {
966 if ((PGMPOOLKIND)pPage->enmKind == enmKind)
967 {
968 int rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
969 if (VBOX_SUCCESS(rc))
970 {
971 *ppPage = pPage;
972 STAM_COUNTER_INC(&pPool->StatCacheHits);
973 return VINF_PGM_CACHED_PAGE;
974 }
975 return rc;
976 }
977
978 /*
979 * The kind is different. In some cases we should now flush the page
980 * as it has been reused, but in most cases this is normal remapping
981 * of PDs as PT or big pages using the GCPhys field in a slightly
982 * different way than the other kinds.
983 */
984 if (pgmPoolCacheReusedByKind(pVM, (PGMPOOLKIND)pPage->enmKind, enmKind))
985 {
986 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
987 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED will be returned by pgmPoolTracInsert. */
988 PGM_INVL_GUEST_TLBS(); /* see PT handler. */
989 break;
990 }
991 }
992
993 /* next */
994 i = pPage->iNext;
995 } while (i != NIL_PGMPOOL_IDX);
996 }
997
998 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%d\n", GCPhys, enmKind));
999 STAM_COUNTER_INC(&pPool->StatCacheMisses);
1000 return VERR_FILE_NOT_FOUND;
1001}
1002
1003
1004/**
1005 * Inserts a page into the cache.
1006 *
1007 * @param pPool The pool.
1008 * @param pPage The cached page.
1009 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
1010 */
1011static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
1012{
1013 /*
1014 * Insert into the GCPhys hash if the page is fit for that.
1015 */
1016 Assert(!pPage->fCached);
1017 if (fCanBeCached)
1018 {
1019 pPage->fCached = true;
1020 pgmPoolHashInsert(pPool, pPage);
1021 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1022 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1023 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
1024 }
1025 else
1026 {
1027 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%d, GCPhys=%RGp}\n",
1028 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
1029 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
1030 }
1031
1032 /*
1033 * Insert at the head of the age list.
1034 */
1035 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1036 pPage->iAgeNext = pPool->iAgeHead;
1037 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
1038 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
1039 else
1040 pPool->iAgeTail = pPage->idx;
1041 pPool->iAgeHead = pPage->idx;
1042}
1043
1044
1045/**
1046 * Flushes a cached page.
1047 *
1048 * @param pPool The pool.
1049 * @param pPage The cached page.
1050 */
1051static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1052{
1053 /*
1054 * Remove the page from the hash.
1055 */
1056 if (pPage->fCached)
1057 {
1058 pPage->fCached = false;
1059 pgmPoolHashRemove(pPool, pPage);
1060 }
1061 else
1062 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1063
1064 /*
1065 * Remove it from the age list.
1066 */
1067 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
1068 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
1069 else
1070 pPool->iAgeTail = pPage->iAgePrev;
1071 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
1072 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
1073 else
1074 pPool->iAgeHead = pPage->iAgeNext;
1075 pPage->iAgeNext = NIL_PGMPOOL_IDX;
1076 pPage->iAgePrev = NIL_PGMPOOL_IDX;
1077}
1078#endif /* PGMPOOL_WITH_CACHE */
1079
1080
1081#ifdef PGMPOOL_WITH_MONITORING
1082/**
1083 * Looks for pages sharing the monitor.
1084 *
1085 * @returns Pointer to the head page.
1086 * @returns NULL if not found.
1087 * @param pPool The Pool
1088 * @param pNewPage The page which is going to be monitored.
1089 */
1090static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
1091{
1092#ifdef PGMPOOL_WITH_CACHE
1093 const PVM pVM = pPool->CTXSUFF(pVM);
1094
1095 /*
1096 * Look up the GCPhys in the hash.
1097 */
1098 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1099 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
1100 if (i == NIL_PGMPOOL_IDX)
1101 return NULL;
1102 do
1103 {
1104 PPGMPOOLPAGE pPage = &pPool->aPages[i];
1105 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
1106 && pPage != pNewPage)
1107 {
1108 switch (pPage->enmKind)
1109 {
1110 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1111 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1112 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1113 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1114 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1115 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1116 case PGMPOOLKIND_ROOT_32BIT_PD:
1117 case PGMPOOLKIND_ROOT_PAE_PD:
1118 case PGMPOOLKIND_ROOT_PDPTR:
1119 case PGMPOOLKIND_ROOT_PML4:
1120 {
1121 /* find the head */
1122 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1123 {
1124 Assert(pPage->iMonitoredPrev != pPage->idx);
1125 pPage = &pPool->aPages[pPage->iMonitoredPrev];
1126 }
1127 return pPage;
1128 }
1129
1130 /* ignore, no monitoring. */
1131 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1132 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1133 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1134 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1135 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1136 break;
1137 default:
1138 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
1139 }
1140 }
1141
1142 /* next */
1143 i = pPage->iNext;
1144 } while (i != NIL_PGMPOOL_IDX);
1145#endif
1146 return NULL;
1147}
1148
1149/**
1150 * Enabled write monitoring of a guest page.
1151 *
1152 * @returns VBox status code.
1153 * @retval VINF_SUCCESS on success.
1154 * @retval VERR_PGM_POOL_CLEARED if the registration of the physical handler will cause a light weight pool flush.
1155 * @param pPool The pool.
1156 * @param pPage The cached page.
1157 */
1158static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1159{
1160 const PVM pVM = pPool->CTXSUFF(pVM);
1161
1162 /*
1163 * Filter out the relevant kinds.
1164 */
1165 switch (pPage->enmKind)
1166 {
1167 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1168 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1169 break;
1170
1171 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1172 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1173 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1174 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1175 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1176 /* Nothing to monitor here. */
1177 return VINF_SUCCESS;
1178
1179 case PGMPOOLKIND_ROOT_32BIT_PD:
1180 case PGMPOOLKIND_ROOT_PAE_PD:
1181#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1182 break;
1183#endif
1184 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1185 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1186 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1187 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1188 case PGMPOOLKIND_ROOT_PDPTR:
1189 case PGMPOOLKIND_ROOT_PML4:
1190 default:
1191 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1192 }
1193
1194 /*
1195 * Install handler.
1196 */
1197 int rc;
1198 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
1199 if (pPageHead)
1200 {
1201 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
1202 Assert(pPageHead->iMonitoredPrev != pPage->idx);
1203 pPage->iMonitoredPrev = pPageHead->idx;
1204 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
1205 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
1206 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
1207 pPageHead->iMonitoredNext = pPage->idx;
1208 rc = VINF_SUCCESS;
1209 }
1210 else
1211 {
1212 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1213 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1);
1214 rc = PGMHandlerPhysicalRegisterEx(pVM, PGMPHYSHANDLERTYPE_PHYSICAL_WRITE,
1215 GCPhysPage, GCPhysPage + (PAGE_SIZE - 1),
1216 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
1217 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
1218 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
1219 pPool->pszAccessHandler);
1220 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
1221 * the heap size should suffice. */
1222 AssertFatalRC(rc);
1223 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1224 rc = VERR_PGM_POOL_CLEARED;
1225 }
1226 pPage->fMonitored = true;
1227 return rc;
1228}
1229
1230
1231/**
1232 * Disables write monitoring of a guest page.
1233 *
1234 * @returns VBox status code.
1235 * @retval VINF_SUCCESS on success.
1236 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1237 * @param pPool The pool.
1238 * @param pPage The cached page.
1239 */
1240static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1241{
1242 const PVM pVM = pPool->CTXSUFF(pVM);
1243
1244 /*
1245 * Filter out the relevant kinds.
1246 */
1247 switch (pPage->enmKind)
1248 {
1249 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1250 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1251 break;
1252
1253 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1254 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1255 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1256 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1257 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1258 /* Nothing to monitor here. */
1259 return VINF_SUCCESS;
1260
1261 case PGMPOOLKIND_ROOT_32BIT_PD:
1262 case PGMPOOLKIND_ROOT_PAE_PD:
1263#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1264 break;
1265#endif
1266 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1267 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1268 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1269 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1270 case PGMPOOLKIND_ROOT_PDPTR:
1271 case PGMPOOLKIND_ROOT_PML4:
1272 default:
1273 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
1274 }
1275
1276 /*
1277 * Remove the page from the monitored list or uninstall it if last.
1278 */
1279 int rc;
1280 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1281 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1282 {
1283 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1284 {
1285 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
1286 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
1287 pNewHead->fCR3Mix = pPage->fCR3Mix;
1288 rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
1289 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pNewHead),
1290 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pNewHead),
1291 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pNewHead),
1292 pPool->pszAccessHandler);
1293 AssertFatalRCSuccess(rc);
1294 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1295 }
1296 else
1297 {
1298 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
1299 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
1300 {
1301 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
1302 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
1303 }
1304 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
1305 rc = VINF_SUCCESS;
1306 }
1307 }
1308 else
1309 {
1310 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1));
1311 AssertFatalRC(rc);
1312 if (pVM->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
1313 rc = VERR_PGM_POOL_CLEARED;
1314 }
1315 pPage->fMonitored = false;
1316
1317 /*
1318 * Remove it from the list of modified pages (if in it).
1319 */
1320 pgmPoolMonitorModifiedRemove(pPool, pPage);
1321
1322 return rc;
1323}
1324
1325
1326#ifdef PGMPOOL_WITH_MIXED_PT_CR3
1327/**
1328 * Set or clear the fCR3Mix attribute in a chain of monitored pages.
1329 *
1330 * @param pPool The Pool.
1331 * @param pPage A page in the chain.
1332 * @param fCR3Mix The new fCR3Mix value.
1333 */
1334static void pgmPoolMonitorChainChangeCR3Mix(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCR3Mix)
1335{
1336 /* current */
1337 pPage->fCR3Mix = fCR3Mix;
1338
1339 /* before */
1340 int16_t idx = pPage->iMonitoredPrev;
1341 while (idx != NIL_PGMPOOL_IDX)
1342 {
1343 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1344 idx = pPool->aPages[idx].iMonitoredPrev;
1345 }
1346
1347 /* after */
1348 idx = pPage->iMonitoredNext;
1349 while (idx != NIL_PGMPOOL_IDX)
1350 {
1351 pPool->aPages[idx].fCR3Mix = fCR3Mix;
1352 idx = pPool->aPages[idx].iMonitoredNext;
1353 }
1354}
1355
1356
1357/**
1358 * Installs or modifies monitoring of a CR3 page (special).
1359 *
1360 * We're pretending the CR3 page is shadowed by the pool so we can use the
1361 * generic mechanisms in detecting chained monitoring. (This also gives us a
1362 * tast of what code changes are required to really pool CR3 shadow pages.)
1363 *
1364 * @returns VBox status code.
1365 * @param pPool The pool.
1366 * @param idxRoot The CR3 (root) page index.
1367 * @param GCPhysCR3 The (new) CR3 value.
1368 */
1369int pgmPoolMonitorMonitorCR3(PPGMPOOL pPool, uint16_t idxRoot, RTGCPHYS GCPhysCR3)
1370{
1371 const PVM pVM = pPool->CTXSUFF(pVM);
1372
1373 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1374 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1375 LogFlow(("pgmPoolMonitorMonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d} GCPhysCR3=%VGp\n",
1376 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored, GCPhysCR3));
1377
1378 /*
1379 * The unlikely case where it already matches.
1380 */
1381 if (pPage->GCPhys == GCPhysCR3)
1382 {
1383 Assert(pPage->fMonitored);
1384 return VINF_SUCCESS;
1385 }
1386
1387 /*
1388 * Flush the current monitoring and remove it from the hash.
1389 */
1390 int rc = VINF_SUCCESS;
1391 if (pPage->fMonitored)
1392 {
1393 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1394 rc = pgmPoolMonitorFlush(pPool, pPage);
1395 if (rc == VERR_PGM_POOL_CLEARED)
1396 rc = VINF_SUCCESS;
1397 else
1398 AssertFatalRC(rc);
1399 pgmPoolHashRemove(pPool, pPage);
1400 }
1401
1402 /*
1403 * Monitor the page at the new location and insert it into the hash.
1404 */
1405 pPage->GCPhys = GCPhysCR3;
1406 int rc2 = pgmPoolMonitorInsert(pPool, pPage);
1407 if (rc2 != VERR_PGM_POOL_CLEARED)
1408 {
1409 AssertFatalRC(rc2);
1410 if (rc2 != VINF_SUCCESS && rc == VINF_SUCCESS)
1411 rc = rc2;
1412 }
1413 pgmPoolHashInsert(pPool, pPage);
1414 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, true);
1415 return rc;
1416}
1417
1418
1419/**
1420 * Removes the monitoring of a CR3 page (special).
1421 *
1422 * @returns VBox status code.
1423 * @param pPool The pool.
1424 * @param idxRoot The CR3 (root) page index.
1425 */
1426int pgmPoolMonitorUnmonitorCR3(PPGMPOOL pPool, uint16_t idxRoot)
1427{
1428 const PVM pVM = pPool->CTXSUFF(pVM);
1429
1430 Assert(idxRoot != NIL_PGMPOOL_IDX && idxRoot < PGMPOOL_IDX_FIRST);
1431 PPGMPOOLPAGE pPage = &pPool->aPages[idxRoot];
1432 LogFlow(("pgmPoolMonitorUnmonitorCR3: idxRoot=%d pPage=%p:{.GCPhys=%VGp, .fMonitored=%d}\n",
1433 idxRoot, pPage, pPage->GCPhys, pPage->fMonitored));
1434
1435 if (!pPage->fMonitored)
1436 return VINF_SUCCESS;
1437
1438 pgmPoolMonitorChainChangeCR3Mix(pPool, pPage, false);
1439 int rc = pgmPoolMonitorFlush(pPool, pPage);
1440 if (rc != VERR_PGM_POOL_CLEARED)
1441 AssertFatalRC(rc);
1442 else
1443 rc = VINF_SUCCESS;
1444 pgmPoolHashRemove(pPool, pPage);
1445 Assert(!pPage->fMonitored);
1446 pPage->GCPhys = NIL_RTGCPHYS;
1447 return rc;
1448}
1449#endif /* PGMPOOL_WITH_MIXED_PT_CR3 */
1450
1451
1452/**
1453 * Inserts the page into the list of modified pages.
1454 *
1455 * @param pPool The pool.
1456 * @param pPage The page.
1457 */
1458void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1459{
1460 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
1461 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
1462 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
1463 && pPool->iModifiedHead != pPage->idx,
1464 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
1465 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
1466 pPool->iModifiedHead, pPool->cModifiedPages));
1467
1468 pPage->iModifiedNext = pPool->iModifiedHead;
1469 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
1470 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
1471 pPool->iModifiedHead = pPage->idx;
1472 pPool->cModifiedPages++;
1473#ifdef VBOX_WITH_STATISTICS
1474 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
1475 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
1476#endif
1477}
1478
1479
1480/**
1481 * Removes the page from the list of modified pages and resets the
1482 * moficiation counter.
1483 *
1484 * @param pPool The pool.
1485 * @param pPage The page which is believed to be in the list of modified pages.
1486 */
1487static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1488{
1489 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
1490 if (pPool->iModifiedHead == pPage->idx)
1491 {
1492 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1493 pPool->iModifiedHead = pPage->iModifiedNext;
1494 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1495 {
1496 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
1497 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1498 }
1499 pPool->cModifiedPages--;
1500 }
1501 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
1502 {
1503 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
1504 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
1505 {
1506 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
1507 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1508 }
1509 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1510 pPool->cModifiedPages--;
1511 }
1512 else
1513 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
1514 pPage->cModifications = 0;
1515}
1516
1517
1518/**
1519 * Zaps the list of modified pages, resetting their modification counters in the process.
1520 *
1521 * @param pVM The VM handle.
1522 */
1523void pgmPoolMonitorModifiedClearAll(PVM pVM)
1524{
1525 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1526 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
1527
1528 unsigned cPages = 0; NOREF(cPages);
1529 uint16_t idx = pPool->iModifiedHead;
1530 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1531 while (idx != NIL_PGMPOOL_IDX)
1532 {
1533 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
1534 idx = pPage->iModifiedNext;
1535 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1536 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1537 pPage->cModifications = 0;
1538 Assert(++cPages);
1539 }
1540 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
1541 pPool->cModifiedPages = 0;
1542}
1543
1544
1545/**
1546 * Clear all shadow pages and clear all modification counters.
1547 *
1548 * @param pVM The VM handle.
1549 * @remark Should only be used when monitoring is available, thus placed in
1550 * the PGMPOOL_WITH_MONITORING #ifdef.
1551 */
1552void pgmPoolClearAll(PVM pVM)
1553{
1554 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
1555 STAM_PROFILE_START(&pPool->StatClearAll, c);
1556 LogFlow(("pgmPoolClearAll: cUsedPages=%d\n", pPool->cUsedPages));
1557
1558 /*
1559 * Iterate all the pages until we've encountered all that in use.
1560 * This is simple but not quite optimal solution.
1561 */
1562 unsigned cModifiedPages = 0; NOREF(cModifiedPages);
1563 unsigned cLeft = pPool->cUsedPages;
1564 unsigned iPage = pPool->cCurPages;
1565 while (--iPage >= PGMPOOL_IDX_FIRST)
1566 {
1567 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1568 if (pPage->GCPhys != NIL_RTGCPHYS)
1569 {
1570 switch (pPage->enmKind)
1571 {
1572 /*
1573 * We only care about shadow page tables.
1574 */
1575 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1576 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1577 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1578 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1579 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1580 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1581 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1582 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1583 {
1584#ifdef PGMPOOL_WITH_USER_TRACKING
1585 if (pPage->cPresent)
1586#endif
1587 {
1588 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
1589 STAM_PROFILE_START(&pPool->StatZeroPage, z);
1590 ASMMemZeroPage(pvShw);
1591 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
1592#ifdef PGMPOOL_WITH_USER_TRACKING
1593 pPage->cPresent = 0;
1594 pPage->iFirstPresent = ~0;
1595#endif
1596 }
1597 }
1598 /* fall thru */
1599
1600 default:
1601 Assert(!pPage->cModifications || ++cModifiedPages);
1602 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1603 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1604 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1605 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1606 pPage->cModifications = 0;
1607 break;
1608
1609 }
1610 if (!--cLeft)
1611 break;
1612 }
1613 }
1614
1615 /* swipe the special pages too. */
1616 for (iPage = PGMPOOL_IDX_FIRST_SPECIAL; iPage < PGMPOOL_IDX_FIRST; iPage++)
1617 {
1618 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
1619 if (pPage->GCPhys != NIL_RTGCPHYS)
1620 {
1621 Assert(!pPage->cModifications || ++cModifiedPages);
1622 Assert(pPage->iModifiedNext == NIL_PGMPOOL_IDX || pPage->cModifications);
1623 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX || pPage->cModifications);
1624 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
1625 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
1626 pPage->cModifications = 0;
1627 }
1628 }
1629
1630 AssertMsg(cModifiedPages == pPool->cModifiedPages, ("%d != %d\n", cModifiedPages, pPool->cModifiedPages));
1631 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
1632 pPool->cModifiedPages = 0;
1633
1634#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
1635 /*
1636 * Clear all the GCPhys links and rebuild the phys ext free list.
1637 */
1638 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
1639 pRam;
1640 pRam = pRam->CTXSUFF(pNext))
1641 {
1642 unsigned iPage = pRam->cb >> PAGE_SHIFT;
1643 while (iPage-- > 0)
1644 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
1645 }
1646
1647 pPool->iPhysExtFreeHead = 0;
1648 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
1649 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
1650 for (unsigned i = 0; i < cMaxPhysExts; i++)
1651 {
1652 paPhysExts[i].iNext = i + 1;
1653 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
1654 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
1655 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
1656 }
1657 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
1658#endif
1659
1660
1661 pPool->cPresent = 0;
1662 STAM_PROFILE_STOP(&pPool->StatClearAll, c);
1663}
1664#endif /* PGMPOOL_WITH_MONITORING */
1665
1666
1667#ifdef PGMPOOL_WITH_USER_TRACKING
1668/**
1669 * Frees up at least one user entry.
1670 *
1671 * @returns VBox status code.
1672 * @retval VINF_SUCCESS if successfully added.
1673 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1674 * @param pPool The pool.
1675 * @param iUser The user index.
1676 */
1677static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
1678{
1679 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
1680#ifdef PGMPOOL_WITH_CACHE
1681 /*
1682 * Just free cached pages in a braindead fashion.
1683 */
1684 /** @todo walk the age list backwards and free the first with usage. */
1685 int rc = VINF_SUCCESS;
1686 do
1687 {
1688 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
1689 if (VBOX_FAILURE(rc2) && rc == VINF_SUCCESS)
1690 rc = rc2;
1691 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
1692 return rc;
1693#else
1694 /*
1695 * Lazy approach.
1696 */
1697 pgmPoolFlushAllInt(pPool);
1698 return VERR_PGM_POOL_FLUSHED;
1699#endif
1700}
1701
1702
1703/**
1704 * Inserts a page into the cache.
1705 *
1706 * This will create user node for the page, insert it into the GCPhys
1707 * hash, and insert it into the age list.
1708 *
1709 * @returns VBox status code.
1710 * @retval VINF_SUCCESS if successfully added.
1711 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1712 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
1713 * @param pPool The pool.
1714 * @param pPage The cached page.
1715 * @param GCPhys The GC physical address of the page we're gonna shadow.
1716 * @param iUser The user index.
1717 * @param iUserTable The user table index.
1718 */
1719DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint16_t iUserTable)
1720{
1721 int rc = VINF_SUCCESS;
1722 PPGMPOOLUSER pUser = pPool->CTXSUFF(paUsers);
1723
1724 /*
1725 * Find free a user node.
1726 */
1727 uint16_t i = pPool->iUserFreeHead;
1728 if (i == NIL_PGMPOOL_USER_INDEX)
1729 {
1730 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1731 if (VBOX_FAILURE(rc))
1732 return rc;
1733 i = pPool->iUserFreeHead;
1734 }
1735
1736 /*
1737 * Unlink the user node from the free list,
1738 * initialize and insert it into the user list.
1739 */
1740 pPool->iUserFreeHead = pUser[i].iNext;
1741 pUser[i].iNext = NIL_PGMPOOL_USER_INDEX;
1742 pUser[i].iUser = iUser;
1743 pUser[i].iUserTable = iUserTable;
1744 pPage->iUserHead = i;
1745
1746 /*
1747 * Insert into cache and enable monitoring of the guest page if enabled.
1748 *
1749 * Until we implement caching of all levels, including the CR3 one, we'll
1750 * have to make sure we don't try monitor & cache any recursive reuse of
1751 * a monitored CR3 page. Because all windows versions are doing this we'll
1752 * have to be able to do combined access monitoring, CR3 + PT and
1753 * PD + PT (guest PAE).
1754 *
1755 * Update:
1756 * We're now cooperating with the CR3 monitor if an uncachable page is found.
1757 */
1758#if defined(PGMPOOL_WITH_MONITORING) || defined(PGMPOOL_WITH_CACHE)
1759# ifdef PGMPOOL_WITH_MIXED_PT_CR3
1760 const bool fCanBeMonitored = true;
1761# else
1762 bool fCanBeMonitored = pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored == NIL_RTGCPHYS
1763 || (GCPhys & X86_PTE_PAE_PG_MASK) != (pPool->CTXSUFF(pVM)->pgm.s.GCPhysGstCR3Monitored & X86_PTE_PAE_PG_MASK)
1764 || pgmPoolIsBigPage((PGMPOOLKIND)pPage->enmKind);
1765# endif
1766# ifdef PGMPOOL_WITH_CACHE
1767 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
1768# endif
1769 if (fCanBeMonitored)
1770 {
1771# ifdef PGMPOOL_WITH_MONITORING
1772 rc = pgmPoolMonitorInsert(pPool, pPage);
1773 if (rc == VERR_PGM_POOL_CLEARED)
1774 {
1775 /* 'Failed' - free the usage, and keep it in the cache (if enabled). */
1776# ifndef PGMPOOL_WITH_CACHE
1777 pgmPoolMonitorFlush(pPool, pPage);
1778 rc = VERR_PGM_POOL_FLUSHED;
1779# endif
1780 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
1781 pUser[i].iNext = pPool->iUserFreeHead;
1782 pUser[i].iUser = NIL_PGMPOOL_IDX;
1783 pPool->iUserFreeHead = i;
1784 }
1785 }
1786# endif
1787#endif /* PGMPOOL_WITH_MONITORING */
1788 return rc;
1789}
1790
1791
1792# ifdef PGMPOOL_WITH_CACHE /* (only used when the cache is enabled.) */
1793/**
1794 * Adds a user reference to a page.
1795 *
1796 * This will
1797 * This will move the page to the head of the
1798 *
1799 * @returns VBox status code.
1800 * @retval VINF_SUCCESS if successfully added.
1801 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
1802 * @param pPool The pool.
1803 * @param pPage The cached page.
1804 * @param iUser The user index.
1805 * @param iUserTable The user table.
1806 */
1807static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1808{
1809 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1810
1811# ifdef VBOX_STRICT
1812 /*
1813 * Check that the entry doesn't already exists.
1814 */
1815 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
1816 {
1817 uint16_t i = pPage->iUserHead;
1818 do
1819 {
1820 Assert(i < pPool->cMaxUsers);
1821 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%d %d\n", iUser, iUserTable));
1822 i = paUsers[i].iNext;
1823 } while (i != NIL_PGMPOOL_USER_INDEX);
1824 }
1825# endif
1826
1827 /*
1828 * Allocate a user node.
1829 */
1830 uint16_t i = pPool->iUserFreeHead;
1831 if (i == NIL_PGMPOOL_USER_INDEX)
1832 {
1833 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
1834 if (VBOX_FAILURE(rc))
1835 return rc;
1836 i = pPool->iUserFreeHead;
1837 }
1838 pPool->iUserFreeHead = paUsers[i].iNext;
1839
1840 /*
1841 * Initialize the user node and insert it.
1842 */
1843 paUsers[i].iNext = pPage->iUserHead;
1844 paUsers[i].iUser = iUser;
1845 paUsers[i].iUserTable = iUserTable;
1846 pPage->iUserHead = i;
1847
1848# ifdef PGMPOOL_WITH_CACHE
1849 /*
1850 * Tell the cache to update its replacement stats for this page.
1851 */
1852 pgmPoolCacheUsed(pPool, pPage);
1853# endif
1854 return VINF_SUCCESS;
1855}
1856# endif /* PGMPOOL_WITH_CACHE */
1857
1858
1859/**
1860 * Frees a user record associated with a page.
1861 *
1862 * This does not clear the entry in the user table, it simply replaces the
1863 * user record to the chain of free records.
1864 *
1865 * @param pPool The pool.
1866 * @param HCPhys The HC physical address of the shadow page.
1867 * @param iUser The shadow page pool index of the user table.
1868 * @param iUserTable The index into the user table (shadowed).
1869 */
1870static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
1871{
1872 const PVM pVM = pPool->CTXSUFF(pVM);
1873
1874 /*
1875 * Unlink and free the specified user entry.
1876 */
1877 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
1878
1879 /* Special: For PAE and 32-bit paging, there are usually no more than one user. */
1880 uint16_t i = pPage->iUserHead;
1881 if ( i != NIL_PGMPOOL_USER_INDEX
1882 && paUsers[i].iUser == iUser
1883 && paUsers[i].iUserTable == iUserTable)
1884 {
1885 pPage->iUserHead = paUsers[i].iNext;
1886
1887 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1888 paUsers[i].iNext = pPool->iUserFreeHead;
1889 pPool->iUserFreeHead = i;
1890 return;
1891 }
1892
1893 /* General: Linear search. */
1894 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
1895 while (i != NIL_PGMPOOL_USER_INDEX)
1896 {
1897 if ( paUsers[i].iUser == iUser
1898 && paUsers[i].iUserTable == iUserTable)
1899 {
1900 if (iPrev != NIL_PGMPOOL_USER_INDEX)
1901 paUsers[iPrev].iNext = paUsers[i].iNext;
1902 else
1903 pPage->iUserHead = paUsers[i].iNext;
1904
1905 paUsers[i].iUser = NIL_PGMPOOL_IDX;
1906 paUsers[i].iNext = pPool->iUserFreeHead;
1907 pPool->iUserFreeHead = i;
1908 return;
1909 }
1910 iPrev = i;
1911 i = paUsers[i].iNext;
1912 }
1913
1914 /* Fatal: didn't find it */
1915 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%#x iUserTable=%#x GCPhys=%VGp\n",
1916 iUser, iUserTable, pPage->GCPhys));
1917}
1918
1919
1920/**
1921 * Gets the entry size of a shadow table.
1922 *
1923 * @param pVM The VM handle.
1924 * @param enmKind The kind of page.
1925 *
1926 * @returns The size of the entry in bytes. That is, 4 or 8.
1927 * @returns If the kind is not for a table, an assertion is raised and 0 is
1928 * returned.
1929 */
1930DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PVM pVM, PGMPOOLKIND enmKind)
1931{
1932 switch (enmKind)
1933 {
1934 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1935 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1936 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1937 case PGMPOOLKIND_ROOT_32BIT_PD:
1938 return 4;
1939
1940 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1941 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1942 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1943 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1944 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1945 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1946 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1947 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1948 case PGMPOOLKIND_ROOT_PAE_PD:
1949 case PGMPOOLKIND_ROOT_PDPTR:
1950 case PGMPOOLKIND_ROOT_PML4:
1951 return 8;
1952
1953 default:
1954 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1955 }
1956}
1957
1958
1959/**
1960 * Gets the entry size of a guest table.
1961 *
1962 * @param pVM The VM handle.
1963 * @param enmKind The kind of page.
1964 *
1965 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
1966 * @returns If the kind is not for a table, an assertion is raised and 0 is
1967 * returned.
1968 */
1969DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PVM pVM, PGMPOOLKIND enmKind)
1970{
1971 switch (enmKind)
1972 {
1973 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1974 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1975 case PGMPOOLKIND_ROOT_32BIT_PD:
1976 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
1977 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
1978 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
1979 return 4;
1980
1981 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
1982 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
1983 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
1984 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
1985 case PGMPOOLKIND_ROOT_PAE_PD:
1986 case PGMPOOLKIND_ROOT_PDPTR:
1987 case PGMPOOLKIND_ROOT_PML4:
1988 return 8;
1989
1990 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1991 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
1992 /** @todo can we return 0? (nobody is calling this...) */
1993 return 0;
1994
1995 default:
1996 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
1997 }
1998}
1999
2000
2001#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2002/**
2003 * Scans one shadow page table for mappings of a physical page.
2004 *
2005 * @param pVM The VM handle.
2006 * @param pPhysPage The guest page in question.
2007 * @param iShw The shadow page table.
2008 * @param cRefs The number of references made in that PT.
2009 */
2010static void pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2011{
2012 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2013 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2014
2015 /*
2016 * Assert sanity.
2017 */
2018 Assert(cRefs == 1);
2019 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
2020 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
2021
2022 /*
2023 * Then, clear the actual mappings to the page in the shadow PT.
2024 */
2025 switch (pPage->enmKind)
2026 {
2027 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2028 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2029 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2030 {
2031 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2032 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2033 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2034 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2035 {
2036 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32 cRefs=%#x\n", i, pPT->a[i], cRefs));
2037 pPT->a[i].u = 0;
2038 cRefs--;
2039 if (!cRefs)
2040 return;
2041 }
2042#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2043 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2044 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2045 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2046 {
2047 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2048 pPT->a[i].u = 0;
2049 }
2050#endif
2051 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2052 break;
2053 }
2054
2055 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2056 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2057 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2058 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2059 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2060 {
2061 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2062 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2063 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2064 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2065 {
2066 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64 cRefs=%#x\n", i, pPT->a[i], cRefs));
2067 pPT->a[i].u = 0;
2068 cRefs--;
2069 if (!cRefs)
2070 return;
2071 }
2072#if defined(DEBUG) && !defined(IN_RING0) ///@todo RTLogPrintf is missing in R0.
2073 RTLogPrintf("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent);
2074 for (unsigned i = 0; i < ELEMENTS(pPT->a); i++)
2075 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2076 {
2077 RTLogPrintf("i=%d cRefs=%d\n", i, cRefs--);
2078 pPT->a[i].u = 0;
2079 }
2080#endif
2081 AssertFatalMsgFailed(("cRefs=%d iFirstPresent=%d cPresent=%d\n", cRefs, pPage->iFirstPresent, pPage->cPresent));
2082 break;
2083 }
2084
2085 default:
2086 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
2087 }
2088}
2089
2090
2091/**
2092 * Scans one shadow page table for mappings of a physical page.
2093 *
2094 * @param pVM The VM handle.
2095 * @param pPhysPage The guest page in question.
2096 * @param iShw The shadow page table.
2097 * @param cRefs The number of references made in that PT.
2098 */
2099void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, uint16_t iShw, uint16_t cRefs)
2100{
2101 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool); NOREF(pPool);
2102 LogFlow(("pgmPoolTrackFlushGCPhysPT: HCPhys=%RHp iShw=%d cRefs=%d\n", pPhysPage->HCPhys, iShw, cRefs));
2103 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
2104 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, iShw, cRefs);
2105 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2106 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
2107}
2108
2109
2110/**
2111 * Flushes a list of shadow page tables mapping the same physical page.
2112 *
2113 * @param pVM The VM handle.
2114 * @param pPhysPage The guest page in question.
2115 * @param iPhysExt The physical cross reference extent list to flush.
2116 */
2117void pgmPoolTrackFlushGCPhysPTs(PVM pVM, PPGMPAGE pPhysPage, uint16_t iPhysExt)
2118{
2119 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2120 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
2121 LogFlow(("pgmPoolTrackFlushGCPhysPTs: HCPhys=%RHp iPhysExt\n", pPhysPage->HCPhys, iPhysExt));
2122
2123 const uint16_t iPhysExtStart = iPhysExt;
2124 PPGMPOOLPHYSEXT pPhysExt;
2125 do
2126 {
2127 Assert(iPhysExt < pPool->cMaxPhysExts);
2128 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2129 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2130 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
2131 {
2132 pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, pPhysExt->aidx[i], 1);
2133 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2134 }
2135
2136 /* next */
2137 iPhysExt = pPhysExt->iNext;
2138 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2139
2140 /* insert the list into the free list and clear the ram range entry. */
2141 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2142 pPool->iPhysExtFreeHead = iPhysExtStart;
2143 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2144
2145 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
2146}
2147#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2148
2149
2150/**
2151 * Scans all shadow page tables for mappings of a physical page.
2152 *
2153 * This may be slow, but it's most likely more efficient than cleaning
2154 * out the entire page pool / cache.
2155 *
2156 * @returns VBox status code.
2157 * @retval VINF_SUCCESS if all references has been successfully cleared.
2158 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
2159 * a page pool cleaning.
2160 *
2161 * @param pVM The VM handle.
2162 * @param pPhysPage The guest page in question.
2163 */
2164int pgmPoolTrackFlushGCPhysPTsSlow(PVM pVM, PPGMPAGE pPhysPage)
2165{
2166 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2167 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2168 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d HCPhys=%RHp\n",
2169 pPool->cUsedPages, pPool->cPresent, pPhysPage->HCPhys));
2170
2171#if 1
2172 /*
2173 * There is a limit to what makes sense.
2174 */
2175 if (pPool->cPresent > 1024)
2176 {
2177 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
2178 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2179 return VINF_PGM_GCPHYS_ALIASED;
2180 }
2181#endif
2182
2183 /*
2184 * Iterate all the pages until we've encountered all that in use.
2185 * This is simple but not quite optimal solution.
2186 */
2187 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
2188 const uint32_t u32 = u64;
2189 unsigned cLeft = pPool->cUsedPages;
2190 unsigned iPage = pPool->cCurPages;
2191 while (--iPage >= PGMPOOL_IDX_FIRST)
2192 {
2193 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
2194 if (pPage->GCPhys != NIL_RTGCPHYS)
2195 {
2196 switch (pPage->enmKind)
2197 {
2198 /*
2199 * We only care about shadow page tables.
2200 */
2201 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2202 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2203 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2204 {
2205 unsigned cPresent = pPage->cPresent;
2206 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2207 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2208 if (pPT->a[i].n.u1Present)
2209 {
2210 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
2211 {
2212 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
2213 pPT->a[i].u = 0;
2214 }
2215 if (!--cPresent)
2216 break;
2217 }
2218 break;
2219 }
2220
2221 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2222 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2223 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2224 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2225 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2226 {
2227 unsigned cPresent = pPage->cPresent;
2228 PX86PTPAE pPT = (PX86PTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
2229 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pPT->a); i++)
2230 if (pPT->a[i].n.u1Present)
2231 {
2232 if ((pPT->a[i].u & (X86_PTE_PAE_PG_MASK | X86_PTE_P)) == u64)
2233 {
2234 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
2235 pPT->a[i].u = 0;
2236 }
2237 if (!--cPresent)
2238 break;
2239 }
2240 break;
2241 }
2242 }
2243 if (!--cLeft)
2244 break;
2245 }
2246 }
2247
2248 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2249 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
2250 return VINF_SUCCESS;
2251}
2252
2253
2254/**
2255 * Clears the user entry in a user table.
2256 *
2257 * This is used to remove all references to a page when flushing it.
2258 */
2259static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
2260{
2261 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
2262 Assert(pUser->iUser < pPool->cCurPages);
2263
2264 /*
2265 * Map the user page.
2266 */
2267 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
2268 union
2269 {
2270 uint64_t *pau64;
2271 uint32_t *pau32;
2272 } u;
2273 const PVM pVM = pPool->CTXSUFF(pVM);
2274
2275 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pVM, pUserPage);
2276
2277#ifdef VBOX_STRICT
2278 /*
2279 * Some sanity checks.
2280 */
2281 switch (pUserPage->enmKind)
2282 {
2283 case PGMPOOLKIND_ROOT_32BIT_PD:
2284 Assert(!(u.pau32[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2285 Assert(pUser->iUserTable < X86_PG_ENTRIES);
2286 break;
2287 case PGMPOOLKIND_ROOT_PAE_PD:
2288 Assert(!(u.pau64[pUser->iUser] & PGM_PDFLAGS_MAPPING));
2289 Assert(pUser->iUserTable < 2048 && pUser->iUser == PGMPOOL_IDX_PAE_PD);
2290 break;
2291 case PGMPOOLKIND_ROOT_PDPTR:
2292 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2293 Assert(pUser->iUserTable < 4);
2294 break;
2295 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2296 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2297 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2298 break;
2299 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2300 case PGMPOOLKIND_ROOT_PML4:
2301 Assert(!(u.pau64[pUser->iUserTable] & PGM_PLXFLAGS_PERMANENT));
2302 Assert(pUser->iUserTable < X86_PG_PAE_ENTRIES);
2303 break;
2304 default:
2305 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
2306 break;
2307 }
2308#endif /* VBOX_STRICT */
2309
2310 /*
2311 * Clear the entry in the user page.
2312 */
2313 switch (pUserPage->enmKind)
2314 {
2315 /* 32-bit entries */
2316 case PGMPOOLKIND_ROOT_32BIT_PD:
2317 u.pau32[pUser->iUserTable] = 0;
2318 break;
2319
2320 /* 64-bit entries */
2321 case PGMPOOLKIND_ROOT_PAE_PD:
2322 case PGMPOOLKIND_ROOT_PDPTR:
2323 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2324 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2325 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2326 case PGMPOOLKIND_ROOT_PML4:
2327 u.pau64[pUser->iUserTable] = 0;
2328 break;
2329
2330 default:
2331 AssertFatalMsgFailed(("enmKind=%d iUser=%#x iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
2332 }
2333}
2334
2335
2336/**
2337 * Clears all users of a page.
2338 */
2339static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2340{
2341 /*
2342 * Free all the user records.
2343 */
2344 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
2345 uint16_t i = pPage->iUserHead;
2346 while (i != NIL_PGMPOOL_USER_INDEX)
2347 {
2348 /* Clear enter in user table. */
2349 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
2350
2351 /* Free it. */
2352 const uint16_t iNext = paUsers[i].iNext;
2353 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2354 paUsers[i].iNext = pPool->iUserFreeHead;
2355 pPool->iUserFreeHead = i;
2356
2357 /* Next. */
2358 i = iNext;
2359 }
2360 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2361}
2362
2363
2364#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2365/**
2366 * Allocates a new physical cross reference extent.
2367 *
2368 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
2369 * @param pVM The VM handle.
2370 * @param piPhysExt Where to store the phys ext index.
2371 */
2372PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVM pVM, uint16_t *piPhysExt)
2373{
2374 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2375 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
2376 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
2377 {
2378 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
2379 return NULL;
2380 }
2381 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2382 pPool->iPhysExtFreeHead = pPhysExt->iNext;
2383 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
2384 *piPhysExt = iPhysExt;
2385 return pPhysExt;
2386}
2387
2388
2389/**
2390 * Frees a physical cross reference extent.
2391 *
2392 * @param pVM The VM handle.
2393 * @param iPhysExt The extent to free.
2394 */
2395void pgmPoolTrackPhysExtFree(PVM pVM, uint16_t iPhysExt)
2396{
2397 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2398 Assert(iPhysExt < pPool->cMaxPhysExts);
2399 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2400 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2401 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2402 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2403 pPool->iPhysExtFreeHead = iPhysExt;
2404}
2405
2406
2407/**
2408 * Frees a physical cross reference extent.
2409 *
2410 * @param pVM The VM handle.
2411 * @param iPhysExt The extent to free.
2412 */
2413void pgmPoolTrackPhysExtFreeList(PVM pVM, uint16_t iPhysExt)
2414{
2415 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2416
2417 const uint16_t iPhysExtStart = iPhysExt;
2418 PPGMPOOLPHYSEXT pPhysExt;
2419 do
2420 {
2421 Assert(iPhysExt < pPool->cMaxPhysExts);
2422 pPhysExt = &pPool->CTXSUFF(paPhysExts)[iPhysExt];
2423 for (unsigned i = 0; i < ELEMENTS(pPhysExt->aidx); i++)
2424 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
2425
2426 /* next */
2427 iPhysExt = pPhysExt->iNext;
2428 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2429
2430 pPhysExt->iNext = pPool->iPhysExtFreeHead;
2431 pPool->iPhysExtFreeHead = iPhysExtStart;
2432}
2433
2434/**
2435 * Insert a reference into a list of physical cross reference extents.
2436 *
2437 * @returns The new ram range flags (top 16-bits).
2438 *
2439 * @param pVM The VM handle.
2440 * @param iPhysExt The physical extent index of the list head.
2441 * @param iShwPT The shadow page table index.
2442 *
2443 */
2444static uint16_t pgmPoolTrackPhysExtInsert(PVM pVM, uint16_t iPhysExt, uint16_t iShwPT)
2445{
2446 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
2447 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2448
2449 /* special common case. */
2450 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
2451 {
2452 paPhysExts[iPhysExt].aidx[2] = iShwPT;
2453 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2454 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{,,%d}\n", iPhysExt, iShwPT));
2455 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2456 }
2457
2458 /* general treatment. */
2459 const uint16_t iPhysExtStart = iPhysExt;
2460 unsigned cMax = 15;
2461 for (;;)
2462 {
2463 Assert(iPhysExt < pPool->cMaxPhysExts);
2464 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2465 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
2466 {
2467 paPhysExts[iPhysExt].aidx[i] = iShwPT;
2468 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedMany);
2469 LogFlow(("pgmPoolTrackPhysExtAddref: %d:{%d} i=%d cMax=%d\n", iPhysExt, iShwPT, i, cMax));
2470 return iPhysExtStart | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2471 }
2472 if (!--cMax)
2473 {
2474 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2475 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2476 LogFlow(("pgmPoolTrackPhysExtAddref: overflow (1) iShwPT=%d\n", iShwPT));
2477 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2478 }
2479 }
2480
2481 /* add another extent to the list. */
2482 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2483 if (!pNew)
2484 {
2485 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackOverflows);
2486 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
2487 return MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2488 }
2489 pNew->iNext = iPhysExtStart;
2490 pNew->aidx[0] = iShwPT;
2491 LogFlow(("pgmPoolTrackPhysExtAddref: added new extent %d:{%d}->%d\n", iPhysExt, iShwPT, iPhysExtStart));
2492 return iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2493}
2494
2495
2496/**
2497 * Add a reference to guest physical page where extents are in use.
2498 *
2499 * @returns The new ram range flags (top 16-bits).
2500 *
2501 * @param pVM The VM handle.
2502 * @param u16 The ram range flags (top 16-bits).
2503 * @param iShwPT The shadow page table index.
2504 */
2505uint16_t pgmPoolTrackPhysExtAddref(PVM pVM, uint16_t u16, uint16_t iShwPT)
2506{
2507 if ((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) != MM_RAM_FLAGS_CREFS_PHYSEXT)
2508 {
2509 /*
2510 * Convert to extent list.
2511 */
2512 Assert((u16 >> (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT)) == 1);
2513 uint16_t iPhysExt;
2514 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
2515 if (pPhysExt)
2516 {
2517 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT));
2518 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliased);
2519 pPhysExt->aidx[0] = u16 & MM_RAM_FLAGS_IDX_MASK;
2520 pPhysExt->aidx[1] = iShwPT;
2521 u16 = iPhysExt | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2522 }
2523 else
2524 u16 = MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT));
2525 }
2526 else if (u16 != (MM_RAM_FLAGS_IDX_OVERFLOWED | (MM_RAM_FLAGS_CREFS_PHYSEXT << (MM_RAM_FLAGS_CREFS_SHIFT - MM_RAM_FLAGS_IDX_SHIFT))))
2527 {
2528 /*
2529 * Insert into the extent list.
2530 */
2531 u16 = pgmPoolTrackPhysExtInsert(pVM, u16 & MM_RAM_FLAGS_IDX_MASK, iShwPT);
2532 }
2533 else
2534 STAM_COUNTER_INC(&pVM->pgm.s.StatTrackAliasedLots);
2535 return u16;
2536}
2537
2538
2539/**
2540 * Clear references to guest physical memory.
2541 *
2542 * @param pPool The pool.
2543 * @param pPage The page.
2544 * @param pPhysPage Pointer to the aPages entry in the ram range.
2545 */
2546void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage)
2547{
2548 const unsigned cRefs = pPhysPage->HCPhys >> MM_RAM_FLAGS_CREFS_SHIFT; /** @todo PAGE FLAGS */
2549 const PVM pVM = pPool->CTXSUFF(pVM);
2550
2551 AssertFatalMsg(cRefs == MM_RAM_FLAGS_CREFS_PHYSEXT, ("cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2552
2553 uint16_t iPhysExt = (pPhysPage->HCPhys >> MM_RAM_FLAGS_IDX_SHIFT) & MM_RAM_FLAGS_IDX_MASK;
2554 if (iPhysExt != MM_RAM_FLAGS_IDX_OVERFLOWED)
2555 {
2556 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
2557 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
2558 do
2559 {
2560 Assert(iPhysExt < pPool->cMaxPhysExts);
2561
2562 /*
2563 * Look for the shadow page and check if it's all freed.
2564 */
2565 for (unsigned i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2566 {
2567 if (paPhysExts[iPhysExt].aidx[i] == pPage->idx)
2568 {
2569 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
2570
2571 for (i = 0; i < ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
2572 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
2573 {
2574 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2575 return;
2576 }
2577
2578 /* we can free the node. */
2579 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
2580 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
2581 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
2582 {
2583 /* lonely node */
2584 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2585 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d lonely\n", pPhysPage->HCPhys, pPage->idx));
2586 pPhysPage->HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
2587 }
2588 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
2589 {
2590 /* head */
2591 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d head\n", pPhysPage->HCPhys, pPage->idx));
2592 pPhysPage->HCPhys = (pPhysPage->HCPhys & MM_RAM_FLAGS_NO_REFS_MASK) /** @todo PAGE FLAGS */
2593 | ((uint64_t)MM_RAM_FLAGS_CREFS_PHYSEXT << MM_RAM_FLAGS_CREFS_SHIFT)
2594 | ((uint64_t)iPhysExtNext << MM_RAM_FLAGS_IDX_SHIFT);
2595 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2596 }
2597 else
2598 {
2599 /* in list */
2600 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64 idx=%d\n", pPhysPage->HCPhys, pPage->idx));
2601 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
2602 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
2603 }
2604 iPhysExt = iPhysExtNext;
2605 return;
2606 }
2607 }
2608
2609 /* next */
2610 iPhysExtPrev = iPhysExt;
2611 iPhysExt = paPhysExts[iPhysExt].iNext;
2612 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
2613
2614 AssertFatalMsgFailed(("not-found! cRefs=%d HCPhys=%RHp pPage=%p:{.idx=%d}\n", cRefs, pPhysPage->HCPhys, pPage, pPage->idx));
2615 }
2616 else /* nothing to do */
2617 LogFlow(("pgmPoolTrackPhysExtDerefGCPhys: HCPhys=%RX64\n", pPhysPage->HCPhys));
2618}
2619
2620
2621
2622/**
2623 * Clear references to guest physical memory.
2624 *
2625 * This is the same as pgmPoolTracDerefGCPhys except that the guest physical address
2626 * is assumed to be correct, so the linear search can be skipped and we can assert
2627 * at an earlier point.
2628 *
2629 * @param pPool The pool.
2630 * @param pPage The page.
2631 * @param HCPhys The host physical address corresponding to the guest page.
2632 * @param GCPhys The guest physical address corresponding to HCPhys.
2633 */
2634static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys)
2635{
2636 const PVM pVM = pPool->CTXSUFF(pVM);
2637 /*
2638 * Walk range list.
2639 */
2640 PPGMRAMRANGE pRam = pVM->pgm.s.CTXSUFF(pRamRanges);
2641 while (pRam)
2642 {
2643 RTGCPHYS off = GCPhys - pRam->GCPhys;
2644 if (off < pRam->cb)
2645 {
2646 /* does it match? */
2647 const unsigned iPage = off >> PAGE_SHIFT;
2648 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2649 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2650 {
2651 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2652 return;
2653 }
2654 break;
2655 }
2656 pRam = CTXSUFF(pRam->pNext);
2657 }
2658 AssertFatalMsgFailed(("HCPhys=%VHp GCPhys=%VGp\n", HCPhys, GCPhys));
2659}
2660
2661
2662/**
2663 * Clear references to guest physical memory.
2664 *
2665 * @param pPool The pool.
2666 * @param pPage The page.
2667 * @param HCPhys The host physical address corresponding to the guest page.
2668 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
2669 */
2670static void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint)
2671{
2672 const PVM pVM = pPool->CTXSUFF(pVM);
2673 /*
2674 * Walk range list.
2675 */
2676 PPGMRAMRANGE pRam = pVM->pgm.s.CTXSUFF(pRamRanges);
2677 while (pRam)
2678 {
2679 RTGCPHYS off = GCPhysHint - pRam->GCPhys;
2680 if (off < pRam->cb)
2681 {
2682 /* does it match? */
2683 const unsigned iPage = off >> PAGE_SHIFT;
2684 Assert(PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]));
2685 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2686 {
2687 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2688 return;
2689 }
2690 break;
2691 }
2692 pRam = CTXSUFF(pRam->pNext);
2693 }
2694
2695 /*
2696 * Damn, the hint didn't work. We'll have to do an expensive linear search.
2697 */
2698 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
2699 pRam = pVM->pgm.s.CTXSUFF(pRamRanges);
2700 while (pRam)
2701 {
2702 unsigned iPage = pRam->cb >> PAGE_SHIFT;
2703 while (iPage-- > 0)
2704 {
2705 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
2706 {
2707 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%VHp GCPhysHint=%VGp GCPhysReal=%VGp\n",
2708 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
2709 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage]);
2710 return;
2711 }
2712 }
2713 pRam = CTXSUFF(pRam->pNext);
2714 }
2715
2716 AssertFatalMsgFailed(("HCPhys=%VHp GCPhysHint=%VGp\n", HCPhys, GCPhysHint));
2717}
2718
2719
2720/**
2721 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
2722 *
2723 * @param pPool The pool.
2724 * @param pPage The page.
2725 * @param pShwPT The shadow page table (mapping of the page).
2726 * @param pGstPT The guest page table.
2727 */
2728DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
2729{
2730 for (unsigned i = pPage->iFirstPresent; i < ELEMENTS(pShwPT->a); i++)
2731 if (pShwPT->a[i].n.u1Present)
2732 {
2733 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
2734 i, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2735 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2736 if (!--pPage->cPresent)
2737 break;
2738 }
2739}
2740
2741
2742/**
2743 * Clear references to guest physical memory in a PAE / 32-bit page table.
2744 *
2745 * @param pPool The pool.
2746 * @param pPage The page.
2747 * @param pShwPT The shadow page table (mapping of the page).
2748 * @param pGstPT The guest page table (just a half one).
2749 */
2750DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PT pGstPT)
2751{
2752 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2753 if (pShwPT->a[i].n.u1Present)
2754 {
2755 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2756 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
2757 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK);
2758 }
2759}
2760
2761
2762/**
2763 * Clear references to guest physical memory in a PAE / PAE page table.
2764 *
2765 * @param pPool The pool.
2766 * @param pPage The page.
2767 * @param pShwPT The shadow page table (mapping of the page).
2768 * @param pGstPT The guest page table.
2769 */
2770DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
2771{
2772 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++)
2773 if (pShwPT->a[i].n.u1Present)
2774 {
2775 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
2776 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
2777 pgmPoolTracDerefGCPhysHint(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK);
2778 }
2779}
2780
2781
2782/**
2783 * Clear references to guest physical memory in a 32-bit / 4MB page table.
2784 *
2785 * @param pPool The pool.
2786 * @param pPage The page.
2787 * @param pShwPT The shadow page table (mapping of the page).
2788 */
2789DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
2790{
2791 RTGCPHYS GCPhys = pPage->GCPhys;
2792 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2793 if (pShwPT->a[i].n.u1Present)
2794 {
2795 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
2796 i, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys));
2797 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PG_MASK, GCPhys);
2798 }
2799}
2800
2801
2802/**
2803 * Clear references to guest physical memory in a PAE / 2/4MB page table.
2804 *
2805 * @param pPool The pool.
2806 * @param pPage The page.
2807 * @param pShwPT The shadow page table (mapping of the page).
2808 */
2809DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT)
2810{
2811 RTGCPHYS GCPhys = pPage->GCPhys;
2812 for (unsigned i = 0; i < ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
2813 if (pShwPT->a[i].n.u1Present)
2814 {
2815 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX32 hint=%RX32\n",
2816 i, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys));
2817 pgmPoolTracDerefGCPhys(pPool, pPage, pShwPT->a[i].u & X86_PTE_PAE_PG_MASK, GCPhys);
2818 }
2819}
2820#endif /* PGMPOOL_WITH_GCPHYS_TRACKING */
2821
2822
2823/**
2824 * Clear references to shadowed pages in a PAE page directory.
2825 *
2826 * @param pPool The pool.
2827 * @param pPage The page.
2828 * @param pShwPD The shadow page directory (mapping of the page).
2829 */
2830DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
2831{
2832 const PVM pVM = pPool->CTXSUFF(pVM);
2833
2834 for (unsigned i = 0; i < ELEMENTS(pShwPD->a); i++)
2835 {
2836 if (pShwPD->a[i].n.u1Present)
2837 {
2838 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PAE_PG_MASK);
2839 if (pSubPage)
2840 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2841 else
2842 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & X86_PDE_PAE_PG_MASK));
2843 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2844 }
2845 }
2846}
2847
2848
2849/**
2850 * Clear references to shadowed pages in a 64-bit page directory pointer table.
2851 *
2852 * @param pPool The pool.
2853 * @param pPage The page.
2854 * @param pShwPdPtr The shadow page directory pointer table (mapping of the page).
2855 */
2856DECLINLINE(void) pgmPoolTrackDerefPDPTR64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPTR pShwPdPtr)
2857{
2858 const PVM pVM = pPool->CTXSUFF(pVM);
2859
2860 for (unsigned i = 0; i < ELEMENTS(pShwPdPtr->a); i++)
2861 {
2862 if (pShwPdPtr->a[i].n.u1Present)
2863 {
2864 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPdPtr->a[i].u & X86_PDPE_PG_MASK);
2865 if (pSubPage)
2866 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
2867 else
2868 AssertFatalMsgFailed(("%RX64\n", pShwPdPtr->a[i].u & X86_PDPE_PG_MASK));
2869 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
2870 }
2871 }
2872}
2873
2874
2875/**
2876 * Clears all references made by this page.
2877 *
2878 * This includes other shadow pages and GC physical addresses.
2879 *
2880 * @param pPool The pool.
2881 * @param pPage The page.
2882 */
2883static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2884{
2885 const PVM pVM = pPool->CTXSUFF(pVM);
2886
2887 /*
2888 * Map the shadow page and take action according to the page kind.
2889 */
2890 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2891 switch (pPage->enmKind)
2892 {
2893#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
2894 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2895 {
2896 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2897 void *pvGst;
2898 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2899 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
2900 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2901 break;
2902 }
2903
2904 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2905 {
2906 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2907 void *pvGst;
2908 int rc = PGM_GCPHYS_2_PTR_EX(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2909 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PT)pvGst);
2910 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2911 break;
2912 }
2913
2914 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2915 {
2916 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2917 void *pvGst;
2918 int rc = PGM_GCPHYS_2_PTR(pPool->CTXSUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
2919 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
2920 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2921 break;
2922 }
2923
2924 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
2925 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2926 {
2927 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2928 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
2929 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2930 break;
2931 }
2932
2933 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 4 MB page */
2934 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2935 {
2936 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
2937 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PX86PTPAE)pvShw);
2938 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
2939 break;
2940 }
2941
2942#else /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2943 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2944 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2945 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2946 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2947 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2948 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2949 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2950 break;
2951#endif /* !PGMPOOL_WITH_GCPHYS_TRACKING */
2952
2953 case PGMPOOLKIND_PAE_PD_FOR_32BIT_PD:
2954 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2955 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
2956 break;
2957
2958 case PGMPOOLKIND_64BIT_PDPTR_FOR_64BIT_PDPTR:
2959 pgmPoolTrackDerefPDPTR64Bit(pPool, pPage, (PX86PDPTR)pvShw);
2960 break;
2961
2962 default:
2963 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
2964 }
2965
2966 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
2967 STAM_PROFILE_START(&pPool->StatZeroPage, z);
2968 ASMMemZeroPage(pvShw);
2969 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
2970 pPage->fZeroed = true;
2971}
2972#endif /* PGMPOOL_WITH_USER_TRACKING */
2973
2974
2975/**
2976 * Flushes all the special root pages as part of a pgmPoolFlushAllInt operation.
2977 *
2978 * @param pPool The pool.
2979 */
2980static void pgmPoolFlushAllSpecialRoots(PPGMPOOL pPool)
2981{
2982 /*
2983 * These special pages are all mapped into the indexes 1..PGMPOOL_IDX_FIRST.
2984 */
2985 Assert(NIL_PGMPOOL_IDX == 0);
2986 for (unsigned i = 1; i < PGMPOOL_IDX_FIRST; i++)
2987 {
2988 /*
2989 * Get the page address.
2990 */
2991 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2992 union
2993 {
2994 uint64_t *pau64;
2995 uint32_t *pau32;
2996 } u;
2997 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTXSUFF(pVM), pPage);
2998
2999 /*
3000 * Mark stuff not present.
3001 */
3002 switch (pPage->enmKind)
3003 {
3004 case PGMPOOLKIND_ROOT_32BIT_PD:
3005 for (unsigned iPage = 0; iPage < X86_PG_ENTRIES; iPage++)
3006 if ((u.pau32[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3007 u.pau32[iPage] = 0;
3008 break;
3009
3010 case PGMPOOLKIND_ROOT_PAE_PD:
3011 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES * 4; iPage++)
3012 if ((u.pau64[iPage] & (PGM_PDFLAGS_MAPPING | X86_PDE_P)) == X86_PDE_P)
3013 u.pau64[iPage] = 0;
3014 break;
3015
3016 case PGMPOOLKIND_ROOT_PML4:
3017 for (unsigned iPage = 0; iPage < X86_PG_PAE_ENTRIES; iPage++)
3018 if ((u.pau64[iPage] & (PGM_PLXFLAGS_PERMANENT | X86_PML4E_P)) == X86_PML4E_P)
3019 u.pau64[iPage] = 0;
3020 break;
3021
3022 case PGMPOOLKIND_ROOT_PDPTR:
3023 /* Not root of shadowed pages currently, ignore it. */
3024 break;
3025 }
3026 }
3027
3028 /*
3029 * Paranoia (to be removed), flag a global CR3 sync.
3030 */
3031 VM_FF_SET(pPool->CTXSUFF(pVM), VM_FF_PGM_SYNC_CR3);
3032}
3033
3034
3035/**
3036 * Flushes the entire cache.
3037 *
3038 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3039 * and execute this CR3 flush.
3040 *
3041 * @param pPool The pool.
3042 */
3043static void pgmPoolFlushAllInt(PPGMPOOL pPool)
3044{
3045 STAM_PROFILE_START(&pPool->StatFlushAllInt, a);
3046 LogFlow(("pgmPoolFlushAllInt:\n"));
3047
3048 /*
3049 * If there are no pages in the pool, there is nothing to do.
3050 */
3051 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
3052 {
3053 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3054 return;
3055 }
3056
3057 /*
3058 * Nuke the free list and reinsert all pages into it.
3059 */
3060 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
3061 {
3062 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3063
3064#ifdef IN_RING3
3065 Assert(pPage->Core.Key == MMPage2Phys(pPool->pVMHC, pPage->pvPageHC));
3066#endif
3067#ifdef PGMPOOL_WITH_MONITORING
3068 if (pPage->fMonitored)
3069 pgmPoolMonitorFlush(pPool, pPage);
3070 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3071 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3072 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3073 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3074 pPage->cModifications = 0;
3075#endif
3076 pPage->GCPhys = NIL_RTGCPHYS;
3077 pPage->enmKind = PGMPOOLKIND_FREE;
3078 Assert(pPage->idx == i);
3079 pPage->iNext = i + 1;
3080 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
3081 pPage->fSeenNonGlobal = false;
3082 pPage->fMonitored= false;
3083 pPage->fCached = false;
3084 pPage->fReusedFlushPending = false;
3085 pPage->fCR3Mix = false;
3086#ifdef PGMPOOL_WITH_USER_TRACKING
3087 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3088#endif
3089#ifdef PGMPOOL_WITH_CACHE
3090 pPage->iAgeNext = NIL_PGMPOOL_IDX;
3091 pPage->iAgePrev = NIL_PGMPOOL_IDX;
3092#endif
3093 }
3094 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
3095 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
3096 pPool->cUsedPages = 0;
3097
3098#ifdef PGMPOOL_WITH_USER_TRACKING
3099 /*
3100 * Zap and reinitialize the user records.
3101 */
3102 pPool->cPresent = 0;
3103 pPool->iUserFreeHead = 0;
3104 PPGMPOOLUSER paUsers = pPool->CTXSUFF(paUsers);
3105 const unsigned cMaxUsers = pPool->cMaxUsers;
3106 for (unsigned i = 0; i < cMaxUsers; i++)
3107 {
3108 paUsers[i].iNext = i + 1;
3109 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3110 paUsers[i].iUserTable = 0xfffe;
3111 }
3112 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
3113#endif
3114
3115#ifdef PGMPOOL_WITH_GCPHYS_TRACKING
3116 /*
3117 * Clear all the GCPhys links and rebuild the phys ext free list.
3118 */
3119 for (PPGMRAMRANGE pRam = pPool->CTXSUFF(pVM)->pgm.s.CTXSUFF(pRamRanges);
3120 pRam;
3121 pRam = pRam->CTXSUFF(pNext))
3122 {
3123 unsigned iPage = pRam->cb >> PAGE_SHIFT;
3124 while (iPage-- > 0)
3125 pRam->aPages[iPage].HCPhys &= MM_RAM_FLAGS_NO_REFS_MASK; /** @todo PAGE FLAGS */
3126 }
3127
3128 pPool->iPhysExtFreeHead = 0;
3129 PPGMPOOLPHYSEXT paPhysExts = pPool->CTXSUFF(paPhysExts);
3130 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
3131 for (unsigned i = 0; i < cMaxPhysExts; i++)
3132 {
3133 paPhysExts[i].iNext = i + 1;
3134 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
3135 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
3136 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
3137 }
3138 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3139#endif
3140
3141#ifdef PGMPOOL_WITH_MONITORING
3142 /*
3143 * Just zap the modified list.
3144 */
3145 pPool->cModifiedPages = 0;
3146 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
3147#endif
3148
3149#ifdef PGMPOOL_WITH_CACHE
3150 /*
3151 * Clear the GCPhys hash and the age list.
3152 */
3153 for (unsigned i = 0; i < ELEMENTS(pPool->aiHash); i++)
3154 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
3155 pPool->iAgeHead = NIL_PGMPOOL_IDX;
3156 pPool->iAgeTail = NIL_PGMPOOL_IDX;
3157#endif
3158
3159 /*
3160 * Flush all the special root pages.
3161 * Reinsert active pages into the hash and ensure monitoring chains are correct.
3162 */
3163 pgmPoolFlushAllSpecialRoots(pPool);
3164 for (unsigned i = PGMPOOL_IDX_FIRST_SPECIAL; i < PGMPOOL_IDX_FIRST; i++)
3165 {
3166 PPGMPOOLPAGE pPage = &pPool->aPages[i];
3167 pPage->iNext = NIL_PGMPOOL_IDX;
3168#ifdef PGMPOOL_WITH_MONITORING
3169 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3170 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3171 pPage->cModifications = 0;
3172 /* ASSUMES that we're not sharing with any of the other special pages (safe for now). */
3173 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
3174 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
3175 if (pPage->fMonitored)
3176 {
3177 const PVM pVM = pPool->CTXSUFF(pVM);
3178 int rc = PGMHandlerPhysicalChangeCallbacks(pVM, pPage->GCPhys & ~(RTGCPHYS)(PAGE_SIZE - 1),
3179 pPool->pfnAccessHandlerR3, MMHyperCCToR3(pVM, pPage),
3180 pPool->pfnAccessHandlerR0, MMHyperCCToR0(pVM, pPage),
3181 pPool->pfnAccessHandlerGC, MMHyperCCToGC(pVM, pPage),
3182 pPool->pszAccessHandler);
3183 AssertFatalRCSuccess(rc);
3184# ifdef PGMPOOL_WITH_CACHE
3185 pgmPoolHashInsert(pPool, pPage);
3186# endif
3187 }
3188#endif
3189#ifdef PGMPOOL_WITH_USER_TRACKING
3190 Assert(pPage->iUserHead == NIL_PGMPOOL_USER_INDEX); /* for now */
3191#endif
3192#ifdef PGMPOOL_WITH_CACHE
3193 Assert(pPage->iAgeNext == NIL_PGMPOOL_IDX);
3194 Assert(pPage->iAgePrev == NIL_PGMPOOL_IDX);
3195#endif
3196 }
3197
3198 STAM_PROFILE_STOP(&pPool->StatFlushAllInt, a);
3199}
3200
3201
3202/**
3203 * Flushes a pool page.
3204 *
3205 * This moves the page to the free list after removing all user references to it.
3206 * In GC this will cause a CR3 reload if the page is traced back to an active root page.
3207 *
3208 * @returns VBox status code.
3209 * @retval VINF_SUCCESS on success.
3210 * @retval VERR_PGM_POOL_CLEARED if the deregistration of the physical handler will cause a light weight pool flush.
3211 * @param pPool The pool.
3212 * @param HCPhys The HC physical address of the shadow page.
3213 */
3214int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3215{
3216 int rc = VINF_SUCCESS;
3217 STAM_PROFILE_START(&pPool->StatFlushPage, f);
3218 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%VHp, .idx=%d, .enmKind=%d, .GCPhys=%VGp}\n",
3219 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, pPage->GCPhys));
3220
3221 /*
3222 * Quietly reject any attempts at flushing any of the special root pages.
3223 */
3224 if (pPage->idx < PGMPOOL_IDX_FIRST)
3225 {
3226 Log(("pgmPoolFlushPage: specaial root page, rejected. enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
3227 return VINF_SUCCESS;
3228 }
3229
3230 /*
3231 * Mark the page as being in need of a ASMMemZeroPage().
3232 */
3233 pPage->fZeroed = false;
3234
3235#ifdef PGMPOOL_WITH_USER_TRACKING
3236 /*
3237 * Clear the page.
3238 */
3239 pgmPoolTrackClearPageUsers(pPool, pPage);
3240 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
3241 pgmPoolTrackDeref(pPool, pPage);
3242 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
3243#endif
3244
3245#ifdef PGMPOOL_WITH_CACHE
3246 /*
3247 * Flush it from the cache.
3248 */
3249 pgmPoolCacheFlushPage(pPool, pPage);
3250#endif /* PGMPOOL_WITH_CACHE */
3251
3252#ifdef PGMPOOL_WITH_MONITORING
3253 /*
3254 * Deregistering the monitoring.
3255 */
3256 if (pPage->fMonitored)
3257 rc = pgmPoolMonitorFlush(pPool, pPage);
3258#endif
3259
3260 /*
3261 * Free the page.
3262 */
3263 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
3264 pPage->iNext = pPool->iFreeHead;
3265 pPool->iFreeHead = pPage->idx;
3266 pPage->enmKind = PGMPOOLKIND_FREE;
3267 pPage->GCPhys = NIL_RTGCPHYS;
3268 pPage->fReusedFlushPending = false;
3269
3270 pPool->cUsedPages--;
3271 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
3272 return rc;
3273}
3274
3275
3276/**
3277 * Frees a usage of a pool page.
3278 *
3279 * The caller is responsible to updating the user table so that it no longer
3280 * references the shadow page.
3281 *
3282 * @param pPool The pool.
3283 * @param HCPhys The HC physical address of the shadow page.
3284 * @param iUser The shadow page pool index of the user table.
3285 * @param iUserTable The index into the user table (shadowed).
3286 */
3287void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint16_t iUserTable)
3288{
3289 STAM_PROFILE_START(&pPool->StatFree, a);
3290 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%VHp, .idx=%d, enmKind=%d} iUser=%#x iUserTable=%#x\n",
3291 pPage, pPage->Core.Key, pPage->idx, pPage->enmKind, iUser, iUserTable));
3292 Assert(pPage->idx >= PGMPOOL_IDX_FIRST);
3293#ifdef PGMPOOL_WITH_USER_TRACKING
3294 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
3295#endif
3296#ifdef PGMPOOL_WITH_CACHE
3297 if (!pPage->fCached)
3298#endif
3299 pgmPoolFlushPage(pPool, pPage); /* ASSUMES that VERR_PGM_POOL_CLEARED can be ignored here. */
3300 STAM_PROFILE_STOP(&pPool->StatFree, a);
3301}
3302
3303
3304/**
3305 * Makes one or more free page free.
3306 *
3307 * @returns VBox status code.
3308 * @retval VINF_SUCCESS on success.
3309 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3310 *
3311 * @param pPool The pool.
3312 * @param iUser The user of the page.
3313 */
3314static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, uint16_t iUser)
3315{
3316 LogFlow(("pgmPoolMakeMoreFreePages: iUser=%#x\n", iUser));
3317
3318 /*
3319 * If the pool isn't full grown yet, expand it.
3320 */
3321 if (pPool->cCurPages < pPool->cMaxPages)
3322 {
3323 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
3324#ifdef IN_RING3
3325 int rc = PGMR3PoolGrow(pPool->pVMHC);
3326#else
3327 int rc = CTXALLMID(VMM, CallHost)(pPool->CTXSUFF(pVM), VMMCALLHOST_PGM_POOL_GROW, 0);
3328#endif
3329 if (VBOX_FAILURE(rc))
3330 return rc;
3331 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
3332 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
3333 return VINF_SUCCESS;
3334 }
3335
3336#ifdef PGMPOOL_WITH_CACHE
3337 /*
3338 * Free one cached page.
3339 */
3340 return pgmPoolCacheFreeOne(pPool, iUser);
3341#else
3342 /*
3343 * Flush the pool.
3344 * If we have tracking enabled, it should be possible to come up with
3345 * a cheap replacement strategy...
3346 */
3347 pgmPoolFlushAllInt(pPool);
3348 return VERR_PGM_POOL_FLUSHED;
3349#endif
3350}
3351
3352
3353/**
3354 * Allocates a page from the pool.
3355 *
3356 * This page may actually be a cached page and not in need of any processing
3357 * on the callers part.
3358 *
3359 * @returns VBox status code.
3360 * @retval VINF_SUCCESS if a NEW page was allocated.
3361 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
3362 * @retval VERR_PGM_POOL_FLUSHED if the pool was flushed.
3363 * @param pVM The VM handle.
3364 * @param GCPhys The GC physical address of the page we're gonna shadow.
3365 * For 4MB and 2MB PD entries, it's the first address the
3366 * shadow PT is covering.
3367 * @param enmKind The kind of mapping.
3368 * @param iUser The shadow page pool index of the user table.
3369 * @param iUserTable The index into the user table (shadowed).
3370 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
3371 */
3372int pgmPoolAlloc(PVM pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, uint16_t iUser, uint16_t iUserTable, PPPGMPOOLPAGE ppPage)
3373{
3374 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3375 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
3376 LogFlow(("pgmPoolAlloc: GCPhys=%VGp enmKind=%d iUser=%#x iUserTable=%#x\n", GCPhys, enmKind, iUser, iUserTable));
3377
3378 *ppPage = NULL;
3379
3380#ifdef PGMPOOL_WITH_CACHE
3381 if (pPool->fCacheEnabled)
3382 {
3383 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, iUser, iUserTable, ppPage);
3384 if (VBOX_SUCCESS(rc2))
3385 {
3386 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3387 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
3388 return rc2;
3389 }
3390 }
3391#endif
3392
3393 /*
3394 * Allocate a new one.
3395 */
3396 int rc = VINF_SUCCESS;
3397 uint16_t iNew = pPool->iFreeHead;
3398 if (iNew == NIL_PGMPOOL_IDX)
3399 {
3400 rc = pgmPoolMakeMoreFreePages(pPool, iUser);
3401 if (VBOX_FAILURE(rc))
3402 {
3403 if (rc != VERR_PGM_POOL_CLEARED)
3404 {
3405 Log(("pgmPoolAlloc: returns %Vrc (Free)\n", rc));
3406 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3407 return rc;
3408 }
3409 rc = VERR_PGM_POOL_FLUSHED;
3410 }
3411 iNew = pPool->iFreeHead;
3412 AssertReleaseReturn(iNew != NIL_PGMPOOL_IDX, VERR_INTERNAL_ERROR);
3413 }
3414
3415 /* unlink the free head */
3416 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
3417 pPool->iFreeHead = pPage->iNext;
3418 pPage->iNext = NIL_PGMPOOL_IDX;
3419
3420 /*
3421 * Initialize it.
3422 */
3423 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
3424 pPage->enmKind = enmKind;
3425 pPage->GCPhys = GCPhys;
3426 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
3427 pPage->fMonitored = false;
3428 pPage->fCached = false;
3429 pPage->fReusedFlushPending = false;
3430 pPage->fCR3Mix = false;
3431#ifdef PGMPOOL_WITH_MONITORING
3432 pPage->cModifications = 0;
3433 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
3434 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
3435#endif
3436#ifdef PGMPOOL_WITH_USER_TRACKING
3437 pPage->cPresent = 0;
3438 pPage->iFirstPresent = ~0;
3439
3440 /*
3441 * Insert into the tracking and cache. If this fails, free the page.
3442 */
3443 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
3444 if (VBOX_FAILURE(rc3))
3445 {
3446 if (rc3 != VERR_PGM_POOL_CLEARED)
3447 {
3448 pPool->cUsedPages--;
3449 pPage->enmKind = PGMPOOLKIND_FREE;
3450 pPage->GCPhys = NIL_RTGCPHYS;
3451 pPage->iNext = pPool->iFreeHead;
3452 pPool->iFreeHead = pPage->idx;
3453 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3454 Log(("pgmPoolAlloc: returns %Vrc (Insert)\n", rc3));
3455 return rc3;
3456 }
3457 rc = VERR_PGM_POOL_FLUSHED;
3458 }
3459#endif /* PGMPOOL_WITH_USER_TRACKING */
3460
3461 /*
3462 * Commit the allocation, clear the page and return.
3463 */
3464#ifdef VBOX_WITH_STATISTICS
3465 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
3466 pPool->cUsedPagesHigh = pPool->cUsedPages;
3467#endif
3468
3469 if (!pPage->fZeroed)
3470 {
3471 STAM_PROFILE_START(&pPool->StatZeroPage, z);
3472 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
3473 ASMMemZeroPage(pv);
3474 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
3475 }
3476
3477 *ppPage = pPage;
3478 LogFlow(("pgmPoolAlloc: returns %Vrc *ppPage=%p:{.Key=%VHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
3479 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
3480 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
3481 return rc;
3482}
3483
3484
3485/**
3486 * Frees a usage of a pool page.
3487 *
3488 * @param pVM The VM handle.
3489 * @param HCPhys The HC physical address of the shadow page.
3490 * @param iUser The shadow page pool index of the user table.
3491 * @param iUserTable The index into the user table (shadowed).
3492 */
3493void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint16_t iUserTable)
3494{
3495 LogFlow(("pgmPoolFree: HCPhys=%VHp iUser=%#x iUserTable=%#x\n", HCPhys, iUser, iUserTable));
3496 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3497 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pVM, pPool, HCPhys), iUser, iUserTable);
3498}
3499
3500
3501/**
3502 * Gets a in-use page in the pool by it's physical address.
3503 *
3504 * @returns Pointer to the page.
3505 * @param pVM The VM handle.
3506 * @param HCPhys The HC physical address of the shadow page.
3507 * @remark This function will NEVER return NULL. It will assert if HCPhys is invalid.
3508 */
3509PPGMPOOLPAGE pgmPoolGetPageByHCPhys(PVM pVM, RTHCPHYS HCPhys)
3510{
3511 /** @todo profile this! */
3512 PPGMPOOL pPool = pVM->pgm.s.CTXSUFF(pPool);
3513 PPGMPOOLPAGE pPage = pgmPoolGetPage(pVM, pPool, HCPhys);
3514 Log3(("pgmPoolGetPageByHCPhys: HCPhys=%VHp -> %p:{.idx=%d .GCPhys=%VGp .enmKind=%d}\n",
3515 HCPhys, pPage, pPage->idx, pPage->GCPhys, pPage->enmKind));
3516 return pPage;
3517}
3518
3519
3520/**
3521 * Flushes the entire cache.
3522 *
3523 * It will assert a global CR3 flush (FF) and assumes the caller is aware of this
3524 * and execute this CR3 flush.
3525 *
3526 * @param pPool The pool.
3527 */
3528void pgmPoolFlushAll(PVM pVM)
3529{
3530 LogFlow(("pgmPoolFlushAll:\n"));
3531 pgmPoolFlushAllInt(pVM->pgm.s.CTXSUFF(pPool));
3532}
3533
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette