VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp@ 96860

Last change on this file since 96860 was 96407, checked in by vboxsync, 21 months ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 208.6 KB
Line 
1/* $Id: PGMAllPool.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * PGM Shadow Page Pool.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM_POOL
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/vmm/pgm.h>
35#include <VBox/vmm/mm.h>
36#include <VBox/vmm/em.h>
37#include <VBox/vmm/cpum.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include "PGMInline.h"
41#include <VBox/disopcode.h>
42#include <VBox/vmm/hm_vmx.h>
43
44#include <VBox/log.h>
45#include <VBox/err.h>
46#include <iprt/asm.h>
47#include <iprt/string.h>
48
49
50/*********************************************************************************************************************************
51* Internal Functions *
52*********************************************************************************************************************************/
53RT_C_DECLS_BEGIN
54#if 0 /* unused */
55DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind);
56DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind);
57#endif /* unused */
58static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
59static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
60static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable);
61static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage);
62#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
63static const char *pgmPoolPoolKindToStr(uint8_t enmKind);
64#endif
65#if 0 /*defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT)*/
66static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT);
67#endif
68
69int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage);
70PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt);
71void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt);
72void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt);
73
74RT_C_DECLS_END
75
76
77#if 0 /* unused */
78/**
79 * Checks if the specified page pool kind is for a 4MB or 2MB guest page.
80 *
81 * @returns true if it's the shadow of a 4MB or 2MB guest page, otherwise false.
82 * @param enmKind The page kind.
83 */
84DECLINLINE(bool) pgmPoolIsBigPage(PGMPOOLKIND enmKind)
85{
86 switch (enmKind)
87 {
88 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
89 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
90 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
91 return true;
92 default:
93 return false;
94 }
95}
96#endif /* unused */
97
98
99/**
100 * Flushes a chain of pages sharing the same access monitor.
101 *
102 * @param pPool The pool.
103 * @param pPage A page in the chain.
104 */
105void pgmPoolMonitorChainFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
106{
107 LogFlow(("pgmPoolMonitorChainFlush: Flush page %RGp type=%d\n", pPage->GCPhys, pPage->enmKind));
108
109 /*
110 * Find the list head.
111 */
112 uint16_t idx = pPage->idx;
113 if (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
114 {
115 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
116 {
117 idx = pPage->iMonitoredPrev;
118 Assert(idx != pPage->idx);
119 pPage = &pPool->aPages[idx];
120 }
121 }
122
123 /*
124 * Iterate the list flushing each shadow page.
125 */
126 for (;;)
127 {
128 idx = pPage->iMonitoredNext;
129 Assert(idx != pPage->idx);
130 if (pPage->idx >= PGMPOOL_IDX_FIRST)
131 {
132 int rc2 = pgmPoolFlushPage(pPool, pPage);
133 AssertRC(rc2);
134 }
135 /* next */
136 if (idx == NIL_PGMPOOL_IDX)
137 break;
138 pPage = &pPool->aPages[idx];
139 }
140}
141
142
143/**
144 * Wrapper for getting the current context pointer to the entry being modified.
145 *
146 * @returns VBox status code suitable for scheduling.
147 * @param pVM The cross context VM structure.
148 * @param pvDst Destination address
149 * @param pvSrc Pointer to the mapping of @a GCPhysSrc or NULL depending
150 * on the context (e.g. \#PF in R0 & RC).
151 * @param GCPhysSrc The source guest physical address.
152 * @param cb Size of data to read
153 */
154DECLINLINE(int) pgmPoolPhysSimpleReadGCPhys(PVMCC pVM, void *pvDst, void const *pvSrc, RTGCPHYS GCPhysSrc, size_t cb)
155{
156#if defined(IN_RING3)
157 NOREF(pVM); NOREF(GCPhysSrc);
158 memcpy(pvDst, (RTHCPTR)((uintptr_t)pvSrc & ~(RTHCUINTPTR)(cb - 1)), cb);
159 return VINF_SUCCESS;
160#else
161 /** @todo in RC we could attempt to use the virtual address, although this can cause many faults (PAE Windows XP guest). */
162 NOREF(pvSrc);
163 return PGMPhysSimpleReadGCPhys(pVM, pvDst, GCPhysSrc & ~(RTGCPHYS)(cb - 1), cb);
164#endif
165}
166
167
168/**
169 * Process shadow entries before they are changed by the guest.
170 *
171 * For PT entries we will clear them. For PD entries, we'll simply check
172 * for mapping conflicts and set the SyncCR3 FF if found.
173 *
174 * @param pVCpu The cross context virtual CPU structure.
175 * @param pPool The pool.
176 * @param pPage The head page.
177 * @param GCPhysFault The guest physical fault address.
178 * @param pvAddress Pointer to the mapping of @a GCPhysFault or NULL
179 * depending on the context (e.g. \#PF in R0 & RC).
180 * @param cbWrite Write size; might be zero if the caller knows we're not crossing entry boundaries
181 */
182static void pgmPoolMonitorChainChanging(PVMCPU pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhysFault,
183 void const *pvAddress, unsigned cbWrite)
184{
185 AssertMsg(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX, ("%u (idx=%u)\n", pPage->iMonitoredPrev, pPage->idx));
186 const unsigned off = GCPhysFault & GUEST_PAGE_OFFSET_MASK;
187 PVMCC pVM = pPool->CTX_SUFF(pVM);
188 NOREF(pVCpu);
189
190 LogFlow(("pgmPoolMonitorChainChanging: %RGv phys=%RGp cbWrite=%d\n",
191 (RTGCPTR)(CTXTYPE(RTGCPTR, uintptr_t, RTGCPTR))(uintptr_t)pvAddress, GCPhysFault, cbWrite));
192
193 for (;;)
194 {
195 union
196 {
197 void *pv;
198 PX86PT pPT;
199 PPGMSHWPTPAE pPTPae;
200 PX86PD pPD;
201 PX86PDPAE pPDPae;
202 PX86PDPT pPDPT;
203 PX86PML4 pPML4;
204 } uShw;
205
206 LogFlow(("pgmPoolMonitorChainChanging: page idx=%d phys=%RGp (next=%d) kind=%s write=%#x\n",
207 pPage->idx, pPage->GCPhys, pPage->iMonitoredNext, pgmPoolPoolKindToStr(pPage->enmKind), cbWrite));
208
209 uShw.pv = NULL;
210 switch (pPage->enmKind)
211 {
212 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
213 {
214 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
215 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
216 const unsigned iShw = off / sizeof(X86PTE);
217 LogFlow(("PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT iShw=%x\n", iShw));
218 X86PGUINT const uPde = uShw.pPT->a[iShw].u;
219 if (uPde & X86_PTE_P)
220 {
221 X86PTE GstPte;
222 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
223 AssertRC(rc);
224 Log4(("pgmPoolMonitorChainChanging 32_32: deref %016RX64 GCPhys %08RX32\n", uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
225 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPde & X86_PTE_PG_MASK, GstPte.u & X86_PTE_PG_MASK, iShw);
226 ASMAtomicWriteU32(&uShw.pPT->a[iShw].u, 0);
227 }
228 break;
229 }
230
231 /* page/2 sized */
232 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
233 {
234 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
235 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
236 if (!((off ^ pPage->GCPhys) & (PAGE_SIZE / 2)))
237 {
238 const unsigned iShw = (off / sizeof(X86PTE)) & (X86_PG_PAE_ENTRIES - 1);
239 LogFlow(("PGMPOOLKIND_PAE_PT_FOR_32BIT_PT iShw=%x\n", iShw));
240 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
241 {
242 X86PTE GstPte;
243 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
244 AssertRC(rc);
245
246 Log4(("pgmPoolMonitorChainChanging pae_32: deref %016RX64 GCPhys %08RX32\n", uShw.pPT->a[iShw].u & X86_PTE_PAE_PG_MASK, GstPte.u & X86_PTE_PG_MASK));
247 pgmPoolTracDerefGCPhysHint(pPool, pPage,
248 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
249 GstPte.u & X86_PTE_PG_MASK,
250 iShw);
251 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
252 }
253 }
254 break;
255 }
256
257 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
258 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
259 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
260 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
261 {
262 unsigned iGst = off / sizeof(X86PDE);
263 unsigned iShwPdpt = iGst / 256;
264 unsigned iShw = (iGst % 256) * 2;
265 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
266
267 LogFlow(("pgmPoolMonitorChainChanging PAE for 32 bits: iGst=%x iShw=%x idx = %d page idx=%d\n", iGst, iShw, iShwPdpt, pPage->enmKind - PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD));
268 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
269 if (iShwPdpt == pPage->enmKind - (unsigned)PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD)
270 {
271 for (unsigned i = 0; i < 2; i++)
272 {
273 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw + i].u;
274 if (uPde & X86_PDE_P)
275 {
276 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw + i, uPde));
277 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw + i);
278 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw + i].u, 0);
279 }
280
281 /* paranoia / a bit assumptive. */
282 if ( (off & 3)
283 && (off & 3) + cbWrite > 4)
284 {
285 const unsigned iShw2 = iShw + 2 + i;
286 if (iShw2 < RT_ELEMENTS(uShw.pPDPae->a))
287 {
288 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
289 if (uPde2 & X86_PDE_P)
290 {
291 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
292 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
293 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
294 }
295 }
296 }
297 }
298 }
299 break;
300 }
301
302 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
303 {
304 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
305 const unsigned iShw = off / sizeof(X86PTEPAE);
306 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPT));
307 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw]))
308 {
309 X86PTEPAE GstPte;
310 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvAddress, GCPhysFault, sizeof(GstPte));
311 AssertRC(rc);
312
313 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]), GstPte.u & X86_PTE_PAE_PG_MASK));
314 pgmPoolTracDerefGCPhysHint(pPool, pPage,
315 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw]),
316 GstPte.u & X86_PTE_PAE_PG_MASK,
317 iShw);
318 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw], 0);
319 }
320
321 /* paranoia / a bit assumptive. */
322 if ( (off & 7)
323 && (off & 7) + cbWrite > sizeof(X86PTEPAE))
324 {
325 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTEPAE);
326 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPTPae->a));
327
328 if (PGMSHWPTEPAE_IS_P(uShw.pPTPae->a[iShw2]))
329 {
330 X86PTEPAE GstPte;
331 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte,
332 pvAddress ? (uint8_t const *)pvAddress + sizeof(GstPte) : NULL,
333 GCPhysFault + sizeof(GstPte), sizeof(GstPte));
334 AssertRC(rc);
335 Log4(("pgmPoolMonitorChainChanging pae: deref %016RX64 GCPhys %016RX64\n", PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]), GstPte.u & X86_PTE_PAE_PG_MASK));
336 pgmPoolTracDerefGCPhysHint(pPool, pPage,
337 PGMSHWPTEPAE_GET_HCPHYS(uShw.pPTPae->a[iShw2]),
338 GstPte.u & X86_PTE_PAE_PG_MASK,
339 iShw2);
340 PGMSHWPTEPAE_ATOMIC_SET(uShw.pPTPae->a[iShw2], 0);
341 }
342 }
343 break;
344 }
345
346 case PGMPOOLKIND_32BIT_PD:
347 {
348 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
349 const unsigned iShw = off / sizeof(X86PTE); // ASSUMING 32-bit guest paging!
350
351 LogFlow(("pgmPoolMonitorChainChanging: PGMPOOLKIND_32BIT_PD %x\n", iShw));
352 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
353 X86PGUINT const uPde = uShw.pPD->a[iShw].u;
354 if (uPde & X86_PDE_P)
355 {
356 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
357 pgmPoolFree(pVM, uPde & X86_PDE_PG_MASK, pPage->idx, iShw);
358 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
359 }
360
361 /* paranoia / a bit assumptive. */
362 if ( (off & 3)
363 && (off & 3) + cbWrite > sizeof(X86PTE))
364 {
365 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PTE);
366 if ( iShw2 != iShw
367 && iShw2 < RT_ELEMENTS(uShw.pPD->a))
368 {
369 X86PGUINT const uPde2 = uShw.pPD->a[iShw2].u;
370 if (uPde2 & X86_PDE_P)
371 {
372 LogFlow(("pgmPoolMonitorChainChanging: 32 bit pd iShw=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
373 pgmPoolFree(pVM, uPde2 & X86_PDE_PG_MASK, pPage->idx, iShw2);
374 ASMAtomicWriteU32(&uShw.pPD->a[iShw2].u, 0);
375 }
376 }
377 }
378#if 0 /* useful when running PGMAssertCR3(), a bit too troublesome for general use (TLBs). - not working any longer... */
379 if ( uShw.pPD->a[iShw].n.u1Present
380 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3))
381 {
382 LogFlow(("pgmPoolMonitorChainChanging: iShw=%#x: %RX32 -> freeing it!\n", iShw, uShw.pPD->a[iShw].u));
383 pgmPoolFree(pVM, uShw.pPD->a[iShw].u & X86_PDE_PG_MASK, pPage->idx, iShw);
384 ASMAtomicWriteU32(&uShw.pPD->a[iShw].u, 0);
385 }
386#endif
387 break;
388 }
389
390 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
391 {
392 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
393 const unsigned iShw = off / sizeof(X86PDEPAE);
394 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
395
396 /*
397 * Causes trouble when the guest uses a PDE to refer to the whole page table level
398 * structure. (Invalidate here; faults later on when it tries to change the page
399 * table entries -> recheck; probably only applies to the RC case.)
400 */
401 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
402 if (uPde & X86_PDE_P)
403 {
404 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
405 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
406 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
407 }
408
409 /* paranoia / a bit assumptive. */
410 if ( (off & 7)
411 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
412 {
413 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
414 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
415
416 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
417 if (uPde2 & X86_PDE_P)
418 {
419 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
420 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
421 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
422 }
423 }
424 break;
425 }
426
427 case PGMPOOLKIND_PAE_PDPT:
428 {
429 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
430 /*
431 * Hopefully this doesn't happen very often:
432 * - touching unused parts of the page
433 * - messing with the bits of pd pointers without changing the physical address
434 */
435 /* PDPT roots are not page aligned; 32 byte only! */
436 const unsigned offPdpt = GCPhysFault - pPage->GCPhys;
437
438 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
439 const unsigned iShw = offPdpt / sizeof(X86PDPE);
440 if (iShw < X86_PG_PAE_PDPE_ENTRIES) /* don't use RT_ELEMENTS(uShw.pPDPT->a), because that's for long mode only */
441 {
442 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
443 if (uPdpe & X86_PDPE_P)
444 {
445 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uShw.pPDPT->a[iShw].u));
446 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
447 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
448 }
449
450 /* paranoia / a bit assumptive. */
451 if ( (offPdpt & 7)
452 && (offPdpt & 7) + cbWrite > sizeof(X86PDPE))
453 {
454 const unsigned iShw2 = (offPdpt + cbWrite - 1) / sizeof(X86PDPE);
455 if ( iShw2 != iShw
456 && iShw2 < X86_PG_PAE_PDPE_ENTRIES)
457 {
458 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
459 if (uPdpe2 & X86_PDPE_P)
460 {
461 LogFlow(("pgmPoolMonitorChainChanging: pae pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw2, uShw.pPDPT->a[iShw2].u));
462 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
463 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
464 }
465 }
466 }
467 }
468 break;
469 }
470
471 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
472 {
473 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPD));
474 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
475 const unsigned iShw = off / sizeof(X86PDEPAE);
476 X86PGPAEUINT const uPde = uShw.pPDPae->a[iShw].u;
477 if (uPde & X86_PDE_P)
478 {
479 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw=%#x: %RX64 -> freeing it!\n", iShw, uPde));
480 pgmPoolFree(pVM, uPde & X86_PDE_PAE_PG_MASK, pPage->idx, iShw);
481 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw].u, 0);
482 }
483
484 /* paranoia / a bit assumptive. */
485 if ( (off & 7)
486 && (off & 7) + cbWrite > sizeof(X86PDEPAE))
487 {
488 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDEPAE);
489 AssertBreak(iShw2 < RT_ELEMENTS(uShw.pPDPae->a));
490 X86PGPAEUINT const uPde2 = uShw.pPDPae->a[iShw2].u;
491 if (uPde2 & X86_PDE_P)
492 {
493 LogFlow(("pgmPoolMonitorChainChanging: pae pd iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPde2));
494 pgmPoolFree(pVM, uPde2 & X86_PDE_PAE_PG_MASK, pPage->idx, iShw2);
495 ASMAtomicWriteU64(&uShw.pPDPae->a[iShw2].u, 0);
496 }
497 }
498 break;
499 }
500
501 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
502 {
503 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPDPT));
504 /*
505 * Hopefully this doesn't happen very often:
506 * - messing with the bits of pd pointers without changing the physical address
507 */
508 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
509 const unsigned iShw = off / sizeof(X86PDPE);
510 X86PGPAEUINT const uPdpe = uShw.pPDPT->a[iShw].u;
511 if (uPdpe & X86_PDPE_P)
512 {
513 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw=%#x: %RX64 -> freeing it!\n", iShw, uPdpe));
514 pgmPoolFree(pVM, uPdpe & X86_PDPE_PG_MASK, pPage->idx, iShw);
515 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw].u, 0);
516 }
517 /* paranoia / a bit assumptive. */
518 if ( (off & 7)
519 && (off & 7) + cbWrite > sizeof(X86PDPE))
520 {
521 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PDPE);
522 X86PGPAEUINT const uPdpe2 = uShw.pPDPT->a[iShw2].u;
523 if (uPdpe2 & X86_PDPE_P)
524 {
525 LogFlow(("pgmPoolMonitorChainChanging: pdpt iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPdpe2));
526 pgmPoolFree(pVM, uPdpe2 & X86_PDPE_PG_MASK, pPage->idx, iShw2);
527 ASMAtomicWriteU64(&uShw.pPDPT->a[iShw2].u, 0);
528 }
529 }
530 break;
531 }
532
533 case PGMPOOLKIND_64BIT_PML4:
534 {
535 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FaultPML4));
536 /*
537 * Hopefully this doesn't happen very often:
538 * - messing with the bits of pd pointers without changing the physical address
539 */
540 uShw.pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
541 const unsigned iShw = off / sizeof(X86PDPE);
542 X86PGPAEUINT const uPml4e = uShw.pPML4->a[iShw].u;
543 if (uPml4e & X86_PML4E_P)
544 {
545 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw=%#x: %RX64 -> freeing it!\n", iShw, uPml4e));
546 pgmPoolFree(pVM, uPml4e & X86_PML4E_PG_MASK, pPage->idx, iShw);
547 ASMAtomicWriteU64(&uShw.pPML4->a[iShw].u, 0);
548 }
549 /* paranoia / a bit assumptive. */
550 if ( (off & 7)
551 && (off & 7) + cbWrite > sizeof(X86PDPE))
552 {
553 const unsigned iShw2 = (off + cbWrite - 1) / sizeof(X86PML4E);
554 X86PGPAEUINT const uPml4e2 = uShw.pPML4->a[iShw2].u;
555 if (uPml4e2 & X86_PML4E_P)
556 {
557 LogFlow(("pgmPoolMonitorChainChanging: pml4 iShw2=%#x: %RX64 -> freeing it!\n", iShw2, uPml4e2));
558 pgmPoolFree(pVM, uPml4e2 & X86_PML4E_PG_MASK, pPage->idx, iShw2);
559 ASMAtomicWriteU64(&uShw.pPML4->a[iShw2].u, 0);
560 }
561 }
562 break;
563 }
564
565 default:
566 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
567 }
568 PGM_DYNMAP_UNUSED_HINT_VM(pVM, uShw.pv);
569
570 /* next */
571 if (pPage->iMonitoredNext == NIL_PGMPOOL_IDX)
572 return;
573 pPage = &pPool->aPages[pPage->iMonitoredNext];
574 }
575}
576
577#ifndef IN_RING3
578
579/**
580 * Checks if a access could be a fork operation in progress.
581 *
582 * Meaning, that the guest is setting up the parent process for Copy-On-Write.
583 *
584 * @returns true if it's likely that we're forking, otherwise false.
585 * @param pPool The pool.
586 * @param pDis The disassembled instruction.
587 * @param offFault The access offset.
588 */
589DECLINLINE(bool) pgmRZPoolMonitorIsForking(PPGMPOOL pPool, PDISCPUSTATE pDis, unsigned offFault)
590{
591 /*
592 * i386 linux is using btr to clear X86_PTE_RW.
593 * The functions involved are (2.6.16 source inspection):
594 * clear_bit
595 * ptep_set_wrprotect
596 * copy_one_pte
597 * copy_pte_range
598 * copy_pmd_range
599 * copy_pud_range
600 * copy_page_range
601 * dup_mmap
602 * dup_mm
603 * copy_mm
604 * copy_process
605 * do_fork
606 */
607 if ( pDis->pCurInstr->uOpcode == OP_BTR
608 && !(offFault & 4)
609 /** @todo Validate that the bit index is X86_PTE_RW. */
610 )
611 {
612 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,Fork)); RT_NOREF_PV(pPool);
613 return true;
614 }
615 return false;
616}
617
618
619/**
620 * Determine whether the page is likely to have been reused.
621 *
622 * @returns true if we consider the page as being reused for a different purpose.
623 * @returns false if we consider it to still be a paging page.
624 * @param pVM The cross context VM structure.
625 * @param pVCpu The cross context virtual CPU structure.
626 * @param pRegFrame Trap register frame.
627 * @param pDis The disassembly info for the faulting instruction.
628 * @param pvFault The fault address.
629 * @param pPage The pool page being accessed.
630 *
631 * @remark The REP prefix check is left to the caller because of STOSD/W.
632 */
633DECLINLINE(bool) pgmRZPoolMonitorIsReused(PVMCC pVM, PVMCPUCC pVCpu, PCPUMCTXCORE pRegFrame, PDISCPUSTATE pDis, RTGCPTR pvFault,
634 PPGMPOOLPAGE pPage)
635{
636 /* Locked (CR3, PDPTR*4) should not be reusable. Considering them as
637 such may cause loops booting tst-ubuntu-15_10-64-efi, ++. */
638 if (pPage->cLocked)
639 {
640 Log2(("pgmRZPoolMonitorIsReused: %RGv (%p) can't have been resued, because it's locked!\n", pvFault, pPage));
641 return false;
642 }
643
644 /** @todo could make this general, faulting close to rsp should be a safe reuse heuristic. */
645 if ( HMHasPendingIrq(pVM)
646 && pRegFrame->rsp - pvFault < 32)
647 {
648 /* Fault caused by stack writes while trying to inject an interrupt event. */
649 Log(("pgmRZPoolMonitorIsReused: reused %RGv for interrupt stack (rsp=%RGv).\n", pvFault, pRegFrame->rsp));
650 return true;
651 }
652
653 LogFlow(("Reused instr %RGv %d at %RGv param1.fUse=%llx param1.reg=%d\n", pRegFrame->rip, pDis->pCurInstr->uOpcode, pvFault, pDis->Param1.fUse, pDis->Param1.Base.idxGenReg));
654
655 /* Non-supervisor mode write means it's used for something else. */
656 if (CPUMGetGuestCPL(pVCpu) == 3)
657 return true;
658
659 switch (pDis->pCurInstr->uOpcode)
660 {
661 /* call implies the actual push of the return address faulted */
662 case OP_CALL:
663 Log4(("pgmRZPoolMonitorIsReused: CALL\n"));
664 return true;
665 case OP_PUSH:
666 Log4(("pgmRZPoolMonitorIsReused: PUSH\n"));
667 return true;
668 case OP_PUSHF:
669 Log4(("pgmRZPoolMonitorIsReused: PUSHF\n"));
670 return true;
671 case OP_PUSHA:
672 Log4(("pgmRZPoolMonitorIsReused: PUSHA\n"));
673 return true;
674 case OP_FXSAVE:
675 Log4(("pgmRZPoolMonitorIsReused: FXSAVE\n"));
676 return true;
677 case OP_MOVNTI: /* solaris - block_zero_no_xmm */
678 Log4(("pgmRZPoolMonitorIsReused: MOVNTI\n"));
679 return true;
680 case OP_MOVNTDQ: /* solaris - hwblkclr & hwblkpagecopy */
681 Log4(("pgmRZPoolMonitorIsReused: MOVNTDQ\n"));
682 return true;
683 case OP_MOVSWD:
684 case OP_STOSWD:
685 if ( pDis->fPrefix == (DISPREFIX_REP|DISPREFIX_REX)
686 && pRegFrame->rcx >= 0x40
687 )
688 {
689 Assert(pDis->uCpuMode == DISCPUMODE_64BIT);
690
691 Log(("pgmRZPoolMonitorIsReused: OP_STOSQ\n"));
692 return true;
693 }
694 break;
695
696 default:
697 /*
698 * Anything having ESP on the left side means stack writes.
699 */
700 if ( ( (pDis->Param1.fUse & DISUSE_REG_GEN32)
701 || (pDis->Param1.fUse & DISUSE_REG_GEN64))
702 && (pDis->Param1.Base.idxGenReg == DISGREG_ESP))
703 {
704 Log4(("pgmRZPoolMonitorIsReused: ESP\n"));
705 return true;
706 }
707 break;
708 }
709
710 /*
711 * Page table updates are very very unlikely to be crossing page boundraries,
712 * and we don't want to deal with that in pgmPoolMonitorChainChanging and such.
713 */
714 uint32_t const cbWrite = DISGetParamSize(pDis, &pDis->Param1);
715 if ( (((uintptr_t)pvFault + cbWrite) >> X86_PAGE_SHIFT) != ((uintptr_t)pvFault >> X86_PAGE_SHIFT) )
716 {
717 Log4(("pgmRZPoolMonitorIsReused: cross page write\n"));
718 return true;
719 }
720
721 /*
722 * Nobody does an unaligned 8 byte write to a page table, right.
723 */
724 if (cbWrite >= 8 && ((uintptr_t)pvFault & 7) != 0)
725 {
726 Log4(("pgmRZPoolMonitorIsReused: Unaligned 8+ byte write\n"));
727 return true;
728 }
729
730 return false;
731}
732
733
734/**
735 * Flushes the page being accessed.
736 *
737 * @returns VBox status code suitable for scheduling.
738 * @param pVM The cross context VM structure.
739 * @param pVCpu The cross context virtual CPU structure.
740 * @param pPool The pool.
741 * @param pPage The pool page (head).
742 * @param pDis The disassembly of the write instruction.
743 * @param pRegFrame The trap register frame.
744 * @param GCPhysFault The fault address as guest physical address.
745 * @param pvFault The fault address.
746 * @todo VBOXSTRICTRC
747 */
748static int pgmRZPoolAccessPfHandlerFlush(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
749 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
750{
751 NOREF(pVM); NOREF(GCPhysFault);
752
753 /*
754 * First, do the flushing.
755 */
756 pgmPoolMonitorChainFlush(pPool, pPage);
757
758 /*
759 * Emulate the instruction (xp/w2k problem, requires pc/cr2/sp detection).
760 * Must do this in raw mode (!); XP boot will fail otherwise.
761 */
762 int rc = VINF_SUCCESS;
763 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
764 if (rc2 == VINF_SUCCESS)
765 { /* do nothing */ }
766 else if (rc2 == VINF_EM_RESCHEDULE)
767 {
768 rc = VBOXSTRICTRC_VAL(rc2);
769# ifndef IN_RING3
770 VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
771# endif
772 }
773 else if (rc2 == VERR_EM_INTERPRETER)
774 {
775 rc = VINF_EM_RAW_EMULATE_INSTR;
776 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
777 }
778 else if (RT_FAILURE_NP(rc2))
779 rc = VBOXSTRICTRC_VAL(rc2);
780 else
781 AssertMsgFailed(("%Rrc\n", VBOXSTRICTRC_VAL(rc2))); /* ASSUMES no complicated stuff here. */
782
783 LogFlow(("pgmRZPoolAccessPfHandlerFlush: returns %Rrc (flushed)\n", rc));
784 return rc;
785}
786
787
788/**
789 * Handles the STOSD write accesses.
790 *
791 * @returns VBox status code suitable for scheduling.
792 * @param pVM The cross context VM structure.
793 * @param pPool The pool.
794 * @param pPage The pool page (head).
795 * @param pDis The disassembly of the write instruction.
796 * @param pRegFrame The trap register frame.
797 * @param GCPhysFault The fault address as guest physical address.
798 * @param pvFault The fault address.
799 */
800DECLINLINE(int) pgmRZPoolAccessPfHandlerSTOSD(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
801 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault)
802{
803 unsigned uIncrement = pDis->Param1.cb;
804 NOREF(pVM);
805
806 Assert(pDis->uCpuMode == DISCPUMODE_32BIT || pDis->uCpuMode == DISCPUMODE_64BIT);
807 Assert(pRegFrame->rcx <= 0x20);
808
809# ifdef VBOX_STRICT
810 if (pDis->uOpMode == DISCPUMODE_32BIT)
811 Assert(uIncrement == 4);
812 else
813 Assert(uIncrement == 8);
814# endif
815
816 Log3(("pgmRZPoolAccessPfHandlerSTOSD\n"));
817
818 /*
819 * Increment the modification counter and insert it into the list
820 * of modified pages the first time.
821 */
822 if (!pPage->cModifications++)
823 pgmPoolMonitorModifiedInsert(pPool, pPage);
824
825 /*
826 * Execute REP STOSD.
827 *
828 * This ASSUMES that we're not invoked by Trap0e on in a out-of-sync
829 * write situation, meaning that it's safe to write here.
830 */
831 PVMCPUCC pVCpu = VMMGetCpu(pPool->CTX_SUFF(pVM));
832 RTGCUINTPTR pu32 = (RTGCUINTPTR)pvFault;
833 while (pRegFrame->rcx)
834 {
835 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, uIncrement);
836 PGMPhysSimpleWriteGCPhys(pVM, GCPhysFault, &pRegFrame->rax, uIncrement);
837 pu32 += uIncrement;
838 GCPhysFault += uIncrement;
839 pRegFrame->rdi += uIncrement;
840 pRegFrame->rcx--;
841 }
842 pRegFrame->rip += pDis->cbInstr;
843
844 LogFlow(("pgmRZPoolAccessPfHandlerSTOSD: returns\n"));
845 return VINF_SUCCESS;
846}
847
848
849/**
850 * Handles the simple write accesses.
851 *
852 * @returns VBox status code suitable for scheduling.
853 * @param pVM The cross context VM structure.
854 * @param pVCpu The cross context virtual CPU structure.
855 * @param pPool The pool.
856 * @param pPage The pool page (head).
857 * @param pDis The disassembly of the write instruction.
858 * @param pRegFrame The trap register frame.
859 * @param GCPhysFault The fault address as guest physical address.
860 * @param pvFault The fault address.
861 * @param pfReused Reused state (in/out)
862 */
863DECLINLINE(int) pgmRZPoolAccessPfHandlerSimple(PVMCC pVM, PVMCPUCC pVCpu, PPGMPOOL pPool, PPGMPOOLPAGE pPage, PDISCPUSTATE pDis,
864 PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, RTGCPTR pvFault, bool *pfReused)
865{
866 Log3(("pgmRZPoolAccessPfHandlerSimple\n"));
867 NOREF(pVM);
868 NOREF(pfReused); /* initialized by caller */
869
870 /*
871 * Increment the modification counter and insert it into the list
872 * of modified pages the first time.
873 */
874 if (!pPage->cModifications++)
875 pgmPoolMonitorModifiedInsert(pPool, pPage);
876
877 /*
878 * Clear all the pages. ASSUMES that pvFault is readable.
879 */
880 uint32_t cbWrite = DISGetParamSize(pDis, &pDis->Param1);
881 if (cbWrite <= 8)
882 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, cbWrite);
883 else if (cbWrite <= 16)
884 {
885 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault, NULL, 8);
886 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + 8, NULL, cbWrite - 8);
887 }
888 else
889 {
890 Assert(cbWrite <= 32);
891 for (uint32_t off = 0; off < cbWrite; off += 8)
892 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhysFault + off, NULL, RT_MIN(8, cbWrite - off));
893 }
894
895 /*
896 * Interpret the instruction.
897 */
898 VBOXSTRICTRC rc = EMInterpretInstructionDisasState(pVCpu, pDis, pRegFrame, pvFault, EMCODETYPE_ALL);
899 if (RT_SUCCESS(rc))
900 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rc))); /* ASSUMES no complicated stuff here. */
901 else if (rc == VERR_EM_INTERPRETER)
902 {
903 LogFlow(("pgmRZPoolAccessPfHandlerSimple: Interpretation failed for %04x:%RGv - opcode=%d\n",
904 pRegFrame->cs.Sel, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode));
905 rc = VINF_EM_RAW_EMULATE_INSTR;
906 STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitorPf,EmulateInstr));
907 }
908
909# if 0 /* experimental code */
910 if (rc == VINF_SUCCESS)
911 {
912 switch (pPage->enmKind)
913 {
914 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
915 {
916 X86PTEPAE GstPte;
917 int rc = pgmPoolPhysSimpleReadGCPhys(pVM, &GstPte, pvFault, GCPhysFault, sizeof(GstPte));
918 AssertRC(rc);
919
920 /* Check the new value written by the guest. If present and with a bogus physical address, then
921 * it's fairly safe to assume the guest is reusing the PT.
922 */
923 if (GstPte.n.u1Present)
924 {
925 RTHCPHYS HCPhys = -1;
926 int rc = PGMPhysGCPhys2HCPhys(pVM, GstPte.u & X86_PTE_PAE_PG_MASK, &HCPhys);
927 if (rc != VINF_SUCCESS)
928 {
929 *pfReused = true;
930 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
931 }
932 }
933 break;
934 }
935 }
936 }
937# endif
938
939 LogFlow(("pgmRZPoolAccessPfHandlerSimple: returns %Rrc\n", VBOXSTRICTRC_VAL(rc)));
940 return VBOXSTRICTRC_VAL(rc);
941}
942
943
944/**
945 * @callback_method_impl{FNPGMRZPHYSPFHANDLER,
946 * \#PF access handler callback for page table pages.}
947 *
948 * @remarks The @a uUser argument is the index of the PGMPOOLPAGE.
949 */
950DECLCALLBACK(VBOXSTRICTRC) pgmRZPoolAccessPfHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCUINT uErrorCode, PCPUMCTXCORE pRegFrame,
951 RTGCPTR pvFault, RTGCPHYS GCPhysFault, uint64_t uUser)
952{
953 STAM_PROFILE_START(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorRZ, a);
954 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
955 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
956 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
957 unsigned cMaxModifications;
958 bool fForcedFlush = false;
959 RT_NOREF_PV(uErrorCode);
960
961 LogFlow(("pgmRZPoolAccessPfHandler: pvFault=%RGv pPage=%p:{.idx=%d} GCPhysFault=%RGp\n", pvFault, pPage, pPage->idx, GCPhysFault));
962
963 PGM_LOCK_VOID(pVM);
964 if (PHYS_PAGE_ADDRESS(GCPhysFault) != PHYS_PAGE_ADDRESS(pPage->GCPhys))
965 {
966 /* Pool page changed while we were waiting for the lock; ignore. */
967 Log(("CPU%d: pgmRZPoolAccessPfHandler pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhysFault), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
968 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
969 PGM_UNLOCK(pVM);
970 return VINF_SUCCESS;
971 }
972# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
973 if (pPage->fDirty)
974 {
975 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH));
976 PGM_UNLOCK(pVM);
977 return VINF_SUCCESS; /* SMP guest case where we were blocking on the pgm lock while the same page was being marked dirty. */
978 }
979# endif
980
981# if 0 /* test code defined(VBOX_STRICT) && defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) */
982 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
983 {
984 void *pvShw = PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pPage);
985 void *pvGst;
986 int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
987 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
988 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
989 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
990 }
991# endif
992
993 /*
994 * Disassemble the faulting instruction.
995 */
996 PDISCPUSTATE pDis = &pVCpu->pgm.s.DisState;
997 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
998 if (RT_UNLIKELY(rc != VINF_SUCCESS))
999 {
1000 AssertMsg(rc == VERR_PAGE_NOT_PRESENT || rc == VERR_PAGE_TABLE_NOT_PRESENT, ("Unexpected rc %d\n", rc));
1001 PGM_UNLOCK(pVM);
1002 return rc;
1003 }
1004
1005 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1006
1007 /*
1008 * We should ALWAYS have the list head as user parameter. This
1009 * is because we use that page to record the changes.
1010 */
1011 Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1012
1013# ifdef IN_RING0
1014 /* Maximum nr of modifications depends on the page type. */
1015 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1016 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1017 cMaxModifications = 4;
1018 else
1019 cMaxModifications = 24;
1020# else
1021 cMaxModifications = 48;
1022# endif
1023
1024 /*
1025 * Incremental page table updates should weigh more than random ones.
1026 * (Only applies when started from offset 0)
1027 */
1028 pVCpu->pgm.s.cPoolAccessHandler++;
1029 if ( pPage->GCPtrLastAccessHandlerRip >= pRegFrame->rip - 0x40 /* observed loops in Windows 7 x64 */
1030 && pPage->GCPtrLastAccessHandlerRip < pRegFrame->rip + 0x40
1031 && pvFault == (pPage->GCPtrLastAccessHandlerFault + pDis->Param1.cb)
1032 && pVCpu->pgm.s.cPoolAccessHandler == pPage->cLastAccessHandler + 1)
1033 {
1034 Log(("Possible page reuse cMods=%d -> %d (locked=%d type=%s)\n", pPage->cModifications, pPage->cModifications * 2, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1035 Assert(pPage->cModifications < 32000);
1036 pPage->cModifications = pPage->cModifications * 2;
1037 pPage->GCPtrLastAccessHandlerFault = pvFault;
1038 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1039 if (pPage->cModifications >= cMaxModifications)
1040 {
1041 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushReinit);
1042 fForcedFlush = true;
1043 }
1044 }
1045
1046 if (pPage->cModifications >= cMaxModifications)
1047 Log(("Mod overflow %RGv cMods=%d (locked=%d type=%s)\n", pvFault, pPage->cModifications, pgmPoolIsPageLocked(pPage), pgmPoolPoolKindToStr(pPage->enmKind)));
1048
1049 /*
1050 * Check if it's worth dealing with.
1051 */
1052 bool fReused = false;
1053 bool fNotReusedNotForking = false;
1054 if ( ( pPage->cModifications < cMaxModifications /** @todo \#define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
1055 || pgmPoolIsPageLocked(pPage)
1056 )
1057 && !(fReused = pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage))
1058 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1059 {
1060 /*
1061 * Simple instructions, no REP prefix.
1062 */
1063 if (!(pDis->fPrefix & (DISPREFIX_REP | DISPREFIX_REPNE)))
1064 {
1065 rc = pgmRZPoolAccessPfHandlerSimple(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault, &fReused);
1066 if (fReused)
1067 goto flushPage;
1068
1069 /* A mov instruction to change the first page table entry will be remembered so we can detect
1070 * full page table changes early on. This will reduce the amount of unnecessary traps we'll take.
1071 */
1072 if ( rc == VINF_SUCCESS
1073 && !pPage->cLocked /* only applies to unlocked pages as we can't free locked ones (e.g. cr3 root). */
1074 && pDis->pCurInstr->uOpcode == OP_MOV
1075 && (pvFault & PAGE_OFFSET_MASK) == 0)
1076 {
1077 pPage->GCPtrLastAccessHandlerFault = pvFault;
1078 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1079 pPage->GCPtrLastAccessHandlerRip = pRegFrame->rip;
1080 /* Make sure we don't kick out a page too quickly. */
1081 if (pPage->cModifications > 8)
1082 pPage->cModifications = 2;
1083 }
1084 else if (pPage->GCPtrLastAccessHandlerFault == pvFault)
1085 {
1086 /* ignore the 2nd write to this page table entry. */
1087 pPage->cLastAccessHandler = pVCpu->pgm.s.cPoolAccessHandler;
1088 }
1089 else
1090 {
1091 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
1092 pPage->GCPtrLastAccessHandlerRip = 0;
1093 }
1094
1095 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZHandled, a);
1096 PGM_UNLOCK(pVM);
1097 return rc;
1098 }
1099
1100 /*
1101 * Windows is frequently doing small memset() operations (netio test 4k+).
1102 * We have to deal with these or we'll kill the cache and performance.
1103 */
1104 if ( pDis->pCurInstr->uOpcode == OP_STOSWD
1105 && !pRegFrame->eflags.Bits.u1DF
1106 && pDis->uOpMode == pDis->uCpuMode
1107 && pDis->uAddrMode == pDis->uCpuMode)
1108 {
1109 bool fValidStosd = false;
1110
1111 if ( pDis->uCpuMode == DISCPUMODE_32BIT
1112 && pDis->fPrefix == DISPREFIX_REP
1113 && pRegFrame->ecx <= 0x20
1114 && pRegFrame->ecx * 4 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1115 && !((uintptr_t)pvFault & 3)
1116 && (pRegFrame->eax == 0 || pRegFrame->eax == 0x80) /* the two values observed. */
1117 )
1118 {
1119 fValidStosd = true;
1120 pRegFrame->rcx &= 0xffffffff; /* paranoia */
1121 }
1122 else
1123 if ( pDis->uCpuMode == DISCPUMODE_64BIT
1124 && pDis->fPrefix == (DISPREFIX_REP | DISPREFIX_REX)
1125 && pRegFrame->rcx <= 0x20
1126 && pRegFrame->rcx * 8 <= GUEST_PAGE_SIZE - ((uintptr_t)pvFault & GUEST_PAGE_OFFSET_MASK)
1127 && !((uintptr_t)pvFault & 7)
1128 && (pRegFrame->rax == 0 || pRegFrame->rax == 0x80) /* the two values observed. */
1129 )
1130 {
1131 fValidStosd = true;
1132 }
1133
1134 if (fValidStosd)
1135 {
1136 rc = pgmRZPoolAccessPfHandlerSTOSD(pVM, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1137 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZRepStosd, a);
1138 PGM_UNLOCK(pVM);
1139 return rc;
1140 }
1141 }
1142
1143 /* REP prefix, don't bother. */
1144 STAM_COUNTER_INC(&pPool->StatMonitorPfRZRepPrefix);
1145 Log4(("pgmRZPoolAccessPfHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
1146 pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->uOpcode, pDis->fPrefix));
1147 fNotReusedNotForking = true;
1148 }
1149
1150# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) && defined(IN_RING0)
1151 /* E.g. Windows 7 x64 initializes page tables and touches some pages in the table during the process. This
1152 * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
1153 */
1154 if ( pPage->cModifications >= cMaxModifications
1155 && !fForcedFlush
1156 && (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1157 && ( fNotReusedNotForking
1158 || ( !pgmRZPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault, pPage)
1159 && !pgmRZPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
1160 )
1161 )
1162 {
1163 Assert(!pgmPoolIsPageLocked(pPage));
1164 Assert(pPage->fDirty == false);
1165
1166 /* Flush any monitored duplicates as we will disable write protection. */
1167 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
1168 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
1169 {
1170 PPGMPOOLPAGE pPageHead = pPage;
1171
1172 /* Find the monitor head. */
1173 while (pPageHead->iMonitoredPrev != NIL_PGMPOOL_IDX)
1174 pPageHead = &pPool->aPages[pPageHead->iMonitoredPrev];
1175
1176 while (pPageHead)
1177 {
1178 unsigned idxNext = pPageHead->iMonitoredNext;
1179
1180 if (pPageHead != pPage)
1181 {
1182 STAM_COUNTER_INC(&pPool->StatDirtyPageDupFlush);
1183 Log(("Flush duplicate page idx=%d GCPhys=%RGp type=%s\n", pPageHead->idx, pPageHead->GCPhys, pgmPoolPoolKindToStr(pPageHead->enmKind)));
1184 int rc2 = pgmPoolFlushPage(pPool, pPageHead);
1185 AssertRC(rc2);
1186 }
1187
1188 if (idxNext == NIL_PGMPOOL_IDX)
1189 break;
1190
1191 pPageHead = &pPool->aPages[idxNext];
1192 }
1193 }
1194
1195 /* The flushing above might fail for locked pages, so double check. */
1196 if ( pPage->iMonitoredNext == NIL_PGMPOOL_IDX
1197 && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
1198 {
1199 pgmPoolAddDirtyPage(pVM, pPool, pPage);
1200
1201 /* Temporarily allow write access to the page table again. */
1202 rc = PGMHandlerPhysicalPageTempOff(pVM,
1203 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK,
1204 pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1205 if (rc == VINF_SUCCESS)
1206 {
1207 rc = PGMShwMakePageWritable(pVCpu, pvFault, PGM_MK_PG_IS_WRITE_FAULT);
1208 AssertMsg(rc == VINF_SUCCESS
1209 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1210 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1211 || rc == VERR_PAGE_NOT_PRESENT,
1212 ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
1213# ifdef VBOX_STRICT
1214 pPage->GCPtrDirtyFault = pvFault;
1215# endif
1216
1217 STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, a);
1218 PGM_UNLOCK(pVM);
1219 return rc;
1220 }
1221 }
1222 }
1223# endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT && IN_RING0 */
1224
1225 STAM_COUNTER_INC(&pPool->StatMonitorPfRZFlushModOverflow);
1226flushPage:
1227 /*
1228 * Not worth it, so flush it.
1229 *
1230 * If we considered it to be reused, don't go back to ring-3
1231 * to emulate failed instructions since we usually cannot
1232 * interpret then. This may be a bit risky, in which case
1233 * the reuse detection must be fixed.
1234 */
1235 rc = pgmRZPoolAccessPfHandlerFlush(pVM, pVCpu, pPool, pPage, pDis, pRegFrame, GCPhysFault, pvFault);
1236 if ( rc == VINF_EM_RAW_EMULATE_INSTR
1237 && fReused)
1238 {
1239 /* Make sure that the current instruction still has shadow page backing, otherwise we'll end up in a loop. */
1240 if (PGMShwGetPage(pVCpu, pRegFrame->rip, NULL, NULL) == VINF_SUCCESS)
1241 rc = VINF_SUCCESS; /* safe to restart the instruction. */
1242 }
1243 STAM_PROFILE_STOP_EX(&pVM->pgm.s.CTX_SUFF(pPool)->StatMonitorPfRZ, &pPool->StatMonitorPfRZFlushPage, a);
1244 PGM_UNLOCK(pVM);
1245 return rc;
1246}
1247
1248#endif /* !IN_RING3 */
1249
1250/**
1251 * @callback_method_impl{FNPGMPHYSHANDLER,
1252 * Access handler for shadowed page table pages.}
1253 *
1254 * @remarks Only uses the VINF_PGM_HANDLER_DO_DEFAULT status.
1255 * @note The @a uUser argument is the index of the PGMPOOLPAGE.
1256 */
1257DECLCALLBACK(VBOXSTRICTRC)
1258pgmPoolAccessHandler(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhys, void *pvPhys, void *pvBuf, size_t cbBuf,
1259 PGMACCESSTYPE enmAccessType, PGMACCESSORIGIN enmOrigin, uint64_t uUser)
1260{
1261 PPGMPOOL const pPool = pVM->pgm.s.CTX_SUFF(pPool);
1262 STAM_PROFILE_START(&pPool->CTX_SUFF_Z(StatMonitor), a);
1263 AssertReturn(uUser < pPool->cCurPages, VERR_PGM_POOL_IPE);
1264 PPGMPOOLPAGE const pPage = &pPool->aPages[uUser];
1265 LogFlow(("PGM_ALL_CB_DECL: GCPhys=%RGp %p:{.Core=%RHp, .idx=%d, .GCPhys=%RGp, .enmType=%d}\n",
1266 GCPhys, pPage, pPage->Core.Key, pPage->idx, pPage->GCPhys, pPage->enmKind));
1267
1268 NOREF(pvPhys); NOREF(pvBuf); NOREF(enmAccessType);
1269
1270 PGM_LOCK_VOID(pVM);
1271
1272#ifdef VBOX_WITH_STATISTICS
1273 /*
1274 * Collect stats on the access.
1275 */
1276 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Sizes)) == 19);
1277 if (cbBuf <= 16 && cbBuf > 0)
1278 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[cbBuf - 1]);
1279 else if (cbBuf >= 17 && cbBuf < 32)
1280 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[16]);
1281 else if (cbBuf >= 32 && cbBuf < 64)
1282 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[17]);
1283 else if (cbBuf >= 64)
1284 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Sizes)[18]);
1285
1286 uint8_t cbAlign;
1287 switch (pPage->enmKind)
1288 {
1289 default:
1290 cbAlign = 7;
1291 break;
1292 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
1293 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
1294 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
1295 case PGMPOOLKIND_32BIT_PD:
1296 case PGMPOOLKIND_32BIT_PD_PHYS:
1297 cbAlign = 3;
1298 break;
1299 }
1300 AssertCompile(RT_ELEMENTS(pPool->CTX_MID_Z(aStatMonitor,Misaligned)) == 7);
1301 if ((uint8_t)GCPhys & cbAlign)
1302 STAM_COUNTER_INC(&pPool->CTX_MID_Z(aStatMonitor,Misaligned)[((uint8_t)GCPhys & cbAlign) - 1]);
1303#endif
1304
1305 /*
1306 * Make sure the pool page wasn't modified by a different CPU.
1307 */
1308 if (PHYS_PAGE_ADDRESS(GCPhys) == PHYS_PAGE_ADDRESS(pPage->GCPhys))
1309 {
1310 Assert(pPage->enmKind != PGMPOOLKIND_FREE);
1311
1312 /* The max modification count before flushing depends on the context and page type. */
1313#ifdef IN_RING3
1314 uint16_t const cMaxModifications = 96; /* it's cheaper here, right? */
1315#else
1316 uint16_t cMaxModifications;
1317 if ( pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
1318 || pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1319 cMaxModifications = 4;
1320 else
1321 cMaxModifications = 24;
1322#endif
1323
1324 /*
1325 * We don't have to be very sophisticated about this since there are relativly few calls here.
1326 * However, we must try our best to detect any non-cpu accesses (disk / networking).
1327 */
1328 if ( ( pPage->cModifications < cMaxModifications
1329 || pgmPoolIsPageLocked(pPage) )
1330 && enmOrigin != PGMACCESSORIGIN_DEVICE
1331 && cbBuf <= 16)
1332 {
1333 /* Clear the shadow entry. */
1334 if (!pPage->cModifications++)
1335 pgmPoolMonitorModifiedInsert(pPool, pPage);
1336
1337 if (cbBuf <= 8)
1338 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, (uint32_t)cbBuf);
1339 else
1340 {
1341 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys, pvBuf, 8);
1342 pgmPoolMonitorChainChanging(pVCpu, pPool, pPage, GCPhys + 8, (uint8_t *)pvBuf + 8, (uint32_t)cbBuf - 8);
1343 }
1344 }
1345 else
1346 pgmPoolMonitorChainFlush(pPool, pPage);
1347
1348 STAM_PROFILE_STOP_EX(&pPool->CTX_SUFF_Z(StatMonitor), &pPool->CTX_MID_Z(StatMonitor,FlushPage), a);
1349 }
1350 else
1351 Log(("CPU%d: PGM_ALL_CB_DECL pgm pool page for %RGp changed (to %RGp) while waiting!\n", pVCpu->idCpu, PHYS_PAGE_ADDRESS(GCPhys), PHYS_PAGE_ADDRESS(pPage->GCPhys)));
1352 PGM_UNLOCK(pVM);
1353 return VINF_PGM_HANDLER_DO_DEFAULT;
1354}
1355
1356
1357#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
1358
1359# if defined(VBOX_STRICT) && !defined(IN_RING3)
1360
1361/**
1362 * Check references to guest physical memory in a PAE / PAE page table.
1363 *
1364 * @param pPool The pool.
1365 * @param pPage The page.
1366 * @param pShwPT The shadow page table (mapping of the page).
1367 * @param pGstPT The guest page table.
1368 */
1369static void pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
1370{
1371 unsigned cErrors = 0;
1372 int LastRc = -1; /* initialized to shut up gcc */
1373 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1374 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1375 PVMCC pVM = pPool->CTX_SUFF(pVM);
1376
1377# ifdef VBOX_STRICT
1378 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1379 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1380# endif
1381 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1382 {
1383 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1384 {
1385 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1386 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1387 if ( rc != VINF_SUCCESS
1388 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1389 {
1390 Log(("rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1391 LastPTE = i;
1392 LastRc = rc;
1393 LastHCPhys = HCPhys;
1394 cErrors++;
1395
1396 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1397 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1398 AssertRC(rc);
1399
1400 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1401 {
1402 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1403
1404 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1405 {
1406 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1407
1408 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1409 {
1410 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1411 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1412 {
1413 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1414 }
1415 }
1416
1417 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1418 }
1419 }
1420 }
1421 }
1422 }
1423 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %RX64 shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1424}
1425
1426
1427/**
1428 * Check references to guest physical memory in a PAE / 32-bit page table.
1429 *
1430 * @param pPool The pool.
1431 * @param pPage The page.
1432 * @param pShwPT The shadow page table (mapping of the page).
1433 * @param pGstPT The guest page table.
1434 */
1435static void pgmPoolTrackCheckPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
1436{
1437 unsigned cErrors = 0;
1438 int LastRc = -1; /* initialized to shut up gcc */
1439 unsigned LastPTE = ~0U; /* initialized to shut up gcc */
1440 RTHCPHYS LastHCPhys = NIL_RTHCPHYS; /* initialized to shut up gcc */
1441 PVMCC pVM = pPool->CTX_SUFF(pVM);
1442
1443# ifdef VBOX_STRICT
1444 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1445 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1446# endif
1447 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1448 {
1449 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1450 {
1451 RTHCPHYS HCPhys = NIL_RTHCPHYS;
1452 int rc = PGMPhysGCPhys2HCPhys(pVM, pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1453 if ( rc != VINF_SUCCESS
1454 || PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) != HCPhys)
1455 {
1456 Log(("rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", rc, i, pGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1457 LastPTE = i;
1458 LastRc = rc;
1459 LastHCPhys = HCPhys;
1460 cErrors++;
1461
1462 RTHCPHYS HCPhysPT = NIL_RTHCPHYS;
1463 rc = PGMPhysGCPhys2HCPhys(pVM, pPage->GCPhys, &HCPhysPT);
1464 AssertRC(rc);
1465
1466 for (unsigned iPage = 0; iPage < pPool->cCurPages; iPage++)
1467 {
1468 PPGMPOOLPAGE pTempPage = &pPool->aPages[iPage];
1469
1470 if (pTempPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_32BIT_PT)
1471 {
1472 PPGMSHWPTPAE pShwPT2 = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pTempPage);
1473
1474 for (unsigned j = 0; j < RT_ELEMENTS(pShwPT->a); j++)
1475 {
1476 if ( PGMSHWPTEPAE_IS_P_RW(pShwPT2->a[j])
1477 && PGMSHWPTEPAE_GET_HCPHYS(pShwPT2->a[j]) == HCPhysPT)
1478 {
1479 Log(("GCPhys=%RGp idx=%d %RX64 vs %RX64\n", pTempPage->GCPhys, j, PGMSHWPTEPAE_GET_LOG(pShwPT->a[j]), PGMSHWPTEPAE_GET_LOG(pShwPT2->a[j])));
1480 }
1481 }
1482
1483 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pShwPT2);
1484 }
1485 }
1486 }
1487 }
1488 }
1489 AssertMsg(!cErrors, ("cErrors=%d: last rc=%d idx=%d guest %x shw=%RX64 vs %RHp\n", cErrors, LastRc, LastPTE, pGstPT->a[LastPTE].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[LastPTE]), LastHCPhys));
1490}
1491
1492# endif /* VBOX_STRICT && !IN_RING3 */
1493
1494/**
1495 * Clear references to guest physical memory in a PAE / PAE page table.
1496 *
1497 * @returns nr of changed PTEs
1498 * @param pPool The pool.
1499 * @param pPage The page.
1500 * @param pShwPT The shadow page table (mapping of the page).
1501 * @param pGstPT The guest page table.
1502 * @param pOldGstPT The old cached guest page table.
1503 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1504 * @param pfFlush Flush reused page table (out)
1505 */
1506DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT,
1507 PCX86PTPAE pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1508{
1509 unsigned cChanged = 0;
1510
1511# ifdef VBOX_STRICT
1512 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1513 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1514# endif
1515 *pfFlush = false;
1516
1517 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1518 {
1519 /* Check the new value written by the guest. If present and with a bogus physical address, then
1520 * it's fairly safe to assume the guest is reusing the PT.
1521 */
1522 if ( fAllowRemoval
1523 && (pGstPT->a[i].u & X86_PTE_P))
1524 {
1525 if (!PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1526 {
1527 *pfFlush = true;
1528 return ++cChanged;
1529 }
1530 }
1531 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1532 {
1533 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1534 if ((pGstPT->a[i].u & X86_PTE_PAE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK))
1535 {
1536# ifdef VBOX_STRICT
1537 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1538 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
1539 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %RX64 old %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1540# endif
1541 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1542 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1543 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G | X86_PTE_PAE_NX);
1544 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1545
1546 if ( uHostAttr == uGuestAttr
1547 && fHostRW <= fGuestRW)
1548 continue;
1549 }
1550 cChanged++;
1551 /* Something was changed, so flush it. */
1552 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%RX64\n",
1553 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
1554 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PAE_PG_MASK, i);
1555 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1556 }
1557 }
1558 return cChanged;
1559}
1560
1561
1562/**
1563 * Clear references to guest physical memory in a PAE / PAE page table.
1564 *
1565 * @returns nr of changed PTEs
1566 * @param pPool The pool.
1567 * @param pPage The page.
1568 * @param pShwPT The shadow page table (mapping of the page).
1569 * @param pGstPT The guest page table.
1570 * @param pOldGstPT The old cached guest page table.
1571 * @param fAllowRemoval Bail out as soon as we encounter an invalid PTE
1572 * @param pfFlush Flush reused page table (out)
1573 */
1574DECLINLINE(unsigned) pgmPoolTrackFlushPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT,
1575 PCX86PT pOldGstPT, bool fAllowRemoval, bool *pfFlush)
1576{
1577 unsigned cChanged = 0;
1578
1579# ifdef VBOX_STRICT
1580 for (unsigned i = 0; i < RT_MIN(RT_ELEMENTS(pShwPT->a), pPage->iFirstPresent); i++)
1581 AssertMsg(!PGMSHWPTEPAE_IS_P(pShwPT->a[i]), ("Unexpected PTE: idx=%d %RX64 (first=%d)\n", i, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), pPage->iFirstPresent));
1582# endif
1583 *pfFlush = false;
1584
1585 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
1586 {
1587 /* Check the new value written by the guest. If present and with a bogus physical address, then
1588 * it's fairly safe to assume the guest is reusing the PT. */
1589 if (fAllowRemoval)
1590 {
1591 X86PGUINT const uPte = pGstPT->a[i].u;
1592 if ( (uPte & X86_PTE_P)
1593 && !PGMPhysIsGCPhysValid(pPool->CTX_SUFF(pVM), uPte & X86_PTE_PG_MASK))
1594 {
1595 *pfFlush = true;
1596 return ++cChanged;
1597 }
1598 }
1599 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
1600 {
1601 /* If the old cached PTE is identical, then there's no need to flush the shadow copy. */
1602 if ((pGstPT->a[i].u & X86_PTE_PG_MASK) == (pOldGstPT->a[i].u & X86_PTE_PG_MASK))
1603 {
1604# ifdef VBOX_STRICT
1605 RTHCPHYS HCPhys = NIL_RTGCPHYS;
1606 int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PG_MASK, &HCPhys);
1607 AssertMsg(rc == VINF_SUCCESS && PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]) == HCPhys, ("rc=%d guest %x old %x shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pOldGstPT->a[i].u, PGMSHWPTEPAE_GET_LOG(pShwPT->a[i]), HCPhys));
1608# endif
1609 uint64_t uHostAttr = PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1610 bool fHostRW = !!(PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & X86_PTE_RW);
1611 uint64_t uGuestAttr = pGstPT->a[i].u & (X86_PTE_P | X86_PTE_US | X86_PTE_A | X86_PTE_D | X86_PTE_G);
1612 bool fGuestRW = !!(pGstPT->a[i].u & X86_PTE_RW);
1613
1614 if ( uHostAttr == uGuestAttr
1615 && fHostRW <= fGuestRW)
1616 continue;
1617 }
1618 cChanged++;
1619 /* Something was changed, so flush it. */
1620 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX64 hint=%x\n",
1621 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK));
1622 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pOldGstPT->a[i].u & X86_PTE_PG_MASK, i);
1623 PGMSHWPTEPAE_ATOMIC_SET(pShwPT->a[i], 0);
1624 }
1625 }
1626 return cChanged;
1627}
1628
1629
1630/**
1631 * Flush a dirty page
1632 *
1633 * @param pVM The cross context VM structure.
1634 * @param pPool The pool.
1635 * @param idxSlot Dirty array slot index
1636 * @param fAllowRemoval Allow a reused page table to be removed
1637 */
1638static void pgmPoolFlushDirtyPage(PVMCC pVM, PPGMPOOL pPool, unsigned idxSlot, bool fAllowRemoval = false)
1639{
1640 AssertCompile(RT_ELEMENTS(pPool->aidxDirtyPages) == RT_ELEMENTS(pPool->aDirtyPages));
1641
1642 Assert(idxSlot < RT_ELEMENTS(pPool->aDirtyPages));
1643 unsigned idxPage = pPool->aidxDirtyPages[idxSlot];
1644 if (idxPage == NIL_PGMPOOL_IDX)
1645 return;
1646
1647 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1648 Assert(pPage->idx == idxPage);
1649 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1650
1651 AssertMsg(pPage->fDirty, ("Page %RGp (slot=%d) not marked dirty!", pPage->GCPhys, idxSlot));
1652 Log(("Flush dirty page %RGp cMods=%d\n", pPage->GCPhys, pPage->cModifications));
1653
1654 /* First write protect the page again to catch all write accesses. (before checking for changes -> SMP) */
1655 int rc = PGMHandlerPhysicalReset(pVM, pPage->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1656 Assert(rc == VINF_SUCCESS);
1657 pPage->fDirty = false;
1658
1659# ifdef VBOX_STRICT
1660 uint64_t fFlags = 0;
1661 RTHCPHYS HCPhys;
1662 rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->GCPtrDirtyFault, &fFlags, &HCPhys);
1663 AssertMsg( ( rc == VINF_SUCCESS
1664 && (!(fFlags & X86_PTE_RW) || HCPhys != pPage->Core.Key))
1665 /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
1666 || rc == VERR_PAGE_TABLE_NOT_PRESENT
1667 || rc == VERR_PAGE_NOT_PRESENT,
1668 ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->GCPtrDirtyFault, rc, fFlags));
1669# endif
1670
1671 /* Flush those PTEs that have changed. */
1672 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
1673 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1674 void *pvGst;
1675 rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1676 bool fFlush;
1677 unsigned cChanges;
1678
1679 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1680 cChanges = pgmPoolTrackFlushPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst,
1681 (PCX86PTPAE)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1682 else
1683 cChanges = pgmPoolTrackFlushPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst,
1684 (PCX86PT)&pPool->aDirtyPages[idxSlot].aPage[0], fAllowRemoval, &fFlush);
1685
1686 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1687 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1688 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
1689 /* Note: we might want to consider keeping the dirty page active in case there were many changes. */
1690
1691 /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
1692 Assert(pPage->cModifications);
1693 if (cChanges < 4)
1694 pPage->cModifications = 1; /* must use > 0 here */
1695 else
1696 pPage->cModifications = RT_MAX(1, pPage->cModifications / 2);
1697
1698 STAM_COUNTER_INC(&pPool->StatResetDirtyPages);
1699 if (pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages))
1700 pPool->idxFreeDirtyPage = idxSlot;
1701
1702 pPool->cDirtyPages--;
1703 pPool->aidxDirtyPages[idxSlot] = NIL_PGMPOOL_IDX;
1704 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1705 if (fFlush)
1706 {
1707 Assert(fAllowRemoval);
1708 Log(("Flush reused page table!\n"));
1709 pgmPoolFlushPage(pPool, pPage);
1710 STAM_COUNTER_INC(&pPool->StatForceFlushReused);
1711 }
1712 else
1713 Log(("Removed dirty page %RGp cMods=%d cChanges=%d\n", pPage->GCPhys, pPage->cModifications, cChanges));
1714}
1715
1716
1717# ifndef IN_RING3
1718/**
1719 * Add a new dirty page
1720 *
1721 * @param pVM The cross context VM structure.
1722 * @param pPool The pool.
1723 * @param pPage The page.
1724 */
1725void pgmPoolAddDirtyPage(PVMCC pVM, PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1726{
1727 PGM_LOCK_ASSERT_OWNER(pVM);
1728 AssertCompile(RT_ELEMENTS(pPool->aDirtyPages) == 8 || RT_ELEMENTS(pPool->aDirtyPages) == 16);
1729 Assert(!pPage->fDirty);
1730
1731 unsigned idxFree = pPool->idxFreeDirtyPage;
1732 Assert(idxFree < RT_ELEMENTS(pPool->aDirtyPages));
1733 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX && pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
1734
1735 if (pPool->cDirtyPages >= RT_ELEMENTS(pPool->aDirtyPages))
1736 {
1737 STAM_COUNTER_INC(&pPool->StatDirtyPageOverFlowFlush);
1738 pgmPoolFlushDirtyPage(pVM, pPool, idxFree, true /* allow removal of reused page tables*/);
1739 }
1740 Assert(pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages));
1741 AssertMsg(pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
1742
1743 Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
1744
1745 /*
1746 * Make a copy of the guest page table as we require valid GCPhys addresses
1747 * when removing references to physical pages.
1748 * (The HCPhys linear lookup is *extremely* expensive!)
1749 */
1750 void *pvGst;
1751 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
1752 memcpy(&pPool->aDirtyPages[idxFree].aPage[0], pvGst,
1753 pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT ? PAGE_SIZE : PAGE_SIZE / 2);
1754# ifdef VBOX_STRICT
1755 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
1756 if (pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT)
1757 pgmPoolTrackCheckPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
1758 else
1759 pgmPoolTrackCheckPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
1760 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
1761# endif
1762 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
1763
1764 STAM_COUNTER_INC(&pPool->StatDirtyPage);
1765 pPage->fDirty = true;
1766 pPage->idxDirtyEntry = (uint8_t)idxFree; Assert(pPage->idxDirtyEntry == idxFree);
1767 pPool->aidxDirtyPages[idxFree] = pPage->idx;
1768 pPool->cDirtyPages++;
1769
1770 pPool->idxFreeDirtyPage = (pPool->idxFreeDirtyPage + 1) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1771 if ( pPool->cDirtyPages < RT_ELEMENTS(pPool->aDirtyPages)
1772 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1773 {
1774 unsigned i;
1775 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1776 {
1777 idxFree = (pPool->idxFreeDirtyPage + i) & (RT_ELEMENTS(pPool->aDirtyPages) - 1);
1778 if (pPool->aidxDirtyPages[idxFree] == NIL_PGMPOOL_IDX)
1779 {
1780 pPool->idxFreeDirtyPage = idxFree;
1781 break;
1782 }
1783 }
1784 Assert(i != RT_ELEMENTS(pPool->aDirtyPages));
1785 }
1786
1787 Assert(pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages) || pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX);
1788
1789 /*
1790 * Clear all references to this shadow table. See @bugref{7298}.
1791 */
1792 pgmPoolTrackClearPageUsers(pPool, pPage);
1793}
1794# endif /* !IN_RING3 */
1795
1796
1797/**
1798 * Check if the specified page is dirty (not write monitored)
1799 *
1800 * @return dirty or not
1801 * @param pVM The cross context VM structure.
1802 * @param GCPhys Guest physical address
1803 */
1804bool pgmPoolIsDirtyPageSlow(PVMCC pVM, RTGCPHYS GCPhys)
1805{
1806 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1807 PGM_LOCK_ASSERT_OWNER(pVM);
1808 if (!pPool->cDirtyPages)
1809 return false;
1810
1811 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1812
1813 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1814 {
1815 unsigned idxPage = pPool->aidxDirtyPages[i];
1816 if (idxPage != NIL_PGMPOOL_IDX)
1817 {
1818 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1819 if (pPage->GCPhys == GCPhys)
1820 return true;
1821 }
1822 }
1823 return false;
1824}
1825
1826
1827/**
1828 * Reset all dirty pages by reinstating page monitoring.
1829 *
1830 * @param pVM The cross context VM structure.
1831 */
1832void pgmPoolResetDirtyPages(PVMCC pVM)
1833{
1834 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1835 PGM_LOCK_ASSERT_OWNER(pVM);
1836 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1837
1838 if (!pPool->cDirtyPages)
1839 return;
1840
1841 Log(("pgmPoolResetDirtyPages\n"));
1842 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1843 pgmPoolFlushDirtyPage(pVM, pPool, i, true /* allow removal of reused page tables*/);
1844
1845 pPool->idxFreeDirtyPage = 0;
1846 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1847 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1848 {
1849 unsigned i;
1850 for (i = 1; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1851 {
1852 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1853 {
1854 pPool->idxFreeDirtyPage = i;
1855 break;
1856 }
1857 }
1858 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1859 }
1860
1861 Assert(pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] == NIL_PGMPOOL_IDX || pPool->cDirtyPages == RT_ELEMENTS(pPool->aDirtyPages));
1862 return;
1863}
1864
1865
1866/**
1867 * Invalidate the PT entry for the specified page
1868 *
1869 * @param pVM The cross context VM structure.
1870 * @param GCPtrPage Guest page to invalidate
1871 */
1872void pgmPoolResetDirtyPage(PVMCC pVM, RTGCPTR GCPtrPage)
1873{
1874 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1875 PGM_LOCK_ASSERT_OWNER(pVM);
1876 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1877
1878 if (!pPool->cDirtyPages)
1879 return;
1880
1881 Log(("pgmPoolResetDirtyPage %RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
1882 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1883 {
1884 /** @todo What was intended here??? This looks incomplete... */
1885 }
1886}
1887
1888
1889/**
1890 * Reset all dirty pages by reinstating page monitoring.
1891 *
1892 * @param pVM The cross context VM structure.
1893 * @param GCPhysPT Physical address of the page table
1894 */
1895void pgmPoolInvalidateDirtyPage(PVMCC pVM, RTGCPHYS GCPhysPT)
1896{
1897 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1898 PGM_LOCK_ASSERT_OWNER(pVM);
1899 Assert(pPool->cDirtyPages <= RT_ELEMENTS(pPool->aDirtyPages));
1900 unsigned idxDirtyPage = RT_ELEMENTS(pPool->aDirtyPages);
1901
1902 if (!pPool->cDirtyPages)
1903 return;
1904
1905 GCPhysPT = GCPhysPT & ~(RTGCPHYS)PAGE_OFFSET_MASK;
1906
1907 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1908 {
1909 unsigned idxPage = pPool->aidxDirtyPages[i];
1910 if (idxPage != NIL_PGMPOOL_IDX)
1911 {
1912 PPGMPOOLPAGE pPage = &pPool->aPages[idxPage];
1913 if (pPage->GCPhys == GCPhysPT)
1914 {
1915 idxDirtyPage = i;
1916 break;
1917 }
1918 }
1919 }
1920
1921 if (idxDirtyPage != RT_ELEMENTS(pPool->aDirtyPages))
1922 {
1923 pgmPoolFlushDirtyPage(pVM, pPool, idxDirtyPage, true /* allow removal of reused page tables*/);
1924 if ( pPool->cDirtyPages != RT_ELEMENTS(pPool->aDirtyPages)
1925 && pPool->aidxDirtyPages[pPool->idxFreeDirtyPage] != NIL_PGMPOOL_IDX)
1926 {
1927 unsigned i;
1928 for (i = 0; i < RT_ELEMENTS(pPool->aDirtyPages); i++)
1929 {
1930 if (pPool->aidxDirtyPages[i] == NIL_PGMPOOL_IDX)
1931 {
1932 pPool->idxFreeDirtyPage = i;
1933 break;
1934 }
1935 }
1936 AssertMsg(i != RT_ELEMENTS(pPool->aDirtyPages), ("cDirtyPages %d", pPool->cDirtyPages));
1937 }
1938 }
1939}
1940
1941#endif /* PGMPOOL_WITH_OPTIMIZED_DIRTY_PT */
1942
1943/**
1944 * Inserts a page into the GCPhys hash table.
1945 *
1946 * @param pPool The pool.
1947 * @param pPage The page.
1948 */
1949DECLINLINE(void) pgmPoolHashInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1950{
1951 Log3(("pgmPoolHashInsert: %RGp\n", pPage->GCPhys));
1952 Assert(pPage->GCPhys != NIL_RTGCPHYS); Assert(pPage->iNext == NIL_PGMPOOL_IDX);
1953 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1954 pPage->iNext = pPool->aiHash[iHash];
1955 pPool->aiHash[iHash] = pPage->idx;
1956}
1957
1958
1959/**
1960 * Removes a page from the GCPhys hash table.
1961 *
1962 * @param pPool The pool.
1963 * @param pPage The page.
1964 */
1965DECLINLINE(void) pgmPoolHashRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
1966{
1967 Log3(("pgmPoolHashRemove: %RGp\n", pPage->GCPhys));
1968 uint16_t iHash = PGMPOOL_HASH(pPage->GCPhys);
1969 if (pPool->aiHash[iHash] == pPage->idx)
1970 pPool->aiHash[iHash] = pPage->iNext;
1971 else
1972 {
1973 uint16_t iPrev = pPool->aiHash[iHash];
1974 for (;;)
1975 {
1976 const int16_t i = pPool->aPages[iPrev].iNext;
1977 if (i == pPage->idx)
1978 {
1979 pPool->aPages[iPrev].iNext = pPage->iNext;
1980 break;
1981 }
1982 if (i == NIL_PGMPOOL_IDX)
1983 {
1984 AssertReleaseMsgFailed(("GCPhys=%RGp idx=%d\n", pPage->GCPhys, pPage->idx));
1985 break;
1986 }
1987 iPrev = i;
1988 }
1989 }
1990 pPage->iNext = NIL_PGMPOOL_IDX;
1991}
1992
1993
1994/**
1995 * Frees up one cache page.
1996 *
1997 * @returns VBox status code.
1998 * @retval VINF_SUCCESS on success.
1999 * @param pPool The pool.
2000 * @param iUser The user index.
2001 */
2002static int pgmPoolCacheFreeOne(PPGMPOOL pPool, uint16_t iUser)
2003{
2004 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2005 Assert(pPool->iAgeHead != pPool->iAgeTail); /* We shouldn't be here if there < 2 cached entries! */
2006 STAM_COUNTER_INC(&pPool->StatCacheFreeUpOne);
2007
2008 /*
2009 * Select one page from the tail of the age list.
2010 */
2011 PPGMPOOLPAGE pPage;
2012 for (unsigned iLoop = 0; ; iLoop++)
2013 {
2014 uint16_t iToFree = pPool->iAgeTail;
2015 if (iToFree == iUser && iUser != NIL_PGMPOOL_IDX)
2016 iToFree = pPool->aPages[iToFree].iAgePrev;
2017/* This is the alternative to the SyncCR3 pgmPoolCacheUsed calls.
2018 if (pPool->aPages[iToFree].iUserHead != NIL_PGMPOOL_USER_INDEX)
2019 {
2020 uint16_t i = pPool->aPages[iToFree].iAgePrev;
2021 for (unsigned j = 0; j < 10 && i != NIL_PGMPOOL_USER_INDEX; j++, i = pPool->aPages[i].iAgePrev)
2022 {
2023 if (pPool->aPages[iToFree].iUserHead == NIL_PGMPOOL_USER_INDEX)
2024 continue;
2025 iToFree = i;
2026 break;
2027 }
2028 }
2029*/
2030 Assert(iToFree != iUser);
2031 AssertReleaseMsg(iToFree != NIL_PGMPOOL_IDX,
2032 ("iToFree=%#x (iAgeTail=%#x) iUser=%#x iLoop=%u - pPool=%p LB %#zx\n",
2033 iToFree, pPool->iAgeTail, iUser, iLoop, pPool,
2034 RT_UOFFSETOF_DYN(PGMPOOL, aPages[pPool->cMaxPages])
2035 + pPool->cMaxUsers * sizeof(PGMPOOLUSER)
2036 + pPool->cMaxPhysExts * sizeof(PGMPOOLPHYSEXT) ));
2037
2038 pPage = &pPool->aPages[iToFree];
2039
2040 /*
2041 * Reject any attempts at flushing the currently active shadow CR3 mapping.
2042 * Call pgmPoolCacheUsed to move the page to the head of the age list.
2043 */
2044 if ( !pgmPoolIsPageLocked(pPage)
2045 && pPage->idx >= PGMPOOL_IDX_FIRST /* paranoia (#6349) */)
2046 break;
2047 LogFlow(("pgmPoolCacheFreeOne: refuse CR3 mapping\n"));
2048 pgmPoolCacheUsed(pPool, pPage);
2049 AssertLogRelReturn(iLoop < 8192, VERR_PGM_POOL_TOO_MANY_LOOPS);
2050 }
2051
2052 /*
2053 * Found a usable page, flush it and return.
2054 */
2055 int rc = pgmPoolFlushPage(pPool, pPage);
2056 /* This flush was initiated by us and not the guest, so explicitly flush the TLB. */
2057 /** @todo find out why this is necessary; pgmPoolFlushPage should trigger a flush if one is really needed. */
2058 if (rc == VINF_SUCCESS)
2059 PGM_INVL_ALL_VCPU_TLBS(pVM);
2060 return rc;
2061}
2062
2063
2064/**
2065 * Checks if a kind mismatch is really a page being reused
2066 * or if it's just normal remappings.
2067 *
2068 * @returns true if reused and the cached page (enmKind1) should be flushed
2069 * @returns false if not reused.
2070 * @param enmKind1 The kind of the cached page.
2071 * @param enmKind2 The kind of the requested page.
2072 */
2073static bool pgmPoolCacheReusedByKind(PGMPOOLKIND enmKind1, PGMPOOLKIND enmKind2)
2074{
2075 switch (enmKind1)
2076 {
2077 /*
2078 * Never reuse them. There is no remapping in non-paging mode.
2079 */
2080 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2081 case PGMPOOLKIND_32BIT_PD_PHYS:
2082 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2083 case PGMPOOLKIND_PAE_PD_PHYS:
2084 case PGMPOOLKIND_PAE_PDPT_PHYS:
2085 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2086 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2087 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2088 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2089 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2090 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT: /* never reuse them for other types */
2091 return false;
2092
2093 /*
2094 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2095 */
2096 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2097 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2098 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2099 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2100 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2101 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2102 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2103 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2104 case PGMPOOLKIND_32BIT_PD:
2105 case PGMPOOLKIND_PAE_PDPT:
2106 switch (enmKind2)
2107 {
2108 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2109 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2110 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2111 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2112 case PGMPOOLKIND_64BIT_PML4:
2113 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2114 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2115 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2116 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2117 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2118 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2119 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2120 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2121 return true;
2122 default:
2123 return false;
2124 }
2125
2126 /*
2127 * It's perfectly fine to reuse these, except for PAE and non-paging stuff.
2128 */
2129 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2130 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2131 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2132 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2133 case PGMPOOLKIND_64BIT_PML4:
2134 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2135 switch (enmKind2)
2136 {
2137 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2138 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2139 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2140 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2141 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2142 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2143 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2144 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2145 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2146 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2147 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2148 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2149 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2150 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2151 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2152 return true;
2153 default:
2154 return false;
2155 }
2156
2157 /*
2158 * These cannot be flushed, and it's common to reuse the PDs as PTs.
2159 */
2160 case PGMPOOLKIND_ROOT_NESTED:
2161 return false;
2162
2163 default:
2164 AssertFatalMsgFailed(("enmKind1=%d\n", enmKind1));
2165 }
2166}
2167
2168
2169/**
2170 * Attempts to satisfy a pgmPoolAlloc request from the cache.
2171 *
2172 * @returns VBox status code.
2173 * @retval VINF_PGM_CACHED_PAGE on success.
2174 * @retval VERR_FILE_NOT_FOUND if not found.
2175 * @param pPool The pool.
2176 * @param GCPhys The GC physical address of the page we're gonna shadow.
2177 * @param enmKind The kind of mapping.
2178 * @param enmAccess Access type for the mapping (only relevant for big pages)
2179 * @param fA20Enabled Whether the CPU has the A20 gate enabled.
2180 * @param iUser The shadow page pool index of the user table. This is
2181 * NIL_PGMPOOL_IDX for root pages.
2182 * @param iUserTable The index into the user table (shadowed). Ignored if
2183 * root page
2184 * @param ppPage Where to store the pointer to the page.
2185 */
2186static int pgmPoolCacheAlloc(PPGMPOOL pPool, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
2187 uint16_t iUser, uint32_t iUserTable, PPPGMPOOLPAGE ppPage)
2188{
2189 /*
2190 * Look up the GCPhys in the hash.
2191 */
2192 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2193 Log3(("pgmPoolCacheAlloc: %RGp kind %s iUser=%d iUserTable=%x SLOT=%d\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable, i));
2194 if (i != NIL_PGMPOOL_IDX)
2195 {
2196 do
2197 {
2198 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2199 Log4(("pgmPoolCacheAlloc: slot %d found page %RGp\n", i, pPage->GCPhys));
2200 if (pPage->GCPhys == GCPhys)
2201 {
2202 if ( (PGMPOOLKIND)pPage->enmKind == enmKind
2203 && (PGMPOOLACCESS)pPage->enmAccess == enmAccess
2204 && pPage->fA20Enabled == fA20Enabled)
2205 {
2206 /* Put it at the start of the use list to make sure pgmPoolTrackAddUser
2207 * doesn't flush it in case there are no more free use records.
2208 */
2209 pgmPoolCacheUsed(pPool, pPage);
2210
2211 int rc = VINF_SUCCESS;
2212 if (iUser != NIL_PGMPOOL_IDX)
2213 rc = pgmPoolTrackAddUser(pPool, pPage, iUser, iUserTable);
2214 if (RT_SUCCESS(rc))
2215 {
2216 Assert((PGMPOOLKIND)pPage->enmKind == enmKind);
2217 *ppPage = pPage;
2218 if (pPage->cModifications)
2219 pPage->cModifications = 1; /* reset counter (can't use 0, or else it will be reinserted in the modified list) */
2220 STAM_COUNTER_INC(&pPool->StatCacheHits);
2221 return VINF_PGM_CACHED_PAGE;
2222 }
2223 return rc;
2224 }
2225
2226 if ((PGMPOOLKIND)pPage->enmKind != enmKind)
2227 {
2228 /*
2229 * The kind is different. In some cases we should now flush the page
2230 * as it has been reused, but in most cases this is normal remapping
2231 * of PDs as PT or big pages using the GCPhys field in a slightly
2232 * different way than the other kinds.
2233 */
2234 if (pgmPoolCacheReusedByKind((PGMPOOLKIND)pPage->enmKind, enmKind))
2235 {
2236 STAM_COUNTER_INC(&pPool->StatCacheKindMismatches);
2237 pgmPoolFlushPage(pPool, pPage);
2238 break;
2239 }
2240 }
2241 }
2242
2243 /* next */
2244 i = pPage->iNext;
2245 } while (i != NIL_PGMPOOL_IDX);
2246 }
2247
2248 Log3(("pgmPoolCacheAlloc: Missed GCPhys=%RGp enmKind=%s\n", GCPhys, pgmPoolPoolKindToStr(enmKind)));
2249 STAM_COUNTER_INC(&pPool->StatCacheMisses);
2250 return VERR_FILE_NOT_FOUND;
2251}
2252
2253
2254/**
2255 * Inserts a page into the cache.
2256 *
2257 * @param pPool The pool.
2258 * @param pPage The cached page.
2259 * @param fCanBeCached Set if the page is fit for caching from the caller's point of view.
2260 */
2261static void pgmPoolCacheInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fCanBeCached)
2262{
2263 /*
2264 * Insert into the GCPhys hash if the page is fit for that.
2265 */
2266 Assert(!pPage->fCached);
2267 if (fCanBeCached)
2268 {
2269 pPage->fCached = true;
2270 pgmPoolHashInsert(pPool, pPage);
2271 Log3(("pgmPoolCacheInsert: Caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2272 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2273 STAM_COUNTER_INC(&pPool->StatCacheCacheable);
2274 }
2275 else
2276 {
2277 Log3(("pgmPoolCacheInsert: Not caching %p:{.Core=%RHp, .idx=%d, .enmKind=%s, GCPhys=%RGp}\n",
2278 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
2279 STAM_COUNTER_INC(&pPool->StatCacheUncacheable);
2280 }
2281
2282 /*
2283 * Insert at the head of the age list.
2284 */
2285 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2286 pPage->iAgeNext = pPool->iAgeHead;
2287 if (pPool->iAgeHead != NIL_PGMPOOL_IDX)
2288 pPool->aPages[pPool->iAgeHead].iAgePrev = pPage->idx;
2289 else
2290 pPool->iAgeTail = pPage->idx;
2291 pPool->iAgeHead = pPage->idx;
2292}
2293
2294
2295/**
2296 * Flushes a cached page.
2297 *
2298 * @param pPool The pool.
2299 * @param pPage The cached page.
2300 */
2301static void pgmPoolCacheFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2302{
2303 Log3(("pgmPoolCacheFlushPage: %RGp\n", pPage->GCPhys));
2304
2305 /*
2306 * Remove the page from the hash.
2307 */
2308 if (pPage->fCached)
2309 {
2310 pPage->fCached = false;
2311 pgmPoolHashRemove(pPool, pPage);
2312 }
2313 else
2314 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
2315
2316 /*
2317 * Remove it from the age list.
2318 */
2319 if (pPage->iAgeNext != NIL_PGMPOOL_IDX)
2320 pPool->aPages[pPage->iAgeNext].iAgePrev = pPage->iAgePrev;
2321 else
2322 pPool->iAgeTail = pPage->iAgePrev;
2323 if (pPage->iAgePrev != NIL_PGMPOOL_IDX)
2324 pPool->aPages[pPage->iAgePrev].iAgeNext = pPage->iAgeNext;
2325 else
2326 pPool->iAgeHead = pPage->iAgeNext;
2327 pPage->iAgeNext = NIL_PGMPOOL_IDX;
2328 pPage->iAgePrev = NIL_PGMPOOL_IDX;
2329}
2330
2331
2332/**
2333 * Looks for pages sharing the monitor.
2334 *
2335 * @returns Pointer to the head page.
2336 * @returns NULL if not found.
2337 * @param pPool The Pool
2338 * @param pNewPage The page which is going to be monitored.
2339 */
2340static PPGMPOOLPAGE pgmPoolMonitorGetPageByGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pNewPage)
2341{
2342 /*
2343 * Look up the GCPhys in the hash.
2344 */
2345 RTGCPHYS GCPhys = pNewPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2346 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
2347 if (i == NIL_PGMPOOL_IDX)
2348 return NULL;
2349 do
2350 {
2351 PPGMPOOLPAGE pPage = &pPool->aPages[i];
2352 if ( pPage->GCPhys - GCPhys < PAGE_SIZE
2353 && pPage != pNewPage)
2354 {
2355 switch (pPage->enmKind)
2356 {
2357 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2358 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2359 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2360 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2361 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2362 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2363 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2364 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2365 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2366 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2367 case PGMPOOLKIND_64BIT_PML4:
2368 case PGMPOOLKIND_32BIT_PD:
2369 case PGMPOOLKIND_PAE_PDPT:
2370 {
2371 /* find the head */
2372 while (pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2373 {
2374 Assert(pPage->iMonitoredPrev != pPage->idx);
2375 pPage = &pPool->aPages[pPage->iMonitoredPrev];
2376 }
2377 return pPage;
2378 }
2379
2380 /* ignore, no monitoring. */
2381 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2382 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2383 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2384 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2385 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2386 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2387 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2388 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2389 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2390 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2391 case PGMPOOLKIND_ROOT_NESTED:
2392 case PGMPOOLKIND_PAE_PD_PHYS:
2393 case PGMPOOLKIND_PAE_PDPT_PHYS:
2394 case PGMPOOLKIND_32BIT_PD_PHYS:
2395 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2396 break;
2397 default:
2398 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
2399 }
2400 }
2401
2402 /* next */
2403 i = pPage->iNext;
2404 } while (i != NIL_PGMPOOL_IDX);
2405 return NULL;
2406}
2407
2408
2409/**
2410 * Enabled write monitoring of a guest page.
2411 *
2412 * @returns VBox status code.
2413 * @retval VINF_SUCCESS on success.
2414 * @param pPool The pool.
2415 * @param pPage The cached page.
2416 */
2417static int pgmPoolMonitorInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2418{
2419 LogFlow(("pgmPoolMonitorInsert %RGp\n", pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK));
2420
2421 /*
2422 * Filter out the relevant kinds.
2423 */
2424 switch (pPage->enmKind)
2425 {
2426 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2427 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2428 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2429 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2430 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2431 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2432 case PGMPOOLKIND_64BIT_PML4:
2433 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2434 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2435 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2436 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2437 case PGMPOOLKIND_32BIT_PD:
2438 case PGMPOOLKIND_PAE_PDPT:
2439 break;
2440
2441 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2442 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2443 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2444 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2445 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2446 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2447 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2448 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2449 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2450 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2451 case PGMPOOLKIND_ROOT_NESTED:
2452 /* Nothing to monitor here. */
2453 return VINF_SUCCESS;
2454
2455 case PGMPOOLKIND_32BIT_PD_PHYS:
2456 case PGMPOOLKIND_PAE_PDPT_PHYS:
2457 case PGMPOOLKIND_PAE_PD_PHYS:
2458 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
2459 /* Nothing to monitor here. */
2460 return VINF_SUCCESS;
2461 default:
2462 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2463 }
2464
2465 /*
2466 * Install handler.
2467 */
2468 int rc;
2469 PPGMPOOLPAGE pPageHead = pgmPoolMonitorGetPageByGCPhys(pPool, pPage);
2470 if (pPageHead)
2471 {
2472 Assert(pPageHead != pPage); Assert(pPageHead->iMonitoredNext != pPage->idx);
2473 Assert(pPageHead->iMonitoredPrev != pPage->idx);
2474
2475#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2476 if (pPageHead->fDirty)
2477 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPageHead->idxDirtyEntry, false /* do not remove */);
2478#endif
2479
2480 pPage->iMonitoredPrev = pPageHead->idx;
2481 pPage->iMonitoredNext = pPageHead->iMonitoredNext;
2482 if (pPageHead->iMonitoredNext != NIL_PGMPOOL_IDX)
2483 pPool->aPages[pPageHead->iMonitoredNext].iMonitoredPrev = pPage->idx;
2484 pPageHead->iMonitoredNext = pPage->idx;
2485 rc = VINF_SUCCESS;
2486 }
2487 else
2488 {
2489 Assert(pPage->iMonitoredNext == NIL_PGMPOOL_IDX); Assert(pPage->iMonitoredPrev == NIL_PGMPOOL_IDX);
2490 PVMCC pVM = pPool->CTX_SUFF(pVM);
2491 const RTGCPHYS GCPhysPage = pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
2492 rc = PGMHandlerPhysicalRegister(pVM, GCPhysPage, GCPhysPage + PAGE_OFFSET_MASK, pPool->hAccessHandlerType,
2493 pPage - &pPool->aPages[0], NIL_RTR3PTR /*pszDesc*/);
2494 /** @todo we should probably deal with out-of-memory conditions here, but for now increasing
2495 * the heap size should suffice. */
2496 AssertFatalMsgRC(rc, ("PGMHandlerPhysicalRegisterEx %RGp failed with %Rrc\n", GCPhysPage, rc));
2497 PVMCPU pVCpu = VMMGetCpu(pVM);
2498 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), ("fSyncFlags=%x syncff=%d\n", pVCpu->pgm.s.fSyncFlags, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)));
2499 }
2500 pPage->fMonitored = true;
2501 return rc;
2502}
2503
2504
2505/**
2506 * Disables write monitoring of a guest page.
2507 *
2508 * @returns VBox status code.
2509 * @retval VINF_SUCCESS on success.
2510 * @param pPool The pool.
2511 * @param pPage The cached page.
2512 */
2513static int pgmPoolMonitorFlush(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2514{
2515 /*
2516 * Filter out the relevant kinds.
2517 */
2518 switch (pPage->enmKind)
2519 {
2520 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
2521 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
2522 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
2523 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
2524 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
2525 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
2526 case PGMPOOLKIND_64BIT_PML4:
2527 case PGMPOOLKIND_32BIT_PD:
2528 case PGMPOOLKIND_PAE_PDPT:
2529 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
2530 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
2531 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
2532 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
2533 break;
2534
2535 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
2536 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
2537 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
2538 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
2539 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
2540 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
2541 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
2542 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
2543 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
2544 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
2545 case PGMPOOLKIND_ROOT_NESTED:
2546 case PGMPOOLKIND_PAE_PD_PHYS:
2547 case PGMPOOLKIND_PAE_PDPT_PHYS:
2548 case PGMPOOLKIND_32BIT_PD_PHYS:
2549 /* Nothing to monitor here. */
2550 Assert(!pPage->fMonitored);
2551 return VINF_SUCCESS;
2552
2553 default:
2554 AssertFatalMsgFailed(("This can't happen! enmKind=%d\n", pPage->enmKind));
2555 }
2556 Assert(pPage->fMonitored);
2557
2558 /*
2559 * Remove the page from the monitored list or uninstall it if last.
2560 */
2561 const PVMCC pVM = pPool->CTX_SUFF(pVM);
2562 int rc;
2563 if ( pPage->iMonitoredNext != NIL_PGMPOOL_IDX
2564 || pPage->iMonitoredPrev != NIL_PGMPOOL_IDX)
2565 {
2566 if (pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
2567 {
2568 PPGMPOOLPAGE pNewHead = &pPool->aPages[pPage->iMonitoredNext];
2569 pNewHead->iMonitoredPrev = NIL_PGMPOOL_IDX;
2570 rc = PGMHandlerPhysicalChangeUserArg(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK, pPage->iMonitoredNext);
2571
2572 AssertFatalRCSuccess(rc);
2573 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2574 }
2575 else
2576 {
2577 pPool->aPages[pPage->iMonitoredPrev].iMonitoredNext = pPage->iMonitoredNext;
2578 if (pPage->iMonitoredNext != NIL_PGMPOOL_IDX)
2579 {
2580 pPool->aPages[pPage->iMonitoredNext].iMonitoredPrev = pPage->iMonitoredPrev;
2581 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
2582 }
2583 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
2584 rc = VINF_SUCCESS;
2585 }
2586 }
2587 else
2588 {
2589 rc = PGMHandlerPhysicalDeregister(pVM, pPage->GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK);
2590 AssertFatalRC(rc);
2591 PVMCPU pVCpu = VMMGetCpu(pVM);
2592 AssertFatalMsg(!(pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3),
2593 ("%#x %#x\n", pVCpu->pgm.s.fSyncFlags, pVM->fGlobalForcedActions));
2594 }
2595 pPage->fMonitored = false;
2596
2597 /*
2598 * Remove it from the list of modified pages (if in it).
2599 */
2600 pgmPoolMonitorModifiedRemove(pPool, pPage);
2601
2602 return rc;
2603}
2604
2605
2606/**
2607 * Inserts the page into the list of modified pages.
2608 *
2609 * @param pPool The pool.
2610 * @param pPage The page.
2611 */
2612void pgmPoolMonitorModifiedInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2613{
2614 Log3(("pgmPoolMonitorModifiedInsert: idx=%d\n", pPage->idx));
2615 AssertMsg( pPage->iModifiedNext == NIL_PGMPOOL_IDX
2616 && pPage->iModifiedPrev == NIL_PGMPOOL_IDX
2617 && pPool->iModifiedHead != pPage->idx,
2618 ("Next=%d Prev=%d idx=%d cModifications=%d Head=%d cModifiedPages=%d\n",
2619 pPage->iModifiedNext, pPage->iModifiedPrev, pPage->idx, pPage->cModifications,
2620 pPool->iModifiedHead, pPool->cModifiedPages));
2621
2622 pPage->iModifiedNext = pPool->iModifiedHead;
2623 if (pPool->iModifiedHead != NIL_PGMPOOL_IDX)
2624 pPool->aPages[pPool->iModifiedHead].iModifiedPrev = pPage->idx;
2625 pPool->iModifiedHead = pPage->idx;
2626 pPool->cModifiedPages++;
2627#ifdef VBOX_WITH_STATISTICS
2628 if (pPool->cModifiedPages > pPool->cModifiedPagesHigh)
2629 pPool->cModifiedPagesHigh = pPool->cModifiedPages;
2630#endif
2631}
2632
2633
2634/**
2635 * Removes the page from the list of modified pages and resets the
2636 * modification counter.
2637 *
2638 * @param pPool The pool.
2639 * @param pPage The page which is believed to be in the list of modified pages.
2640 */
2641static void pgmPoolMonitorModifiedRemove(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
2642{
2643 Log3(("pgmPoolMonitorModifiedRemove: idx=%d cModifications=%d\n", pPage->idx, pPage->cModifications));
2644 if (pPool->iModifiedHead == pPage->idx)
2645 {
2646 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2647 pPool->iModifiedHead = pPage->iModifiedNext;
2648 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2649 {
2650 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = NIL_PGMPOOL_IDX;
2651 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2652 }
2653 pPool->cModifiedPages--;
2654 }
2655 else if (pPage->iModifiedPrev != NIL_PGMPOOL_IDX)
2656 {
2657 pPool->aPages[pPage->iModifiedPrev].iModifiedNext = pPage->iModifiedNext;
2658 if (pPage->iModifiedNext != NIL_PGMPOOL_IDX)
2659 {
2660 pPool->aPages[pPage->iModifiedNext].iModifiedPrev = pPage->iModifiedPrev;
2661 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2662 }
2663 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2664 pPool->cModifiedPages--;
2665 }
2666 else
2667 Assert(pPage->iModifiedPrev == NIL_PGMPOOL_IDX);
2668 pPage->cModifications = 0;
2669}
2670
2671
2672/**
2673 * Zaps the list of modified pages, resetting their modification counters in the process.
2674 *
2675 * @param pVM The cross context VM structure.
2676 */
2677static void pgmPoolMonitorModifiedClearAll(PVMCC pVM)
2678{
2679 PGM_LOCK_VOID(pVM);
2680 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2681 LogFlow(("pgmPoolMonitorModifiedClearAll: cModifiedPages=%d\n", pPool->cModifiedPages));
2682
2683 unsigned cPages = 0; NOREF(cPages);
2684
2685#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2686 pgmPoolResetDirtyPages(pVM);
2687#endif
2688
2689 uint16_t idx = pPool->iModifiedHead;
2690 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
2691 while (idx != NIL_PGMPOOL_IDX)
2692 {
2693 PPGMPOOLPAGE pPage = &pPool->aPages[idx];
2694 idx = pPage->iModifiedNext;
2695 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
2696 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
2697 pPage->cModifications = 0;
2698 Assert(++cPages);
2699 }
2700 AssertMsg(cPages == pPool->cModifiedPages, ("%d != %d\n", cPages, pPool->cModifiedPages));
2701 pPool->cModifiedPages = 0;
2702 PGM_UNLOCK(pVM);
2703}
2704
2705
2706/**
2707 * Handle SyncCR3 pool tasks
2708 *
2709 * @returns VBox status code.
2710 * @retval VINF_SUCCESS if successfully added.
2711 * @retval VINF_PGM_SYNC_CR3 is it needs to be deferred to ring 3 (GC only)
2712 * @param pVCpu The cross context virtual CPU structure.
2713 * @remark Should only be used when monitoring is available, thus placed in
2714 * the PGMPOOL_WITH_MONITORING \#ifdef.
2715 */
2716int pgmPoolSyncCR3(PVMCPUCC pVCpu)
2717{
2718 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2719 LogFlow(("pgmPoolSyncCR3 fSyncFlags=%x\n", pVCpu->pgm.s.fSyncFlags));
2720
2721 /*
2722 * When monitoring shadowed pages, we reset the modification counters on CR3 sync.
2723 * Occasionally we will have to clear all the shadow page tables because we wanted
2724 * to monitor a page which was mapped by too many shadowed page tables. This operation
2725 * sometimes referred to as a 'lightweight flush'.
2726 */
2727# ifdef IN_RING3 /* Don't flush in ring-0 or raw mode, it's taking too long. */
2728 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2729 pgmR3PoolClearAll(pVM, false /*fFlushRemTlb*/);
2730# else /* !IN_RING3 */
2731 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2732 {
2733 Log(("SyncCR3: PGM_SYNC_CLEAR_PGM_POOL is set -> VINF_PGM_SYNC_CR3\n"));
2734 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
2735
2736 /* Make sure all other VCPUs return to ring 3. */
2737 if (pVM->cCpus > 1)
2738 {
2739 VM_FF_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING);
2740 PGM_INVL_ALL_VCPU_TLBS(pVM);
2741 }
2742 return VINF_PGM_SYNC_CR3;
2743 }
2744# endif /* !IN_RING3 */
2745 else
2746 {
2747 pgmPoolMonitorModifiedClearAll(pVM);
2748
2749 /* pgmPoolMonitorModifiedClearAll can cause a pgm pool flush (dirty page clearing), so make sure we handle this! */
2750 if (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)
2751 {
2752 Log(("pgmPoolMonitorModifiedClearAll caused a pgm flush -> call pgmPoolSyncCR3 again!\n"));
2753 return pgmPoolSyncCR3(pVCpu);
2754 }
2755 }
2756 return VINF_SUCCESS;
2757}
2758
2759
2760/**
2761 * Frees up at least one user entry.
2762 *
2763 * @returns VBox status code.
2764 * @retval VINF_SUCCESS if successfully added.
2765 *
2766 * @param pPool The pool.
2767 * @param iUser The user index.
2768 */
2769static int pgmPoolTrackFreeOneUser(PPGMPOOL pPool, uint16_t iUser)
2770{
2771 STAM_COUNTER_INC(&pPool->StatTrackFreeUpOneUser);
2772 /*
2773 * Just free cached pages in a braindead fashion.
2774 */
2775 /** @todo walk the age list backwards and free the first with usage. */
2776 int rc = VINF_SUCCESS;
2777 do
2778 {
2779 int rc2 = pgmPoolCacheFreeOne(pPool, iUser);
2780 if (RT_FAILURE(rc2) && rc == VINF_SUCCESS)
2781 rc = rc2;
2782 } while (pPool->iUserFreeHead == NIL_PGMPOOL_USER_INDEX);
2783 return rc;
2784}
2785
2786
2787/**
2788 * Inserts a page into the cache.
2789 *
2790 * This will create user node for the page, insert it into the GCPhys
2791 * hash, and insert it into the age list.
2792 *
2793 * @returns VBox status code.
2794 * @retval VINF_SUCCESS if successfully added.
2795 *
2796 * @param pPool The pool.
2797 * @param pPage The cached page.
2798 * @param GCPhys The GC physical address of the page we're gonna shadow.
2799 * @param iUser The user index.
2800 * @param iUserTable The user table index.
2801 */
2802DECLINLINE(int) pgmPoolTrackInsert(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTGCPHYS GCPhys, uint16_t iUser, uint32_t iUserTable)
2803{
2804 int rc = VINF_SUCCESS;
2805 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2806
2807 LogFlow(("pgmPoolTrackInsert GCPhys=%RGp iUser=%d iUserTable=%x\n", GCPhys, iUser, iUserTable)); RT_NOREF_PV(GCPhys);
2808
2809 if (iUser != NIL_PGMPOOL_IDX)
2810 {
2811#ifdef VBOX_STRICT
2812 /*
2813 * Check that the entry doesn't already exists.
2814 */
2815 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2816 {
2817 uint16_t i = pPage->iUserHead;
2818 do
2819 {
2820 Assert(i < pPool->cMaxUsers);
2821 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2822 i = paUsers[i].iNext;
2823 } while (i != NIL_PGMPOOL_USER_INDEX);
2824 }
2825#endif
2826
2827 /*
2828 * Find free a user node.
2829 */
2830 uint16_t i = pPool->iUserFreeHead;
2831 if (i == NIL_PGMPOOL_USER_INDEX)
2832 {
2833 rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2834 if (RT_FAILURE(rc))
2835 return rc;
2836 i = pPool->iUserFreeHead;
2837 }
2838
2839 /*
2840 * Unlink the user node from the free list,
2841 * initialize and insert it into the user list.
2842 */
2843 pPool->iUserFreeHead = paUsers[i].iNext;
2844 paUsers[i].iNext = NIL_PGMPOOL_USER_INDEX;
2845 paUsers[i].iUser = iUser;
2846 paUsers[i].iUserTable = iUserTable;
2847 pPage->iUserHead = i;
2848 }
2849 else
2850 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
2851
2852
2853 /*
2854 * Insert into cache and enable monitoring of the guest page if enabled.
2855 *
2856 * Until we implement caching of all levels, including the CR3 one, we'll
2857 * have to make sure we don't try monitor & cache any recursive reuse of
2858 * a monitored CR3 page. Because all windows versions are doing this we'll
2859 * have to be able to do combined access monitoring, CR3 + PT and
2860 * PD + PT (guest PAE).
2861 *
2862 * Update:
2863 * We're now cooperating with the CR3 monitor if an uncachable page is found.
2864 */
2865 const bool fCanBeMonitored = true;
2866 pgmPoolCacheInsert(pPool, pPage, fCanBeMonitored); /* This can be expanded. */
2867 if (fCanBeMonitored)
2868 {
2869 rc = pgmPoolMonitorInsert(pPool, pPage);
2870 AssertRC(rc);
2871 }
2872 return rc;
2873}
2874
2875
2876/**
2877 * Adds a user reference to a page.
2878 *
2879 * This will move the page to the head of the
2880 *
2881 * @returns VBox status code.
2882 * @retval VINF_SUCCESS if successfully added.
2883 *
2884 * @param pPool The pool.
2885 * @param pPage The cached page.
2886 * @param iUser The user index.
2887 * @param iUserTable The user table.
2888 */
2889static int pgmPoolTrackAddUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2890{
2891 Log3(("pgmPoolTrackAddUser: GCPhys=%RGp iUser=%x iUserTable=%x\n", pPage->GCPhys, iUser, iUserTable));
2892 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2893 Assert(iUser != NIL_PGMPOOL_IDX);
2894
2895# ifdef VBOX_STRICT
2896 /*
2897 * Check that the entry doesn't already exists. We only allow multiple
2898 * users of top-level paging structures (SHW_POOL_ROOT_IDX).
2899 */
2900 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
2901 {
2902 uint16_t i = pPage->iUserHead;
2903 do
2904 {
2905 Assert(i < pPool->cMaxUsers);
2906 /** @todo this assertion looks odd... Shouldn't it be && here? */
2907 AssertMsg(paUsers[i].iUser != iUser || paUsers[i].iUserTable != iUserTable, ("%x %x vs new %x %x\n", paUsers[i].iUser, paUsers[i].iUserTable, iUser, iUserTable));
2908 i = paUsers[i].iNext;
2909 } while (i != NIL_PGMPOOL_USER_INDEX);
2910 }
2911# endif
2912
2913 /*
2914 * Allocate a user node.
2915 */
2916 uint16_t i = pPool->iUserFreeHead;
2917 if (i == NIL_PGMPOOL_USER_INDEX)
2918 {
2919 int rc = pgmPoolTrackFreeOneUser(pPool, iUser);
2920 if (RT_FAILURE(rc))
2921 return rc;
2922 i = pPool->iUserFreeHead;
2923 }
2924 pPool->iUserFreeHead = paUsers[i].iNext;
2925
2926 /*
2927 * Initialize the user node and insert it.
2928 */
2929 paUsers[i].iNext = pPage->iUserHead;
2930 paUsers[i].iUser = iUser;
2931 paUsers[i].iUserTable = iUserTable;
2932 pPage->iUserHead = i;
2933
2934# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
2935 if (pPage->fDirty)
2936 pgmPoolFlushDirtyPage(pPool->CTX_SUFF(pVM), pPool, pPage->idxDirtyEntry, false /* do not remove */);
2937# endif
2938
2939 /*
2940 * Tell the cache to update its replacement stats for this page.
2941 */
2942 pgmPoolCacheUsed(pPool, pPage);
2943 return VINF_SUCCESS;
2944}
2945
2946
2947/**
2948 * Frees a user record associated with a page.
2949 *
2950 * This does not clear the entry in the user table, it simply replaces the
2951 * user record to the chain of free records.
2952 *
2953 * @param pPool The pool.
2954 * @param pPage The shadow page.
2955 * @param iUser The shadow page pool index of the user table.
2956 * @param iUserTable The index into the user table (shadowed).
2957 *
2958 * @remarks Don't call this for root pages.
2959 */
2960static void pgmPoolTrackFreeUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
2961{
2962 Log3(("pgmPoolTrackFreeUser %RGp %x %x\n", pPage->GCPhys, iUser, iUserTable));
2963 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
2964 Assert(iUser != NIL_PGMPOOL_IDX);
2965
2966 /*
2967 * Unlink and free the specified user entry.
2968 */
2969
2970 /* Special: For PAE and 32-bit paging, there is usually no more than one user. */
2971 uint16_t i = pPage->iUserHead;
2972 if ( i != NIL_PGMPOOL_USER_INDEX
2973 && paUsers[i].iUser == iUser
2974 && paUsers[i].iUserTable == iUserTable)
2975 {
2976 pPage->iUserHead = paUsers[i].iNext;
2977
2978 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2979 paUsers[i].iNext = pPool->iUserFreeHead;
2980 pPool->iUserFreeHead = i;
2981 return;
2982 }
2983
2984 /* General: Linear search. */
2985 uint16_t iPrev = NIL_PGMPOOL_USER_INDEX;
2986 while (i != NIL_PGMPOOL_USER_INDEX)
2987 {
2988 if ( paUsers[i].iUser == iUser
2989 && paUsers[i].iUserTable == iUserTable)
2990 {
2991 if (iPrev != NIL_PGMPOOL_USER_INDEX)
2992 paUsers[iPrev].iNext = paUsers[i].iNext;
2993 else
2994 pPage->iUserHead = paUsers[i].iNext;
2995
2996 paUsers[i].iUser = NIL_PGMPOOL_IDX;
2997 paUsers[i].iNext = pPool->iUserFreeHead;
2998 pPool->iUserFreeHead = i;
2999 return;
3000 }
3001 iPrev = i;
3002 i = paUsers[i].iNext;
3003 }
3004
3005 /* Fatal: didn't find it */
3006 AssertFatalMsgFailed(("Didn't find the user entry! iUser=%d iUserTable=%#x GCPhys=%RGp\n",
3007 iUser, iUserTable, pPage->GCPhys));
3008}
3009
3010
3011#if 0 /* unused */
3012/**
3013 * Gets the entry size of a shadow table.
3014 *
3015 * @param enmKind The kind of page.
3016 *
3017 * @returns The size of the entry in bytes. That is, 4 or 8.
3018 * @returns If the kind is not for a table, an assertion is raised and 0 is
3019 * returned.
3020 */
3021DECLINLINE(unsigned) pgmPoolTrackGetShadowEntrySize(PGMPOOLKIND enmKind)
3022{
3023 switch (enmKind)
3024 {
3025 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3026 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3027 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3028 case PGMPOOLKIND_32BIT_PD:
3029 case PGMPOOLKIND_32BIT_PD_PHYS:
3030 return 4;
3031
3032 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3033 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3034 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3035 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3036 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3037 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3038 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3039 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3040 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3041 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3042 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3043 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3044 case PGMPOOLKIND_64BIT_PML4:
3045 case PGMPOOLKIND_PAE_PDPT:
3046 case PGMPOOLKIND_ROOT_NESTED:
3047 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3048 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3049 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3050 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3051 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3052 case PGMPOOLKIND_PAE_PD_PHYS:
3053 case PGMPOOLKIND_PAE_PDPT_PHYS:
3054 return 8;
3055
3056 default:
3057 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3058 }
3059}
3060#endif /* unused */
3061
3062#if 0 /* unused */
3063/**
3064 * Gets the entry size of a guest table.
3065 *
3066 * @param enmKind The kind of page.
3067 *
3068 * @returns The size of the entry in bytes. That is, 0, 4 or 8.
3069 * @returns If the kind is not for a table, an assertion is raised and 0 is
3070 * returned.
3071 */
3072DECLINLINE(unsigned) pgmPoolTrackGetGuestEntrySize(PGMPOOLKIND enmKind)
3073{
3074 switch (enmKind)
3075 {
3076 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3077 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3078 case PGMPOOLKIND_32BIT_PD:
3079 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3080 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3081 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3082 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3083 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3084 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3085 return 4;
3086
3087 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3088 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3089 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3090 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3091 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3092 case PGMPOOLKIND_64BIT_PML4:
3093 case PGMPOOLKIND_PAE_PDPT:
3094 return 8;
3095
3096 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3097 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3098 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3099 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3100 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3101 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3102 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3103 case PGMPOOLKIND_ROOT_NESTED:
3104 case PGMPOOLKIND_PAE_PD_PHYS:
3105 case PGMPOOLKIND_PAE_PDPT_PHYS:
3106 case PGMPOOLKIND_32BIT_PD_PHYS:
3107 /** @todo can we return 0? (nobody is calling this...) */
3108 AssertFailed();
3109 return 0;
3110
3111 default:
3112 AssertFatalMsgFailed(("enmKind=%d\n", enmKind));
3113 }
3114}
3115#endif /* unused */
3116
3117
3118/**
3119 * Checks one shadow page table entry for a mapping of a physical page.
3120 *
3121 * @returns true / false indicating removal of all relevant PTEs
3122 *
3123 * @param pVM The cross context VM structure.
3124 * @param pPhysPage The guest page in question.
3125 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3126 * @param iShw The shadow page table.
3127 * @param iPte Page table entry or NIL_PGMPOOL_PHYSEXT_IDX_PTE if unknown
3128 */
3129static bool pgmPoolTrackFlushGCPhysPTInt(PVM pVM, PCPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw, uint16_t iPte)
3130{
3131 LogFlow(("pgmPoolTrackFlushGCPhysPTInt: pPhysPage=%RHp iShw=%d iPte=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw, iPte));
3132 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3133 bool fRet = false;
3134
3135 /*
3136 * Assert sanity.
3137 */
3138 Assert(iPte != NIL_PGMPOOL_PHYSEXT_IDX_PTE);
3139 AssertFatalMsg(iShw < pPool->cCurPages && iShw != NIL_PGMPOOL_IDX, ("iShw=%d\n", iShw));
3140 PPGMPOOLPAGE pPage = &pPool->aPages[iShw];
3141
3142 /*
3143 * Then, clear the actual mappings to the page in the shadow PT.
3144 */
3145 switch (pPage->enmKind)
3146 {
3147 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3148 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3149 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3150 {
3151 const uint32_t u32 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3152 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3153 uint32_t u32AndMask = 0;
3154 uint32_t u32OrMask = 0;
3155
3156 if (!fFlushPTEs)
3157 {
3158 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3159 {
3160 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3161 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3162 u32OrMask = X86_PTE_RW;
3163 u32AndMask = UINT32_MAX;
3164 fRet = true;
3165 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3166 break;
3167
3168 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3169 u32OrMask = 0;
3170 u32AndMask = ~X86_PTE_RW;
3171 fRet = true;
3172 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3173 break;
3174 default:
3175 /* (shouldn't be here, will assert below) */
3176 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3177 break;
3178 }
3179 }
3180 else
3181 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3182
3183 /* Update the counter if we're removing references. */
3184 if (!u32AndMask)
3185 {
3186 Assert(pPage->cPresent);
3187 Assert(pPool->cPresent);
3188 pPage->cPresent--;
3189 pPool->cPresent--;
3190 }
3191
3192 if ((pPT->a[iPte].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3193 {
3194 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX32\n", iPte, pPT->a[iPte]));
3195 X86PTE Pte;
3196 Pte.u = (pPT->a[iPte].u & u32AndMask) | u32OrMask;
3197 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3198 Pte.u &= ~(X86PGUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3199 ASMAtomicWriteU32(&pPT->a[iPte].u, Pte.u);
3200 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3201 return fRet;
3202 }
3203#ifdef LOG_ENABLED
3204 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3205 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3206 if ((pPT->a[i].u & (X86_PTE_PG_MASK | X86_PTE_P)) == u32)
3207 {
3208 Log(("i=%d cFound=%d\n", i, ++cFound));
3209 }
3210#endif
3211 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u32=%RX32 poolkind=%x\n", pPage->iFirstPresent, pPage->cPresent, u32, pPage->enmKind));
3212 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3213 break;
3214 }
3215
3216 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3217 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3218 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3219 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3220 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3221 case PGMPOOLKIND_EPT_PT_FOR_PHYS: /* physical mask the same as PAE; RW bit as well; be careful! */
3222 {
3223 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PTE_P;
3224 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3225 uint64_t u64OrMask = 0;
3226 uint64_t u64AndMask = 0;
3227
3228 if (!fFlushPTEs)
3229 {
3230 switch (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage))
3231 {
3232 case PGM_PAGE_HNDL_PHYS_STATE_NONE: /* No handler installed. */
3233 case PGM_PAGE_HNDL_PHYS_STATE_DISABLED: /* Monitoring is temporarily disabled. */
3234 u64OrMask = X86_PTE_RW;
3235 u64AndMask = UINT64_MAX;
3236 fRet = true;
3237 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3238 break;
3239
3240 case PGM_PAGE_HNDL_PHYS_STATE_WRITE: /* Write access is monitored. */
3241 u64OrMask = 0;
3242 u64AndMask = ~(uint64_t)X86_PTE_RW;
3243 fRet = true;
3244 STAM_COUNTER_INC(&pPool->StatTrackFlushEntryKeep);
3245 break;
3246
3247 default:
3248 /* (shouldn't be here, will assert below) */
3249 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3250 break;
3251 }
3252 }
3253 else
3254 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3255
3256 /* Update the counter if we're removing references. */
3257 if (!u64AndMask)
3258 {
3259 Assert(pPage->cPresent);
3260 Assert(pPool->cPresent);
3261 pPage->cPresent--;
3262 pPool->cPresent--;
3263 }
3264
3265 if ((PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3266 {
3267 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pte=%RX64\n", iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3268 X86PTEPAE Pte;
3269 Pte.u = (PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & u64AndMask) | u64OrMask;
3270 if (Pte.u & PGM_PTFLAGS_TRACK_DIRTY)
3271 Pte.u &= ~(X86PGPAEUINT)X86_PTE_RW; /* need to disallow writes when dirty bit tracking is still active. */
3272
3273 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[iPte], Pte.u);
3274 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3275 return fRet;
3276 }
3277#ifdef LOG_ENABLED
3278 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3279 Log(("Found %RX64 expected %RX64\n", PGMSHWPTEPAE_GET_U(pPT->a[iPte]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX), u64));
3280 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPT->a); i++)
3281 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & (X86_PTE_PAE_PG_MASK | X86_PTE_P | X86_PTE_PAE_MBZ_MASK_NX)) == u64)
3282 Log(("i=%d cFound=%d\n", i, ++cFound));
3283#endif
3284 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d u64=%RX64 poolkind=%x iPte=%d PT=%RX64\n", pPage->iFirstPresent, pPage->cPresent, u64, pPage->enmKind, iPte, PGMSHWPTEPAE_GET_LOG(pPT->a[iPte])));
3285 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);*/
3286 break;
3287 }
3288
3289#ifdef PGM_WITH_LARGE_PAGES
3290 /* Large page case only. */
3291 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3292 {
3293 Assert(pVM->pgm.s.fNestedPaging);
3294
3295 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3296 PEPTPD pPD = (PEPTPD)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3297
3298 if ((pPD->a[iPte].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3299 {
3300 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3301 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3302 pPD->a[iPte].u = 0;
3303 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3304
3305 /* Update the counter as we're removing references. */
3306 Assert(pPage->cPresent);
3307 Assert(pPool->cPresent);
3308 pPage->cPresent--;
3309 pPool->cPresent--;
3310
3311 return fRet;
3312 }
3313# ifdef LOG_ENABLED
3314 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3315 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3316 if ((pPD->a[i].u & (EPT_PDE2M_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3317 Log(("i=%d cFound=%d\n", i, ++cFound));
3318# endif
3319 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3320 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3321 break;
3322 }
3323
3324 /* AMD-V nested paging */ /** @todo merge with EPT as we only check the parts that are identical. */
3325 case PGMPOOLKIND_PAE_PD_PHYS:
3326 {
3327 Assert(pVM->pgm.s.fNestedPaging);
3328
3329 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage) | X86_PDE4M_P | X86_PDE4M_PS;
3330 PX86PDPAE pPD = (PX86PDPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3331
3332 if ((pPD->a[iPte].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3333 {
3334 Log4(("pgmPoolTrackFlushGCPhysPTs: i=%d pde=%RX64\n", iPte, pPD->a[iPte]));
3335 STAM_COUNTER_INC(&pPool->StatTrackFlushEntry);
3336 pPD->a[iPte].u = 0;
3337 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);
3338
3339 /* Update the counter as we're removing references. */
3340 Assert(pPage->cPresent);
3341 Assert(pPool->cPresent);
3342 pPage->cPresent--;
3343 pPool->cPresent--;
3344 return fRet;
3345 }
3346# ifdef LOG_ENABLED
3347 Log(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3348 for (unsigned i = 0, cFound = 0; i < RT_ELEMENTS(pPD->a); i++)
3349 if ((pPD->a[i].u & (X86_PDE2M_PAE_PG_MASK | X86_PDE4M_P | X86_PDE4M_PS)) == u64)
3350 Log(("i=%d cFound=%d\n", i, ++cFound));
3351# endif
3352 AssertFatalMsgFailed(("iFirstPresent=%d cPresent=%d\n", pPage->iFirstPresent, pPage->cPresent));
3353 /*PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPD);*/
3354 break;
3355 }
3356#endif /* PGM_WITH_LARGE_PAGES */
3357
3358 default:
3359 AssertFatalMsgFailed(("enmKind=%d iShw=%d\n", pPage->enmKind, iShw));
3360 }
3361
3362 /* not reached. */
3363#ifndef _MSC_VER
3364 return fRet;
3365#endif
3366}
3367
3368
3369/**
3370 * Scans one shadow page table for mappings of a physical page.
3371 *
3372 * @param pVM The cross context VM structure.
3373 * @param pPhysPage The guest page in question.
3374 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3375 * @param iShw The shadow page table.
3376 */
3377static void pgmPoolTrackFlushGCPhysPT(PVM pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iShw)
3378{
3379 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3380
3381 /* We should only come here with when there's only one reference to this physical page. */
3382 Assert(PGMPOOL_TD_GET_CREFS(PGM_PAGE_GET_TRACKING(pPhysPage)) == 1);
3383
3384 Log2(("pgmPoolTrackFlushGCPhysPT: pPhysPage=%RHp iShw=%d\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iShw));
3385 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPT, f);
3386 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, iShw, PGM_PAGE_GET_PTE_INDEX(pPhysPage));
3387 if (!fKeptPTEs)
3388 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3389 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPT, f);
3390}
3391
3392
3393/**
3394 * Flushes a list of shadow page tables mapping the same physical page.
3395 *
3396 * @param pVM The cross context VM structure.
3397 * @param pPhysPage The guest page in question.
3398 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3399 * @param iPhysExt The physical cross reference extent list to flush.
3400 */
3401static void pgmPoolTrackFlushGCPhysPTs(PVMCC pVM, PPGMPAGE pPhysPage, bool fFlushPTEs, uint16_t iPhysExt)
3402{
3403 PGM_LOCK_ASSERT_OWNER(pVM);
3404 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3405 bool fKeepList = false;
3406
3407 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTs, f);
3408 Log2(("pgmPoolTrackFlushGCPhysPTs: pPhysPage=%RHp iPhysExt=%u\n", PGM_PAGE_GET_HCPHYS(pPhysPage), iPhysExt));
3409
3410 const uint16_t iPhysExtStart = iPhysExt;
3411 PPGMPOOLPHYSEXT pPhysExt;
3412 do
3413 {
3414 Assert(iPhysExt < pPool->cMaxPhysExts);
3415 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3416 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3417 {
3418 if (pPhysExt->aidx[i] != NIL_PGMPOOL_IDX)
3419 {
3420 bool fKeptPTEs = pgmPoolTrackFlushGCPhysPTInt(pVM, pPhysPage, fFlushPTEs, pPhysExt->aidx[i], pPhysExt->apte[i]);
3421 if (!fKeptPTEs)
3422 {
3423 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3424 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3425 }
3426 else
3427 fKeepList = true;
3428 }
3429 }
3430 /* next */
3431 iPhysExt = pPhysExt->iNext;
3432 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3433
3434 if (!fKeepList)
3435 {
3436 /* insert the list into the free list and clear the ram range entry. */
3437 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3438 pPool->iPhysExtFreeHead = iPhysExtStart;
3439 /* Invalidate the tracking data. */
3440 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3441 }
3442
3443 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTs, f);
3444}
3445
3446
3447/**
3448 * Flushes all shadow page table mappings of the given guest page.
3449 *
3450 * This is typically called when the host page backing the guest one has been
3451 * replaced or when the page protection was changed due to a guest access
3452 * caught by the monitoring.
3453 *
3454 * @returns VBox status code.
3455 * @retval VINF_SUCCESS if all references has been successfully cleared.
3456 * @retval VINF_PGM_SYNC_CR3 if we're better off with a CR3 sync and a page
3457 * pool cleaning. FF and sync flags are set.
3458 *
3459 * @param pVM The cross context VM structure.
3460 * @param GCPhysPage GC physical address of the page in question
3461 * @param pPhysPage The guest page in question.
3462 * @param fFlushPTEs Flush PTEs or allow them to be updated (e.g. in case of an RW bit change)
3463 * @param pfFlushTLBs This is set to @a true if the shadow TLBs should be
3464 * flushed, it is NOT touched if this isn't necessary.
3465 * The caller MUST initialized this to @a false.
3466 */
3467int pgmPoolTrackUpdateGCPhys(PVMCC pVM, RTGCPHYS GCPhysPage, PPGMPAGE pPhysPage, bool fFlushPTEs, bool *pfFlushTLBs)
3468{
3469 PVMCPUCC pVCpu = VMMGetCpu(pVM);
3470 PGM_LOCK_VOID(pVM);
3471 int rc = VINF_SUCCESS;
3472
3473#ifdef PGM_WITH_LARGE_PAGES
3474 /* Is this page part of a large page? */
3475 if (PGM_PAGE_GET_PDE_TYPE(pPhysPage) == PGM_PAGE_PDE_TYPE_PDE)
3476 {
3477 RTGCPHYS GCPhysBase = GCPhysPage & X86_PDE2M_PAE_PG_MASK;
3478 GCPhysPage &= X86_PDE_PAE_PG_MASK;
3479
3480 /* Fetch the large page base. */
3481 PPGMPAGE pLargePage;
3482 if (GCPhysBase != GCPhysPage)
3483 {
3484 pLargePage = pgmPhysGetPage(pVM, GCPhysBase);
3485 AssertFatal(pLargePage);
3486 }
3487 else
3488 pLargePage = pPhysPage;
3489
3490 Log(("pgmPoolTrackUpdateGCPhys: update large page PDE for %RGp (%RGp)\n", GCPhysBase, GCPhysPage));
3491
3492 if (PGM_PAGE_GET_PDE_TYPE(pLargePage) == PGM_PAGE_PDE_TYPE_PDE)
3493 {
3494 /* Mark the large page as disabled as we need to break it up to change a single page in the 2 MB range. */
3495 PGM_PAGE_SET_PDE_TYPE(pVM, pLargePage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3496 pVM->pgm.s.cLargePagesDisabled++;
3497
3498 /* Update the base as that *only* that one has a reference and there's only one PDE to clear. */
3499 rc = pgmPoolTrackUpdateGCPhys(pVM, GCPhysBase, pLargePage, fFlushPTEs, pfFlushTLBs);
3500
3501 *pfFlushTLBs = true;
3502 PGM_UNLOCK(pVM);
3503 return rc;
3504 }
3505 }
3506#else
3507 NOREF(GCPhysPage);
3508#endif /* PGM_WITH_LARGE_PAGES */
3509
3510 const uint16_t u16 = PGM_PAGE_GET_TRACKING(pPhysPage);
3511 if (u16)
3512 {
3513 /*
3514 * The zero page is currently screwing up the tracking and we'll
3515 * have to flush the whole shebang. Unless VBOX_WITH_NEW_LAZY_PAGE_ALLOC
3516 * is defined, zero pages won't normally be mapped. Some kind of solution
3517 * will be needed for this problem of course, but it will have to wait...
3518 */
3519 if ( PGM_PAGE_IS_ZERO(pPhysPage)
3520 || PGM_PAGE_IS_BALLOONED(pPhysPage))
3521 rc = VINF_PGM_GCPHYS_ALIASED;
3522 else
3523 {
3524 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
3525 {
3526 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
3527 pgmPoolTrackFlushGCPhysPT(pVM,
3528 pPhysPage,
3529 fFlushPTEs,
3530 PGMPOOL_TD_GET_IDX(u16));
3531 }
3532 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
3533 pgmPoolTrackFlushGCPhysPTs(pVM, pPhysPage, fFlushPTEs, PGMPOOL_TD_GET_IDX(u16));
3534 else
3535 rc = pgmPoolTrackFlushGCPhysPTsSlow(pVM, pPhysPage);
3536 *pfFlushTLBs = true;
3537 }
3538 }
3539
3540 if (rc == VINF_PGM_GCPHYS_ALIASED)
3541 {
3542 pVCpu->pgm.s.fSyncFlags |= PGM_SYNC_CLEAR_PGM_POOL;
3543 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
3544 rc = VINF_PGM_SYNC_CR3;
3545 }
3546 PGM_UNLOCK(pVM);
3547 return rc;
3548}
3549
3550
3551/**
3552 * Scans all shadow page tables for mappings of a physical page.
3553 *
3554 * This may be slow, but it's most likely more efficient than cleaning
3555 * out the entire page pool / cache.
3556 *
3557 * @returns VBox status code.
3558 * @retval VINF_SUCCESS if all references has been successfully cleared.
3559 * @retval VINF_PGM_GCPHYS_ALIASED if we're better off with a CR3 sync and
3560 * a page pool cleaning.
3561 *
3562 * @param pVM The cross context VM structure.
3563 * @param pPhysPage The guest page in question.
3564 */
3565int pgmPoolTrackFlushGCPhysPTsSlow(PVMCC pVM, PPGMPAGE pPhysPage)
3566{
3567 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3568 STAM_PROFILE_START(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3569 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: cUsedPages=%d cPresent=%d pPhysPage=%R[pgmpage]\n",
3570 pPool->cUsedPages, pPool->cPresent, pPhysPage));
3571
3572 /*
3573 * There is a limit to what makes sense.
3574 */
3575 if ( pPool->cPresent > 1024
3576 && pVM->cCpus == 1)
3577 {
3578 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3579 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3580 return VINF_PGM_GCPHYS_ALIASED;
3581 }
3582
3583 /*
3584 * Iterate all the pages until we've encountered all that in use.
3585 * This is simple but not quite optimal solution.
3586 */
3587 const uint64_t u64 = PGM_PAGE_GET_HCPHYS(pPhysPage);
3588 unsigned cLeft = pPool->cUsedPages;
3589 unsigned iPage = pPool->cCurPages;
3590 while (--iPage >= PGMPOOL_IDX_FIRST)
3591 {
3592 PPGMPOOLPAGE pPage = &pPool->aPages[iPage];
3593 if ( pPage->GCPhys != NIL_RTGCPHYS
3594 && pPage->cPresent)
3595 {
3596 switch (pPage->enmKind)
3597 {
3598 /*
3599 * We only care about shadow page tables.
3600 */
3601 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
3602 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
3603 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
3604 {
3605 const uint32_t u32 = (uint32_t)u64;
3606 unsigned cPresent = pPage->cPresent;
3607 PX86PT pPT = (PX86PT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3608 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3609 {
3610 const X86PGUINT uPte = pPT->a[i].u;
3611 if (uPte & X86_PTE_P)
3612 {
3613 if ((uPte & X86_PTE_PG_MASK) == u32)
3614 {
3615 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX32\n", iPage, i, pPT->a[i]));
3616 ASMAtomicWriteU32(&pPT->a[i].u, 0);
3617
3618 /* Update the counter as we're removing references. */
3619 Assert(pPage->cPresent);
3620 Assert(pPool->cPresent);
3621 pPage->cPresent--;
3622 pPool->cPresent--;
3623 }
3624 if (!--cPresent)
3625 break;
3626 }
3627 }
3628 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3629 break;
3630 }
3631
3632 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
3633 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
3634 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
3635 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
3636 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
3637 {
3638 unsigned cPresent = pPage->cPresent;
3639 PPGMSHWPTPAE pPT = (PPGMSHWPTPAE)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3640 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3641 if (PGMSHWPTEPAE_IS_P(pPT->a[i]))
3642 {
3643 if ((PGMSHWPTEPAE_GET_U(pPT->a[i]) & X86_PTE_PAE_PG_MASK) == u64)
3644 {
3645 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3646 PGMSHWPTEPAE_ATOMIC_SET(pPT->a[i], 0); /// @todo why not atomic?
3647
3648 /* Update the counter as we're removing references. */
3649 Assert(pPage->cPresent);
3650 Assert(pPool->cPresent);
3651 pPage->cPresent--;
3652 pPool->cPresent--;
3653 }
3654 if (!--cPresent)
3655 break;
3656 }
3657 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3658 break;
3659 }
3660
3661 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
3662 {
3663 unsigned cPresent = pPage->cPresent;
3664 PEPTPT pPT = (PEPTPT)PGMPOOL_PAGE_2_PTR(pVM, pPage);
3665 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pPT->a); i++)
3666 {
3667 X86PGPAEUINT const uPte = pPT->a[i].u;
3668 if (uPte & EPT_E_READ)
3669 {
3670 if ((uPte & EPT_PTE_PG_MASK) == u64)
3671 {
3672 //Log4(("pgmPoolTrackFlushGCPhysPTsSlow: idx=%d i=%d pte=%RX64\n", iPage, i, pPT->a[i]));
3673 ASMAtomicWriteU64(&pPT->a[i].u, 0);
3674
3675 /* Update the counter as we're removing references. */
3676 Assert(pPage->cPresent);
3677 Assert(pPool->cPresent);
3678 pPage->cPresent--;
3679 pPool->cPresent--;
3680 }
3681 if (!--cPresent)
3682 break;
3683 }
3684 }
3685 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pPT);
3686 break;
3687 }
3688 }
3689
3690 if (!--cLeft)
3691 break;
3692 }
3693 }
3694
3695 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
3696 STAM_PROFILE_STOP(&pPool->StatTrackFlushGCPhysPTsSlow, s);
3697
3698 /*
3699 * There is a limit to what makes sense. The above search is very expensive, so force a pgm pool flush.
3700 */
3701 if (pPool->cPresent > 1024)
3702 {
3703 LogFlow(("pgmPoolTrackFlushGCPhysPTsSlow: giving up... (cPresent=%d)\n", pPool->cPresent));
3704 return VINF_PGM_GCPHYS_ALIASED;
3705 }
3706
3707 return VINF_SUCCESS;
3708}
3709
3710
3711/**
3712 * Clears the user entry in a user table.
3713 *
3714 * This is used to remove all references to a page when flushing it.
3715 */
3716static void pgmPoolTrackClearPageUser(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PCPGMPOOLUSER pUser)
3717{
3718 Assert(pUser->iUser != NIL_PGMPOOL_IDX);
3719 Assert(pUser->iUser < pPool->cCurPages);
3720 uint32_t iUserTable = pUser->iUserTable;
3721
3722 /*
3723 * Map the user page. Ignore references made by fictitious pages.
3724 */
3725 PPGMPOOLPAGE pUserPage = &pPool->aPages[pUser->iUser];
3726 LogFlow(("pgmPoolTrackClearPageUser: clear %x in %s (%RGp) (flushing %s)\n", iUserTable, pgmPoolPoolKindToStr(pUserPage->enmKind), pUserPage->Core.Key, pgmPoolPoolKindToStr(pPage->enmKind)));
3727 union
3728 {
3729 uint64_t *pau64;
3730 uint32_t *pau32;
3731 } u;
3732 if (pUserPage->idx < PGMPOOL_IDX_FIRST)
3733 {
3734 Assert(!pUserPage->pvPageR3);
3735 return;
3736 }
3737 u.pau64 = (uint64_t *)PGMPOOL_PAGE_2_PTR(pPool->CTX_SUFF(pVM), pUserPage);
3738
3739
3740 /* Safety precaution in case we change the paging for other modes too in the future. */
3741 Assert(!pgmPoolIsPageLocked(pPage)); RT_NOREF_PV(pPage);
3742
3743#ifdef VBOX_STRICT
3744 /*
3745 * Some sanity checks.
3746 */
3747 switch (pUserPage->enmKind)
3748 {
3749 case PGMPOOLKIND_32BIT_PD:
3750 case PGMPOOLKIND_32BIT_PD_PHYS:
3751 Assert(iUserTable < X86_PG_ENTRIES);
3752 break;
3753 case PGMPOOLKIND_PAE_PDPT:
3754 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3755 case PGMPOOLKIND_PAE_PDPT_PHYS:
3756 Assert(iUserTable < 4);
3757 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3758 break;
3759 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3760 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3761 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3762 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3763 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3764 case PGMPOOLKIND_PAE_PD_PHYS:
3765 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3766 break;
3767 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3768 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3769 break;
3770 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3771 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3772 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3773 break;
3774 case PGMPOOLKIND_64BIT_PML4:
3775 Assert(!(u.pau64[iUserTable] & PGM_PLXFLAGS_PERMANENT));
3776 /* GCPhys >> PAGE_SHIFT is the index here */
3777 break;
3778 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3779 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3780 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3781 break;
3782
3783 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3784 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3785 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3786 break;
3787
3788 case PGMPOOLKIND_ROOT_NESTED:
3789 Assert(iUserTable < X86_PG_PAE_ENTRIES);
3790 break;
3791
3792 default:
3793 AssertMsgFailed(("enmKind=%d\n", pUserPage->enmKind));
3794 break;
3795 }
3796#endif /* VBOX_STRICT */
3797
3798 /*
3799 * Clear the entry in the user page.
3800 */
3801 switch (pUserPage->enmKind)
3802 {
3803 /* 32-bit entries */
3804 case PGMPOOLKIND_32BIT_PD:
3805 case PGMPOOLKIND_32BIT_PD_PHYS:
3806 ASMAtomicWriteU32(&u.pau32[iUserTable], 0);
3807 break;
3808
3809 /* 64-bit entries */
3810 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
3811 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
3812 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
3813 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
3814 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
3815 case PGMPOOLKIND_PAE_PD_PHYS:
3816 case PGMPOOLKIND_PAE_PDPT_PHYS:
3817 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
3818 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
3819 case PGMPOOLKIND_64BIT_PML4:
3820 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
3821 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
3822 case PGMPOOLKIND_PAE_PDPT:
3823 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
3824 case PGMPOOLKIND_ROOT_NESTED:
3825 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
3826 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
3827 ASMAtomicWriteU64(&u.pau64[iUserTable], 0);
3828 break;
3829
3830 default:
3831 AssertFatalMsgFailed(("enmKind=%d iUser=%d iUserTable=%#x\n", pUserPage->enmKind, pUser->iUser, pUser->iUserTable));
3832 }
3833 PGM_DYNMAP_UNUSED_HINT_VM(pPool->CTX_SUFF(pVM), u.pau64);
3834}
3835
3836
3837/**
3838 * Clears all users of a page.
3839 */
3840static void pgmPoolTrackClearPageUsers(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
3841{
3842 /*
3843 * Free all the user records.
3844 */
3845 LogFlow(("pgmPoolTrackClearPageUsers %RGp\n", pPage->GCPhys));
3846
3847 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
3848 uint16_t i = pPage->iUserHead;
3849 while (i != NIL_PGMPOOL_USER_INDEX)
3850 {
3851 /* Clear enter in user table. */
3852 pgmPoolTrackClearPageUser(pPool, pPage, &paUsers[i]);
3853
3854 /* Free it. */
3855 const uint16_t iNext = paUsers[i].iNext;
3856 paUsers[i].iUser = NIL_PGMPOOL_IDX;
3857 paUsers[i].iNext = pPool->iUserFreeHead;
3858 pPool->iUserFreeHead = i;
3859
3860 /* Next. */
3861 i = iNext;
3862 }
3863 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
3864}
3865
3866
3867/**
3868 * Allocates a new physical cross reference extent.
3869 *
3870 * @returns Pointer to the allocated extent on success. NULL if we're out of them.
3871 * @param pVM The cross context VM structure.
3872 * @param piPhysExt Where to store the phys ext index.
3873 */
3874PPGMPOOLPHYSEXT pgmPoolTrackPhysExtAlloc(PVMCC pVM, uint16_t *piPhysExt)
3875{
3876 PGM_LOCK_ASSERT_OWNER(pVM);
3877 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3878 uint16_t iPhysExt = pPool->iPhysExtFreeHead;
3879 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
3880 {
3881 STAM_COUNTER_INC(&pPool->StamTrackPhysExtAllocFailures);
3882 return NULL;
3883 }
3884 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3885 pPool->iPhysExtFreeHead = pPhysExt->iNext;
3886 pPhysExt->iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
3887 *piPhysExt = iPhysExt;
3888 return pPhysExt;
3889}
3890
3891
3892/**
3893 * Frees a physical cross reference extent.
3894 *
3895 * @param pVM The cross context VM structure.
3896 * @param iPhysExt The extent to free.
3897 */
3898void pgmPoolTrackPhysExtFree(PVMCC pVM, uint16_t iPhysExt)
3899{
3900 PGM_LOCK_ASSERT_OWNER(pVM);
3901 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3902 Assert(iPhysExt < pPool->cMaxPhysExts);
3903 PPGMPOOLPHYSEXT pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3904 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3905 {
3906 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3907 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3908 }
3909 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3910 pPool->iPhysExtFreeHead = iPhysExt;
3911}
3912
3913
3914/**
3915 * Frees a physical cross reference extent.
3916 *
3917 * @param pVM The cross context VM structure.
3918 * @param iPhysExt The extent to free.
3919 */
3920void pgmPoolTrackPhysExtFreeList(PVMCC pVM, uint16_t iPhysExt)
3921{
3922 PGM_LOCK_ASSERT_OWNER(pVM);
3923 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3924
3925 const uint16_t iPhysExtStart = iPhysExt;
3926 PPGMPOOLPHYSEXT pPhysExt;
3927 do
3928 {
3929 Assert(iPhysExt < pPool->cMaxPhysExts);
3930 pPhysExt = &pPool->CTX_SUFF(paPhysExts)[iPhysExt];
3931 for (unsigned i = 0; i < RT_ELEMENTS(pPhysExt->aidx); i++)
3932 {
3933 pPhysExt->aidx[i] = NIL_PGMPOOL_IDX;
3934 pPhysExt->apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
3935 }
3936
3937 /* next */
3938 iPhysExt = pPhysExt->iNext;
3939 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
3940
3941 pPhysExt->iNext = pPool->iPhysExtFreeHead;
3942 pPool->iPhysExtFreeHead = iPhysExtStart;
3943}
3944
3945
3946/**
3947 * Insert a reference into a list of physical cross reference extents.
3948 *
3949 * @returns The new tracking data for PGMPAGE.
3950 *
3951 * @param pVM The cross context VM structure.
3952 * @param iPhysExt The physical extent index of the list head.
3953 * @param iShwPT The shadow page table index.
3954 * @param iPte Page table entry
3955 *
3956 */
3957static uint16_t pgmPoolTrackPhysExtInsert(PVMCC pVM, uint16_t iPhysExt, uint16_t iShwPT, uint16_t iPte)
3958{
3959 PGM_LOCK_ASSERT_OWNER(pVM);
3960 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3961 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
3962
3963 /*
3964 * Special common cases.
3965 */
3966 if (paPhysExts[iPhysExt].aidx[1] == NIL_PGMPOOL_IDX)
3967 {
3968 paPhysExts[iPhysExt].aidx[1] = iShwPT;
3969 paPhysExts[iPhysExt].apte[1] = iPte;
3970 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3971 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,%d pte %d,}\n", iPhysExt, iShwPT, iPte));
3972 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3973 }
3974 if (paPhysExts[iPhysExt].aidx[2] == NIL_PGMPOOL_IDX)
3975 {
3976 paPhysExts[iPhysExt].aidx[2] = iShwPT;
3977 paPhysExts[iPhysExt].apte[2] = iPte;
3978 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3979 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{,,%d pte %d}\n", iPhysExt, iShwPT, iPte));
3980 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
3981 }
3982 AssertCompile(RT_ELEMENTS(paPhysExts[iPhysExt].aidx) == 3);
3983
3984 /*
3985 * General treatment.
3986 */
3987 const uint16_t iPhysExtStart = iPhysExt;
3988 unsigned cMax = 15;
3989 for (;;)
3990 {
3991 Assert(iPhysExt < pPool->cMaxPhysExts);
3992 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
3993 if (paPhysExts[iPhysExt].aidx[i] == NIL_PGMPOOL_IDX)
3994 {
3995 paPhysExts[iPhysExt].aidx[i] = iShwPT;
3996 paPhysExts[iPhysExt].apte[i] = iPte;
3997 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedMany);
3998 LogFlow(("pgmPoolTrackPhysExtInsert: %d:{%d pte %d} i=%d cMax=%d\n", iPhysExt, iShwPT, iPte, i, cMax));
3999 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtStart);
4000 }
4001 if (!--cMax)
4002 {
4003 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackOverflows);
4004 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4005 LogFlow(("pgmPoolTrackPhysExtInsert: overflow (1) iShwPT=%d\n", iShwPT));
4006 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4007 }
4008
4009 /* advance */
4010 iPhysExt = paPhysExts[iPhysExt].iNext;
4011 if (iPhysExt == NIL_PGMPOOL_PHYSEXT_INDEX)
4012 break;
4013 }
4014
4015 /*
4016 * Add another extent to the list.
4017 */
4018 PPGMPOOLPHYSEXT pNew = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4019 if (!pNew)
4020 {
4021 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackNoExtentsLeft);
4022 pgmPoolTrackPhysExtFreeList(pVM, iPhysExtStart);
4023 LogFlow(("pgmPoolTrackPhysExtInsert: pgmPoolTrackPhysExtAlloc failed iShwPT=%d\n", iShwPT));
4024 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4025 }
4026 pNew->iNext = iPhysExtStart;
4027 pNew->aidx[0] = iShwPT;
4028 pNew->apte[0] = iPte;
4029 LogFlow(("pgmPoolTrackPhysExtInsert: added new extent %d:{%d pte %d}->%d\n", iPhysExt, iShwPT, iPte, iPhysExtStart));
4030 return PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4031}
4032
4033
4034/**
4035 * Add a reference to guest physical page where extents are in use.
4036 *
4037 * @returns The new tracking data for PGMPAGE.
4038 *
4039 * @param pVM The cross context VM structure.
4040 * @param pPhysPage Pointer to the aPages entry in the ram range.
4041 * @param u16 The ram range flags (top 16-bits).
4042 * @param iShwPT The shadow page table index.
4043 * @param iPte Page table entry
4044 */
4045uint16_t pgmPoolTrackPhysExtAddref(PVMCC pVM, PPGMPAGE pPhysPage, uint16_t u16, uint16_t iShwPT, uint16_t iPte)
4046{
4047 PGM_LOCK_VOID(pVM);
4048 if (PGMPOOL_TD_GET_CREFS(u16) != PGMPOOL_TD_CREFS_PHYSEXT)
4049 {
4050 /*
4051 * Convert to extent list.
4052 */
4053 Assert(PGMPOOL_TD_GET_CREFS(u16) == 1);
4054 uint16_t iPhysExt;
4055 PPGMPOOLPHYSEXT pPhysExt = pgmPoolTrackPhysExtAlloc(pVM, &iPhysExt);
4056 if (pPhysExt)
4057 {
4058 LogFlow(("pgmPoolTrackPhysExtAddref: new extent: %d:{%d, %d}\n", iPhysExt, PGMPOOL_TD_GET_IDX(u16), iShwPT));
4059 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliased);
4060 pPhysExt->aidx[0] = PGMPOOL_TD_GET_IDX(u16);
4061 pPhysExt->apte[0] = PGM_PAGE_GET_PTE_INDEX(pPhysPage);
4062 pPhysExt->aidx[1] = iShwPT;
4063 pPhysExt->apte[1] = iPte;
4064 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExt);
4065 }
4066 else
4067 u16 = PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED);
4068 }
4069 else if (u16 != PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, PGMPOOL_TD_IDX_OVERFLOWED))
4070 {
4071 /*
4072 * Insert into the extent list.
4073 */
4074 u16 = pgmPoolTrackPhysExtInsert(pVM, PGMPOOL_TD_GET_IDX(u16), iShwPT, iPte);
4075 }
4076 else
4077 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackAliasedLots);
4078 PGM_UNLOCK(pVM);
4079 return u16;
4080}
4081
4082
4083/**
4084 * Clear references to guest physical memory.
4085 *
4086 * @param pPool The pool.
4087 * @param pPage The page.
4088 * @param pPhysPage Pointer to the aPages entry in the ram range.
4089 * @param iPte Shadow PTE index
4090 */
4091void pgmPoolTrackPhysExtDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMPAGE pPhysPage, uint16_t iPte)
4092{
4093 PVMCC pVM = pPool->CTX_SUFF(pVM);
4094 const unsigned cRefs = PGM_PAGE_GET_TD_CREFS(pPhysPage);
4095 AssertFatalMsg(cRefs == PGMPOOL_TD_CREFS_PHYSEXT, ("cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4096
4097 uint16_t iPhysExt = PGM_PAGE_GET_TD_IDX(pPhysPage);
4098 if (iPhysExt != PGMPOOL_TD_IDX_OVERFLOWED)
4099 {
4100 PGM_LOCK_VOID(pVM);
4101
4102 uint16_t iPhysExtPrev = NIL_PGMPOOL_PHYSEXT_INDEX;
4103 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
4104 do
4105 {
4106 Assert(iPhysExt < pPool->cMaxPhysExts);
4107
4108 /*
4109 * Look for the shadow page and check if it's all freed.
4110 */
4111 for (unsigned i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4112 {
4113 if ( paPhysExts[iPhysExt].aidx[i] == pPage->idx
4114 && paPhysExts[iPhysExt].apte[i] == iPte)
4115 {
4116 paPhysExts[iPhysExt].aidx[i] = NIL_PGMPOOL_IDX;
4117 paPhysExts[iPhysExt].apte[i] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
4118
4119 for (i = 0; i < RT_ELEMENTS(paPhysExts[iPhysExt].aidx); i++)
4120 if (paPhysExts[iPhysExt].aidx[i] != NIL_PGMPOOL_IDX)
4121 {
4122 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d\n", pPhysPage, pPage->idx));
4123 PGM_UNLOCK(pVM);
4124 return;
4125 }
4126
4127 /* we can free the node. */
4128 const uint16_t iPhysExtNext = paPhysExts[iPhysExt].iNext;
4129 if ( iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX
4130 && iPhysExtNext == NIL_PGMPOOL_PHYSEXT_INDEX)
4131 {
4132 /* lonely node */
4133 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4134 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d lonely\n", pPhysPage, pPage->idx));
4135 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, 0);
4136 }
4137 else if (iPhysExtPrev == NIL_PGMPOOL_PHYSEXT_INDEX)
4138 {
4139 /* head */
4140 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d head\n", pPhysPage, pPage->idx));
4141 PGM_PAGE_SET_TRACKING(pVM, pPhysPage, PGMPOOL_TD_MAKE(PGMPOOL_TD_CREFS_PHYSEXT, iPhysExtNext));
4142 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4143 }
4144 else
4145 {
4146 /* in list */
4147 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage] idx=%d in list\n", pPhysPage, pPage->idx));
4148 paPhysExts[iPhysExtPrev].iNext = iPhysExtNext;
4149 pgmPoolTrackPhysExtFree(pVM, iPhysExt);
4150 }
4151 iPhysExt = iPhysExtNext;
4152 PGM_UNLOCK(pVM);
4153 return;
4154 }
4155 }
4156
4157 /* next */
4158 iPhysExtPrev = iPhysExt;
4159 iPhysExt = paPhysExts[iPhysExt].iNext;
4160 } while (iPhysExt != NIL_PGMPOOL_PHYSEXT_INDEX);
4161
4162 PGM_UNLOCK(pVM);
4163 AssertFatalMsgFailed(("not-found! cRefs=%d pPhysPage=%R[pgmpage] pPage=%p:{.idx=%d}\n", cRefs, pPhysPage, pPage, pPage->idx));
4164 }
4165 else /* nothing to do */
4166 Log2(("pgmPoolTrackPhysExtDerefGCPhys: pPhysPage=%R[pgmpage]\n", pPhysPage));
4167}
4168
4169/**
4170 * Clear references to guest physical memory.
4171 *
4172 * This is the same as pgmPoolTracDerefGCPhysHint except that the guest
4173 * physical address is assumed to be correct, so the linear search can be
4174 * skipped and we can assert at an earlier point.
4175 *
4176 * @param pPool The pool.
4177 * @param pPage The page.
4178 * @param HCPhys The host physical address corresponding to the guest page.
4179 * @param GCPhys The guest physical address corresponding to HCPhys.
4180 * @param iPte Shadow PTE index
4181 */
4182static void pgmPoolTracDerefGCPhys(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhys, uint16_t iPte)
4183{
4184 /*
4185 * Lookup the page and check if it checks out before derefing it.
4186 */
4187 PVMCC pVM = pPool->CTX_SUFF(pVM);
4188 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhys);
4189 if (pPhysPage)
4190 {
4191 Assert(PGM_PAGE_GET_HCPHYS(pPhysPage));
4192#ifdef LOG_ENABLED
4193 RTHCPHYS HCPhysPage = PGM_PAGE_GET_HCPHYS(pPhysPage);
4194 Log2(("pgmPoolTracDerefGCPhys %RHp vs %RHp\n", HCPhysPage, HCPhys));
4195#endif
4196 if (PGM_PAGE_GET_HCPHYS(pPhysPage) == HCPhys)
4197 {
4198 Assert(pPage->cPresent);
4199 Assert(pPool->cPresent);
4200 pPage->cPresent--;
4201 pPool->cPresent--;
4202 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4203 return;
4204 }
4205
4206 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp; found page has HCPhys=%RHp\n",
4207 HCPhys, GCPhys, PGM_PAGE_GET_HCPHYS(pPhysPage)));
4208 }
4209 AssertFatalMsgFailed(("HCPhys=%RHp GCPhys=%RGp\n", HCPhys, GCPhys));
4210}
4211
4212
4213/**
4214 * Clear references to guest physical memory.
4215 *
4216 * @param pPool The pool.
4217 * @param pPage The page.
4218 * @param HCPhys The host physical address corresponding to the guest page.
4219 * @param GCPhysHint The guest physical address which may corresponding to HCPhys.
4220 * @param iPte Shadow pte index
4221 */
4222void pgmPoolTracDerefGCPhysHint(PPGMPOOL pPool, PPGMPOOLPAGE pPage, RTHCPHYS HCPhys, RTGCPHYS GCPhysHint, uint16_t iPte)
4223{
4224 Log4(("pgmPoolTracDerefGCPhysHint %RHp %RGp\n", HCPhys, GCPhysHint));
4225
4226 /*
4227 * Try the hint first.
4228 */
4229 RTHCPHYS HCPhysHinted;
4230 PVMCC pVM = pPool->CTX_SUFF(pVM);
4231 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysHint);
4232 if (pPhysPage)
4233 {
4234 HCPhysHinted = PGM_PAGE_GET_HCPHYS(pPhysPage);
4235 Assert(HCPhysHinted);
4236 if (HCPhysHinted == HCPhys)
4237 {
4238 Assert(pPage->cPresent);
4239 Assert(pPool->cPresent);
4240 pPage->cPresent--;
4241 pPool->cPresent--;
4242 pgmTrackDerefGCPhys(pPool, pPage, pPhysPage, iPte);
4243 return;
4244 }
4245 }
4246 else
4247 HCPhysHinted = UINT64_C(0xdeadbeefdeadbeef);
4248
4249 /*
4250 * Damn, the hint didn't work. We'll have to do an expensive linear search.
4251 */
4252 STAM_COUNTER_INC(&pPool->StatTrackLinearRamSearches);
4253 PPGMRAMRANGE pRam = pPool->CTX_SUFF(pVM)->pgm.s.CTX_SUFF(pRamRangesX);
4254 while (pRam)
4255 {
4256 unsigned iPage = pRam->cb >> PAGE_SHIFT;
4257 while (iPage-- > 0)
4258 {
4259 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
4260 {
4261 Log4(("pgmPoolTracDerefGCPhysHint: Linear HCPhys=%RHp GCPhysHint=%RGp GCPhysReal=%RGp\n",
4262 HCPhys, GCPhysHint, pRam->GCPhys + (iPage << PAGE_SHIFT)));
4263 Assert(pPage->cPresent);
4264 Assert(pPool->cPresent);
4265 pPage->cPresent--;
4266 pPool->cPresent--;
4267 pgmTrackDerefGCPhys(pPool, pPage, &pRam->aPages[iPage], iPte);
4268 return;
4269 }
4270 }
4271 pRam = pRam->CTX_SUFF(pNext);
4272 }
4273
4274 AssertFatalMsgFailed(("HCPhys=%RHp GCPhysHint=%RGp (Hinted page has HCPhys = %RHp)\n", HCPhys, GCPhysHint, HCPhysHinted));
4275}
4276
4277
4278/**
4279 * Clear references to guest physical memory in a 32-bit / 32-bit page table.
4280 *
4281 * @param pPool The pool.
4282 * @param pPage The page.
4283 * @param pShwPT The shadow page table (mapping of the page).
4284 * @param pGstPT The guest page table.
4285 */
4286DECLINLINE(void) pgmPoolTrackDerefPT32Bit32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT, PCX86PT pGstPT)
4287{
4288 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4289 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4290 {
4291 const X86PGUINT uPte = pShwPT->a[i].u;
4292 Assert(!(uPte & RT_BIT_32(10)));
4293 if (uPte & X86_PTE_P)
4294 {
4295 Log4(("pgmPoolTrackDerefPT32Bit32Bit: i=%d pte=%RX32 hint=%RX32\n",
4296 i, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & X86_PTE_PG_MASK));
4297 pgmPoolTracDerefGCPhysHint(pPool, pPage, uPte & X86_PTE_PG_MASK, pGstPT->a[i].u & fPgMask, i);
4298 if (!pPage->cPresent)
4299 break;
4300 }
4301 }
4302}
4303
4304
4305/**
4306 * Clear references to guest physical memory in a PAE / 32-bit page table.
4307 *
4308 * @param pPool The pool.
4309 * @param pPage The page.
4310 * @param pShwPT The shadow page table (mapping of the page).
4311 * @param pGstPT The guest page table (just a half one).
4312 */
4313DECLINLINE(void) pgmPoolTrackDerefPTPae32Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PT pGstPT)
4314{
4315 RTGCPHYS32 const fPgMask = pPage->fA20Enabled ? X86_PTE_PG_MASK : X86_PTE_PG_MASK & ~RT_BIT_32(20);
4316 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4317 {
4318 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4319 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4320 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4321 {
4322 Log4(("pgmPoolTrackDerefPTPae32Bit: i=%d pte=%RX64 hint=%RX32\n",
4323 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PG_MASK));
4324 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4325 if (!pPage->cPresent)
4326 break;
4327 }
4328 }
4329}
4330
4331
4332/**
4333 * Clear references to guest physical memory in a PAE / PAE page table.
4334 *
4335 * @param pPool The pool.
4336 * @param pPage The page.
4337 * @param pShwPT The shadow page table (mapping of the page).
4338 * @param pGstPT The guest page table.
4339 */
4340DECLINLINE(void) pgmPoolTrackDerefPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT, PCX86PTPAE pGstPT)
4341{
4342 RTGCPHYS const fPgMask = pPage->fA20Enabled ? X86_PTE_PAE_PG_MASK : X86_PTE_PAE_PG_MASK & ~RT_BIT_64(20);
4343 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++)
4344 {
4345 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4346 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4347 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4348 {
4349 Log4(("pgmPoolTrackDerefPTPaePae: i=%d pte=%RX32 hint=%RX32\n",
4350 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK));
4351 pgmPoolTracDerefGCPhysHint(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), pGstPT->a[i].u & fPgMask, i);
4352 if (!pPage->cPresent)
4353 break;
4354 }
4355 }
4356}
4357
4358
4359/**
4360 * Clear references to guest physical memory in a 32-bit / 4MB page table.
4361 *
4362 * @param pPool The pool.
4363 * @param pPage The page.
4364 * @param pShwPT The shadow page table (mapping of the page).
4365 */
4366DECLINLINE(void) pgmPoolTrackDerefPT32Bit4MB(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PT pShwPT)
4367{
4368 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4369 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4370 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4371 {
4372 const X86PGUINT uPte = pShwPT->a[i].u;
4373 Assert(!(uPte & RT_BIT_32(10)));
4374 if (uPte & X86_PTE_P)
4375 {
4376 Log4(("pgmPoolTrackDerefPT32Bit4MB: i=%d pte=%RX32 GCPhys=%RGp\n",
4377 i, uPte & X86_PTE_PG_MASK, GCPhys));
4378 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & X86_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4379 if (!pPage->cPresent)
4380 break;
4381 }
4382 }
4383}
4384
4385
4386/**
4387 * Clear references to guest physical memory in a PAE / 2/4MB page table.
4388 *
4389 * @param pPool The pool.
4390 * @param pPage The page.
4391 * @param pShwPT The shadow page table (mapping of the page).
4392 */
4393DECLINLINE(void) pgmPoolTrackDerefPTPaeBig(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PPGMSHWPTPAE pShwPT)
4394{
4395 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4396 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4397 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4398 {
4399 Assert( (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == 0
4400 || (PGMSHWPTEPAE_GET_U(pShwPT->a[i]) & UINT64_C(0x7ff0000000000400)) == UINT64_C(0x7ff0000000000000));
4401 if (PGMSHWPTEPAE_IS_P(pShwPT->a[i]))
4402 {
4403 Log4(("pgmPoolTrackDerefPTPaeBig: i=%d pte=%RX64 hint=%RGp\n",
4404 i, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys));
4405 pgmPoolTracDerefGCPhys(pPool, pPage, PGMSHWPTEPAE_GET_HCPHYS(pShwPT->a[i]), GCPhys & GCPhysA20Mask, i);
4406 if (!pPage->cPresent)
4407 break;
4408 }
4409 }
4410}
4411
4412
4413/**
4414 * Clear references to shadowed pages in an EPT page table.
4415 *
4416 * @param pPool The pool.
4417 * @param pPage The page.
4418 * @param pShwPT The shadow page directory pointer table (mapping of the
4419 * page).
4420 */
4421DECLINLINE(void) pgmPoolTrackDerefPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPT pShwPT)
4422{
4423 RTGCPHYS const GCPhysA20Mask = pPage->fA20Enabled ? UINT64_MAX : ~RT_BIT_64(20);
4424 RTGCPHYS GCPhys = pPage->GCPhys + PAGE_SIZE * pPage->iFirstPresent;
4425 for (unsigned i = pPage->iFirstPresent; i < RT_ELEMENTS(pShwPT->a); i++, GCPhys += PAGE_SIZE)
4426 {
4427 X86PGPAEUINT const uPte = pShwPT->a[i].u;
4428 Assert((uPte & UINT64_C(0xfff0000000000f80)) == 0);
4429 if (uPte & EPT_E_READ)
4430 {
4431 Log4(("pgmPoolTrackDerefPTEPT: i=%d pte=%RX64 GCPhys=%RX64\n",
4432 i, uPte & EPT_PTE_PG_MASK, pPage->GCPhys));
4433 pgmPoolTracDerefGCPhys(pPool, pPage, uPte & EPT_PTE_PG_MASK, GCPhys & GCPhysA20Mask, i);
4434 if (!pPage->cPresent)
4435 break;
4436 }
4437 }
4438}
4439
4440
4441/**
4442 * Clear references to shadowed pages in a 32 bits page directory.
4443 *
4444 * @param pPool The pool.
4445 * @param pPage The page.
4446 * @param pShwPD The shadow page directory (mapping of the page).
4447 */
4448DECLINLINE(void) pgmPoolTrackDerefPD(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PD pShwPD)
4449{
4450 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4451 {
4452 X86PGUINT const uPde = pShwPD->a[i].u;
4453 if (uPde & X86_PDE_P)
4454 {
4455 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, pShwPD->a[i].u & X86_PDE_PG_MASK);
4456 if (pSubPage)
4457 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4458 else
4459 AssertFatalMsgFailed(("%x\n", pShwPD->a[i].u & X86_PDE_PG_MASK));
4460 }
4461 }
4462}
4463
4464
4465/**
4466 * Clear references to shadowed pages in a PAE (legacy or 64 bits) page directory.
4467 *
4468 * @param pPool The pool.
4469 * @param pPage The page.
4470 * @param pShwPD The shadow page directory (mapping of the page).
4471 */
4472DECLINLINE(void) pgmPoolTrackDerefPDPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPAE pShwPD)
4473{
4474 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4475 {
4476 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4477 if (uPde & X86_PDE_P)
4478 {
4479#ifdef PGM_WITH_LARGE_PAGES
4480 if (uPde & X86_PDE_PS)
4481 {
4482 Log4(("pgmPoolTrackDerefPDPae: i=%d pde=%RX64 GCPhys=%RX64\n",
4483 i, uPde & X86_PDE2M_PAE_PG_MASK, pPage->GCPhys));
4484 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & X86_PDE2M_PAE_PG_MASK,
4485 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4486 i);
4487 }
4488 else
4489#endif
4490 {
4491 Assert((uPde & (X86_PDE_PAE_MBZ_MASK_NX | UINT64_C(0x7ff0000000000000))) == 0);
4492 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & X86_PDE_PAE_PG_MASK);
4493 if (pSubPage)
4494 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4495 else
4496 AssertFatalMsgFailed(("%RX64\n", uPde & X86_PDE_PAE_PG_MASK));
4497 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4498 }
4499 }
4500 }
4501}
4502
4503
4504/**
4505 * Clear references to shadowed pages in a PAE page directory pointer table.
4506 *
4507 * @param pPool The pool.
4508 * @param pPage The page.
4509 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4510 */
4511DECLINLINE(void) pgmPoolTrackDerefPDPTPae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4512{
4513 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4514 {
4515 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4516 Assert((uPdpe & (X86_PDPE_PAE_MBZ_MASK | UINT64_C(0x7ff0000000000200))) == 0);
4517 if (uPdpe & X86_PDPE_P)
4518 {
4519 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4520 if (pSubPage)
4521 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4522 else
4523 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4524 }
4525 }
4526}
4527
4528
4529/**
4530 * Clear references to shadowed pages in a 64-bit page directory pointer table.
4531 *
4532 * @param pPool The pool.
4533 * @param pPage The page.
4534 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4535 */
4536DECLINLINE(void) pgmPoolTrackDerefPDPT64Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PDPT pShwPDPT)
4537{
4538 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4539 {
4540 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4541 Assert((uPdpe & (X86_PDPE_LM_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4542 if (uPdpe & X86_PDPE_P)
4543 {
4544 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & X86_PDPE_PG_MASK);
4545 if (pSubPage)
4546 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4547 else
4548 AssertFatalMsgFailed(("%RX64\n", uPdpe & X86_PDPE_PG_MASK));
4549 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4550 }
4551 }
4552}
4553
4554
4555/**
4556 * Clear references to shadowed pages in a 64-bit level 4 page table.
4557 *
4558 * @param pPool The pool.
4559 * @param pPage The page.
4560 * @param pShwPML4 The shadow page directory pointer table (mapping of the page).
4561 */
4562DECLINLINE(void) pgmPoolTrackDerefPML464Bit(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PML4 pShwPML4)
4563{
4564 for (unsigned i = 0; i < RT_ELEMENTS(pShwPML4->a); i++)
4565 {
4566 X86PGPAEUINT const uPml4e = pShwPML4->a[i].u;
4567 Assert((uPml4e & (X86_PML4E_MBZ_MASK_NX | UINT64_C(0x7ff0000000000200))) == 0);
4568 if (uPml4e & X86_PML4E_P)
4569 {
4570 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPml4e & X86_PDPE_PG_MASK);
4571 if (pSubPage)
4572 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4573 else
4574 AssertFatalMsgFailed(("%RX64\n", uPml4e & X86_PML4E_PG_MASK));
4575 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4576 }
4577 }
4578}
4579
4580
4581/**
4582 * Clear references to shadowed pages in an EPT page directory.
4583 *
4584 * @param pPool The pool.
4585 * @param pPage The page.
4586 * @param pShwPD The shadow page directory (mapping of the page).
4587 */
4588DECLINLINE(void) pgmPoolTrackDerefPDEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPD pShwPD)
4589{
4590 for (unsigned i = 0; i < RT_ELEMENTS(pShwPD->a); i++)
4591 {
4592 X86PGPAEUINT const uPde = pShwPD->a[i].u;
4593 Assert((uPde & UINT64_C(0xfff0000000000f80)) == 0);
4594 if (uPde & EPT_E_READ)
4595 {
4596#ifdef PGM_WITH_LARGE_PAGES
4597 if (uPde & EPT_E_LEAF)
4598 {
4599 Log4(("pgmPoolTrackDerefPDEPT: i=%d pde=%RX64 GCPhys=%RX64\n",
4600 i, uPde & EPT_PDE2M_PG_MASK, pPage->GCPhys));
4601 pgmPoolTracDerefGCPhys(pPool, pPage, uPde & EPT_PDE2M_PG_MASK,
4602 pPage->GCPhys + i * 2 * _1M /* pPage->GCPhys = base address of the memory described by the PD */,
4603 i);
4604 }
4605 else
4606#endif
4607 {
4608 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPde & EPT_PDE_PG_MASK);
4609 if (pSubPage)
4610 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4611 else
4612 AssertFatalMsgFailed(("%RX64\n", pShwPD->a[i].u & EPT_PDE_PG_MASK));
4613 }
4614 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4615 }
4616 }
4617}
4618
4619
4620/**
4621 * Clear references to shadowed pages in an EPT page directory pointer table.
4622 *
4623 * @param pPool The pool.
4624 * @param pPage The page.
4625 * @param pShwPDPT The shadow page directory pointer table (mapping of the page).
4626 */
4627DECLINLINE(void) pgmPoolTrackDerefPDPTEPT(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PEPTPDPT pShwPDPT)
4628{
4629 for (unsigned i = 0; i < RT_ELEMENTS(pShwPDPT->a); i++)
4630 {
4631 X86PGPAEUINT const uPdpe = pShwPDPT->a[i].u;
4632 Assert((uPdpe & UINT64_C(0xfff0000000000f80)) == 0);
4633 if (uPdpe & EPT_E_READ)
4634 {
4635 PPGMPOOLPAGE pSubPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, uPdpe & EPT_PDPTE_PG_MASK);
4636 if (pSubPage)
4637 pgmPoolTrackFreeUser(pPool, pSubPage, pPage->idx, i);
4638 else
4639 AssertFatalMsgFailed(("%RX64\n", uPdpe & EPT_PDPTE_PG_MASK));
4640 /** @todo 64-bit guests: have to ensure that we're not exhausting the dynamic mappings! */
4641 }
4642 }
4643}
4644
4645
4646/**
4647 * Clears all references made by this page.
4648 *
4649 * This includes other shadow pages and GC physical addresses.
4650 *
4651 * @param pPool The pool.
4652 * @param pPage The page.
4653 */
4654static void pgmPoolTrackDeref(PPGMPOOL pPool, PPGMPOOLPAGE pPage)
4655{
4656 /*
4657 * Map the shadow page and take action according to the page kind.
4658 */
4659 PVMCC pVM = pPool->CTX_SUFF(pVM);
4660 void *pvShw = PGMPOOL_PAGE_2_PTR(pVM, pPage);
4661 switch (pPage->enmKind)
4662 {
4663 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
4664 {
4665 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4666 void *pvGst;
4667 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4668 pgmPoolTrackDerefPT32Bit32Bit(pPool, pPage, (PX86PT)pvShw, (PCX86PT)pvGst);
4669 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4670 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4671 break;
4672 }
4673
4674 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
4675 {
4676 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4677 void *pvGst;
4678 int rc = PGM_GCPHYS_2_PTR_EX(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4679 pgmPoolTrackDerefPTPae32Bit(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PT)pvGst);
4680 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4681 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4682 break;
4683 }
4684
4685 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
4686 {
4687 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4688 void *pvGst;
4689 int rc = PGM_GCPHYS_2_PTR(pVM, pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
4690 pgmPoolTrackDerefPTPaePae(pPool, pPage, (PPGMSHWPTPAE)pvShw, (PCX86PTPAE)pvGst);
4691 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvGst);
4692 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4693 break;
4694 }
4695
4696 case PGMPOOLKIND_32BIT_PT_FOR_PHYS: /* treat it like a 4 MB page */
4697 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
4698 {
4699 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4700 pgmPoolTrackDerefPT32Bit4MB(pPool, pPage, (PX86PT)pvShw);
4701 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4702 break;
4703 }
4704
4705 case PGMPOOLKIND_PAE_PT_FOR_PHYS: /* treat it like a 2 MB page */
4706 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
4707 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
4708 {
4709 STAM_PROFILE_START(&pPool->StatTrackDerefGCPhys, g);
4710 pgmPoolTrackDerefPTPaeBig(pPool, pPage, (PPGMSHWPTPAE)pvShw);
4711 STAM_PROFILE_STOP(&pPool->StatTrackDerefGCPhys, g);
4712 break;
4713 }
4714
4715 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
4716 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
4717 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
4718 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
4719 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
4720 case PGMPOOLKIND_PAE_PD_PHYS:
4721 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
4722 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
4723 pgmPoolTrackDerefPDPae(pPool, pPage, (PX86PDPAE)pvShw);
4724 break;
4725
4726 case PGMPOOLKIND_32BIT_PD_PHYS:
4727 case PGMPOOLKIND_32BIT_PD:
4728 pgmPoolTrackDerefPD(pPool, pPage, (PX86PD)pvShw);
4729 break;
4730
4731 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
4732 case PGMPOOLKIND_PAE_PDPT:
4733 case PGMPOOLKIND_PAE_PDPT_PHYS:
4734 pgmPoolTrackDerefPDPTPae(pPool, pPage, (PX86PDPT)pvShw);
4735 break;
4736
4737 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
4738 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
4739 pgmPoolTrackDerefPDPT64Bit(pPool, pPage, (PX86PDPT)pvShw);
4740 break;
4741
4742 case PGMPOOLKIND_64BIT_PML4:
4743 pgmPoolTrackDerefPML464Bit(pPool, pPage, (PX86PML4)pvShw);
4744 break;
4745
4746 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
4747 pgmPoolTrackDerefPTEPT(pPool, pPage, (PEPTPT)pvShw);
4748 break;
4749
4750 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
4751 pgmPoolTrackDerefPDEPT(pPool, pPage, (PEPTPD)pvShw);
4752 break;
4753
4754 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
4755 pgmPoolTrackDerefPDPTEPT(pPool, pPage, (PEPTPDPT)pvShw);
4756 break;
4757
4758 default:
4759 AssertFatalMsgFailed(("enmKind=%d\n", pPage->enmKind));
4760 }
4761
4762 /* paranoia, clear the shadow page. Remove this laser (i.e. let Alloc and ClearAll do it). */
4763 STAM_PROFILE_START(&pPool->StatZeroPage, z);
4764 ASMMemZeroPage(pvShw);
4765 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
4766 pPage->fZeroed = true;
4767 Assert(!pPage->cPresent);
4768 PGM_DYNMAP_UNUSED_HINT_VM(pVM, pvShw);
4769}
4770
4771
4772/**
4773 * Flushes a pool page.
4774 *
4775 * This moves the page to the free list after removing all user references to it.
4776 *
4777 * @returns VBox status code.
4778 * @retval VINF_SUCCESS on success.
4779 * @param pPool The pool.
4780 * @param pPage The shadow page.
4781 * @param fFlush Flush the TLBS when required (should only be false in very specific use cases!!)
4782 */
4783int pgmPoolFlushPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, bool fFlush)
4784{
4785 PVMCC pVM = pPool->CTX_SUFF(pVM);
4786 bool fFlushRequired = false;
4787
4788 int rc = VINF_SUCCESS;
4789 STAM_PROFILE_START(&pPool->StatFlushPage, f);
4790 LogFlow(("pgmPoolFlushPage: pPage=%p:{.Key=%RHp, .idx=%d, .enmKind=%s, .GCPhys=%RGp}\n",
4791 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), pPage->GCPhys));
4792
4793 /*
4794 * Reject any attempts at flushing any of the special root pages (shall
4795 * not happen).
4796 */
4797 AssertMsgReturn(pPage->idx >= PGMPOOL_IDX_FIRST,
4798 ("pgmPoolFlushPage: special root page, rejected. enmKind=%s idx=%d\n",
4799 pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx),
4800 VINF_SUCCESS);
4801
4802 PGM_LOCK_VOID(pVM);
4803
4804 /*
4805 * Quietly reject any attempts at flushing the currently active shadow CR3 mapping
4806 */
4807 if (pgmPoolIsPageLocked(pPage))
4808 {
4809 AssertMsg( pPage->enmKind == PGMPOOLKIND_64BIT_PML4
4810 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT
4811 || pPage->enmKind == PGMPOOLKIND_PAE_PDPT_FOR_32BIT
4812 || pPage->enmKind == PGMPOOLKIND_32BIT_PD
4813 || pPage->enmKind == PGMPOOLKIND_PAE_PD_FOR_PAE_PD
4814 || pPage->enmKind == PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD
4815 || pPage->enmKind == PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD
4816 || pPage->enmKind == PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD
4817 || pPage->enmKind == PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD
4818 || pPage->enmKind == PGMPOOLKIND_ROOT_NESTED,
4819 ("Can't free the shadow CR3! (%RHp vs %RHp kind=%d\n", PGMGetHyperCR3(VMMGetCpu(pVM)), pPage->Core.Key, pPage->enmKind));
4820 Log(("pgmPoolFlushPage: current active shadow CR3, rejected. enmKind=%s idx=%d\n", pgmPoolPoolKindToStr(pPage->enmKind), pPage->idx));
4821 PGM_UNLOCK(pVM);
4822 return VINF_SUCCESS;
4823 }
4824
4825 /*
4826 * Mark the page as being in need of an ASMMemZeroPage().
4827 */
4828 pPage->fZeroed = false;
4829
4830#ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4831 if (pPage->fDirty)
4832 pgmPoolFlushDirtyPage(pVM, pPool, pPage->idxDirtyEntry, false /* do not remove */);
4833#endif
4834
4835 /* If there are any users of this table, then we *must* issue a tlb flush on all VCPUs. */
4836 if (pPage->iUserHead != NIL_PGMPOOL_USER_INDEX)
4837 fFlushRequired = true;
4838
4839 /*
4840 * Clear the page.
4841 */
4842 pgmPoolTrackClearPageUsers(pPool, pPage);
4843 STAM_PROFILE_START(&pPool->StatTrackDeref,a);
4844 pgmPoolTrackDeref(pPool, pPage);
4845 STAM_PROFILE_STOP(&pPool->StatTrackDeref,a);
4846
4847 /*
4848 * Flush it from the cache.
4849 */
4850 pgmPoolCacheFlushPage(pPool, pPage);
4851
4852 /*
4853 * Deregistering the monitoring.
4854 */
4855 if (pPage->fMonitored)
4856 rc = pgmPoolMonitorFlush(pPool, pPage);
4857
4858 /*
4859 * Free the page.
4860 */
4861 Assert(pPage->iNext == NIL_PGMPOOL_IDX);
4862 pPage->iNext = pPool->iFreeHead;
4863 pPool->iFreeHead = pPage->idx;
4864 pPage->enmKind = PGMPOOLKIND_FREE;
4865 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
4866 pPage->GCPhys = NIL_RTGCPHYS;
4867 pPage->fReusedFlushPending = false;
4868
4869 pPool->cUsedPages--;
4870
4871 /* Flush the TLBs of all VCPUs if required. */
4872 if ( fFlushRequired
4873 && fFlush)
4874 {
4875 PGM_INVL_ALL_VCPU_TLBS(pVM);
4876 }
4877
4878 PGM_UNLOCK(pVM);
4879 STAM_PROFILE_STOP(&pPool->StatFlushPage, f);
4880 return rc;
4881}
4882
4883
4884/**
4885 * Frees a usage of a pool page.
4886 *
4887 * The caller is responsible to updating the user table so that it no longer
4888 * references the shadow page.
4889 *
4890 * @param pPool The pool.
4891 * @param pPage The shadow page.
4892 * @param iUser The shadow page pool index of the user table.
4893 * NIL_PGMPOOL_IDX for root pages.
4894 * @param iUserTable The index into the user table (shadowed). Ignored if
4895 * root page.
4896 */
4897void pgmPoolFreeByPage(PPGMPOOL pPool, PPGMPOOLPAGE pPage, uint16_t iUser, uint32_t iUserTable)
4898{
4899 PVMCC pVM = pPool->CTX_SUFF(pVM);
4900
4901 STAM_PROFILE_START(&pPool->StatFree, a);
4902 LogFlow(("pgmPoolFreeByPage: pPage=%p:{.Key=%RHp, .idx=%d, enmKind=%s} iUser=%d iUserTable=%#x\n",
4903 pPage, pPage->Core.Key, pPage->idx, pgmPoolPoolKindToStr(pPage->enmKind), iUser, iUserTable));
4904 AssertReturnVoid(pPage->idx >= PGMPOOL_IDX_FIRST); /* paranoia (#6349) */
4905
4906 PGM_LOCK_VOID(pVM);
4907 if (iUser != NIL_PGMPOOL_IDX)
4908 pgmPoolTrackFreeUser(pPool, pPage, iUser, iUserTable);
4909 if (!pPage->fCached)
4910 pgmPoolFlushPage(pPool, pPage);
4911 PGM_UNLOCK(pVM);
4912 STAM_PROFILE_STOP(&pPool->StatFree, a);
4913}
4914
4915
4916/**
4917 * Makes one or more free page free.
4918 *
4919 * @returns VBox status code.
4920 * @retval VINF_SUCCESS on success.
4921 *
4922 * @param pPool The pool.
4923 * @param enmKind Page table kind
4924 * @param iUser The user of the page.
4925 */
4926static int pgmPoolMakeMoreFreePages(PPGMPOOL pPool, PGMPOOLKIND enmKind, uint16_t iUser)
4927{
4928 PVMCC pVM = pPool->CTX_SUFF(pVM);
4929 LogFlow(("pgmPoolMakeMoreFreePages: enmKind=%d iUser=%d\n", enmKind, iUser));
4930 NOREF(enmKind);
4931
4932 /*
4933 * If the pool isn't full grown yet, expand it.
4934 */
4935 if (pPool->cCurPages < pPool->cMaxPages)
4936 {
4937 STAM_PROFILE_ADV_SUSPEND(&pPool->StatAlloc, a);
4938#ifdef IN_RING3
4939 int rc = PGMR3PoolGrow(pVM, VMMGetCpu(pVM));
4940#else
4941 int rc = PGMR0PoolGrow(pVM, VMMGetCpuId(pVM));
4942#endif
4943 if (RT_FAILURE(rc))
4944 return rc;
4945 STAM_PROFILE_ADV_RESUME(&pPool->StatAlloc, a);
4946 if (pPool->iFreeHead != NIL_PGMPOOL_IDX)
4947 return VINF_SUCCESS;
4948 }
4949
4950 /*
4951 * Free one cached page.
4952 */
4953 return pgmPoolCacheFreeOne(pPool, iUser);
4954}
4955
4956
4957/**
4958 * Allocates a page from the pool.
4959 *
4960 * This page may actually be a cached page and not in need of any processing
4961 * on the callers part.
4962 *
4963 * @returns VBox status code.
4964 * @retval VINF_SUCCESS if a NEW page was allocated.
4965 * @retval VINF_PGM_CACHED_PAGE if a CACHED page was returned.
4966 *
4967 * @param pVM The cross context VM structure.
4968 * @param GCPhys The GC physical address of the page we're gonna shadow.
4969 * For 4MB and 2MB PD entries, it's the first address the
4970 * shadow PT is covering.
4971 * @param enmKind The kind of mapping.
4972 * @param enmAccess Access type for the mapping (only relevant for big pages)
4973 * @param fA20Enabled Whether the A20 gate is enabled or not.
4974 * @param iUser The shadow page pool index of the user table. Root
4975 * pages should pass NIL_PGMPOOL_IDX.
4976 * @param iUserTable The index into the user table (shadowed). Ignored for
4977 * root pages (iUser == NIL_PGMPOOL_IDX).
4978 * @param fLockPage Lock the page
4979 * @param ppPage Where to store the pointer to the page. NULL is stored here on failure.
4980 */
4981int pgmPoolAlloc(PVMCC pVM, RTGCPHYS GCPhys, PGMPOOLKIND enmKind, PGMPOOLACCESS enmAccess, bool fA20Enabled,
4982 uint16_t iUser, uint32_t iUserTable, bool fLockPage, PPPGMPOOLPAGE ppPage)
4983{
4984 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4985 STAM_PROFILE_ADV_START(&pPool->StatAlloc, a);
4986 LogFlow(("pgmPoolAlloc: GCPhys=%RGp enmKind=%s iUser=%d iUserTable=%#x\n", GCPhys, pgmPoolPoolKindToStr(enmKind), iUser, iUserTable));
4987 *ppPage = NULL;
4988 /** @todo CSAM/PGMPrefetchPage messes up here during CSAMR3CheckGates
4989 * (TRPMR3SyncIDT) because of FF priority. Try fix that?
4990 * Assert(!(pVM->pgm.s.fGlobalSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)); */
4991
4992 PGM_LOCK_VOID(pVM);
4993
4994 if (pPool->fCacheEnabled)
4995 {
4996 int rc2 = pgmPoolCacheAlloc(pPool, GCPhys, enmKind, enmAccess, fA20Enabled, iUser, iUserTable, ppPage);
4997 if (RT_SUCCESS(rc2))
4998 {
4999 if (fLockPage)
5000 pgmPoolLockPage(pPool, *ppPage);
5001 PGM_UNLOCK(pVM);
5002 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5003 LogFlow(("pgmPoolAlloc: cached returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d}\n", rc2, *ppPage, (*ppPage)->Core.Key, (*ppPage)->idx));
5004 return rc2;
5005 }
5006 }
5007
5008 /*
5009 * Allocate a new one.
5010 */
5011 int rc = VINF_SUCCESS;
5012 uint16_t iNew = pPool->iFreeHead;
5013 if (iNew == NIL_PGMPOOL_IDX)
5014 {
5015 rc = pgmPoolMakeMoreFreePages(pPool, enmKind, iUser);
5016 if (RT_FAILURE(rc))
5017 {
5018 PGM_UNLOCK(pVM);
5019 Log(("pgmPoolAlloc: returns %Rrc (Free)\n", rc));
5020 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5021 return rc;
5022 }
5023 iNew = pPool->iFreeHead;
5024 AssertReleaseMsgReturn(iNew != NIL_PGMPOOL_IDX, ("iNew=%#x\n", iNew), VERR_PGM_POOL_IPE);
5025 }
5026
5027 /* unlink the free head */
5028 PPGMPOOLPAGE pPage = &pPool->aPages[iNew];
5029 pPool->iFreeHead = pPage->iNext;
5030 pPage->iNext = NIL_PGMPOOL_IDX;
5031
5032 /*
5033 * Initialize it.
5034 */
5035 pPool->cUsedPages++; /* physical handler registration / pgmPoolTrackFlushGCPhysPTsSlow requirement. */
5036 pPage->enmKind = enmKind;
5037 pPage->enmAccess = enmAccess;
5038 pPage->GCPhys = GCPhys;
5039 pPage->fA20Enabled = fA20Enabled;
5040 pPage->fSeenNonGlobal = false; /* Set this to 'true' to disable this feature. */
5041 pPage->fMonitored = false;
5042 pPage->fCached = false;
5043 pPage->fDirty = false;
5044 pPage->fReusedFlushPending = false;
5045 pPage->cModifications = 0;
5046 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5047 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5048 pPage->cPresent = 0;
5049 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5050 pPage->idxDirtyEntry = 0;
5051 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5052 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5053 pPage->cLastAccessHandler = 0;
5054 pPage->cLocked = 0;
5055# ifdef VBOX_STRICT
5056 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5057# endif
5058
5059 /*
5060 * Insert into the tracking and cache. If this fails, free the page.
5061 */
5062 int rc3 = pgmPoolTrackInsert(pPool, pPage, GCPhys, iUser, iUserTable);
5063 if (RT_FAILURE(rc3))
5064 {
5065 pPool->cUsedPages--;
5066 pPage->enmKind = PGMPOOLKIND_FREE;
5067 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5068 pPage->GCPhys = NIL_RTGCPHYS;
5069 pPage->iNext = pPool->iFreeHead;
5070 pPool->iFreeHead = pPage->idx;
5071 PGM_UNLOCK(pVM);
5072 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5073 Log(("pgmPoolAlloc: returns %Rrc (Insert)\n", rc3));
5074 return rc3;
5075 }
5076
5077 /*
5078 * Commit the allocation, clear the page and return.
5079 */
5080#ifdef VBOX_WITH_STATISTICS
5081 if (pPool->cUsedPages > pPool->cUsedPagesHigh)
5082 pPool->cUsedPagesHigh = pPool->cUsedPages;
5083#endif
5084
5085 if (!pPage->fZeroed)
5086 {
5087 STAM_PROFILE_START(&pPool->StatZeroPage, z);
5088 void *pv = PGMPOOL_PAGE_2_PTR(pVM, pPage);
5089 ASMMemZeroPage(pv);
5090 STAM_PROFILE_STOP(&pPool->StatZeroPage, z);
5091 }
5092
5093 *ppPage = pPage;
5094 if (fLockPage)
5095 pgmPoolLockPage(pPool, pPage);
5096 PGM_UNLOCK(pVM);
5097 LogFlow(("pgmPoolAlloc: returns %Rrc *ppPage=%p:{.Key=%RHp, .idx=%d, .fCached=%RTbool, .fMonitored=%RTbool}\n",
5098 rc, pPage, pPage->Core.Key, pPage->idx, pPage->fCached, pPage->fMonitored));
5099 STAM_PROFILE_ADV_STOP(&pPool->StatAlloc, a);
5100 return rc;
5101}
5102
5103
5104/**
5105 * Frees a usage of a pool page.
5106 *
5107 * @param pVM The cross context VM structure.
5108 * @param HCPhys The HC physical address of the shadow page.
5109 * @param iUser The shadow page pool index of the user table.
5110 * NIL_PGMPOOL_IDX if root page.
5111 * @param iUserTable The index into the user table (shadowed). Ignored if
5112 * root page.
5113 */
5114void pgmPoolFree(PVM pVM, RTHCPHYS HCPhys, uint16_t iUser, uint32_t iUserTable)
5115{
5116 LogFlow(("pgmPoolFree: HCPhys=%RHp iUser=%d iUserTable=%#x\n", HCPhys, iUser, iUserTable));
5117 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5118 pgmPoolFreeByPage(pPool, pgmPoolGetPage(pPool, HCPhys), iUser, iUserTable);
5119}
5120
5121
5122/**
5123 * Internal worker for finding a 'in-use' shadow page give by it's physical address.
5124 *
5125 * @returns Pointer to the shadow page structure.
5126 * @param pPool The pool.
5127 * @param HCPhys The HC physical address of the shadow page.
5128 */
5129PPGMPOOLPAGE pgmPoolGetPage(PPGMPOOL pPool, RTHCPHYS HCPhys)
5130{
5131 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5132
5133 /*
5134 * Look up the page.
5135 */
5136 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5137
5138 AssertFatalMsg(pPage && pPage->enmKind != PGMPOOLKIND_FREE, ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0));
5139 return pPage;
5140}
5141
5142
5143/**
5144 * Internal worker for finding a page for debugging purposes, no assertions.
5145 *
5146 * @returns Pointer to the shadow page structure. NULL on if not found.
5147 * @param pPool The pool.
5148 * @param HCPhys The HC physical address of the shadow page.
5149 */
5150PPGMPOOLPAGE pgmPoolQueryPageForDbg(PPGMPOOL pPool, RTHCPHYS HCPhys)
5151{
5152 PGM_LOCK_ASSERT_OWNER(pPool->CTX_SUFF(pVM));
5153 return (PPGMPOOLPAGE)RTAvloHCPhysGet(&pPool->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5154}
5155
5156
5157/**
5158 * Internal worker for PGM_HCPHYS_2_PTR.
5159 *
5160 * @returns VBox status code.
5161 * @param pVM The cross context VM structure.
5162 * @param HCPhys The HC physical address of the shadow page.
5163 * @param ppv Where to return the address.
5164 */
5165int pgmPoolHCPhys2Ptr(PVM pVM, RTHCPHYS HCPhys, void **ppv)
5166{
5167 PPGMPOOLPAGE pPage = (PPGMPOOLPAGE)RTAvloHCPhysGet(&pVM->pgm.s.CTX_SUFF(pPool)->HCPhysTree, HCPhys & X86_PTE_PAE_PG_MASK);
5168 AssertMsgReturn(pPage && pPage->enmKind != PGMPOOLKIND_FREE,
5169 ("HCPhys=%RHp pPage=%p idx=%d\n", HCPhys, pPage, (pPage) ? pPage->idx : 0),
5170 VERR_PGM_POOL_GET_PAGE_FAILED);
5171 *ppv = (uint8_t *)pPage->CTX_SUFF(pvPage) + (HCPhys & PAGE_OFFSET_MASK);
5172 return VINF_SUCCESS;
5173}
5174
5175#ifdef IN_RING3 /* currently only used in ring 3; save some space in the R0 & GC modules (left it here as we might need it elsewhere later on) */
5176
5177/**
5178 * Flush the specified page if present
5179 *
5180 * @param pVM The cross context VM structure.
5181 * @param GCPhys Guest physical address of the page to flush
5182 */
5183void pgmPoolFlushPageByGCPhys(PVM pVM, RTGCPHYS GCPhys)
5184{
5185 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5186
5187 VM_ASSERT_EMT(pVM);
5188
5189 /*
5190 * Look up the GCPhys in the hash.
5191 */
5192 GCPhys = GCPhys & ~(RTGCPHYS)PAGE_OFFSET_MASK;
5193 unsigned i = pPool->aiHash[PGMPOOL_HASH(GCPhys)];
5194 if (i == NIL_PGMPOOL_IDX)
5195 return;
5196
5197 do
5198 {
5199 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5200 if (pPage->GCPhys - GCPhys < PAGE_SIZE)
5201 {
5202 switch (pPage->enmKind)
5203 {
5204 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5205 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5206 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5207 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5208 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5209 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5210 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5211 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5212 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5213 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5214 case PGMPOOLKIND_64BIT_PML4:
5215 case PGMPOOLKIND_32BIT_PD:
5216 case PGMPOOLKIND_PAE_PDPT:
5217 {
5218 Log(("PGMPoolFlushPage: found pgm pool pages for %RGp\n", GCPhys));
5219# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5220 if (pPage->fDirty)
5221 STAM_COUNTER_INC(&pPool->StatForceFlushDirtyPage);
5222 else
5223# endif
5224 STAM_COUNTER_INC(&pPool->StatForceFlushPage);
5225 Assert(!pgmPoolIsPageLocked(pPage));
5226 pgmPoolMonitorChainFlush(pPool, pPage);
5227 return;
5228 }
5229
5230 /* ignore, no monitoring. */
5231 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5232 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5233 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5234 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5235 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5236 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5237 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5238 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5239 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5240 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5241 case PGMPOOLKIND_ROOT_NESTED:
5242 case PGMPOOLKIND_PAE_PD_PHYS:
5243 case PGMPOOLKIND_PAE_PDPT_PHYS:
5244 case PGMPOOLKIND_32BIT_PD_PHYS:
5245 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5246 break;
5247
5248 default:
5249 AssertFatalMsgFailed(("enmKind=%d idx=%d\n", pPage->enmKind, pPage->idx));
5250 }
5251 }
5252
5253 /* next */
5254 i = pPage->iNext;
5255 } while (i != NIL_PGMPOOL_IDX);
5256 return;
5257}
5258
5259
5260/**
5261 * Reset CPU on hot plugging.
5262 *
5263 * @param pVM The cross context VM structure.
5264 * @param pVCpu The cross context virtual CPU structure.
5265 */
5266void pgmR3PoolResetUnpluggedCpu(PVM pVM, PVMCPU pVCpu)
5267{
5268 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5269
5270 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5271 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5272 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5273}
5274
5275
5276/**
5277 * Flushes the entire cache.
5278 *
5279 * It will assert a global CR3 flush (FF) and assumes the caller is aware of
5280 * this and execute this CR3 flush.
5281 *
5282 * @param pVM The cross context VM structure.
5283 */
5284void pgmR3PoolReset(PVM pVM)
5285{
5286 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
5287
5288 PGM_LOCK_ASSERT_OWNER(pVM);
5289 STAM_PROFILE_START(&pPool->StatR3Reset, a);
5290 LogFlow(("pgmR3PoolReset:\n"));
5291
5292 /*
5293 * If there are no pages in the pool, there is nothing to do.
5294 */
5295 if (pPool->cCurPages <= PGMPOOL_IDX_FIRST)
5296 {
5297 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5298 return;
5299 }
5300
5301 /*
5302 * Exit the shadow mode since we're going to clear everything,
5303 * including the root page.
5304 */
5305 VMCC_FOR_EACH_VMCPU(pVM)
5306 pgmR3ExitShadowModeBeforePoolFlush(pVCpu);
5307 VMCC_FOR_EACH_VMCPU_END(pVM);
5308
5309
5310 /*
5311 * Nuke the free list and reinsert all pages into it.
5312 */
5313 for (unsigned i = pPool->cCurPages - 1; i >= PGMPOOL_IDX_FIRST; i--)
5314 {
5315 PPGMPOOLPAGE pPage = &pPool->aPages[i];
5316
5317 if (pPage->fMonitored)
5318 pgmPoolMonitorFlush(pPool, pPage);
5319 pPage->iModifiedNext = NIL_PGMPOOL_IDX;
5320 pPage->iModifiedPrev = NIL_PGMPOOL_IDX;
5321 pPage->iMonitoredNext = NIL_PGMPOOL_IDX;
5322 pPage->iMonitoredPrev = NIL_PGMPOOL_IDX;
5323 pPage->GCPhys = NIL_RTGCPHYS;
5324 pPage->enmKind = PGMPOOLKIND_FREE;
5325 pPage->enmAccess = PGMPOOLACCESS_DONTCARE;
5326 Assert(pPage->idx == i);
5327 pPage->iNext = i + 1;
5328 pPage->fA20Enabled = true;
5329 pPage->fZeroed = false; /* This could probably be optimized, but better safe than sorry. */
5330 pPage->fSeenNonGlobal = false;
5331 pPage->fMonitored = false;
5332 pPage->fDirty = false;
5333 pPage->fCached = false;
5334 pPage->fReusedFlushPending = false;
5335 pPage->iUserHead = NIL_PGMPOOL_USER_INDEX;
5336 pPage->cPresent = 0;
5337 pPage->iFirstPresent = NIL_PGMPOOL_PRESENT_INDEX;
5338 pPage->cModifications = 0;
5339 pPage->iAgeNext = NIL_PGMPOOL_IDX;
5340 pPage->iAgePrev = NIL_PGMPOOL_IDX;
5341 pPage->idxDirtyEntry = 0;
5342 pPage->GCPtrLastAccessHandlerRip = NIL_RTGCPTR;
5343 pPage->GCPtrLastAccessHandlerFault = NIL_RTGCPTR;
5344 pPage->cLastAccessHandler = 0;
5345 pPage->cLocked = 0;
5346# ifdef VBOX_STRICT
5347 pPage->GCPtrDirtyFault = NIL_RTGCPTR;
5348# endif
5349 }
5350 pPool->aPages[pPool->cCurPages - 1].iNext = NIL_PGMPOOL_IDX;
5351 pPool->iFreeHead = PGMPOOL_IDX_FIRST;
5352 pPool->cUsedPages = 0;
5353
5354 /*
5355 * Zap and reinitialize the user records.
5356 */
5357 pPool->cPresent = 0;
5358 pPool->iUserFreeHead = 0;
5359 PPGMPOOLUSER paUsers = pPool->CTX_SUFF(paUsers);
5360 const unsigned cMaxUsers = pPool->cMaxUsers;
5361 for (unsigned i = 0; i < cMaxUsers; i++)
5362 {
5363 paUsers[i].iNext = i + 1;
5364 paUsers[i].iUser = NIL_PGMPOOL_IDX;
5365 paUsers[i].iUserTable = 0xfffffffe;
5366 }
5367 paUsers[cMaxUsers - 1].iNext = NIL_PGMPOOL_USER_INDEX;
5368
5369 /*
5370 * Clear all the GCPhys links and rebuild the phys ext free list.
5371 */
5372 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
5373 pRam;
5374 pRam = pRam->CTX_SUFF(pNext))
5375 {
5376 unsigned iPage = pRam->cb >> PAGE_SHIFT;
5377 while (iPage-- > 0)
5378 PGM_PAGE_SET_TRACKING(pVM, &pRam->aPages[iPage], 0);
5379 }
5380
5381 pPool->iPhysExtFreeHead = 0;
5382 PPGMPOOLPHYSEXT paPhysExts = pPool->CTX_SUFF(paPhysExts);
5383 const unsigned cMaxPhysExts = pPool->cMaxPhysExts;
5384 for (unsigned i = 0; i < cMaxPhysExts; i++)
5385 {
5386 paPhysExts[i].iNext = i + 1;
5387 paPhysExts[i].aidx[0] = NIL_PGMPOOL_IDX;
5388 paPhysExts[i].apte[0] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5389 paPhysExts[i].aidx[1] = NIL_PGMPOOL_IDX;
5390 paPhysExts[i].apte[1] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5391 paPhysExts[i].aidx[2] = NIL_PGMPOOL_IDX;
5392 paPhysExts[i].apte[2] = NIL_PGMPOOL_PHYSEXT_IDX_PTE;
5393 }
5394 paPhysExts[cMaxPhysExts - 1].iNext = NIL_PGMPOOL_PHYSEXT_INDEX;
5395
5396 /*
5397 * Just zap the modified list.
5398 */
5399 pPool->cModifiedPages = 0;
5400 pPool->iModifiedHead = NIL_PGMPOOL_IDX;
5401
5402 /*
5403 * Clear the GCPhys hash and the age list.
5404 */
5405 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aiHash); i++)
5406 pPool->aiHash[i] = NIL_PGMPOOL_IDX;
5407 pPool->iAgeHead = NIL_PGMPOOL_IDX;
5408 pPool->iAgeTail = NIL_PGMPOOL_IDX;
5409
5410# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
5411 /* Clear all dirty pages. */
5412 pPool->idxFreeDirtyPage = 0;
5413 pPool->cDirtyPages = 0;
5414 for (unsigned i = 0; i < RT_ELEMENTS(pPool->aidxDirtyPages); i++)
5415 pPool->aidxDirtyPages[i] = NIL_PGMPOOL_IDX;
5416# endif
5417
5418 /*
5419 * Reinsert active pages into the hash and ensure monitoring chains are correct.
5420 */
5421 VMCC_FOR_EACH_VMCPU(pVM)
5422 {
5423 /*
5424 * Re-enter the shadowing mode and assert Sync CR3 FF.
5425 */
5426 pgmR3ReEnterShadowModeAfterPoolFlush(pVM, pVCpu);
5427 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3);
5428 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5429 }
5430 VMCC_FOR_EACH_VMCPU_END(pVM);
5431
5432 STAM_PROFILE_STOP(&pPool->StatR3Reset, a);
5433}
5434
5435#endif /* IN_RING3 */
5436
5437#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5438/**
5439 * Stringifies a PGMPOOLKIND value.
5440 */
5441static const char *pgmPoolPoolKindToStr(uint8_t enmKind)
5442{
5443 switch ((PGMPOOLKIND)enmKind)
5444 {
5445 case PGMPOOLKIND_INVALID:
5446 return "PGMPOOLKIND_INVALID";
5447 case PGMPOOLKIND_FREE:
5448 return "PGMPOOLKIND_FREE";
5449 case PGMPOOLKIND_32BIT_PT_FOR_PHYS:
5450 return "PGMPOOLKIND_32BIT_PT_FOR_PHYS";
5451 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT:
5452 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_PT";
5453 case PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB:
5454 return "PGMPOOLKIND_32BIT_PT_FOR_32BIT_4MB";
5455 case PGMPOOLKIND_PAE_PT_FOR_PHYS:
5456 return "PGMPOOLKIND_PAE_PT_FOR_PHYS";
5457 case PGMPOOLKIND_PAE_PT_FOR_32BIT_PT:
5458 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_PT";
5459 case PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB:
5460 return "PGMPOOLKIND_PAE_PT_FOR_32BIT_4MB";
5461 case PGMPOOLKIND_PAE_PT_FOR_PAE_PT:
5462 return "PGMPOOLKIND_PAE_PT_FOR_PAE_PT";
5463 case PGMPOOLKIND_PAE_PT_FOR_PAE_2MB:
5464 return "PGMPOOLKIND_PAE_PT_FOR_PAE_2MB";
5465 case PGMPOOLKIND_32BIT_PD:
5466 return "PGMPOOLKIND_32BIT_PD";
5467 case PGMPOOLKIND_32BIT_PD_PHYS:
5468 return "PGMPOOLKIND_32BIT_PD_PHYS";
5469 case PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD:
5470 return "PGMPOOLKIND_PAE_PD0_FOR_32BIT_PD";
5471 case PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD:
5472 return "PGMPOOLKIND_PAE_PD1_FOR_32BIT_PD";
5473 case PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD:
5474 return "PGMPOOLKIND_PAE_PD2_FOR_32BIT_PD";
5475 case PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD:
5476 return "PGMPOOLKIND_PAE_PD3_FOR_32BIT_PD";
5477 case PGMPOOLKIND_PAE_PD_FOR_PAE_PD:
5478 return "PGMPOOLKIND_PAE_PD_FOR_PAE_PD";
5479 case PGMPOOLKIND_PAE_PD_PHYS:
5480 return "PGMPOOLKIND_PAE_PD_PHYS";
5481 case PGMPOOLKIND_PAE_PDPT_FOR_32BIT:
5482 return "PGMPOOLKIND_PAE_PDPT_FOR_32BIT";
5483 case PGMPOOLKIND_PAE_PDPT:
5484 return "PGMPOOLKIND_PAE_PDPT";
5485 case PGMPOOLKIND_PAE_PDPT_PHYS:
5486 return "PGMPOOLKIND_PAE_PDPT_PHYS";
5487 case PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT:
5488 return "PGMPOOLKIND_64BIT_PDPT_FOR_64BIT_PDPT";
5489 case PGMPOOLKIND_64BIT_PDPT_FOR_PHYS:
5490 return "PGMPOOLKIND_64BIT_PDPT_FOR_PHYS";
5491 case PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD:
5492 return "PGMPOOLKIND_64BIT_PD_FOR_64BIT_PD";
5493 case PGMPOOLKIND_64BIT_PD_FOR_PHYS:
5494 return "PGMPOOLKIND_64BIT_PD_FOR_PHYS";
5495 case PGMPOOLKIND_64BIT_PML4:
5496 return "PGMPOOLKIND_64BIT_PML4";
5497 case PGMPOOLKIND_EPT_PDPT_FOR_PHYS:
5498 return "PGMPOOLKIND_EPT_PDPT_FOR_PHYS";
5499 case PGMPOOLKIND_EPT_PD_FOR_PHYS:
5500 return "PGMPOOLKIND_EPT_PD_FOR_PHYS";
5501 case PGMPOOLKIND_EPT_PT_FOR_PHYS:
5502 return "PGMPOOLKIND_EPT_PT_FOR_PHYS";
5503 case PGMPOOLKIND_ROOT_NESTED:
5504 return "PGMPOOLKIND_ROOT_NESTED";
5505 }
5506 return "Unknown kind!";
5507}
5508#endif /* LOG_ENABLED || VBOX_STRICT */
5509
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use