VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 94508

Last change on this file since 94508 was 94508, checked in by vboxsync, 2 years ago

VMM/PGM: Removed unnecessary assertion in BthEnter broken by r150131. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 197.9 KB
Line 
1/* $Id: PGMAllBth.h 94508 2022-04-07 11:08:43Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2022 Oracle Corporation
15 *
16 * This file is part of VirtualBox Open Source Edition (OSE), as
17 * available from http://www.virtualbox.org. This file is free software;
18 * you can redistribute it and/or modify it under the terms of the GNU
19 * General Public License (GPL) as published by the Free Software
20 * Foundation, in version 2 as it comes in the "COPYING" file of the
21 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
22 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
23 */
24
25#ifdef _MSC_VER
26/** @todo we're generating unnecessary code in nested/ept shadow mode and for
27 * real/prot-guest+RC mode. */
28# pragma warning(disable: 4505)
29#endif
30
31
32/*********************************************************************************************************************************
33* Internal Functions *
34*********************************************************************************************************************************/
35RT_C_DECLS_BEGIN
36PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
37#ifndef IN_RING3
38PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
39PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
40 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken);
41#endif
42PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
43static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
44static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
45static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
46#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
47static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
48#else
49static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
50#endif
51PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
52PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
53PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
54#ifdef VBOX_STRICT
55PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
56#endif
57PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
58PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
59
60#ifdef IN_RING3
61PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
62#endif
63RT_C_DECLS_END
64
65
66
67
68/*
69 * Filter out some illegal combinations of guest and shadow paging, so we can
70 * remove redundant checks inside functions.
71 */
72#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
73 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
74# error "Invalid combination; PAE guest implies PAE shadow"
75#endif
76
77#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
78 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
79 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
80# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
81#endif
82
83#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
84 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
85 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
86# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
87#endif
88
89#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
90 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
91# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
92#endif
93
94
95/**
96 * Enters the shadow+guest mode.
97 *
98 * @returns VBox status code.
99 * @param pVCpu The cross context virtual CPU structure.
100 * @param GCPhysCR3 The physical address from the CR3 register.
101 */
102PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
103{
104 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
105 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
106 */
107#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
108 || PGM_SHW_TYPE == PGM_TYPE_PAE \
109 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
110 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
111 || PGM_GST_TYPE == PGM_TYPE_PROT))
112
113 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
114
115 Assert(!pVM->pgm.s.fNestedPaging);
116
117 PGM_LOCK_VOID(pVM);
118 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
119 * but any calls to GC need a proper shadow page setup as well.
120 */
121 /* Free the previous root mapping if still active. */
122 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
123 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
124 if (pOldShwPageCR3)
125 {
126 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
127
128 /* Mark the page as unlocked; allow flushing again. */
129 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
130
131 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
132 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
133 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
134 }
135
136 /* construct a fake address. */
137 GCPhysCR3 = RT_BIT_64(63);
138 PPGMPOOLPAGE pNewShwPageCR3;
139 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
140 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
141 &pNewShwPageCR3);
142 AssertRCReturn(rc, rc);
143
144 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
145 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
146
147 /* Mark the page as locked; disallow flushing. */
148 pgmPoolLockPage(pPool, pNewShwPageCR3);
149
150 /* Set the current hypervisor CR3. */
151 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
152
153 PGM_UNLOCK(pVM);
154 return rc;
155#else
156 NOREF(pVCpu); NOREF(GCPhysCR3);
157 return VINF_SUCCESS;
158#endif
159}
160
161
162#ifndef IN_RING3
163
164# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
165/**
166 * Deal with a guest page fault.
167 *
168 * @returns Strict VBox status code.
169 * @retval VINF_EM_RAW_GUEST_TRAP
170 * @retval VINF_EM_RAW_EMULATE_INSTR
171 *
172 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
173 * @param pWalk The guest page table walk result.
174 * @param uErr The error code.
175 */
176PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
177{
178 /*
179 * Calc the error code for the guest trap.
180 */
181 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
182 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
183 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
184 if ( pWalk->fRsvdError
185 || pWalk->fBadPhysAddr)
186 {
187 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
188 Assert(!pWalk->fNotPresent);
189 }
190 else if (!pWalk->fNotPresent)
191 uNewErr |= X86_TRAP_PF_P;
192 TRPMSetErrorCode(pVCpu, uNewErr);
193
194 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
195 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
196 return VINF_EM_RAW_GUEST_TRAP;
197}
198# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
199
200
201#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
202/**
203 * Deal with a guest page fault.
204 *
205 * The caller has taken the PGM lock.
206 *
207 * @returns Strict VBox status code.
208 *
209 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
210 * @param uErr The error code.
211 * @param pRegFrame The register frame.
212 * @param pvFault The fault address.
213 * @param pPage The guest page at @a pvFault.
214 * @param pWalk The guest page table walk result.
215 * @param pGstWalk The guest paging-mode specific walk information.
216 * @param pfLockTaken PGM lock taken here or not (out). This is true
217 * when we're called.
218 */
219static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
220 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
221# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
222 , PPGMPTWALK pWalk
223 , PGSTPTWALK pGstWalk
224# endif
225 )
226{
227# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
228 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
229# endif
230 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
231 VBOXSTRICTRC rcStrict;
232
233 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
234 {
235 /*
236 * Physical page access handler.
237 */
238# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
239 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
240# else
241 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
242# endif
243 PPGMPHYSHANDLER pCur;
244 rcStrict = pgmHandlerPhysicalLookup(pVM, GCPhysFault, &pCur);
245 if (RT_SUCCESS(rcStrict))
246 {
247 PCPGMPHYSHANDLERTYPEINT const pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
248
249# ifdef PGM_SYNC_N_PAGES
250 /*
251 * If the region is write protected and we got a page not present fault, then sync
252 * the pages. If the fault was caused by a read, then restart the instruction.
253 * In case of write access continue to the GC write handler.
254 *
255 * ASSUMES that there is only one handler per page or that they have similar write properties.
256 */
257 if ( !(uErr & X86_TRAP_PF_P)
258 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
259 {
260# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
261 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
262# else
263 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
264# endif
265 if ( RT_FAILURE(rcStrict)
266 || !(uErr & X86_TRAP_PF_RW)
267 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
268 {
269 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
270 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
271 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
272 return rcStrict;
273 }
274 }
275# endif
276# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
277 /*
278 * If the access was not thru a #PF(RSVD|...) resync the page.
279 */
280 if ( !(uErr & X86_TRAP_PF_RSVD)
281 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
282# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
283 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
284 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
285# endif
286 )
287 {
288# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
289 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
290# else
291 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
292# endif
293 if ( RT_FAILURE(rcStrict)
294 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
295 {
296 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
297 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
298 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
299 return rcStrict;
300 }
301 }
302# endif
303
304 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
305 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
306 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
307 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
308 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
309 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
310 else
311 {
312 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
313 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
314 }
315
316 if (pCurType->pfnPfHandler)
317 {
318 STAM_PROFILE_START(&pCur->Stat, h);
319
320 if (pCurType->fKeepPgmLock)
321 {
322 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault,
323 !pCurType->fRing0DevInsIdx ? pCur->uUser
324 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser));
325
326 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
327 }
328 else
329 {
330 uint64_t const uUser = !pCurType->fRing0DevInsIdx ? pCur->uUser
331 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser);
332 PGM_UNLOCK(pVM);
333 *pfLockTaken = false;
334
335 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, uUser);
336
337 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
338 }
339 }
340 else
341 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
342
343 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
344 return rcStrict;
345 }
346 AssertMsgReturn(rcStrict == VERR_NOT_FOUND, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict);
347 }
348
349 /*
350 * There is a handled area of the page, but this fault doesn't belong to it.
351 * We must emulate the instruction.
352 *
353 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
354 * we first check if this was a page-not-present fault for a page with only
355 * write access handlers. Restart the instruction if it wasn't a write access.
356 */
357 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
358
359 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
360 && !(uErr & X86_TRAP_PF_P))
361 {
362# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
363 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
364# else
365 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
366# endif
367 if ( RT_FAILURE(rcStrict)
368 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
369 || !(uErr & X86_TRAP_PF_RW))
370 {
371 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
372 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
373 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
374 return rcStrict;
375 }
376 }
377
378 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
379 * It's writing to an unhandled part of the LDT page several million times.
380 */
381 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
382 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
383 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
384 return rcStrict;
385} /* if any kind of handler */
386# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
387
388
389/**
390 * \#PF Handler for raw-mode guest execution.
391 *
392 * @returns VBox status code (appropriate for trap handling and GC return).
393 *
394 * @param pVCpu The cross context virtual CPU structure.
395 * @param uErr The trap error code.
396 * @param pRegFrame Trap register frame.
397 * @param pvFault The fault address.
398 * @param pfLockTaken PGM lock taken here or not (out)
399 */
400PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
401{
402 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
403
404 *pfLockTaken = false;
405
406# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
407 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
408 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
409 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
410 && PGM_SHW_TYPE != PGM_TYPE_NONE
411 int rc;
412
413# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
414 /*
415 * Walk the guest page translation tables and check if it's a guest fault.
416 */
417 PGMPTWALK Walk;
418 GSTPTWALK GstWalk;
419 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
420 if (RT_FAILURE_NP(rc))
421 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
422
423 /* assert some GstWalk sanity. */
424# if PGM_GST_TYPE == PGM_TYPE_AMD64
425 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
426# endif
427# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
428 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
429# endif
430 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
431 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
432 Assert(Walk.fSucceeded);
433 Assert(Walk.fEffective & PGM_PTATTRS_R_MASK);
434
435 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
436 {
437 if ( ( (uErr & X86_TRAP_PF_RW)
438 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
439 && ( (uErr & X86_TRAP_PF_US)
440 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
441 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
442 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
443 )
444 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
445 }
446
447 /* Take the big lock now before we update flags. */
448 *pfLockTaken = true;
449 PGM_LOCK_VOID(pVM);
450
451 /*
452 * Set the accessed and dirty flags.
453 */
454 /** @todo Should probably use cmpxchg logic here as we're potentially racing
455 * other CPUs in SMP configs. (the lock isn't enough, since we take it
456 * after walking and the page tables could be stale already) */
457# if PGM_GST_TYPE == PGM_TYPE_AMD64
458 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
459 {
460 GstWalk.Pml4e.u |= X86_PML4E_A;
461 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
462 }
463 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
464 {
465 GstWalk.Pdpe.u |= X86_PDPE_A;
466 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
467 }
468# endif
469 if (Walk.fBigPage)
470 {
471 Assert(GstWalk.Pde.u & X86_PDE_PS);
472 if (uErr & X86_TRAP_PF_RW)
473 {
474 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
475 {
476 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
477 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
478 }
479 }
480 else
481 {
482 if (!(GstWalk.Pde.u & X86_PDE4M_A))
483 {
484 GstWalk.Pde.u |= X86_PDE4M_A;
485 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
486 }
487 }
488 }
489 else
490 {
491 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
492 if (!(GstWalk.Pde.u & X86_PDE_A))
493 {
494 GstWalk.Pde.u |= X86_PDE_A;
495 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
496 }
497
498 if (uErr & X86_TRAP_PF_RW)
499 {
500# ifdef VBOX_WITH_STATISTICS
501 if (GstWalk.Pte.u & X86_PTE_D)
502 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
503 else
504 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
505# endif
506 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
507 {
508 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
509 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
510 }
511 }
512 else
513 {
514 if (!(GstWalk.Pte.u & X86_PTE_A))
515 {
516 GstWalk.Pte.u |= X86_PTE_A;
517 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
518 }
519 }
520 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
521 }
522#if 0
523 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
524 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
525 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
526#endif
527
528# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
529 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
530
531 /* Take the big lock now. */
532 *pfLockTaken = true;
533 PGM_LOCK_VOID(pVM);
534# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
535
536# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
537 /*
538 * If it is a reserved bit fault we know that it is an MMIO (access
539 * handler) related fault and can skip some 200 lines of code.
540 */
541 if (uErr & X86_TRAP_PF_RSVD)
542 {
543 Assert(uErr & X86_TRAP_PF_P);
544 PPGMPAGE pPage;
545# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
546 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
547 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
548 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
549 pfLockTaken, &Walk, &GstWalk));
550 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
551# else
552 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
553 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
554 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
555 pfLockTaken));
556 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
557# endif
558 AssertRC(rc);
559 PGM_INVL_PG(pVCpu, pvFault);
560 return rc; /* Restart with the corrected entry. */
561 }
562# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
563
564 /*
565 * Fetch the guest PDE, PDPE and PML4E.
566 */
567# if PGM_SHW_TYPE == PGM_TYPE_32BIT
568 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
569 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
570
571# elif PGM_SHW_TYPE == PGM_TYPE_PAE
572 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
573 PX86PDPAE pPDDst;
574# if PGM_GST_TYPE == PGM_TYPE_PAE
575 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
576# else
577 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
578# endif
579 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
580
581# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
582 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
583 PX86PDPAE pPDDst;
584# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
585 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
586 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
587# else
588 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
589# endif
590 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
591
592# elif PGM_SHW_TYPE == PGM_TYPE_EPT
593 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
594 PEPTPD pPDDst;
595 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
596 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
597# endif
598 Assert(pPDDst);
599
600# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
601 /*
602 * Dirty page handling.
603 *
604 * If we successfully correct the write protection fault due to dirty bit
605 * tracking, then return immediately.
606 */
607 if (uErr & X86_TRAP_PF_RW) /* write fault? */
608 {
609 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
610 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
611 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
612 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
613 {
614 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
615 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
616 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
617 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
618 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
619 return VINF_SUCCESS;
620 }
621#ifdef DEBUG_bird
622 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
623 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
624#endif
625 }
626
627# if 0 /* rarely useful; leave for debugging. */
628 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
629# endif
630# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
631
632 /*
633 * A common case is the not-present error caused by lazy page table syncing.
634 *
635 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
636 * here so we can safely assume that the shadow PT is present when calling
637 * SyncPage later.
638 *
639 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
640 * of mapping conflict and defer to SyncCR3 in R3.
641 * (Again, we do NOT support access handlers for non-present guest pages.)
642 *
643 */
644# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
645 Assert(GstWalk.Pde.u & X86_PDE_P);
646# endif
647 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
648 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
649 {
650 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
651# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
652 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
653 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
654# else
655 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
656 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
657# endif
658 if (RT_SUCCESS(rc))
659 return rc;
660 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
661 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
662 return VINF_PGM_SYNC_CR3;
663 }
664
665 /*
666 * Check if this fault address is flagged for special treatment,
667 * which means we'll have to figure out the physical address and
668 * check flags associated with it.
669 *
670 * ASSUME that we can limit any special access handling to pages
671 * in page tables which the guest believes to be present.
672 */
673# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
674 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
675# else
676 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
677# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
678 PPGMPAGE pPage;
679 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
680 if (RT_FAILURE(rc))
681 {
682 /*
683 * When the guest accesses invalid physical memory (e.g. probing
684 * of RAM or accessing a remapped MMIO range), then we'll fall
685 * back to the recompiler to emulate the instruction.
686 */
687 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
688 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
689 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
690 return VINF_EM_RAW_EMULATE_INSTR;
691 }
692
693 /*
694 * Any handlers for this page?
695 */
696 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
697# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
698 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
699 &Walk, &GstWalk));
700# else
701 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
702# endif
703
704 /*
705 * We are here only if page is present in Guest page tables and
706 * trap is not handled by our handlers.
707 *
708 * Check it for page out-of-sync situation.
709 */
710 if (!(uErr & X86_TRAP_PF_P))
711 {
712 /*
713 * Page is not present in our page tables. Try to sync it!
714 */
715 if (uErr & X86_TRAP_PF_US)
716 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
717 else /* supervisor */
718 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
719
720 if (PGM_PAGE_IS_BALLOONED(pPage))
721 {
722 /* Emulate reads from ballooned pages as they are not present in
723 our shadow page tables. (Required for e.g. Solaris guests; soft
724 ecc, random nr generator.) */
725 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
726 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
727 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
728 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
729 return rc;
730 }
731
732# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
733 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
734# else
735 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
736# endif
737 if (RT_SUCCESS(rc))
738 {
739 /* The page was successfully synced, return to the guest. */
740 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
741 return VINF_SUCCESS;
742 }
743 }
744 else /* uErr & X86_TRAP_PF_P: */
745 {
746 /*
747 * Write protected pages are made writable when the guest makes the
748 * first write to it. This happens for pages that are shared, write
749 * monitored or not yet allocated.
750 *
751 * We may also end up here when CR0.WP=0 in the guest.
752 *
753 * Also, a side effect of not flushing global PDEs are out of sync
754 * pages due to physical monitored regions, that are no longer valid.
755 * Assume for now it only applies to the read/write flag.
756 */
757 if (uErr & X86_TRAP_PF_RW)
758 {
759 /*
760 * Check if it is a read-only page.
761 */
762 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
763 {
764 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
765 Assert(!PGM_PAGE_IS_ZERO(pPage));
766 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
767 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
768
769 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
770 if (rc != VINF_SUCCESS)
771 {
772 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
773 return rc;
774 }
775 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
776 return VINF_EM_NO_MEMORY;
777 }
778
779# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
780 /*
781 * Check to see if we need to emulate the instruction if CR0.WP=0.
782 */
783 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
784 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
785 && CPUMGetGuestCPL(pVCpu) < 3)
786 {
787 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
788
789 /*
790 * The Netware WP0+RO+US hack.
791 *
792 * Netware sometimes(/always?) runs with WP0. It has been observed doing
793 * excessive write accesses to pages which are mapped with US=1 and RW=0
794 * while WP=0. This causes a lot of exits and extremely slow execution.
795 * To avoid trapping and emulating every write here, we change the shadow
796 * page table entry to map it as US=0 and RW=1 until user mode tries to
797 * access it again (see further below). We count these shadow page table
798 * changes so we can avoid having to clear the page pool every time the WP
799 * bit changes to 1 (see PGMCr0WpEnabled()).
800 */
801# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
802 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
803 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
804 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
805 {
806 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
807 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
808 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
809 {
810 PGM_INVL_PG(pVCpu, pvFault);
811 pVCpu->pgm.s.cNetwareWp0Hacks++;
812 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
813 return rc;
814 }
815 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
816 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
817 }
818# endif
819
820 /* Interpret the access. */
821 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
822 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
823 if (RT_SUCCESS(rc))
824 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
825 else
826 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
827 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
828 return rc;
829 }
830# endif
831 /// @todo count the above case; else
832 if (uErr & X86_TRAP_PF_US)
833 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
834 else /* supervisor */
835 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
836
837 /*
838 * Sync the page.
839 *
840 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
841 * page is not present, which is not true in this case.
842 */
843# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
844 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
845# else
846 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
847# endif
848 if (RT_SUCCESS(rc))
849 {
850 /*
851 * Page was successfully synced, return to guest but invalidate
852 * the TLB first as the page is very likely to be in it.
853 */
854# if PGM_SHW_TYPE == PGM_TYPE_EPT
855 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
856# else
857 PGM_INVL_PG(pVCpu, pvFault);
858# endif
859# ifdef VBOX_STRICT
860 PGMPTWALK GstPageWalk;
861 GstPageWalk.GCPhys = RTGCPHYS_MAX;
862 if (!pVM->pgm.s.fNestedPaging)
863 {
864 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
865 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
866 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
867 }
868# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
869 uint64_t fPageShw = 0;
870 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
871 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
872 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
873# endif
874# endif /* VBOX_STRICT */
875 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
876 return VINF_SUCCESS;
877 }
878 }
879# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
880 /*
881 * Check for Netware WP0+RO+US hack from above and undo it when user
882 * mode accesses the page again.
883 */
884 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
885 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
886 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
887 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
888 && CPUMGetGuestCPL(pVCpu) == 3
889 && pVM->cCpus == 1
890 )
891 {
892 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
893 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
894 if (RT_SUCCESS(rc))
895 {
896 PGM_INVL_PG(pVCpu, pvFault);
897 pVCpu->pgm.s.cNetwareWp0Hacks--;
898 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
899 return VINF_SUCCESS;
900 }
901 }
902# endif /* PGM_WITH_PAGING */
903
904 /** @todo else: why are we here? */
905
906# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
907 /*
908 * Check for VMM page flags vs. Guest page flags consistency.
909 * Currently only for debug purposes.
910 */
911 if (RT_SUCCESS(rc))
912 {
913 /* Get guest page flags. */
914 PGMPTWALK GstPageWalk;
915 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
916 if (RT_SUCCESS(rc2))
917 {
918 uint64_t fPageShw = 0;
919 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
920
921#if 0
922 /*
923 * Compare page flags.
924 * Note: we have AVL, A, D bits desynced.
925 */
926 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
927 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
928 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
929 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
930 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
931 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
932 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
933 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
934 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
93501:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
93601:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
937
93801:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
93901:01:15.625516 00:08:43.268051 Location :
940e:\vbox\svn\trunk\srcPage flags mismatch!
941pvFault=fffff801b0d7b000
942 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
943GCPhys=0000000019b52000
944fPageShw=0
945fPageGst=77b0000000000121
946rc=0
947#endif
948
949 }
950 else
951 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
952 }
953 else
954 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
955# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
956 }
957
958
959 /*
960 * If we get here it is because something failed above, i.e. most like guru
961 * meditiation time.
962 */
963 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
964 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
965 return rc;
966
967# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
968 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
969 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
970 return VERR_PGM_NOT_USED_IN_MODE;
971# endif
972}
973
974
975/**
976 * Nested \#PF handler for nested-guest hardware-assisted execution using nested
977 * paging.
978 *
979 * @returns VBox status code (appropriate for trap handling and GC return).
980 * @param pVCpu The cross context virtual CPU structure.
981 * @param uErr The fault error (X86_TRAP_PF_*).
982 * @param pRegFrame The register frame.
983 * @param GCPhysNested The nested-guest physical address being accessed.
984 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
985 * caused this fault. If @c false, GCPtrNested must be
986 * 0.
987 * @param GCPtrNested The nested-guest linear address that caused this
988 * fault.
989 * @param pWalk The guest page table walk result.
990 * @param pfLockTaken Where to store whether the PGM lock is still held
991 * when this function completes.
992 */
993PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
994 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken)
995{
996 *pfLockTaken = false;
997# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) \
998 && ( PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_32BIT \
999 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1000 && PGM_SHW_TYPE == PGM_TYPE_EPT
1001
1002 Assert(CPUMIsGuestVmxEptPagingEnabled(pVCpu));
1003
1004 /*
1005 * Walk the guest EPT tables and check if it's an EPT violation or misconfiguration.
1006 */
1007 PGMPTWALKGST GstWalkAll;
1008 int rc = pgmGstSlatWalk(pVCpu, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk, &GstWalkAll);
1009 if (RT_FAILURE(rc))
1010 return rc;
1011
1012 Assert(GstWalkAll.enmType == PGMPTWALKGSTTYPE_EPT);
1013 Assert(pWalk->fSucceeded);
1014 Assert(pWalk->fEffective & PGM_PTATTRS_R_MASK);
1015 Assert(pWalk->fIsSlat);
1016
1017 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
1018 {
1019 if ( ( (uErr & X86_TRAP_PF_RW)
1020 && !(pWalk->fEffective & PGM_PTATTRS_W_MASK)
1021 && ( (uErr & X86_TRAP_PF_US)
1022 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
1023 || ((uErr & X86_TRAP_PF_US) && !(pWalk->fEffective & PGM_PTATTRS_US_MASK))
1024 || ((uErr & X86_TRAP_PF_ID) && (pWalk->fEffective & PGM_PTATTRS_NX_MASK))
1025 )
1026 return VERR_ACCESS_DENIED;
1027 }
1028
1029 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1030 RTGCPHYS const GCPhysFault = PGM_A20_APPLY(pVCpu, GCPhysNested & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1031 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
1032
1033 /* Take the big lock now. */
1034 *pfLockTaken = true;
1035 PGM_LOCK_VOID(pVM);
1036
1037 /*
1038 * Check if this is an APIC-access page access (VMX specific).
1039 */
1040 RTGCPHYS const GCPhysApicAccess = CPUMGetGuestVmxApicAccessPageAddr(pVCpu);
1041 if ((pWalk->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysApicAccess)
1042 {
1043 PPGMPAGE pPage;
1044 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPhysApicAccess), &pPage);
1045 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1046 {
1047 rc = VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pWalk->GCPhys, pPage,
1048 pfLockTaken));
1049 return rc;
1050 }
1051 }
1052
1053# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1054 /*
1055 * Check if this is an MMIO access.
1056 */
1057 if (uErr & X86_TRAP_PF_RSVD)
1058 {
1059 PPGMPAGE pPage;
1060 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)GCPhysFault), &pPage);
1061 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1062 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1063 pfLockTaken));
1064 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1065 AssertRC(rc);
1066 HMInvalidatePhysPage(pVM, GCPhysFault);
1067 return rc; /* Restart with the corrected entry. */
1068 }
1069# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1070
1071 /*
1072 * Fetch the guest EPT page directory pointer.
1073 */
1074 const unsigned iPDDst = ((GCPhysFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
1075 PEPTPD pPDDst;
1076 rc = pgmShwGetEPTPDPtr(pVCpu, GCPhysFault, NULL /* ppPdpt */, &pPDDst);
1077 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
1078 Assert(pPDDst);
1079
1080 /*
1081 * A common case is the not-present error caused by lazy page table syncing.
1082 *
1083 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
1084 * here so we can safely assume that the shadow PT is present when calling
1085 * SyncPage later.
1086 *
1087 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
1088 * of mapping conflict and defer to SyncCR3 in R3.
1089 * (Again, we do NOT support access handlers for non-present guest pages.)
1090 *
1091 */
1092 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
1093 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
1094 {
1095 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
1096 LogFlow(("=>SyncPT GCPhysFault=%RGp\n", GCPhysFault));
1097 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0 /* iPDSrc */, NULL /* pPDSrc */, GCPhysFault);
1098 if (RT_SUCCESS(rc))
1099 return rc;
1100 Log(("SyncPT: %RGp failed!! rc=%Rrc\n", GCPhysFault, rc));
1101 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1102 return VINF_PGM_SYNC_CR3;
1103 }
1104
1105 /*
1106 * Check if this fault address is flagged for special treatment,
1107 * which means we'll have to figure out the physical address and
1108 * check flags associated with it.
1109 *
1110 * ASSUME that we can limit any special access handling to pages
1111 * in page tables which the guest believes to be present.
1112 */
1113 PPGMPAGE pPage;
1114 rc = pgmPhysGetPageEx(pVM, GCPhysFault, &pPage);
1115 if (RT_FAILURE(rc))
1116 {
1117 /*
1118 * When the guest accesses invalid physical memory (e.g. probing
1119 * of RAM or accessing a remapped MMIO range), then we'll fall
1120 * back to the recompiler to emulate the instruction.
1121 */
1122 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhysFault, rc));
1123 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
1124 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
1125 return VINF_EM_RAW_EMULATE_INSTR;
1126 }
1127
1128 /*
1129 * Any handlers for this page?
1130 */
1131 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1132 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1133 pfLockTaken));
1134
1135 /*
1136 * We are here only if page is present in Guest page tables and
1137 * trap is not handled by our handlers.
1138 *
1139 * Check it for page out-of-sync situation.
1140 */
1141 if (!(uErr & X86_TRAP_PF_P))
1142 {
1143 /*
1144 * Page is not present in our page tables. Try to sync it!
1145 */
1146 if (uErr & X86_TRAP_PF_US)
1147 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
1148 else /* supervisor */
1149 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
1150
1151 if (PGM_PAGE_IS_BALLOONED(pPage))
1152 {
1153 /* Emulate reads from ballooned pages as they are not present in
1154 our shadow page tables. (Required for e.g. Solaris guests; soft
1155 ecc, random nr generator.) */
1156 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, GCPhysFault));
1157 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
1158 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
1159 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
1160 return rc;
1161 }
1162
1163 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, PGM_SYNC_NR_PAGES, uErr);
1164 if (RT_SUCCESS(rc))
1165 {
1166 /* The page was successfully synced, return to the guest. */
1167 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
1168 return VINF_SUCCESS;
1169 }
1170 }
1171 else
1172 {
1173 /*
1174 * Write protected pages are made writable when the guest makes the
1175 * first write to it. This happens for pages that are shared, write
1176 * monitored or not yet allocated.
1177 *
1178 * We may also end up here when CR0.WP=0 in the guest.
1179 *
1180 * Also, a side effect of not flushing global PDEs are out of sync
1181 * pages due to physical monitored regions, that are no longer valid.
1182 * Assume for now it only applies to the read/write flag.
1183 */
1184 if (uErr & X86_TRAP_PF_RW)
1185 {
1186 /*
1187 * Check if it is a read-only page.
1188 */
1189 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1190 {
1191 Assert(!PGM_PAGE_IS_ZERO(pPage));
1192 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhysFault));
1193 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
1194
1195 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysFault);
1196 if (rc != VINF_SUCCESS)
1197 {
1198 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
1199 return rc;
1200 }
1201 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
1202 return VINF_EM_NO_MEMORY;
1203 }
1204
1205 if (uErr & X86_TRAP_PF_US)
1206 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
1207 else
1208 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1209
1210 /*
1211 * Sync the page.
1212 *
1213 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1214 * page is not present, which is not true in this case.
1215 */
1216 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1217 if (RT_SUCCESS(rc))
1218 {
1219 /*
1220 * Page was successfully synced, return to guest but invalidate
1221 * the TLB first as the page is very likely to be in it.
1222 */
1223 HMInvalidatePhysPage(pVM, GCPhysFault);
1224 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
1225 return VINF_SUCCESS;
1226 }
1227 }
1228 }
1229
1230 /*
1231 * If we get here it is because something failed above, i.e. most like guru meditation time.
1232 */
1233 LogRelFunc(("returns rc=%Rrc GCPhysFault=%RGp uErr=%RX64 cs:rip=%04x:%08RX64\n", rc, GCPhysFault, (uint64_t)uErr,
1234 pRegFrame->cs.Sel, pRegFrame->rip));
1235 return rc;
1236
1237# else
1238 RT_NOREF7(pVCpu, uErr, pRegFrame, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk);
1239 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
1240 return VERR_PGM_NOT_USED_IN_MODE;
1241# endif
1242}
1243
1244#endif /* !IN_RING3 */
1245
1246
1247/**
1248 * Emulation of the invlpg instruction.
1249 *
1250 *
1251 * @returns VBox status code.
1252 *
1253 * @param pVCpu The cross context virtual CPU structure.
1254 * @param GCPtrPage Page to invalidate.
1255 *
1256 * @remark ASSUMES that the guest is updating before invalidating. This order
1257 * isn't required by the CPU, so this is speculative and could cause
1258 * trouble.
1259 * @remark No TLB shootdown is done on any other VCPU as we assume that
1260 * invlpg emulation is the *only* reason for calling this function.
1261 * (The guest has to shoot down TLB entries on other CPUs itself)
1262 * Currently true, but keep in mind!
1263 *
1264 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1265 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1266 */
1267PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1268{
1269#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1270 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1271 && PGM_SHW_TYPE != PGM_TYPE_NONE
1272 int rc;
1273 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1274 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1275
1276 PGM_LOCK_ASSERT_OWNER(pVM);
1277
1278 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1279
1280 /*
1281 * Get the shadow PD entry and skip out if this PD isn't present.
1282 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1283 */
1284# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1285 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1286 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1287
1288 /* Fetch the pgm pool shadow descriptor. */
1289 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1290# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1291 if (!pShwPde)
1292 {
1293 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1294 return VINF_SUCCESS;
1295 }
1296# else
1297 Assert(pShwPde);
1298# endif
1299
1300# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1301 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1302 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1303
1304 /* If the shadow PDPE isn't present, then skip the invalidate. */
1305# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1306 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1307# else
1308 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1309# endif
1310 {
1311 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1312 PGM_INVL_PG(pVCpu, GCPtrPage);
1313 return VINF_SUCCESS;
1314 }
1315
1316 /* Fetch the pgm pool shadow descriptor. */
1317 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1318 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1319
1320 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1321 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1322 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1323
1324# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1325 /* PML4 */
1326 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1327 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1328 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1329 PX86PDPAE pPDDst;
1330 PX86PDPT pPdptDst;
1331 PX86PML4E pPml4eDst;
1332 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1333 if (rc != VINF_SUCCESS)
1334 {
1335 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1336 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1337 PGM_INVL_PG(pVCpu, GCPtrPage);
1338 return VINF_SUCCESS;
1339 }
1340 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1341 Assert(pPDDst);
1342 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1343
1344 /* Fetch the pgm pool shadow descriptor. */
1345 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1346 Assert(pShwPde);
1347
1348# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1349
1350 const SHWPDE PdeDst = *pPdeDst;
1351 if (!(PdeDst.u & X86_PDE_P))
1352 {
1353 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1354 PGM_INVL_PG(pVCpu, GCPtrPage);
1355 return VINF_SUCCESS;
1356 }
1357
1358 /*
1359 * Get the guest PD entry and calc big page.
1360 */
1361# if PGM_GST_TYPE == PGM_TYPE_32BIT
1362 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1363 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1364 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1365# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1366 unsigned iPDSrc = 0;
1367# if PGM_GST_TYPE == PGM_TYPE_PAE
1368 X86PDPE PdpeSrcIgn;
1369 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1370# else /* AMD64 */
1371 PX86PML4E pPml4eSrcIgn;
1372 X86PDPE PdpeSrcIgn;
1373 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1374# endif
1375 GSTPDE PdeSrc;
1376
1377 if (pPDSrc)
1378 PdeSrc = pPDSrc->a[iPDSrc];
1379 else
1380 PdeSrc.u = 0;
1381# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1382 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1383 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1384 if (fWasBigPage != fIsBigPage)
1385 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1386
1387# ifdef IN_RING3
1388 /*
1389 * If a CR3 Sync is pending we may ignore the invalidate page operation
1390 * depending on the kind of sync and if it's a global page or not.
1391 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1392 */
1393# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1394 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1395 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1396 && fIsBigPage
1397 && (PdeSrc.u & X86_PDE4M_G)
1398 )
1399 )
1400# else
1401 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1402# endif
1403 {
1404 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1405 return VINF_SUCCESS;
1406 }
1407# endif /* IN_RING3 */
1408
1409 /*
1410 * Deal with the Guest PDE.
1411 */
1412 rc = VINF_SUCCESS;
1413 if (PdeSrc.u & X86_PDE_P)
1414 {
1415 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1416 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1417 if (!fIsBigPage)
1418 {
1419 /*
1420 * 4KB - page.
1421 */
1422 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1423 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1424
1425# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1426 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1427 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1428# endif
1429 if (pShwPage->GCPhys == GCPhys)
1430 {
1431 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1432 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1433
1434 PGSTPT pPTSrc;
1435 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1436 if (RT_SUCCESS(rc))
1437 {
1438 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1439 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1440 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1441 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1442 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1443 GCPtrPage, PteSrc.u & X86_PTE_P,
1444 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1445 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1446 (uint64_t)PteSrc.u,
1447 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1448 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1449 }
1450 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1451 PGM_INVL_PG(pVCpu, GCPtrPage);
1452 }
1453 else
1454 {
1455 /*
1456 * The page table address changed.
1457 */
1458 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1459 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1460 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1461 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1462 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1463 PGM_INVL_VCPU_TLBS(pVCpu);
1464 }
1465 }
1466 else
1467 {
1468 /*
1469 * 2/4MB - page.
1470 */
1471 /* Before freeing the page, check if anything really changed. */
1472 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1473 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1474# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1475 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1476 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1477# endif
1478 if ( pShwPage->GCPhys == GCPhys
1479 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1480 {
1481 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1482 /** @todo This test is wrong as it cannot check the G bit!
1483 * FIXME */
1484 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1485 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1486 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1487 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1488 {
1489 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1490 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1491 return VINF_SUCCESS;
1492 }
1493 }
1494
1495 /*
1496 * Ok, the page table is present and it's been changed in the guest.
1497 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1498 * We could do this for some flushes in GC too, but we need an algorithm for
1499 * deciding which 4MB pages containing code likely to be executed very soon.
1500 */
1501 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1502 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1503 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1504 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1505 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1506 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1507 }
1508 }
1509 else
1510 {
1511 /*
1512 * Page directory is not present, mark shadow PDE not present.
1513 */
1514 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1515 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1516 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1517 PGM_INVL_PG(pVCpu, GCPtrPage);
1518 }
1519 return rc;
1520
1521#else /* guest real and protected mode, nested + ept, none. */
1522 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1523 NOREF(pVCpu); NOREF(GCPtrPage);
1524 return VINF_SUCCESS;
1525#endif
1526}
1527
1528#if PGM_SHW_TYPE != PGM_TYPE_NONE
1529
1530/**
1531 * Update the tracking of shadowed pages.
1532 *
1533 * @param pVCpu The cross context virtual CPU structure.
1534 * @param pShwPage The shadow page.
1535 * @param HCPhys The physical page we is being dereferenced.
1536 * @param iPte Shadow PTE index
1537 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1538 */
1539DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1540 RTGCPHYS GCPhysPage)
1541{
1542 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1543
1544# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1545 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1546 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1547
1548 /* Use the hint we retrieved from the cached guest PT. */
1549 if (pShwPage->fDirty)
1550 {
1551 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1552
1553 Assert(pShwPage->cPresent);
1554 Assert(pPool->cPresent);
1555 pShwPage->cPresent--;
1556 pPool->cPresent--;
1557
1558 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1559 AssertRelease(pPhysPage);
1560 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1561 return;
1562 }
1563# else
1564 NOREF(GCPhysPage);
1565# endif
1566
1567 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1568 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1569
1570 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1571 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1572 * 2. write protect all shadowed pages. I.e. implement caching.
1573 */
1574 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1575
1576 /*
1577 * Find the guest address.
1578 */
1579 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1580 pRam;
1581 pRam = pRam->CTX_SUFF(pNext))
1582 {
1583 unsigned iPage = pRam->cb >> GUEST_PAGE_SHIFT;
1584 while (iPage-- > 0)
1585 {
1586 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1587 {
1588 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1589
1590 Assert(pShwPage->cPresent);
1591 Assert(pPool->cPresent);
1592 pShwPage->cPresent--;
1593 pPool->cPresent--;
1594
1595 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1596 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1597 return;
1598 }
1599 }
1600 }
1601
1602 for (;;)
1603 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1604}
1605
1606
1607/**
1608 * Update the tracking of shadowed pages.
1609 *
1610 * @param pVCpu The cross context virtual CPU structure.
1611 * @param pShwPage The shadow page.
1612 * @param u16 The top 16-bit of the pPage->HCPhys.
1613 * @param pPage Pointer to the guest page. this will be modified.
1614 * @param iPTDst The index into the shadow table.
1615 */
1616DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1617 PPGMPAGE pPage, const unsigned iPTDst)
1618{
1619 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1620
1621 /*
1622 * Just deal with the simple first time here.
1623 */
1624 if (!u16)
1625 {
1626 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1627 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1628 /* Save the page table index. */
1629 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1630 }
1631 else
1632 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1633
1634 /* write back */
1635 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1636 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1637
1638 /* update statistics. */
1639 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1640 pShwPage->cPresent++;
1641 if (pShwPage->iFirstPresent > iPTDst)
1642 pShwPage->iFirstPresent = iPTDst;
1643}
1644
1645
1646/**
1647 * Modifies a shadow PTE to account for access handlers.
1648 *
1649 * @param pVM The cross context VM structure.
1650 * @param pPage The page in question.
1651 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1652 * A (accessed) bit so it can be emulated correctly.
1653 * @param pPteDst The shadow PTE (output). This is temporary storage and
1654 * does not need to be set atomically.
1655 */
1656DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1657{
1658 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1659
1660 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1661 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1662 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1663 {
1664 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1665# if PGM_SHW_TYPE == PGM_TYPE_EPT
1666 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1667# else
1668 if (fPteSrc & X86_PTE_A)
1669 {
1670 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1671 SHW_PTE_SET_RO(*pPteDst);
1672 }
1673 else
1674 SHW_PTE_SET(*pPteDst, 0);
1675# endif
1676 }
1677# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1678# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1679 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1680 && ( BTH_IS_NP_ACTIVE(pVM)
1681 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1682# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1683 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1684# endif
1685 )
1686 {
1687 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1688# if PGM_SHW_TYPE == PGM_TYPE_EPT
1689 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1690 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1691 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1692 | EPT_E_WRITE
1693 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1694 | EPT_E_MEMTYPE_INVALID_3;
1695# else
1696 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1697 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1698# endif
1699 }
1700# endif
1701# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1702 else
1703 {
1704 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1705 SHW_PTE_SET(*pPteDst, 0);
1706 }
1707 /** @todo count these kinds of entries. */
1708}
1709
1710
1711/**
1712 * Creates a 4K shadow page for a guest page.
1713 *
1714 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1715 * physical address. The PdeSrc argument only the flags are used. No page
1716 * structured will be mapped in this function.
1717 *
1718 * @param pVCpu The cross context virtual CPU structure.
1719 * @param pPteDst Destination page table entry.
1720 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1721 * Can safely assume that only the flags are being used.
1722 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1723 * @param pShwPage Pointer to the shadow page.
1724 * @param iPTDst The index into the shadow table.
1725 *
1726 * @remark Not used for 2/4MB pages!
1727 */
1728# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1729static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1730 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1731# else
1732static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1733 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1734# endif
1735{
1736 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1737 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1738
1739# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1740 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1741 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1742
1743 if (pShwPage->fDirty)
1744 {
1745 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1746 PGSTPT pGstPT;
1747
1748 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1749 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1750 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1751 pGstPT->a[iPTDst].u = PteSrc.u;
1752 }
1753# else
1754 Assert(!pShwPage->fDirty);
1755# endif
1756
1757# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1758 if ( (PteSrc.u & X86_PTE_P)
1759 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1760# endif
1761 {
1762# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1763 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1764# endif
1765 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1766
1767 /*
1768 * Find the ram range.
1769 */
1770 PPGMPAGE pPage;
1771 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1772 if (RT_SUCCESS(rc))
1773 {
1774 /* Ignore ballooned pages.
1775 Don't return errors or use a fatal assert here as part of a
1776 shadow sync range might included ballooned pages. */
1777 if (PGM_PAGE_IS_BALLOONED(pPage))
1778 {
1779 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1780 return;
1781 }
1782
1783# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1784 /* Make the page writable if necessary. */
1785 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1786 && ( PGM_PAGE_IS_ZERO(pPage)
1787# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1788 || ( (PteSrc.u & X86_PTE_RW)
1789# else
1790 || ( 1
1791# endif
1792 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1793# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1794 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1795# endif
1796# ifdef VBOX_WITH_PAGE_SHARING
1797 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1798# endif
1799 )
1800 )
1801 )
1802 {
1803 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1804 AssertRC(rc);
1805 }
1806# endif
1807
1808 /*
1809 * Make page table entry.
1810 */
1811 SHWPTE PteDst;
1812# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1813 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1814# else
1815 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1816# endif
1817 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1818 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1819 else
1820 {
1821# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1822 /*
1823 * If the page or page directory entry is not marked accessed,
1824 * we mark the page not present.
1825 */
1826 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1827 {
1828 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1829 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1830 SHW_PTE_SET(PteDst, 0);
1831 }
1832 /*
1833 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1834 * when the page is modified.
1835 */
1836 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1837 {
1838 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1839 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1840 SHW_PTE_SET(PteDst,
1841 fGstShwPteFlags
1842 | PGM_PAGE_GET_HCPHYS(pPage)
1843 | PGM_PTFLAGS_TRACK_DIRTY);
1844 SHW_PTE_SET_RO(PteDst);
1845 }
1846 else
1847# endif
1848 {
1849 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1850# if PGM_SHW_TYPE == PGM_TYPE_EPT
1851 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1852 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1853# else
1854 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1855# endif
1856 }
1857
1858 /*
1859 * Make sure only allocated pages are mapped writable.
1860 */
1861 if ( SHW_PTE_IS_P_RW(PteDst)
1862 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1863 {
1864 /* Still applies to shared pages. */
1865 Assert(!PGM_PAGE_IS_ZERO(pPage));
1866 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1867 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1868 }
1869 }
1870
1871 /*
1872 * Keep user track up to date.
1873 */
1874 if (SHW_PTE_IS_P(PteDst))
1875 {
1876 if (!SHW_PTE_IS_P(*pPteDst))
1877 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1878 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1879 {
1880 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1881 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1882 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1883 }
1884 }
1885 else if (SHW_PTE_IS_P(*pPteDst))
1886 {
1887 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1888 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1889 }
1890
1891 /*
1892 * Update statistics and commit the entry.
1893 */
1894# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1895 if (!(PteSrc.u & X86_PTE_G))
1896 pShwPage->fSeenNonGlobal = true;
1897# endif
1898 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1899 return;
1900 }
1901
1902/** @todo count these three different kinds. */
1903 Log2(("SyncPageWorker: invalid address in Pte\n"));
1904 }
1905# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1906 else if (!(PteSrc.u & X86_PTE_P))
1907 Log2(("SyncPageWorker: page not present in Pte\n"));
1908 else
1909 Log2(("SyncPageWorker: invalid Pte\n"));
1910# endif
1911
1912 /*
1913 * The page is not present or the PTE is bad. Replace the shadow PTE by
1914 * an empty entry, making sure to keep the user tracking up to date.
1915 */
1916 if (SHW_PTE_IS_P(*pPteDst))
1917 {
1918 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1919 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1920 }
1921 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1922}
1923
1924
1925/**
1926 * Syncs a guest OS page.
1927 *
1928 * There are no conflicts at this point, neither is there any need for
1929 * page table allocations.
1930 *
1931 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1932 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1933 *
1934 * @returns VBox status code.
1935 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1936 * @param pVCpu The cross context virtual CPU structure.
1937 * @param PdeSrc Page directory entry of the guest.
1938 * @param GCPtrPage Guest context page address.
1939 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1940 * @param uErr Fault error (X86_TRAP_PF_*).
1941 */
1942static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1943{
1944 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1945 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1946 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1947 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1948
1949 PGM_LOCK_ASSERT_OWNER(pVM);
1950
1951# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1952 || PGM_GST_TYPE == PGM_TYPE_PAE \
1953 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1954 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1955
1956 /*
1957 * Assert preconditions.
1958 */
1959 Assert(PdeSrc.u & X86_PDE_P);
1960 Assert(cPages);
1961# if 0 /* rarely useful; leave for debugging. */
1962 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1963# endif
1964
1965 /*
1966 * Get the shadow PDE, find the shadow page table in the pool.
1967 */
1968# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1969 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1970 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1971
1972 /* Fetch the pgm pool shadow descriptor. */
1973 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1974 Assert(pShwPde);
1975
1976# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1977 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1978 PPGMPOOLPAGE pShwPde = NULL;
1979 PX86PDPAE pPDDst;
1980
1981 /* Fetch the pgm pool shadow descriptor. */
1982 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1983 AssertRCSuccessReturn(rc2, rc2);
1984 Assert(pShwPde);
1985
1986 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1987 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1988
1989# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
1990 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1991 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1992 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
1993 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
1994
1995 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
1996 AssertRCSuccessReturn(rc2, rc2);
1997 Assert(pPDDst && pPdptDst);
1998 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1999# endif
2000 SHWPDE PdeDst = *pPdeDst;
2001
2002 /*
2003 * - In the guest SMP case we could have blocked while another VCPU reused
2004 * this page table.
2005 * - With W7-64 we may also take this path when the A bit is cleared on
2006 * higher level tables (PDPE/PML4E). The guest does not invalidate the
2007 * relevant TLB entries. If we're write monitoring any page mapped by
2008 * the modified entry, we may end up here with a "stale" TLB entry.
2009 */
2010 if (!(PdeDst.u & X86_PDE_P))
2011 {
2012 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2013 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
2014 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2015 if (uErr & X86_TRAP_PF_P)
2016 PGM_INVL_PG(pVCpu, GCPtrPage);
2017 return VINF_SUCCESS; /* force the instruction to be executed again. */
2018 }
2019
2020 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2021 Assert(pShwPage);
2022
2023# if PGM_GST_TYPE == PGM_TYPE_AMD64
2024 /* Fetch the pgm pool shadow descriptor. */
2025 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2026 Assert(pShwPde);
2027# endif
2028
2029 /*
2030 * Check that the page is present and that the shadow PDE isn't out of sync.
2031 */
2032 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
2033 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
2034 RTGCPHYS GCPhys;
2035 if (!fBigPage)
2036 {
2037 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2038# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2039 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2040 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2041# endif
2042 }
2043 else
2044 {
2045 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2046# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2047 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2048 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2049# endif
2050 }
2051 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
2052 if ( fPdeValid
2053 && pShwPage->GCPhys == GCPhys
2054 && (PdeSrc.u & X86_PDE_P)
2055 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
2056 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
2057# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2058 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
2059# endif
2060 )
2061 {
2062 /*
2063 * Check that the PDE is marked accessed already.
2064 * Since we set the accessed bit *before* getting here on a #PF, this
2065 * check is only meant for dealing with non-#PF'ing paths.
2066 */
2067 if (PdeSrc.u & X86_PDE_A)
2068 {
2069 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2070 if (!fBigPage)
2071 {
2072 /*
2073 * 4KB Page - Map the guest page table.
2074 */
2075 PGSTPT pPTSrc;
2076 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2077 if (RT_SUCCESS(rc))
2078 {
2079# ifdef PGM_SYNC_N_PAGES
2080 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2081 if ( cPages > 1
2082 && !(uErr & X86_TRAP_PF_P)
2083 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2084 {
2085 /*
2086 * This code path is currently only taken when the caller is PGMTrap0eHandler
2087 * for non-present pages!
2088 *
2089 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2090 * deal with locality.
2091 */
2092 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2093# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2094 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2095 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2096# else
2097 const unsigned offPTSrc = 0;
2098# endif
2099 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2100 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2101 iPTDst = 0;
2102 else
2103 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2104
2105 for (; iPTDst < iPTDstEnd; iPTDst++)
2106 {
2107 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
2108
2109 if ( (pPteSrc->u & X86_PTE_P)
2110 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2111 {
2112 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT))
2113 | ((offPTSrc + iPTDst) << GUEST_PAGE_SHIFT);
2114 NOREF(GCPtrCurPage);
2115 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
2116 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2117 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
2118 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
2119 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
2120 (uint64_t)pPteSrc->u,
2121 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2122 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2123 }
2124 }
2125 }
2126 else
2127# endif /* PGM_SYNC_N_PAGES */
2128 {
2129 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2130 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2131 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2132 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2133 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2134 GCPtrPage, PteSrc.u & X86_PTE_P,
2135 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2136 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2137 (uint64_t)PteSrc.u,
2138 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2139 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2140 }
2141 }
2142 else /* MMIO or invalid page: emulated in #PF handler. */
2143 {
2144 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
2145 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
2146 }
2147 }
2148 else
2149 {
2150 /*
2151 * 4/2MB page - lazy syncing shadow 4K pages.
2152 * (There are many causes of getting here, it's no longer only CSAM.)
2153 */
2154 /* Calculate the GC physical address of this 4KB shadow page. */
2155 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
2156 /* Find ram range. */
2157 PPGMPAGE pPage;
2158 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
2159 if (RT_SUCCESS(rc))
2160 {
2161 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2162
2163# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2164 /* Try to make the page writable if necessary. */
2165 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2166 && ( PGM_PAGE_IS_ZERO(pPage)
2167 || ( (PdeSrc.u & X86_PDE_RW)
2168 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2169# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2170 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2171# endif
2172# ifdef VBOX_WITH_PAGE_SHARING
2173 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2174# endif
2175 )
2176 )
2177 )
2178 {
2179 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2180 AssertRC(rc);
2181 }
2182# endif
2183
2184 /*
2185 * Make shadow PTE entry.
2186 */
2187 SHWPTE PteDst;
2188 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2189 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2190 else
2191 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2192
2193 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2194 if ( SHW_PTE_IS_P(PteDst)
2195 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2196 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2197
2198 /* Make sure only allocated pages are mapped writable. */
2199 if ( SHW_PTE_IS_P_RW(PteDst)
2200 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2201 {
2202 /* Still applies to shared pages. */
2203 Assert(!PGM_PAGE_IS_ZERO(pPage));
2204 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2205 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2206 }
2207
2208 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2209
2210 /*
2211 * If the page is not flagged as dirty and is writable, then make it read-only
2212 * at PD level, so we can set the dirty bit when the page is modified.
2213 *
2214 * ASSUMES that page access handlers are implemented on page table entry level.
2215 * Thus we will first catch the dirty access and set PDE.D and restart. If
2216 * there is an access handler, we'll trap again and let it work on the problem.
2217 */
2218 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2219 * As for invlpg, it simply frees the whole shadow PT.
2220 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2221 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
2222 {
2223 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2224 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2225 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2226 }
2227 else
2228 {
2229 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
2230 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
2231 }
2232 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2233 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2234 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
2235 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2236 }
2237 else
2238 {
2239 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2240 /** @todo must wipe the shadow page table entry in this
2241 * case. */
2242 }
2243 }
2244 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2245 return VINF_SUCCESS;
2246 }
2247
2248 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
2249 }
2250 else if (fPdeValid)
2251 {
2252 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2253 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2254 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2255 }
2256 else
2257 {
2258/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2259 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2260 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2261 }
2262
2263 /*
2264 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2265 * Yea, I'm lazy.
2266 */
2267 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2268 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2269
2270 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2271 PGM_INVL_VCPU_TLBS(pVCpu);
2272 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2273
2274
2275# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2276 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2277 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2278 NOREF(PdeSrc);
2279
2280# ifdef PGM_SYNC_N_PAGES
2281 /*
2282 * Get the shadow PDE, find the shadow page table in the pool.
2283 */
2284# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2285 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2286
2287# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2288 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2289
2290# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2291 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2292 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2293 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2294 X86PDEPAE PdeDst;
2295 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2296
2297 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2298 AssertRCSuccessReturn(rc, rc);
2299 Assert(pPDDst && pPdptDst);
2300 PdeDst = pPDDst->a[iPDDst];
2301
2302# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2303 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2304 PEPTPD pPDDst;
2305 EPTPDE PdeDst;
2306
2307 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2308 if (rc != VINF_SUCCESS)
2309 {
2310 AssertRC(rc);
2311 return rc;
2312 }
2313 Assert(pPDDst);
2314 PdeDst = pPDDst->a[iPDDst];
2315# endif
2316 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2317 if (!SHW_PDE_IS_P(PdeDst))
2318 {
2319 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2320 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2321 return VINF_SUCCESS; /* force the instruction to be executed again. */
2322 }
2323
2324 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2325 if (SHW_PDE_IS_BIG(PdeDst))
2326 {
2327 Assert(pVM->pgm.s.fNestedPaging);
2328 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2329 return VINF_SUCCESS;
2330 }
2331
2332 /* Mask away the page offset. */
2333 GCPtrPage &= ~((RTGCPTR)0xfff);
2334
2335 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2336 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2337
2338 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2339 if ( cPages > 1
2340 && !(uErr & X86_TRAP_PF_P)
2341 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2342 {
2343 /*
2344 * This code path is currently only taken when the caller is PGMTrap0eHandler
2345 * for non-present pages!
2346 *
2347 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2348 * deal with locality.
2349 */
2350 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2351 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2352 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2353 iPTDst = 0;
2354 else
2355 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2356 for (; iPTDst < iPTDstEnd; iPTDst++)
2357 {
2358 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2359 {
2360 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2361 | (iPTDst << GUEST_PAGE_SHIFT));
2362
2363 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2364 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2365 GCPtrCurPage,
2366 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2367 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2368
2369 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2370 break;
2371 }
2372 else
2373 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n",
2374 (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << GUEST_PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2375 }
2376 }
2377 else
2378# endif /* PGM_SYNC_N_PAGES */
2379 {
2380 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2381 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2382 | (iPTDst << GUEST_PAGE_SHIFT));
2383
2384 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2385
2386 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2387 GCPtrPage,
2388 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2389 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2390 }
2391 return VINF_SUCCESS;
2392
2393# else
2394 NOREF(PdeSrc);
2395 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2396 return VERR_PGM_NOT_USED_IN_MODE;
2397# endif
2398}
2399
2400#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2401#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2402
2403/**
2404 * Handle dirty bit tracking faults.
2405 *
2406 * @returns VBox status code.
2407 * @param pVCpu The cross context virtual CPU structure.
2408 * @param uErr Page fault error code.
2409 * @param pPdeSrc Guest page directory entry.
2410 * @param pPdeDst Shadow page directory entry.
2411 * @param GCPtrPage Guest context page address.
2412 */
2413static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2414 RTGCPTR GCPtrPage)
2415{
2416 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2417 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2418 NOREF(uErr);
2419
2420 PGM_LOCK_ASSERT_OWNER(pVM);
2421
2422 /*
2423 * Handle big page.
2424 */
2425 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2426 {
2427 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2428 {
2429 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2430 Assert(pPdeSrc->u & X86_PDE_RW);
2431
2432 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2433 * fault again and take this path to only invalidate the entry (see below). */
2434 SHWPDE PdeDst = *pPdeDst;
2435 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2436 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2437 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2438 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2439 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2440 }
2441
2442# ifdef IN_RING0
2443 /* Check for stale TLB entry; only applies to the SMP guest case. */
2444 if ( pVM->cCpus > 1
2445 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2446 {
2447 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2448 if (pShwPage)
2449 {
2450 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2451 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2452 if (SHW_PTE_IS_P_RW(*pPteDst))
2453 {
2454 /* Stale TLB entry. */
2455 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2456 PGM_INVL_PG(pVCpu, GCPtrPage);
2457 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2458 }
2459 }
2460 }
2461# endif /* IN_RING0 */
2462 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2463 }
2464
2465 /*
2466 * Map the guest page table.
2467 */
2468 PGSTPT pPTSrc;
2469 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2470 AssertRCReturn(rc, rc);
2471
2472 if (SHW_PDE_IS_P(*pPdeDst))
2473 {
2474 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2475 const GSTPTE PteSrc = *pPteSrc;
2476
2477 /*
2478 * Map shadow page table.
2479 */
2480 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2481 if (pShwPage)
2482 {
2483 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2484 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2485 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2486 {
2487 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2488 {
2489 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2490 SHWPTE PteDst = *pPteDst;
2491
2492 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2493 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2494
2495 Assert(PteSrc.u & X86_PTE_RW);
2496
2497 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2498 * entry will not harm; write access will simply fault again and
2499 * take this path to only invalidate the entry.
2500 */
2501 if (RT_LIKELY(pPage))
2502 {
2503 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2504 {
2505 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2506 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2507 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2508 SHW_PTE_SET_RO(PteDst);
2509 }
2510 else
2511 {
2512 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2513 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2514 {
2515 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2516 AssertRC(rc);
2517 }
2518 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2519 SHW_PTE_SET_RW(PteDst);
2520 else
2521 {
2522 /* Still applies to shared pages. */
2523 Assert(!PGM_PAGE_IS_ZERO(pPage));
2524 SHW_PTE_SET_RO(PteDst);
2525 }
2526 }
2527 }
2528 else
2529 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2530
2531 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2532 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2533 PGM_INVL_PG(pVCpu, GCPtrPage);
2534 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2535 }
2536
2537# ifdef IN_RING0
2538 /* Check for stale TLB entry; only applies to the SMP guest case. */
2539 if ( pVM->cCpus > 1
2540 && SHW_PTE_IS_RW(*pPteDst)
2541 && SHW_PTE_IS_A(*pPteDst))
2542 {
2543 /* Stale TLB entry. */
2544 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2545 PGM_INVL_PG(pVCpu, GCPtrPage);
2546 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2547 }
2548# endif
2549 }
2550 }
2551 else
2552 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2553 }
2554
2555 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2556}
2557
2558#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2559
2560/**
2561 * Sync a shadow page table.
2562 *
2563 * The shadow page table is not present in the shadow PDE.
2564 *
2565 * Handles mapping conflicts.
2566 *
2567 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2568 * conflict), and Trap0eHandler.
2569 *
2570 * A precondition for this method is that the shadow PDE is not present. The
2571 * caller must take the PGM lock before checking this and continue to hold it
2572 * when calling this method.
2573 *
2574 * @returns VBox status code.
2575 * @param pVCpu The cross context virtual CPU structure.
2576 * @param iPDSrc Page directory index.
2577 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2578 * Assume this is a temporary mapping.
2579 * @param GCPtrPage GC Pointer of the page that caused the fault
2580 */
2581static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2582{
2583 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2584 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2585
2586#if 0 /* rarely useful; leave for debugging. */
2587 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2588#endif
2589 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2590
2591 PGM_LOCK_ASSERT_OWNER(pVM);
2592
2593#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2594 || PGM_GST_TYPE == PGM_TYPE_PAE \
2595 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2596 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2597 && PGM_SHW_TYPE != PGM_TYPE_NONE
2598 int rc = VINF_SUCCESS;
2599
2600 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2601
2602 /*
2603 * Some input validation first.
2604 */
2605 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2606
2607 /*
2608 * Get the relevant shadow PDE entry.
2609 */
2610# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2611 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2612 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2613
2614 /* Fetch the pgm pool shadow descriptor. */
2615 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2616 Assert(pShwPde);
2617
2618# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2619 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2620 PPGMPOOLPAGE pShwPde = NULL;
2621 PX86PDPAE pPDDst;
2622 PSHWPDE pPdeDst;
2623
2624 /* Fetch the pgm pool shadow descriptor. */
2625 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2626 AssertRCSuccessReturn(rc, rc);
2627 Assert(pShwPde);
2628
2629 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2630 pPdeDst = &pPDDst->a[iPDDst];
2631
2632# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2633 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2634 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2635 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2636 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2637 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2638 AssertRCSuccessReturn(rc, rc);
2639 Assert(pPDDst);
2640 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2641
2642# endif
2643 SHWPDE PdeDst = *pPdeDst;
2644
2645# if PGM_GST_TYPE == PGM_TYPE_AMD64
2646 /* Fetch the pgm pool shadow descriptor. */
2647 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2648 Assert(pShwPde);
2649# endif
2650
2651 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2652
2653 /*
2654 * Sync the page directory entry.
2655 */
2656 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2657 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2658 if ( (PdeSrc.u & X86_PDE_P)
2659 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2660 {
2661 /*
2662 * Allocate & map the page table.
2663 */
2664 PSHWPT pPTDst;
2665 PPGMPOOLPAGE pShwPage;
2666 RTGCPHYS GCPhys;
2667 if (fPageTable)
2668 {
2669 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2670# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2671 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2672 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2673# endif
2674 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2675 pShwPde->idx, iPDDst, false /*fLockPage*/,
2676 &pShwPage);
2677 }
2678 else
2679 {
2680 PGMPOOLACCESS enmAccess;
2681# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2682 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2683# else
2684 const bool fNoExecute = false;
2685# endif
2686
2687 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2688# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2689 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2690 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2691# endif
2692 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2693 if (PdeSrc.u & X86_PDE_US)
2694 {
2695 if (PdeSrc.u & X86_PDE_RW)
2696 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2697 else
2698 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2699 }
2700 else
2701 {
2702 if (PdeSrc.u & X86_PDE_RW)
2703 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2704 else
2705 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2706 }
2707 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2708 pShwPde->idx, iPDDst, false /*fLockPage*/,
2709 &pShwPage);
2710 }
2711 if (rc == VINF_SUCCESS)
2712 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2713 else if (rc == VINF_PGM_CACHED_PAGE)
2714 {
2715 /*
2716 * The PT was cached, just hook it up.
2717 */
2718 if (fPageTable)
2719 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2720 else
2721 {
2722 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2723 /* (see explanation and assumptions further down.) */
2724 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2725 {
2726 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2727 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2728 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2729 }
2730 }
2731 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2732 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2733 return VINF_SUCCESS;
2734 }
2735 else
2736 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2737 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2738 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2739 * irrelevant at this point. */
2740 PdeDst.u &= X86_PDE_AVL_MASK;
2741 PdeDst.u |= pShwPage->Core.Key;
2742
2743 /*
2744 * Page directory has been accessed (this is a fault situation, remember).
2745 */
2746 /** @todo
2747 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2748 * fault situation. What's more, the Trap0eHandler has already set the
2749 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2750 * might need setting the accessed flag.
2751 *
2752 * The best idea is to leave this change to the caller and add an
2753 * assertion that it's set already. */
2754 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2755 if (fPageTable)
2756 {
2757 /*
2758 * Page table - 4KB.
2759 *
2760 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2761 */
2762 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2763 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2764 PGSTPT pPTSrc;
2765 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2766 if (RT_SUCCESS(rc))
2767 {
2768 /*
2769 * Start by syncing the page directory entry so CSAM's TLB trick works.
2770 */
2771 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2772 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2773 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2774 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2775
2776 /*
2777 * Directory/page user or supervisor privilege: (same goes for read/write)
2778 *
2779 * Directory Page Combined
2780 * U/S U/S U/S
2781 * 0 0 0
2782 * 0 1 0
2783 * 1 0 0
2784 * 1 1 1
2785 *
2786 * Simple AND operation. Table listed for completeness.
2787 *
2788 */
2789 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2790# ifdef PGM_SYNC_N_PAGES
2791 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2792 unsigned iPTDst = iPTBase;
2793 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2794 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2795 iPTDst = 0;
2796 else
2797 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2798# else /* !PGM_SYNC_N_PAGES */
2799 unsigned iPTDst = 0;
2800 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2801# endif /* !PGM_SYNC_N_PAGES */
2802 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2803 | ((RTGCPTR)iPTDst << GUEST_PAGE_SHIFT);
2804# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2805 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2806 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2807# else
2808 const unsigned offPTSrc = 0;
2809# endif
2810 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += GUEST_PAGE_SIZE)
2811 {
2812 const unsigned iPTSrc = iPTDst + offPTSrc;
2813 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2814 if (PteSrc.u & X86_PTE_P)
2815 {
2816 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2817 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2818 GCPtrCur,
2819 PteSrc.u & X86_PTE_P,
2820 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2821 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2822 (uint64_t)PteSrc.u,
2823 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2824 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2825 }
2826 /* else: the page table was cleared by the pool */
2827 } /* for PTEs */
2828 }
2829 }
2830 else
2831 {
2832 /*
2833 * Big page - 2/4MB.
2834 *
2835 * We'll walk the ram range list in parallel and optimize lookups.
2836 * We will only sync one shadow page table at a time.
2837 */
2838 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2839
2840 /**
2841 * @todo It might be more efficient to sync only a part of the 4MB
2842 * page (similar to what we do for 4KB PDs).
2843 */
2844
2845 /*
2846 * Start by syncing the page directory entry.
2847 */
2848 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2849 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2850
2851 /*
2852 * If the page is not flagged as dirty and is writable, then make it read-only
2853 * at PD level, so we can set the dirty bit when the page is modified.
2854 *
2855 * ASSUMES that page access handlers are implemented on page table entry level.
2856 * Thus we will first catch the dirty access and set PDE.D and restart. If
2857 * there is an access handler, we'll trap again and let it work on the problem.
2858 */
2859 /** @todo move the above stuff to a section in the PGM documentation. */
2860 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2861 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2862 {
2863 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2864 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2865 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2866 }
2867 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2868 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2869
2870 /*
2871 * Fill the shadow page table.
2872 */
2873 /* Get address and flags from the source PDE. */
2874 SHWPTE PteDstBase;
2875 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2876
2877 /* Loop thru the entries in the shadow PT. */
2878 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2879 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2880 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2881 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2882 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2883 unsigned iPTDst = 0;
2884 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2885 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2886 {
2887 if (pRam && GCPhys >= pRam->GCPhys)
2888 {
2889# ifndef PGM_WITH_A20
2890 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT;
2891# endif
2892 do
2893 {
2894 /* Make shadow PTE. */
2895# ifdef PGM_WITH_A20
2896 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
2897# else
2898 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2899# endif
2900 SHWPTE PteDst;
2901
2902# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2903 /* Try to make the page writable if necessary. */
2904 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2905 && ( PGM_PAGE_IS_ZERO(pPage)
2906 || ( SHW_PTE_IS_RW(PteDstBase)
2907 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2908# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2909 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2910# endif
2911# ifdef VBOX_WITH_PAGE_SHARING
2912 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2913# endif
2914 && !PGM_PAGE_IS_BALLOONED(pPage))
2915 )
2916 )
2917 {
2918 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2919 AssertRCReturn(rc, rc);
2920 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2921 break;
2922 }
2923# endif
2924
2925 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2926 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2927 else if (PGM_PAGE_IS_BALLOONED(pPage))
2928 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2929 else
2930 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2931
2932 /* Only map writable pages writable. */
2933 if ( SHW_PTE_IS_P_RW(PteDst)
2934 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2935 {
2936 /* Still applies to shared pages. */
2937 Assert(!PGM_PAGE_IS_ZERO(pPage));
2938 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2939 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2940 }
2941
2942 if (SHW_PTE_IS_P(PteDst))
2943 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2944
2945 /* commit it (not atomic, new table) */
2946 pPTDst->a[iPTDst] = PteDst;
2947 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2948 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2949 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2950
2951 /* advance */
2952 GCPhys += GUEST_PAGE_SIZE;
2953 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2954# ifndef PGM_WITH_A20
2955 iHCPage++;
2956# endif
2957 iPTDst++;
2958 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2959 && GCPhys <= pRam->GCPhysLast);
2960
2961 /* Advance ram range list. */
2962 while (pRam && GCPhys > pRam->GCPhysLast)
2963 pRam = pRam->CTX_SUFF(pNext);
2964 }
2965 else if (pRam)
2966 {
2967 Log(("Invalid pages at %RGp\n", GCPhys));
2968 do
2969 {
2970 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2971 GCPhys += GUEST_PAGE_SIZE;
2972 iPTDst++;
2973 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2974 && GCPhys < pRam->GCPhys);
2975 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2976 }
2977 else
2978 {
2979 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2980 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2981 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2982 }
2983 } /* while more PTEs */
2984 } /* 4KB / 4MB */
2985 }
2986 else
2987 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2988
2989 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2990 if (RT_FAILURE(rc))
2991 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
2992 return rc;
2993
2994#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2995 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2996 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
2997 && PGM_SHW_TYPE != PGM_TYPE_NONE
2998 NOREF(iPDSrc); NOREF(pPDSrc);
2999
3000 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3001
3002 /*
3003 * Validate input a little bit.
3004 */
3005 int rc = VINF_SUCCESS;
3006# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3007 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3008 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3009
3010 /* Fetch the pgm pool shadow descriptor. */
3011 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3012 Assert(pShwPde);
3013
3014# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3015 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3016 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3017 PX86PDPAE pPDDst;
3018 PSHWPDE pPdeDst;
3019
3020 /* Fetch the pgm pool shadow descriptor. */
3021 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3022 AssertRCSuccessReturn(rc, rc);
3023 Assert(pShwPde);
3024
3025 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3026 pPdeDst = &pPDDst->a[iPDDst];
3027
3028# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3029 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3030 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3031 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3032 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3033 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3034 AssertRCSuccessReturn(rc, rc);
3035 Assert(pPDDst);
3036 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3037
3038 /* Fetch the pgm pool shadow descriptor. */
3039 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3040 Assert(pShwPde);
3041
3042# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3043 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3044 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3045 PEPTPD pPDDst;
3046 PEPTPDPT pPdptDst;
3047
3048 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3049 if (rc != VINF_SUCCESS)
3050 {
3051 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3052 AssertRC(rc);
3053 return rc;
3054 }
3055 Assert(pPDDst);
3056 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3057
3058 /* Fetch the pgm pool shadow descriptor. */
3059 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
3060 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3061 Assert(pShwPde);
3062# endif
3063 SHWPDE PdeDst = *pPdeDst;
3064
3065 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3066
3067# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3068 if (BTH_IS_NP_ACTIVE(pVM))
3069 {
3070 Assert(!VM_IS_NEM_ENABLED(pVM));
3071
3072 /* Check if we allocated a big page before for this 2 MB range. */
3073 PPGMPAGE pPage;
3074 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
3075 if (RT_SUCCESS(rc))
3076 {
3077 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3078 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3079 {
3080 if (PGM_A20_IS_ENABLED(pVCpu))
3081 {
3082 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3083 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3084 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3085 }
3086 else
3087 {
3088 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3089 pVM->pgm.s.cLargePagesDisabled++;
3090 }
3091 }
3092 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
3093 && PGM_A20_IS_ENABLED(pVCpu))
3094 {
3095 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3096 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
3097 if (RT_SUCCESS(rc))
3098 {
3099 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3100 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3101 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3102 }
3103 }
3104 else if ( PGMIsUsingLargePages(pVM)
3105 && PGM_A20_IS_ENABLED(pVCpu))
3106 {
3107 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3108 if (RT_SUCCESS(rc))
3109 {
3110 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3111 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3112 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3113 }
3114 else
3115 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3116 }
3117
3118 if (HCPhys != NIL_RTHCPHYS)
3119 {
3120# if PGM_SHW_TYPE == PGM_TYPE_EPT
3121 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
3122 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
3123# else
3124 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
3125 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
3126# endif
3127 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3128
3129 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3130 /* Add a reference to the first page only. */
3131 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3132
3133 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3134 return VINF_SUCCESS;
3135 }
3136 }
3137 }
3138# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
3139
3140 /*
3141 * Allocate & map the page table.
3142 */
3143 PSHWPT pPTDst;
3144 PPGMPOOLPAGE pShwPage;
3145 RTGCPHYS GCPhys;
3146
3147 /* Virtual address = physical address */
3148 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
3149 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
3150 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
3151 &pShwPage);
3152 if ( rc == VINF_SUCCESS
3153 || rc == VINF_PGM_CACHED_PAGE)
3154 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3155 else
3156 {
3157 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3158 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3159 }
3160
3161 if (rc == VINF_SUCCESS)
3162 {
3163 /* New page table; fully set it up. */
3164 Assert(pPTDst);
3165
3166 /* Mask away the page offset. */
3167 GCPtrPage &= ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK;
3168
3169 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3170 {
3171 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
3172 | (iPTDst << GUEST_PAGE_SHIFT));
3173
3174 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
3175 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
3176 GCPtrCurPage,
3177 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3178 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3179
3180 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3181 break;
3182 }
3183 }
3184 else
3185 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3186
3187 /* Save the new PDE. */
3188# if PGM_SHW_TYPE == PGM_TYPE_EPT
3189 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
3190 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
3191# else
3192 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
3193 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
3194# endif
3195 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3196
3197 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3198 if (RT_FAILURE(rc))
3199 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3200 return rc;
3201
3202#else
3203 NOREF(iPDSrc); NOREF(pPDSrc);
3204 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3205 return VERR_PGM_NOT_USED_IN_MODE;
3206#endif
3207}
3208
3209
3210
3211/**
3212 * Prefetch a page/set of pages.
3213 *
3214 * Typically used to sync commonly used pages before entering raw mode
3215 * after a CR3 reload.
3216 *
3217 * @returns VBox status code.
3218 * @param pVCpu The cross context virtual CPU structure.
3219 * @param GCPtrPage Page to invalidate.
3220 */
3221PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3222{
3223#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3224 || PGM_GST_TYPE == PGM_TYPE_REAL \
3225 || PGM_GST_TYPE == PGM_TYPE_PROT \
3226 || PGM_GST_TYPE == PGM_TYPE_PAE \
3227 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3228 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3229 && PGM_SHW_TYPE != PGM_TYPE_NONE
3230 /*
3231 * Check that all Guest levels thru the PDE are present, getting the
3232 * PD and PDE in the processes.
3233 */
3234 int rc = VINF_SUCCESS;
3235# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3236# if PGM_GST_TYPE == PGM_TYPE_32BIT
3237 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3238 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3239# elif PGM_GST_TYPE == PGM_TYPE_PAE
3240 unsigned iPDSrc;
3241 X86PDPE PdpeSrc;
3242 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3243 if (!pPDSrc)
3244 return VINF_SUCCESS; /* not present */
3245# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3246 unsigned iPDSrc;
3247 PX86PML4E pPml4eSrc;
3248 X86PDPE PdpeSrc;
3249 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3250 if (!pPDSrc)
3251 return VINF_SUCCESS; /* not present */
3252# endif
3253 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3254# else
3255 PGSTPD pPDSrc = NULL;
3256 const unsigned iPDSrc = 0;
3257 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3258# endif
3259
3260 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3261 {
3262 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3263 PGM_LOCK_VOID(pVM);
3264
3265# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3266 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3267# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3268 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3269 PX86PDPAE pPDDst;
3270 X86PDEPAE PdeDst;
3271# if PGM_GST_TYPE != PGM_TYPE_PAE
3272 X86PDPE PdpeSrc;
3273
3274 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3275 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3276# endif
3277 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3278 if (rc != VINF_SUCCESS)
3279 {
3280 PGM_UNLOCK(pVM);
3281 AssertRC(rc);
3282 return rc;
3283 }
3284 Assert(pPDDst);
3285 PdeDst = pPDDst->a[iPDDst];
3286
3287# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3288 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3289 PX86PDPAE pPDDst;
3290 X86PDEPAE PdeDst;
3291
3292# if PGM_GST_TYPE == PGM_TYPE_PROT
3293 /* AMD-V nested paging */
3294 X86PML4E Pml4eSrc;
3295 X86PDPE PdpeSrc;
3296 PX86PML4E pPml4eSrc = &Pml4eSrc;
3297
3298 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3299 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3300 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3301# endif
3302
3303 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3304 if (rc != VINF_SUCCESS)
3305 {
3306 PGM_UNLOCK(pVM);
3307 AssertRC(rc);
3308 return rc;
3309 }
3310 Assert(pPDDst);
3311 PdeDst = pPDDst->a[iPDDst];
3312# endif
3313 if (!(PdeDst.u & X86_PDE_P))
3314 {
3315 /** @todo r=bird: This guy will set the A bit on the PDE,
3316 * probably harmless. */
3317 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3318 }
3319 else
3320 {
3321 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3322 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3323 * makes no sense to prefetch more than one page.
3324 */
3325 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3326 if (RT_SUCCESS(rc))
3327 rc = VINF_SUCCESS;
3328 }
3329 PGM_UNLOCK(pVM);
3330 }
3331 return rc;
3332
3333#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3334 NOREF(pVCpu); NOREF(GCPtrPage);
3335 return VINF_SUCCESS; /* ignore */
3336#else
3337 AssertCompile(0);
3338#endif
3339}
3340
3341
3342
3343
3344/**
3345 * Syncs a page during a PGMVerifyAccess() call.
3346 *
3347 * @returns VBox status code (informational included).
3348 * @param pVCpu The cross context virtual CPU structure.
3349 * @param GCPtrPage The address of the page to sync.
3350 * @param fPage The effective guest page flags.
3351 * @param uErr The trap error code.
3352 * @remarks This will normally never be called on invalid guest page
3353 * translation entries.
3354 */
3355PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3356{
3357 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3358
3359 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3360 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3361
3362 Assert(!pVM->pgm.s.fNestedPaging);
3363#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3364 || PGM_GST_TYPE == PGM_TYPE_REAL \
3365 || PGM_GST_TYPE == PGM_TYPE_PROT \
3366 || PGM_GST_TYPE == PGM_TYPE_PAE \
3367 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3368 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3369 && PGM_SHW_TYPE != PGM_TYPE_NONE
3370
3371 /*
3372 * Get guest PD and index.
3373 */
3374 /** @todo Performance: We've done all this a jiffy ago in the
3375 * PGMGstGetPage call. */
3376# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3377# if PGM_GST_TYPE == PGM_TYPE_32BIT
3378 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3379 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3380
3381# elif PGM_GST_TYPE == PGM_TYPE_PAE
3382 unsigned iPDSrc = 0;
3383 X86PDPE PdpeSrc;
3384 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3385 if (RT_UNLIKELY(!pPDSrc))
3386 {
3387 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3388 return VINF_EM_RAW_GUEST_TRAP;
3389 }
3390
3391# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3392 unsigned iPDSrc = 0; /* shut up gcc */
3393 PX86PML4E pPml4eSrc = NULL; /* ditto */
3394 X86PDPE PdpeSrc;
3395 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3396 if (RT_UNLIKELY(!pPDSrc))
3397 {
3398 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3399 return VINF_EM_RAW_GUEST_TRAP;
3400 }
3401# endif
3402
3403# else /* !PGM_WITH_PAGING */
3404 PGSTPD pPDSrc = NULL;
3405 const unsigned iPDSrc = 0;
3406# endif /* !PGM_WITH_PAGING */
3407 int rc = VINF_SUCCESS;
3408
3409 PGM_LOCK_VOID(pVM);
3410
3411 /*
3412 * First check if the shadow pd is present.
3413 */
3414# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3415 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3416
3417# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3418 PX86PDEPAE pPdeDst;
3419 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3420 PX86PDPAE pPDDst;
3421# if PGM_GST_TYPE != PGM_TYPE_PAE
3422 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3423 X86PDPE PdpeSrc;
3424 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3425# endif
3426 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3427 if (rc != VINF_SUCCESS)
3428 {
3429 PGM_UNLOCK(pVM);
3430 AssertRC(rc);
3431 return rc;
3432 }
3433 Assert(pPDDst);
3434 pPdeDst = &pPDDst->a[iPDDst];
3435
3436# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3437 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3438 PX86PDPAE pPDDst;
3439 PX86PDEPAE pPdeDst;
3440
3441# if PGM_GST_TYPE == PGM_TYPE_PROT
3442 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3443 X86PML4E Pml4eSrc;
3444 X86PDPE PdpeSrc;
3445 PX86PML4E pPml4eSrc = &Pml4eSrc;
3446 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3447 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3448# endif
3449
3450 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3451 if (rc != VINF_SUCCESS)
3452 {
3453 PGM_UNLOCK(pVM);
3454 AssertRC(rc);
3455 return rc;
3456 }
3457 Assert(pPDDst);
3458 pPdeDst = &pPDDst->a[iPDDst];
3459# endif
3460
3461 if (!(pPdeDst->u & X86_PDE_P))
3462 {
3463 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3464 if (rc != VINF_SUCCESS)
3465 {
3466 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3467 PGM_UNLOCK(pVM);
3468 AssertRC(rc);
3469 return rc;
3470 }
3471 }
3472
3473# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3474 /* Check for dirty bit fault */
3475 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3476 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3477 Log(("PGMVerifyAccess: success (dirty)\n"));
3478 else
3479# endif
3480 {
3481# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3482 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3483# else
3484 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3485# endif
3486
3487 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3488 if (uErr & X86_TRAP_PF_US)
3489 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3490 else /* supervisor */
3491 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3492
3493 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3494 if (RT_SUCCESS(rc))
3495 {
3496 /* Page was successfully synced */
3497 Log2(("PGMVerifyAccess: success (sync)\n"));
3498 rc = VINF_SUCCESS;
3499 }
3500 else
3501 {
3502 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3503 rc = VINF_EM_RAW_GUEST_TRAP;
3504 }
3505 }
3506 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3507 PGM_UNLOCK(pVM);
3508 return rc;
3509
3510#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3511
3512 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3513 return VERR_PGM_NOT_USED_IN_MODE;
3514#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3515}
3516
3517
3518/**
3519 * Syncs the paging hierarchy starting at CR3.
3520 *
3521 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3522 * informational status codes.
3523 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3524 * the VMM into guest context.
3525 * @param pVCpu The cross context virtual CPU structure.
3526 * @param cr0 Guest context CR0 register.
3527 * @param cr3 Guest context CR3 register. Not subjected to the A20
3528 * mask.
3529 * @param cr4 Guest context CR4 register.
3530 * @param fGlobal Including global page directories or not
3531 */
3532PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3533{
3534 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3535 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3536
3537 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3538
3539#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3540# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3541 PGM_LOCK_VOID(pVM);
3542 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3543 if (pPool->cDirtyPages)
3544 pgmPoolResetDirtyPages(pVM);
3545 PGM_UNLOCK(pVM);
3546# endif
3547#endif /* !NESTED && !EPT */
3548
3549#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3550 /*
3551 * Nested / EPT / None - No work.
3552 */
3553 return VINF_SUCCESS;
3554
3555#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3556 /*
3557 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3558 * out the shadow parts when the guest modifies its tables.
3559 */
3560 return VINF_SUCCESS;
3561
3562#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3563
3564 return VINF_SUCCESS;
3565#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3566}
3567
3568
3569
3570
3571#ifdef VBOX_STRICT
3572
3573/**
3574 * Checks that the shadow page table is in sync with the guest one.
3575 *
3576 * @returns The number of errors.
3577 * @param pVCpu The cross context virtual CPU structure.
3578 * @param cr3 Guest context CR3 register.
3579 * @param cr4 Guest context CR4 register.
3580 * @param GCPtr Where to start. Defaults to 0.
3581 * @param cb How much to check. Defaults to everything.
3582 */
3583PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3584{
3585 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3586#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3587 return 0;
3588#else
3589 unsigned cErrors = 0;
3590 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3591 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3592
3593# if PGM_GST_TYPE == PGM_TYPE_PAE
3594 /** @todo currently broken; crashes below somewhere */
3595 AssertFailed();
3596# endif
3597
3598# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3599 || PGM_GST_TYPE == PGM_TYPE_PAE \
3600 || PGM_GST_TYPE == PGM_TYPE_AMD64
3601
3602 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3603 PPGMCPU pPGM = &pVCpu->pgm.s;
3604 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3605 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3606# ifndef IN_RING0
3607 RTHCPHYS HCPhys; /* general usage. */
3608# endif
3609 int rc;
3610
3611 /*
3612 * Check that the Guest CR3 and all its mappings are correct.
3613 */
3614 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3615 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3616 false);
3617# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3618# if 0
3619# if PGM_GST_TYPE == PGM_TYPE_32BIT
3620 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3621# else
3622 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3623# endif
3624 AssertRCReturn(rc, 1);
3625 HCPhys = NIL_RTHCPHYS;
3626 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3627 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3628# endif
3629# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3630 pgmGstGet32bitPDPtr(pVCpu);
3631 RTGCPHYS GCPhys;
3632 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3633 AssertRCReturn(rc, 1);
3634 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3635# endif
3636# endif /* !IN_RING0 */
3637
3638 /*
3639 * Get and check the Shadow CR3.
3640 */
3641# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3642 unsigned cPDEs = X86_PG_ENTRIES;
3643 unsigned cIncrement = X86_PG_ENTRIES * GUEST_PAGE_SIZE;
3644# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3645# if PGM_GST_TYPE == PGM_TYPE_32BIT
3646 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3647# else
3648 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3649# endif
3650 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3651# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3652 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3653 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3654# endif
3655 if (cb != ~(RTGCPTR)0)
3656 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3657
3658/** @todo call the other two PGMAssert*() functions. */
3659
3660# if PGM_GST_TYPE == PGM_TYPE_AMD64
3661 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3662
3663 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3664 {
3665 PPGMPOOLPAGE pShwPdpt = NULL;
3666 PX86PML4E pPml4eSrc;
3667 PX86PML4E pPml4eDst;
3668 RTGCPHYS GCPhysPdptSrc;
3669
3670 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3671 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3672
3673 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3674 if (!(pPml4eDst->u & X86_PML4E_P))
3675 {
3676 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3677 continue;
3678 }
3679
3680 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3681 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3682
3683 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3684 {
3685 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3686 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3687 cErrors++;
3688 continue;
3689 }
3690
3691 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3692 {
3693 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3694 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3695 cErrors++;
3696 continue;
3697 }
3698
3699 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3700 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3701 {
3702 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3703 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3704 cErrors++;
3705 continue;
3706 }
3707# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3708 {
3709# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3710
3711# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3712 /*
3713 * Check the PDPTEs too.
3714 */
3715 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3716
3717 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3718 {
3719 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3720 PPGMPOOLPAGE pShwPde = NULL;
3721 PX86PDPE pPdpeDst;
3722 RTGCPHYS GCPhysPdeSrc;
3723 X86PDPE PdpeSrc;
3724 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3725# if PGM_GST_TYPE == PGM_TYPE_PAE
3726 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3727 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3728# else
3729 PX86PML4E pPml4eSrcIgn;
3730 PX86PDPT pPdptDst;
3731 PX86PDPAE pPDDst;
3732 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3733
3734 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3735 if (rc != VINF_SUCCESS)
3736 {
3737 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3738 GCPtr += 512 * _2M;
3739 continue; /* next PDPTE */
3740 }
3741 Assert(pPDDst);
3742# endif
3743 Assert(iPDSrc == 0);
3744
3745 pPdpeDst = &pPdptDst->a[iPdpt];
3746
3747 if (!(pPdpeDst->u & X86_PDPE_P))
3748 {
3749 GCPtr += 512 * _2M;
3750 continue; /* next PDPTE */
3751 }
3752
3753 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3754 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3755
3756 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3757 {
3758 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3759 GCPtr += 512 * _2M;
3760 cErrors++;
3761 continue;
3762 }
3763
3764 if (GCPhysPdeSrc != pShwPde->GCPhys)
3765 {
3766# if PGM_GST_TYPE == PGM_TYPE_AMD64
3767 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3768# else
3769 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3770# endif
3771 GCPtr += 512 * _2M;
3772 cErrors++;
3773 continue;
3774 }
3775
3776# if PGM_GST_TYPE == PGM_TYPE_AMD64
3777 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3778 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3779 {
3780 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3781 GCPtr += 512 * _2M;
3782 cErrors++;
3783 continue;
3784 }
3785# endif
3786
3787# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3788 {
3789# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3790# if PGM_GST_TYPE == PGM_TYPE_32BIT
3791 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3792# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3793 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3794# endif
3795# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3796 /*
3797 * Iterate the shadow page directory.
3798 */
3799 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3800 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3801
3802 for (;
3803 iPDDst < cPDEs;
3804 iPDDst++, GCPtr += cIncrement)
3805 {
3806# if PGM_SHW_TYPE == PGM_TYPE_PAE
3807 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3808# else
3809 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3810# endif
3811 if ( (PdeDst.u & X86_PDE_P)
3812 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3813 {
3814 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3815 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3816 if (!pPoolPage)
3817 {
3818 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3819 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3820 cErrors++;
3821 continue;
3822 }
3823 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3824
3825 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3826 {
3827 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3828 GCPtr, (uint64_t)PdeDst.u));
3829 cErrors++;
3830 }
3831
3832 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3833 {
3834 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3835 GCPtr, (uint64_t)PdeDst.u));
3836 cErrors++;
3837 }
3838
3839 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3840 if (!(PdeSrc.u & X86_PDE_P))
3841 {
3842 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3843 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3844 cErrors++;
3845 continue;
3846 }
3847
3848 if ( !(PdeSrc.u & X86_PDE_PS)
3849 || !fBigPagesSupported)
3850 {
3851 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3852# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3853 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
3854# endif
3855 }
3856 else
3857 {
3858# if PGM_GST_TYPE == PGM_TYPE_32BIT
3859 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3860 {
3861 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3862 GCPtr, (uint64_t)PdeSrc.u));
3863 cErrors++;
3864 continue;
3865 }
3866# endif
3867 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3868# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3869 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3870# endif
3871 }
3872
3873 if ( pPoolPage->enmKind
3874 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3875 {
3876 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3877 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3878 cErrors++;
3879 }
3880
3881 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3882 if (!pPhysPage)
3883 {
3884 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3885 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3886 cErrors++;
3887 continue;
3888 }
3889
3890 if (GCPhysGst != pPoolPage->GCPhys)
3891 {
3892 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3893 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3894 cErrors++;
3895 continue;
3896 }
3897
3898 if ( !(PdeSrc.u & X86_PDE_PS)
3899 || !fBigPagesSupported)
3900 {
3901 /*
3902 * Page Table.
3903 */
3904 const GSTPT *pPTSrc;
3905 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(GUEST_PAGE_SIZE - 1)),
3906 &pPTSrc);
3907 if (RT_FAILURE(rc))
3908 {
3909 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3910 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3911 cErrors++;
3912 continue;
3913 }
3914 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3915 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3916 {
3917 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3918 // (This problem will go away when/if we shadow multiple CR3s.)
3919 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3920 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3921 cErrors++;
3922 continue;
3923 }
3924 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3925 {
3926 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3927 GCPtr, (uint64_t)PdeDst.u));
3928 cErrors++;
3929 continue;
3930 }
3931
3932 /* iterate the page table. */
3933# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3934 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3935 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3936# else
3937 const unsigned offPTSrc = 0;
3938# endif
3939 for (unsigned iPT = 0, off = 0;
3940 iPT < RT_ELEMENTS(pPTDst->a);
3941 iPT++, off += GUEST_PAGE_SIZE)
3942 {
3943 const SHWPTE PteDst = pPTDst->a[iPT];
3944
3945 /* skip not-present and dirty tracked entries. */
3946 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3947 continue;
3948 Assert(SHW_PTE_IS_P(PteDst));
3949
3950 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3951 if (!(PteSrc.u & X86_PTE_P))
3952 {
3953# ifdef IN_RING3
3954 PGMAssertHandlerAndFlagsInSync(pVM);
3955 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3956 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3957 0, 0, UINT64_MAX, 99, NULL);
3958# endif
3959 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3960 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3961 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3962 cErrors++;
3963 continue;
3964 }
3965
3966 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3967# if 1 /** @todo sync accessed bit properly... */
3968 fIgnoreFlags |= X86_PTE_A;
3969# endif
3970
3971 /* match the physical addresses */
3972 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3973 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3974
3975# ifdef IN_RING3
3976 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3977 if (RT_FAILURE(rc))
3978 {
3979# if 0
3980 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3981 {
3982 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3983 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3984 cErrors++;
3985 continue;
3986 }
3987# endif
3988 }
3989 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
3990 {
3991 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
3992 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3993 cErrors++;
3994 continue;
3995 }
3996# endif
3997
3998 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3999 if (!pPhysPage)
4000 {
4001# if 0
4002 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4003 {
4004 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4005 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4006 cErrors++;
4007 continue;
4008 }
4009# endif
4010 if (SHW_PTE_IS_RW(PteDst))
4011 {
4012 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4013 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4014 cErrors++;
4015 }
4016 fIgnoreFlags |= X86_PTE_RW;
4017 }
4018 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4019 {
4020 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4021 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4022 cErrors++;
4023 continue;
4024 }
4025
4026 /* flags */
4027 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4028 {
4029 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4030 {
4031 if (SHW_PTE_IS_RW(PteDst))
4032 {
4033 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4034 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4035 cErrors++;
4036 continue;
4037 }
4038 fIgnoreFlags |= X86_PTE_RW;
4039 }
4040 else
4041 {
4042 if ( SHW_PTE_IS_P(PteDst)
4043# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4044 && !PGM_PAGE_IS_MMIO(pPhysPage)
4045# endif
4046 )
4047 {
4048 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4049 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4050 cErrors++;
4051 continue;
4052 }
4053 fIgnoreFlags |= X86_PTE_P;
4054 }
4055 }
4056 else
4057 {
4058 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
4059 {
4060 if (SHW_PTE_IS_RW(PteDst))
4061 {
4062 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4063 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4064 cErrors++;
4065 continue;
4066 }
4067 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4068 {
4069 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4070 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4071 cErrors++;
4072 continue;
4073 }
4074 if (SHW_PTE_IS_D(PteDst))
4075 {
4076 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4077 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4078 cErrors++;
4079 }
4080# if 0 /** @todo sync access bit properly... */
4081 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4082 {
4083 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4084 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4085 cErrors++;
4086 }
4087 fIgnoreFlags |= X86_PTE_RW;
4088# else
4089 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4090# endif
4091 }
4092 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4093 {
4094 /* access bit emulation (not implemented). */
4095 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
4096 {
4097 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4098 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4099 cErrors++;
4100 continue;
4101 }
4102 if (!SHW_PTE_IS_A(PteDst))
4103 {
4104 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4105 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4106 cErrors++;
4107 }
4108 fIgnoreFlags |= X86_PTE_P;
4109 }
4110# ifdef DEBUG_sandervl
4111 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4112# endif
4113 }
4114
4115 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4116 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4117 )
4118 {
4119 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4120 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4121 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4122 cErrors++;
4123 continue;
4124 }
4125 } /* foreach PTE */
4126 }
4127 else
4128 {
4129 /*
4130 * Big Page.
4131 */
4132 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4133 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
4134 {
4135 if (PdeDst.u & X86_PDE_RW)
4136 {
4137 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4138 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4139 cErrors++;
4140 continue;
4141 }
4142 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4143 {
4144 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4145 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4146 cErrors++;
4147 continue;
4148 }
4149# if 0 /** @todo sync access bit properly... */
4150 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4151 {
4152 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4153 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4154 cErrors++;
4155 }
4156 fIgnoreFlags |= X86_PTE_RW;
4157# else
4158 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4159# endif
4160 }
4161 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4162 {
4163 /* access bit emulation (not implemented). */
4164 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
4165 {
4166 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4167 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4168 cErrors++;
4169 continue;
4170 }
4171 if (!SHW_PDE_IS_A(PdeDst))
4172 {
4173 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4174 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4175 cErrors++;
4176 }
4177 fIgnoreFlags |= X86_PTE_P;
4178 }
4179
4180 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4181 {
4182 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4183 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4184 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4185 cErrors++;
4186 }
4187
4188 /* iterate the page table. */
4189 for (unsigned iPT = 0, off = 0;
4190 iPT < RT_ELEMENTS(pPTDst->a);
4191 iPT++, off += GUEST_PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + GUEST_PAGE_SIZE))
4192 {
4193 const SHWPTE PteDst = pPTDst->a[iPT];
4194
4195 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4196 {
4197 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4198 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4199 cErrors++;
4200 }
4201
4202 /* skip not-present entries. */
4203 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4204 continue;
4205
4206 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4207
4208 /* match the physical addresses */
4209 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4210
4211# ifdef IN_RING3
4212 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4213 if (RT_FAILURE(rc))
4214 {
4215# if 0
4216 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4217 {
4218 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4219 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4220 cErrors++;
4221 }
4222# endif
4223 }
4224 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4225 {
4226 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4227 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4228 cErrors++;
4229 continue;
4230 }
4231# endif
4232 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4233 if (!pPhysPage)
4234 {
4235# if 0 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4236 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4237 {
4238 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4239 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4240 cErrors++;
4241 continue;
4242 }
4243# endif
4244 if (SHW_PTE_IS_RW(PteDst))
4245 {
4246 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4247 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4248 cErrors++;
4249 }
4250 fIgnoreFlags |= X86_PTE_RW;
4251 }
4252 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4253 {
4254 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4255 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4256 cErrors++;
4257 continue;
4258 }
4259
4260 /* flags */
4261 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4262 {
4263 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4264 {
4265 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4266 {
4267 if (SHW_PTE_IS_RW(PteDst))
4268 {
4269 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4270 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4271 cErrors++;
4272 continue;
4273 }
4274 fIgnoreFlags |= X86_PTE_RW;
4275 }
4276 }
4277 else
4278 {
4279 if ( SHW_PTE_IS_P(PteDst)
4280# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4281 && !PGM_PAGE_IS_MMIO(pPhysPage)
4282# endif
4283 )
4284 {
4285 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4286 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4287 cErrors++;
4288 continue;
4289 }
4290 fIgnoreFlags |= X86_PTE_P;
4291 }
4292 }
4293
4294 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4295 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4296 )
4297 {
4298 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4299 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4300 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4301 cErrors++;
4302 continue;
4303 }
4304 } /* for each PTE */
4305 }
4306 }
4307 /* not present */
4308
4309 } /* for each PDE */
4310
4311 } /* for each PDPTE */
4312
4313 } /* for each PML4E */
4314
4315# ifdef DEBUG
4316 if (cErrors)
4317 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4318# endif
4319# endif /* GST is in {32BIT, PAE, AMD64} */
4320 return cErrors;
4321#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4322}
4323#endif /* VBOX_STRICT */
4324
4325
4326/**
4327 * Sets up the CR3 for shadow paging
4328 *
4329 * @returns Strict VBox status code.
4330 * @retval VINF_SUCCESS.
4331 *
4332 * @param pVCpu The cross context virtual CPU structure.
4333 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4334 * already applied.)
4335 */
4336PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
4337{
4338 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4339 int rc = VINF_SUCCESS;
4340
4341 /* Update guest paging info. */
4342#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4343 || PGM_GST_TYPE == PGM_TYPE_PAE \
4344 || PGM_GST_TYPE == PGM_TYPE_AMD64
4345
4346 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4347 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4348
4349# if PGM_GST_TYPE == PGM_TYPE_PAE
4350 if ( !pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped)
4351 || pVCpu->pgm.s.GCPhysPaeCR3 != GCPhysCR3)
4352# endif
4353 {
4354 /*
4355 * Map the page CR3 points at.
4356 */
4357 RTHCPTR HCPtrGuestCR3;
4358 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
4359 if (RT_SUCCESS(rc))
4360 {
4361# if PGM_GST_TYPE == PGM_TYPE_32BIT
4362# ifdef IN_RING3
4363 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4364 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4365# else
4366 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4367 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4368# endif
4369
4370# elif PGM_GST_TYPE == PGM_TYPE_PAE
4371# ifdef IN_RING3
4372 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4373 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4374# else
4375 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4376 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4377# endif
4378
4379 /*
4380 * Update CPUM and map the 4 PDs too.
4381 */
4382 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4383 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4384 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4385 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4386
4387 pVCpu->pgm.s.GCPhysPaeCR3 = GCPhysCR3;
4388# ifdef IN_RING3
4389 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = true;
4390 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4391# else
4392 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4393 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = true;
4394# endif
4395
4396# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4397# ifdef IN_RING3
4398 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4399 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4400# else
4401 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4402 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4403# endif
4404# endif
4405 }
4406 else
4407 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4408 }
4409#endif
4410
4411 /*
4412 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4413 */
4414# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4415 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4416 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4417 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4418 && PGM_GST_TYPE != PGM_TYPE_PROT))
4419
4420 Assert(!pVM->pgm.s.fNestedPaging);
4421 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4422
4423 /*
4424 * Update the shadow root page as well since that's not fixed.
4425 */
4426 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4427 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4428 PPGMPOOLPAGE pNewShwPageCR3;
4429
4430 PGM_LOCK_VOID(pVM);
4431
4432# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4433 if (pPool->cDirtyPages)
4434 pgmPoolResetDirtyPages(pVM);
4435# endif
4436
4437 Assert(!(GCPhysCR3 >> (GUEST_PAGE_SHIFT + 32))); /** @todo what is this for? */
4438 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4439 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4440 AssertFatalRC(rc2);
4441
4442 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
4443 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
4444
4445 /* Set the current hypervisor CR3. */
4446 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4447
4448 /* Clean up the old CR3 root. */
4449 if ( pOldShwPageCR3
4450 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4451 {
4452 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4453
4454 /* Mark the page as unlocked; allow flushing again. */
4455 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4456
4457 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4458 }
4459 PGM_UNLOCK(pVM);
4460# else
4461 NOREF(GCPhysCR3);
4462# endif
4463
4464 return rc;
4465}
4466
4467/**
4468 * Unmaps the shadow CR3.
4469 *
4470 * @returns VBox status, no specials.
4471 * @param pVCpu The cross context virtual CPU structure.
4472 */
4473PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4474{
4475 LogFlow(("UnmapCR3\n"));
4476
4477 int rc = VINF_SUCCESS;
4478 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4479
4480 /*
4481 * Update guest paging info.
4482 */
4483#if PGM_GST_TYPE == PGM_TYPE_32BIT
4484 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4485 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4486
4487#elif PGM_GST_TYPE == PGM_TYPE_PAE
4488 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4489 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4490 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4491 {
4492 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4493 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4494 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4495 }
4496
4497#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4498 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4499 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4500
4501#else /* prot/real mode stub */
4502 /* nothing to do */
4503#endif
4504
4505 /*
4506 * Update second-level address translation info.
4507 */
4508#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4509 pVCpu->pgm.s.pGstEptPml4R3 = 0;
4510 pVCpu->pgm.s.pGstEptPml4R0 = 0;
4511#endif
4512
4513 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4514 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4515 pVCpu->pgm.s.GCPhysPaeCR3 = NIL_RTGCPHYS;
4516
4517 /*
4518 * Update shadow paging info.
4519 */
4520#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4521 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4522 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4523# if PGM_GST_TYPE != PGM_TYPE_REAL
4524 Assert(!pVM->pgm.s.fNestedPaging);
4525# endif
4526 PGM_LOCK_VOID(pVM);
4527
4528 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4529 {
4530 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4531
4532# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4533 if (pPool->cDirtyPages)
4534 pgmPoolResetDirtyPages(pVM);
4535# endif
4536
4537 /* Mark the page as unlocked; allow flushing again. */
4538 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4539
4540 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4541 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4542 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4543 }
4544
4545 PGM_UNLOCK(pVM);
4546#endif
4547
4548 return rc;
4549}
4550
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use