VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/PGMAllBth.h@ 96860

Last change on this file since 96860 was 96407, checked in by vboxsync, 21 months ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 198.2 KB
Line 
1/* $Id: PGMAllBth.h 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * VBox - Page Manager, Shadow+Guest Paging Template - All context code.
4 *
5 * @remarks Extended page tables (intel) are built with PGM_GST_TYPE set to
6 * PGM_TYPE_PROT (and PGM_SHW_TYPE set to PGM_TYPE_EPT).
7 * bird: WTF does this mean these days? Looking at PGMAll.cpp it's
8 *
9 * @remarks This file is one big \#ifdef-orgy!
10 *
11 */
12
13/*
14 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
15 *
16 * This file is part of VirtualBox base platform packages, as
17 * available from https://www.virtualbox.org.
18 *
19 * This program is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU General Public License
21 * as published by the Free Software Foundation, in version 3 of the
22 * License.
23 *
24 * This program is distributed in the hope that it will be useful, but
25 * WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, see <https://www.gnu.org/licenses>.
31 *
32 * SPDX-License-Identifier: GPL-3.0-only
33 */
34
35#ifdef _MSC_VER
36/** @todo we're generating unnecessary code in nested/ept shadow mode and for
37 * real/prot-guest+RC mode. */
38# pragma warning(disable: 4505)
39#endif
40
41
42/*********************************************************************************************************************************
43* Internal Functions *
44*********************************************************************************************************************************/
45RT_C_DECLS_BEGIN
46PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
47#ifndef IN_RING3
48PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
49PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
50 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken);
51#endif
52PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
53static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr);
54static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc, RTGCPTR GCPtrPage);
55static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPD, PGSTPD pPDSrc, RTGCPTR GCPtrPage);
56#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
57static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
58#else
59static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage, PPGMPOOLPAGE pShwPage, unsigned iPTDst);
60#endif
61PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR Addr, unsigned fPage, unsigned uErr);
62PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage);
63PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal);
64#ifdef VBOX_STRICT
65PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr = 0, RTGCPTR cb = ~(RTGCPTR)0);
66#endif
67PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3);
68PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu);
69
70#ifdef IN_RING3
71PGM_BTH_DECL(int, Relocate)(PVMCPUCC pVCpu, RTGCPTR offDelta);
72#endif
73RT_C_DECLS_END
74
75
76
77
78/*
79 * Filter out some illegal combinations of guest and shadow paging, so we can
80 * remove redundant checks inside functions.
81 */
82#if PGM_GST_TYPE == PGM_TYPE_PAE && PGM_SHW_TYPE != PGM_TYPE_PAE \
83 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
84# error "Invalid combination; PAE guest implies PAE shadow"
85#endif
86
87#if (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
88 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64 \
89 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
90# error "Invalid combination; real or protected mode without paging implies 32 bits or PAE shadow paging."
91#endif
92
93#if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) \
94 && !( PGM_SHW_TYPE == PGM_TYPE_32BIT || PGM_SHW_TYPE == PGM_TYPE_PAE \
95 || PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE)
96# error "Invalid combination; 32 bits guest paging or PAE implies 32 bits or PAE shadow paging."
97#endif
98
99#if (PGM_GST_TYPE == PGM_TYPE_AMD64 && PGM_SHW_TYPE != PGM_TYPE_AMD64 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE) \
100 || (PGM_SHW_TYPE == PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PROT)
101# error "Invalid combination; AMD64 guest implies AMD64 shadow and vice versa"
102#endif
103
104
105/**
106 * Enters the shadow+guest mode.
107 *
108 * @returns VBox status code.
109 * @param pVCpu The cross context virtual CPU structure.
110 * @param GCPhysCR3 The physical address from the CR3 register.
111 */
112PGM_BTH_DECL(int, Enter)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
113{
114 /* Here we deal with allocation of the root shadow page table for real and protected mode during mode switches;
115 * Other modes rely on MapCR3/UnmapCR3 to setup the shadow root page tables.
116 */
117#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
118 || PGM_SHW_TYPE == PGM_TYPE_PAE \
119 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
120 && ( PGM_GST_TYPE == PGM_TYPE_REAL \
121 || PGM_GST_TYPE == PGM_TYPE_PROT))
122
123 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
124
125 Assert(!pVM->pgm.s.fNestedPaging);
126
127 PGM_LOCK_VOID(pVM);
128 /* Note: we only really need shadow paging in real and protected mode for VT-x and AMD-V (excluding nested paging/EPT modes),
129 * but any calls to GC need a proper shadow page setup as well.
130 */
131 /* Free the previous root mapping if still active. */
132 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
133 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
134 if (pOldShwPageCR3)
135 {
136 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
137
138 /* Mark the page as unlocked; allow flushing again. */
139 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
140
141 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
142 pVCpu->pgm.s.pShwPageCR3R3 = NIL_RTR3PTR;
143 pVCpu->pgm.s.pShwPageCR3R0 = NIL_RTR0PTR;
144 }
145
146 /* construct a fake address. */
147 GCPhysCR3 = RT_BIT_64(63);
148 PPGMPOOLPAGE pNewShwPageCR3;
149 int rc = pgmPoolAlloc(pVM, GCPhysCR3, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
150 NIL_PGMPOOL_IDX, UINT32_MAX, false /*fLockPage*/,
151 &pNewShwPageCR3);
152 AssertRCReturn(rc, rc);
153
154 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
155 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
156
157 /* Mark the page as locked; disallow flushing. */
158 pgmPoolLockPage(pPool, pNewShwPageCR3);
159
160 /* Set the current hypervisor CR3. */
161 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
162
163 PGM_UNLOCK(pVM);
164 return rc;
165#else
166 NOREF(pVCpu); NOREF(GCPhysCR3);
167 return VINF_SUCCESS;
168#endif
169}
170
171
172#ifndef IN_RING3
173
174# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
175/**
176 * Deal with a guest page fault.
177 *
178 * @returns Strict VBox status code.
179 * @retval VINF_EM_RAW_GUEST_TRAP
180 * @retval VINF_EM_RAW_EMULATE_INSTR
181 *
182 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
183 * @param pWalk The guest page table walk result.
184 * @param uErr The error code.
185 */
186PGM_BTH_DECL(VBOXSTRICTRC, Trap0eHandlerGuestFault)(PVMCPUCC pVCpu, PPGMPTWALK pWalk, RTGCUINT uErr)
187{
188 /*
189 * Calc the error code for the guest trap.
190 */
191 uint32_t uNewErr = GST_IS_NX_ACTIVE(pVCpu)
192 ? uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID)
193 : uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US);
194 if ( pWalk->fRsvdError
195 || pWalk->fBadPhysAddr)
196 {
197 uNewErr |= X86_TRAP_PF_RSVD | X86_TRAP_PF_P;
198 Assert(!pWalk->fNotPresent);
199 }
200 else if (!pWalk->fNotPresent)
201 uNewErr |= X86_TRAP_PF_P;
202 TRPMSetErrorCode(pVCpu, uNewErr);
203
204 LogFlow(("Guest trap; cr2=%RGv uErr=%RGv lvl=%d\n", pWalk->GCPtr, uErr, pWalk->uLevel));
205 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
206 return VINF_EM_RAW_GUEST_TRAP;
207}
208# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
209
210
211#if !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
212/**
213 * Deal with a guest page fault.
214 *
215 * The caller has taken the PGM lock.
216 *
217 * @returns Strict VBox status code.
218 *
219 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
220 * @param uErr The error code.
221 * @param pRegFrame The register frame.
222 * @param pvFault The fault address.
223 * @param pPage The guest page at @a pvFault.
224 * @param pWalk The guest page table walk result.
225 * @param pGstWalk The guest paging-mode specific walk information.
226 * @param pfLockTaken PGM lock taken here or not (out). This is true
227 * when we're called.
228 */
229static VBOXSTRICTRC PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame,
230 RTGCPTR pvFault, PPGMPAGE pPage, bool *pfLockTaken
231# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
232 , PPGMPTWALK pWalk
233 , PGSTPTWALK pGstWalk
234# endif
235 )
236{
237# if !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
238 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
239# endif
240 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
241 VBOXSTRICTRC rcStrict;
242
243 if (PGM_PAGE_HAS_ANY_PHYSICAL_HANDLERS(pPage))
244 {
245 /*
246 * Physical page access handler.
247 */
248# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
249 const RTGCPHYS GCPhysFault = pWalk->GCPhys;
250# else
251 const RTGCPHYS GCPhysFault = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault);
252# endif
253 PPGMPHYSHANDLER pCur;
254 rcStrict = pgmHandlerPhysicalLookup(pVM, GCPhysFault, &pCur);
255 if (RT_SUCCESS(rcStrict))
256 {
257 PCPGMPHYSHANDLERTYPEINT const pCurType = PGMPHYSHANDLER_GET_TYPE(pVM, pCur);
258
259# ifdef PGM_SYNC_N_PAGES
260 /*
261 * If the region is write protected and we got a page not present fault, then sync
262 * the pages. If the fault was caused by a read, then restart the instruction.
263 * In case of write access continue to the GC write handler.
264 *
265 * ASSUMES that there is only one handler per page or that they have similar write properties.
266 */
267 if ( !(uErr & X86_TRAP_PF_P)
268 && pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
269 {
270# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
271 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
272# else
273 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
274# endif
275 if ( RT_FAILURE(rcStrict)
276 || !(uErr & X86_TRAP_PF_RW)
277 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
278 {
279 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
280 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
281 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
282 return rcStrict;
283 }
284 }
285# endif
286# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
287 /*
288 * If the access was not thru a #PF(RSVD|...) resync the page.
289 */
290 if ( !(uErr & X86_TRAP_PF_RSVD)
291 && pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
292# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
293 && (pWalk->fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK))
294 == PGM_PTATTRS_W_MASK /** @todo Remove pGstWalk->Core.fEffectiveUS and X86_PTE_US further down in the sync code. */
295# endif
296 )
297 {
298# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
299 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
300# else
301 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
302# endif
303 if ( RT_FAILURE(rcStrict)
304 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
305 {
306 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
307 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
308 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
309 return rcStrict;
310 }
311 }
312# endif
313
314 AssertMsg( pCurType->enmKind != PGMPHYSHANDLERKIND_WRITE
315 || (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE && (uErr & X86_TRAP_PF_RW)),
316 ("Unexpected trap for physical handler: %08X (phys=%08x) pPage=%R[pgmpage] uErr=%X, enmKind=%d\n",
317 pvFault, GCPhysFault, pPage, uErr, pCurType->enmKind));
318 if (pCurType->enmKind == PGMPHYSHANDLERKIND_WRITE)
319 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysWrite);
320 else
321 {
322 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAll);
323 if (uErr & X86_TRAP_PF_RSVD) STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersPhysAllOpt);
324 }
325
326 if (pCurType->pfnPfHandler)
327 {
328 STAM_PROFILE_START(&pCur->Stat, h);
329
330 if (pCurType->fKeepPgmLock)
331 {
332 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault,
333 !pCurType->fRing0DevInsIdx ? pCur->uUser
334 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser));
335
336 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
337 }
338 else
339 {
340 uint64_t const uUser = !pCurType->fRing0DevInsIdx ? pCur->uUser
341 : (uintptr_t)PDMDeviceRing0IdxToInstance(pVM, pCur->uUser);
342 PGM_UNLOCK(pVM);
343 *pfLockTaken = false;
344
345 rcStrict = pCurType->pfnPfHandler(pVM, pVCpu, uErr, pRegFrame, pvFault, GCPhysFault, uUser);
346
347 STAM_PROFILE_STOP(&pCur->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
348 }
349 }
350 else
351 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
352
353 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndPhys; });
354 return rcStrict;
355 }
356 AssertMsgReturn(rcStrict == VERR_NOT_FOUND, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict);
357 }
358
359 /*
360 * There is a handled area of the page, but this fault doesn't belong to it.
361 * We must emulate the instruction.
362 *
363 * To avoid crashing (non-fatal) in the interpreter and go back to the recompiler
364 * we first check if this was a page-not-present fault for a page with only
365 * write access handlers. Restart the instruction if it wasn't a write access.
366 */
367 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersUnhandled);
368
369 if ( !PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
370 && !(uErr & X86_TRAP_PF_P))
371 {
372# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
373 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, pGstWalk->Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
374# else
375 rcStrict = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
376# endif
377 if ( RT_FAILURE(rcStrict)
378 || rcStrict == VINF_PGM_SYNCPAGE_MODIFIED_PDE
379 || !(uErr & X86_TRAP_PF_RW))
380 {
381 AssertMsgRC(rcStrict, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
382 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersOutOfSync);
383 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndPhys; });
384 return rcStrict;
385 }
386 }
387
388 /** @todo This particular case can cause quite a lot of overhead. E.g. early stage of kernel booting in Ubuntu 6.06
389 * It's writing to an unhandled part of the LDT page several million times.
390 */
391 rcStrict = PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault);
392 LogFlow(("PGM: PGMInterpretInstruction -> rcStrict=%d pPage=%R[pgmpage]\n", VBOXSTRICTRC_VAL(rcStrict), pPage));
393 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2HndUnhandled; });
394 return rcStrict;
395} /* if any kind of handler */
396# endif /* !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE*/
397
398
399/**
400 * \#PF Handler for raw-mode guest execution.
401 *
402 * @returns VBox status code (appropriate for trap handling and GC return).
403 *
404 * @param pVCpu The cross context virtual CPU structure.
405 * @param uErr The trap error code.
406 * @param pRegFrame Trap register frame.
407 * @param pvFault The fault address.
408 * @param pfLockTaken PGM lock taken here or not (out)
409 */
410PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken)
411{
412 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
413
414 *pfLockTaken = false;
415
416# if ( PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT \
417 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
418 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
419 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
420 && PGM_SHW_TYPE != PGM_TYPE_NONE
421 int rc;
422
423# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
424 /*
425 * Walk the guest page translation tables and check if it's a guest fault.
426 */
427 PGMPTWALK Walk;
428 GSTPTWALK GstWalk;
429 rc = PGM_GST_NAME(Walk)(pVCpu, pvFault, &Walk, &GstWalk);
430 if (RT_FAILURE_NP(rc))
431 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
432
433 /* assert some GstWalk sanity. */
434# if PGM_GST_TYPE == PGM_TYPE_AMD64
435 /*AssertMsg(GstWalk.Pml4e.u == GstWalk.pPml4e->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pml4e.u, (uint64_t)GstWalk.pPml4e->u)); - not always true with SMP guests. */
436# endif
437# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
438 /*AssertMsg(GstWalk.Pdpe.u == GstWalk.pPdpe->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pdpe.u, (uint64_t)GstWalk.pPdpe->u)); - ditto */
439# endif
440 /*AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); - ditto */
441 /*AssertMsg(GstWalk.Core.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); - ditto */
442 Assert(Walk.fSucceeded);
443 Assert(Walk.fEffective & PGM_PTATTRS_R_MASK);
444
445 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
446 {
447 if ( ( (uErr & X86_TRAP_PF_RW)
448 && !(Walk.fEffective & PGM_PTATTRS_W_MASK)
449 && ( (uErr & X86_TRAP_PF_US)
450 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
451 || ((uErr & X86_TRAP_PF_US) && !(Walk.fEffective & PGM_PTATTRS_US_MASK))
452 || ((uErr & X86_TRAP_PF_ID) && (Walk.fEffective & PGM_PTATTRS_NX_MASK))
453 )
454 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerGuestFault)(pVCpu, &Walk, uErr));
455 }
456
457 /* Take the big lock now before we update flags. */
458 *pfLockTaken = true;
459 PGM_LOCK_VOID(pVM);
460
461 /*
462 * Set the accessed and dirty flags.
463 */
464 /** @todo Should probably use cmpxchg logic here as we're potentially racing
465 * other CPUs in SMP configs. (the lock isn't enough, since we take it
466 * after walking and the page tables could be stale already) */
467# if PGM_GST_TYPE == PGM_TYPE_AMD64
468 if (!(GstWalk.Pml4e.u & X86_PML4E_A))
469 {
470 GstWalk.Pml4e.u |= X86_PML4E_A;
471 GST_ATOMIC_OR(&GstWalk.pPml4e->u, X86_PML4E_A);
472 }
473 if (!(GstWalk.Pdpe.u & X86_PDPE_A))
474 {
475 GstWalk.Pdpe.u |= X86_PDPE_A;
476 GST_ATOMIC_OR(&GstWalk.pPdpe->u, X86_PDPE_A);
477 }
478# endif
479 if (Walk.fBigPage)
480 {
481 Assert(GstWalk.Pde.u & X86_PDE_PS);
482 if (uErr & X86_TRAP_PF_RW)
483 {
484 if ((GstWalk.Pde.u & (X86_PDE4M_A | X86_PDE4M_D)) != (X86_PDE4M_A | X86_PDE4M_D))
485 {
486 GstWalk.Pde.u |= X86_PDE4M_A | X86_PDE4M_D;
487 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A | X86_PDE4M_D);
488 }
489 }
490 else
491 {
492 if (!(GstWalk.Pde.u & X86_PDE4M_A))
493 {
494 GstWalk.Pde.u |= X86_PDE4M_A;
495 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE4M_A);
496 }
497 }
498 }
499 else
500 {
501 Assert(!(GstWalk.Pde.u & X86_PDE_PS));
502 if (!(GstWalk.Pde.u & X86_PDE_A))
503 {
504 GstWalk.Pde.u |= X86_PDE_A;
505 GST_ATOMIC_OR(&GstWalk.pPde->u, X86_PDE_A);
506 }
507
508 if (uErr & X86_TRAP_PF_RW)
509 {
510# ifdef VBOX_WITH_STATISTICS
511 if (GstWalk.Pte.u & X86_PTE_D)
512 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageAlreadyDirty));
513 else
514 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtiedPage));
515# endif
516 if ((GstWalk.Pte.u & (X86_PTE_A | X86_PTE_D)) != (X86_PTE_A | X86_PTE_D))
517 {
518 GstWalk.Pte.u |= X86_PTE_A | X86_PTE_D;
519 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A | X86_PTE_D);
520 }
521 }
522 else
523 {
524 if (!(GstWalk.Pte.u & X86_PTE_A))
525 {
526 GstWalk.Pte.u |= X86_PTE_A;
527 GST_ATOMIC_OR(&GstWalk.pPte->u, X86_PTE_A);
528 }
529 }
530 Assert(GstWalk.Pte.u == GstWalk.pPte->u);
531 }
532#if 0
533 /* Disabling this since it's not reliable for SMP, see @bugref{10092#c22}. */
534 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u,
535 ("%RX64 %RX64 pPte=%p pPde=%p Pte=%RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u, GstWalk.pPte, GstWalk.pPde, (uint64_t)GstWalk.pPte->u));
536#endif
537
538# else /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
539 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A}; /** @todo eliminate this */
540
541 /* Take the big lock now. */
542 *pfLockTaken = true;
543 PGM_LOCK_VOID(pVM);
544# endif /* !PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
545
546# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
547 /*
548 * If it is a reserved bit fault we know that it is an MMIO (access
549 * handler) related fault and can skip some 200 lines of code.
550 */
551 if (uErr & X86_TRAP_PF_RSVD)
552 {
553 Assert(uErr & X86_TRAP_PF_P);
554 PPGMPAGE pPage;
555# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
556 rc = pgmPhysGetPageEx(pVM, Walk.GCPhys, &pPage);
557 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
558 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
559 pfLockTaken, &Walk, &GstWalk));
560 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
561# else
562 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault), &pPage);
563 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
564 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage,
565 pfLockTaken));
566 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
567# endif
568 AssertRC(rc);
569 PGM_INVL_PG(pVCpu, pvFault);
570 return rc; /* Restart with the corrected entry. */
571 }
572# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
573
574 /*
575 * Fetch the guest PDE, PDPE and PML4E.
576 */
577# if PGM_SHW_TYPE == PGM_TYPE_32BIT
578 const unsigned iPDDst = pvFault >> SHW_PD_SHIFT;
579 PX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
580
581# elif PGM_SHW_TYPE == PGM_TYPE_PAE
582 const unsigned iPDDst = (pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK; /* pPDDst index, not used with the pool. */
583 PX86PDPAE pPDDst;
584# if PGM_GST_TYPE == PGM_TYPE_PAE
585 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, GstWalk.Pdpe.u, &pPDDst);
586# else
587 rc = pgmShwSyncPaePDPtr(pVCpu, pvFault, X86_PDPE_P, &pPDDst); /* RW, US and A are reserved in PAE mode. */
588# endif
589 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
590
591# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
592 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
593 PX86PDPAE pPDDst;
594# if PGM_GST_TYPE == PGM_TYPE_PROT /* (AMD-V nested paging) */
595 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A,
596 X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A, &pPDDst);
597# else
598 rc = pgmShwSyncLongModePDPtr(pVCpu, pvFault, GstWalk.Pml4e.u, GstWalk.Pdpe.u, &pPDDst);
599# endif
600 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
601
602# elif PGM_SHW_TYPE == PGM_TYPE_EPT
603 const unsigned iPDDst = ((pvFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
604 PEPTPD pPDDst;
605 rc = pgmShwGetEPTPDPtr(pVCpu, pvFault, NULL, &pPDDst);
606 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
607# endif
608 Assert(pPDDst);
609
610# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
611 /*
612 * Dirty page handling.
613 *
614 * If we successfully correct the write protection fault due to dirty bit
615 * tracking, then return immediately.
616 */
617 if (uErr & X86_TRAP_PF_RW) /* write fault? */
618 {
619 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
620 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, &pPDDst->a[iPDDst], GstWalk.pPde, pvFault);
621 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyBitTracking), a);
622 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
623 {
624 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0
625 = rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT
626 ? &pVCpu->pgm.s.Stats.StatRZTrap0eTime2DirtyAndAccessed
627 : &pVCpu->pgm.s.Stats.StatRZTrap0eTime2GuestTrap; });
628 Log8(("Trap0eHandler: returns VINF_SUCCESS\n"));
629 return VINF_SUCCESS;
630 }
631#ifdef DEBUG_bird
632 AssertMsg(GstWalk.Pde.u == GstWalk.pPde->u || GstWalk.pPte->u == GstWalk.pPde->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pde.u, (uint64_t)GstWalk.pPde->u)); // - triggers with smp w7 guests.
633 AssertMsg(Walk.fBigPage || GstWalk.Pte.u == GstWalk.pPte->u || pVM->cCpus > 1, ("%RX64 %RX64\n", (uint64_t)GstWalk.Pte.u, (uint64_t)GstWalk.pPte->u)); // - ditto.
634#endif
635 }
636
637# if 0 /* rarely useful; leave for debugging. */
638 STAM_COUNTER_INC(&pVCpu->pgm.s.StatRZTrap0ePD[iPDSrc]);
639# endif
640# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
641
642 /*
643 * A common case is the not-present error caused by lazy page table syncing.
644 *
645 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
646 * here so we can safely assume that the shadow PT is present when calling
647 * SyncPage later.
648 *
649 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
650 * of mapping conflict and defer to SyncCR3 in R3.
651 * (Again, we do NOT support access handlers for non-present guest pages.)
652 *
653 */
654# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
655 Assert(GstWalk.Pde.u & X86_PDE_P);
656# endif
657 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
658 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
659 {
660 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
661# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
662 LogFlow(("=>SyncPT %04x = %08RX64\n", (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, (uint64_t)GstWalk.Pde.u));
663 rc = PGM_BTH_NAME(SyncPT)(pVCpu, (pvFault >> GST_PD_SHIFT) & GST_PD_MASK, GstWalk.pPd, pvFault);
664# else
665 LogFlow(("=>SyncPT pvFault=%RGv\n", pvFault));
666 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0, NULL, pvFault);
667# endif
668 if (RT_SUCCESS(rc))
669 return rc;
670 Log(("SyncPT: %RGv failed!! rc=%Rrc\n", pvFault, rc));
671 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
672 return VINF_PGM_SYNC_CR3;
673 }
674
675 /*
676 * Check if this fault address is flagged for special treatment,
677 * which means we'll have to figure out the physical address and
678 * check flags associated with it.
679 *
680 * ASSUME that we can limit any special access handling to pages
681 * in page tables which the guest believes to be present.
682 */
683# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
684 RTGCPHYS GCPhys = Walk.GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
685# else
686 RTGCPHYS GCPhys = PGM_A20_APPLY(pVCpu, (RTGCPHYS)pvFault & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
687# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) */
688 PPGMPAGE pPage;
689 rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
690 if (RT_FAILURE(rc))
691 {
692 /*
693 * When the guest accesses invalid physical memory (e.g. probing
694 * of RAM or accessing a remapped MMIO range), then we'll fall
695 * back to the recompiler to emulate the instruction.
696 */
697 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhys, rc));
698 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
699 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
700 return VINF_EM_RAW_EMULATE_INSTR;
701 }
702
703 /*
704 * Any handlers for this page?
705 */
706 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
707# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
708 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken,
709 &Walk, &GstWalk));
710# else
711 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pvFault, pPage, pfLockTaken));
712# endif
713
714 /*
715 * We are here only if page is present in Guest page tables and
716 * trap is not handled by our handlers.
717 *
718 * Check it for page out-of-sync situation.
719 */
720 if (!(uErr & X86_TRAP_PF_P))
721 {
722 /*
723 * Page is not present in our page tables. Try to sync it!
724 */
725 if (uErr & X86_TRAP_PF_US)
726 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
727 else /* supervisor */
728 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
729
730 if (PGM_PAGE_IS_BALLOONED(pPage))
731 {
732 /* Emulate reads from ballooned pages as they are not present in
733 our shadow page tables. (Required for e.g. Solaris guests; soft
734 ecc, random nr generator.) */
735 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
736 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
737 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
738 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
739 return rc;
740 }
741
742# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
743 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, PGM_SYNC_NR_PAGES, uErr);
744# else
745 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, PGM_SYNC_NR_PAGES, uErr);
746# endif
747 if (RT_SUCCESS(rc))
748 {
749 /* The page was successfully synced, return to the guest. */
750 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
751 return VINF_SUCCESS;
752 }
753 }
754 else /* uErr & X86_TRAP_PF_P: */
755 {
756 /*
757 * Write protected pages are made writable when the guest makes the
758 * first write to it. This happens for pages that are shared, write
759 * monitored or not yet allocated.
760 *
761 * We may also end up here when CR0.WP=0 in the guest.
762 *
763 * Also, a side effect of not flushing global PDEs are out of sync
764 * pages due to physical monitored regions, that are no longer valid.
765 * Assume for now it only applies to the read/write flag.
766 */
767 if (uErr & X86_TRAP_PF_RW)
768 {
769 /*
770 * Check if it is a read-only page.
771 */
772 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
773 {
774 Log(("PGM #PF: Make writable: %RGp %R[pgmpage] pvFault=%RGp uErr=%#x\n", GCPhys, pPage, pvFault, uErr));
775 Assert(!PGM_PAGE_IS_ZERO(pPage));
776 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
777 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
778
779 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
780 if (rc != VINF_SUCCESS)
781 {
782 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
783 return rc;
784 }
785 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
786 return VINF_EM_NO_MEMORY;
787 }
788
789# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
790 /*
791 * Check to see if we need to emulate the instruction if CR0.WP=0.
792 */
793 if ( !(Walk.fEffective & PGM_PTATTRS_W_MASK)
794 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
795 && CPUMGetGuestCPL(pVCpu) < 3)
796 {
797 Assert((uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_P)) == (X86_TRAP_PF_RW | X86_TRAP_PF_P));
798
799 /*
800 * The Netware WP0+RO+US hack.
801 *
802 * Netware sometimes(/always?) runs with WP0. It has been observed doing
803 * excessive write accesses to pages which are mapped with US=1 and RW=0
804 * while WP=0. This causes a lot of exits and extremely slow execution.
805 * To avoid trapping and emulating every write here, we change the shadow
806 * page table entry to map it as US=0 and RW=1 until user mode tries to
807 * access it again (see further below). We count these shadow page table
808 * changes so we can avoid having to clear the page pool every time the WP
809 * bit changes to 1 (see PGMCr0WpEnabled()).
810 */
811# if (PGM_GST_TYPE == PGM_TYPE_32BIT || PGM_GST_TYPE == PGM_TYPE_PAE) && 1
812 if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
813 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
814 && pVM->cCpus == 1 /* Sorry, no go on SMP. Add CFGM option? */)
815 {
816 Log(("PGM #PF: Netware WP0+RO+US hack: pvFault=%RGp uErr=%#x (big=%d)\n", pvFault, uErr, Walk.fBigPage));
817 rc = pgmShwMakePageSupervisorAndWritable(pVCpu, pvFault, Walk.fBigPage, PGM_MK_PG_IS_WRITE_FAULT);
818 if (rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3)
819 {
820 PGM_INVL_PG(pVCpu, pvFault);
821 pVCpu->pgm.s.cNetwareWp0Hacks++;
822 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsHack; });
823 return rc;
824 }
825 AssertMsg(RT_FAILURE_NP(rc), ("%Rrc\n", rc));
826 Log(("pgmShwMakePageSupervisorAndWritable(%RGv) failed with rc=%Rrc - ignored\n", pvFault, rc));
827 }
828# endif
829
830 /* Interpret the access. */
831 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, pvFault));
832 Log(("PGM #PF: WP0 emulation (pvFault=%RGp uErr=%#x cpl=%d fBig=%d fEffUs=%d)\n", pvFault, uErr, CPUMGetGuestCPL(pVCpu), Walk.fBigPage, !!(Walk.fEffective & PGM_PTATTRS_US_MASK)));
833 if (RT_SUCCESS(rc))
834 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulInRZ);
835 else
836 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eWPEmulToR3);
837 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2WPEmulation; });
838 return rc;
839 }
840# endif
841 /// @todo count the above case; else
842 if (uErr & X86_TRAP_PF_US)
843 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
844 else /* supervisor */
845 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
846
847 /*
848 * Sync the page.
849 *
850 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
851 * page is not present, which is not true in this case.
852 */
853# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
854 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
855# else
856 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, pvFault, 1, uErr);
857# endif
858 if (RT_SUCCESS(rc))
859 {
860 /*
861 * Page was successfully synced, return to guest but invalidate
862 * the TLB first as the page is very likely to be in it.
863 */
864# if PGM_SHW_TYPE == PGM_TYPE_EPT
865 HMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
866# else
867 PGM_INVL_PG(pVCpu, pvFault);
868# endif
869# ifdef VBOX_STRICT
870 PGMPTWALK GstPageWalk;
871 GstPageWalk.GCPhys = RTGCPHYS_MAX;
872 if (!pVM->pgm.s.fNestedPaging)
873 {
874 rc = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
875 AssertMsg(RT_SUCCESS(rc) && ((GstPageWalk.fEffective & X86_PTE_RW) || ((CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG && CPUMGetGuestCPL(pVCpu) < 3)), ("rc=%Rrc fPageGst=%RX64\n", rc, GstPageWalk.fEffective));
876 LogFlow(("Obsolete physical monitor page out of sync %RGv - phys %RGp flags=%08llx\n", pvFault, GstPageWalk.GCPhys, GstPageWalk.fEffective));
877 }
878# if 0 /* Bogus! Triggers incorrectly with w7-64 and later for the SyncPage case: "Pde at %RGv changed behind our back?" */
879 uint64_t fPageShw = 0;
880 rc = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
881 AssertMsg((RT_SUCCESS(rc) && (fPageShw & X86_PTE_RW)) || pVM->cCpus > 1 /* new monitor can be installed/page table flushed between the trap exit and PGMTrap0eHandler */,
882 ("rc=%Rrc fPageShw=%RX64 GCPhys2=%RGp fPageGst=%RX64 pvFault=%RGv\n", rc, fPageShw, GstPageWalk.GCPhys, fPageGst, pvFault));
883# endif
884# endif /* VBOX_STRICT */
885 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
886 return VINF_SUCCESS;
887 }
888 }
889# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
890 /*
891 * Check for Netware WP0+RO+US hack from above and undo it when user
892 * mode accesses the page again.
893 */
894 else if ( (Walk.fEffective & (PGM_PTATTRS_W_MASK | PGM_PTATTRS_US_MASK)) == PGM_PTATTRS_US_MASK
895 && (Walk.fBigPage || (GstWalk.Pde.u & X86_PDE_RW))
896 && pVCpu->pgm.s.cNetwareWp0Hacks > 0
897 && (CPUMGetGuestCR0(pVCpu) & (X86_CR0_WP | X86_CR0_PG)) == X86_CR0_PG
898 && CPUMGetGuestCPL(pVCpu) == 3
899 && pVM->cCpus == 1
900 )
901 {
902 Log(("PGM #PF: Undo netware WP0+RO+US hack: pvFault=%RGp uErr=%#x\n", pvFault, uErr));
903 rc = PGM_BTH_NAME(SyncPage)(pVCpu, GstWalk.Pde, pvFault, 1, uErr);
904 if (RT_SUCCESS(rc))
905 {
906 PGM_INVL_PG(pVCpu, pvFault);
907 pVCpu->pgm.s.cNetwareWp0Hacks--;
908 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Wp0RoUsUnhack; });
909 return VINF_SUCCESS;
910 }
911 }
912# endif /* PGM_WITH_PAGING */
913
914 /** @todo else: why are we here? */
915
916# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && defined(VBOX_STRICT)
917 /*
918 * Check for VMM page flags vs. Guest page flags consistency.
919 * Currently only for debug purposes.
920 */
921 if (RT_SUCCESS(rc))
922 {
923 /* Get guest page flags. */
924 PGMPTWALK GstPageWalk;
925 int rc2 = PGMGstGetPage(pVCpu, pvFault, &GstPageWalk);
926 if (RT_SUCCESS(rc2))
927 {
928 uint64_t fPageShw = 0;
929 rc2 = PGMShwGetPage(pVCpu, pvFault, &fPageShw, NULL);
930
931#if 0
932 /*
933 * Compare page flags.
934 * Note: we have AVL, A, D bits desynced.
935 */
936 AssertMsg( (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
937 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK))
938 || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0
939 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
940 == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US))
941 && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW
942 && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US),
943 ("Page flags mismatch! pvFault=%RGv uErr=%x GCPhys=%RGp fPageShw=%RX64 fPageGst=%RX64 rc=%d\n",
944 pvFault, (uint32_t)uErr, GCPhys, fPageShw, fPageGst, rc));
94501:01:15.623511 00:08:43.266063 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94601:01:15.623511 00:08:43.266064 Location : e:\vbox\svn\trunk\srcPage flags mismatch! pvFault=fffff801b0d7b000 uErr=11 GCPhys=0000000019b52000 fPageShw=0 fPageGst=77b0000000000121 rc=0
947
94801:01:15.625516 00:08:43.268051 Expression: (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK)) || ( pVCpu->pgm.s.cNetwareWp0Hacks > 0 && (fPageShw & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) == (fPageGst & ~(X86_PTE_A | X86_PTE_D | X86_PTE_AVL_MASK | X86_PTE_RW | X86_PTE_US)) && (fPageShw & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW && (fPageGst & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_US)
94901:01:15.625516 00:08:43.268051 Location :
950e:\vbox\svn\trunk\srcPage flags mismatch!
951pvFault=fffff801b0d7b000
952 uErr=11 X86_TRAP_PF_ID | X86_TRAP_PF_P
953GCPhys=0000000019b52000
954fPageShw=0
955fPageGst=77b0000000000121
956rc=0
957#endif
958
959 }
960 else
961 AssertMsgFailed(("PGMGstGetPage rc=%Rrc\n", rc));
962 }
963 else
964 AssertMsgFailed(("PGMGCGetPage rc=%Rrc\n", rc));
965# endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && VBOX_STRICT */
966 }
967
968
969 /*
970 * If we get here it is because something failed above, i.e. most like guru
971 * meditiation time.
972 */
973 LogRel(("%s: returns rc=%Rrc pvFault=%RGv uErr=%RX64 cs:rip=%04x:%08RX64\n",
974 __PRETTY_FUNCTION__, rc, pvFault, (uint64_t)uErr, pRegFrame->cs.Sel, pRegFrame->rip));
975 return rc;
976
977# else /* Nested paging, EPT except PGM_GST_TYPE = PROT, NONE. */
978 NOREF(uErr); NOREF(pRegFrame); NOREF(pvFault);
979 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
980 return VERR_PGM_NOT_USED_IN_MODE;
981# endif
982}
983
984
985/**
986 * Nested \#PF handler for nested-guest hardware-assisted execution using nested
987 * paging.
988 *
989 * @returns VBox status code (appropriate for trap handling and GC return).
990 * @param pVCpu The cross context virtual CPU structure.
991 * @param uErr The fault error (X86_TRAP_PF_*).
992 * @param pRegFrame The register frame.
993 * @param GCPhysNested The nested-guest physical address being accessed.
994 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
995 * caused this fault. If @c false, GCPtrNested must be
996 * 0.
997 * @param GCPtrNested The nested-guest linear address that caused this
998 * fault.
999 * @param pWalk The guest page table walk result.
1000 * @param pfLockTaken Where to store whether the PGM lock is still held
1001 * when this function completes.
1002 */
1003PGM_BTH_DECL(int, NestedTrap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysNested,
1004 bool fIsLinearAddrValid, RTGCPTR GCPtrNested, PPGMPTWALK pWalk, bool *pfLockTaken)
1005{
1006 *pfLockTaken = false;
1007# if defined(VBOX_WITH_NESTED_HWVIRT_VMX_EPT) \
1008 && ( PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT || PGM_GST_TYPE == PGM_TYPE_32BIT \
1009 || PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1010 && PGM_SHW_TYPE == PGM_TYPE_EPT
1011
1012 Assert(CPUMIsGuestVmxEptPagingEnabled(pVCpu));
1013
1014 /*
1015 * Walk the guest EPT tables and check if it's an EPT violation or misconfiguration.
1016 */
1017 PGMPTWALKGST GstWalkAll;
1018 int rc = pgmGstSlatWalk(pVCpu, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk, &GstWalkAll);
1019 if (RT_FAILURE(rc))
1020 return rc;
1021
1022 Assert(GstWalkAll.enmType == PGMPTWALKGSTTYPE_EPT);
1023 Assert(pWalk->fSucceeded);
1024 Assert(pWalk->fEffective & PGM_PTATTRS_R_MASK);
1025 Assert(pWalk->fIsSlat);
1026
1027 if (uErr & (X86_TRAP_PF_RW | X86_TRAP_PF_US | X86_TRAP_PF_ID))
1028 {
1029 if ( ( (uErr & X86_TRAP_PF_RW)
1030 && !(pWalk->fEffective & PGM_PTATTRS_W_MASK)
1031 && ( (uErr & X86_TRAP_PF_US)
1032 || CPUMIsGuestR0WriteProtEnabled(pVCpu)) )
1033 || ((uErr & X86_TRAP_PF_US) && !(pWalk->fEffective & PGM_PTATTRS_US_MASK))
1034 || ((uErr & X86_TRAP_PF_ID) && (pWalk->fEffective & PGM_PTATTRS_NX_MASK))
1035 )
1036 return VERR_ACCESS_DENIED;
1037 }
1038
1039 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1040 RTGCPHYS const GCPhysFault = PGM_A20_APPLY(pVCpu, GCPhysNested & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
1041 GSTPDE const PdeSrcDummy = { X86_PDE_P | X86_PDE_US | X86_PDE_RW | X86_PDE_A };
1042
1043 /* Take the big lock now. */
1044 *pfLockTaken = true;
1045 PGM_LOCK_VOID(pVM);
1046
1047 /*
1048 * Check if this is an APIC-access page access (VMX specific).
1049 */
1050 RTGCPHYS const GCPhysApicAccess = CPUMGetGuestVmxApicAccessPageAddr(pVCpu);
1051 if ((pWalk->GCPhys & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysApicAccess)
1052 {
1053 PPGMPAGE pPage;
1054 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPhysApicAccess), &pPage);
1055 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1056 {
1057 rc = VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, pWalk->GCPhys, pPage,
1058 pfLockTaken));
1059 return rc;
1060 }
1061 }
1062
1063# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1064 /*
1065 * Check if this is an MMIO access.
1066 */
1067 if (uErr & X86_TRAP_PF_RSVD)
1068 {
1069 PPGMPAGE pPage;
1070 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, (RTGCPHYS)GCPhysFault), &pPage);
1071 if (RT_SUCCESS(rc) && PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1072 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1073 pfLockTaken));
1074 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1075 AssertRC(rc);
1076 HMInvalidatePhysPage(pVM, GCPhysFault);
1077 return rc; /* Restart with the corrected entry. */
1078 }
1079# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1080
1081 /*
1082 * Fetch the guest EPT page directory pointer.
1083 */
1084 const unsigned iPDDst = ((GCPhysFault >> SHW_PD_SHIFT) & SHW_PD_MASK);
1085 PEPTPD pPDDst;
1086 rc = pgmShwGetEPTPDPtr(pVCpu, GCPhysFault, NULL /* ppPdpt */, &pPDDst);
1087 AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
1088 Assert(pPDDst);
1089
1090 /*
1091 * A common case is the not-present error caused by lazy page table syncing.
1092 *
1093 * It is IMPORTANT that we weed out any access to non-present shadow PDEs
1094 * here so we can safely assume that the shadow PT is present when calling
1095 * SyncPage later.
1096 *
1097 * On failure, we ASSUME that SyncPT is out of memory or detected some kind
1098 * of mapping conflict and defer to SyncCR3 in R3.
1099 * (Again, we do NOT support access handlers for non-present guest pages.)
1100 *
1101 */
1102 if ( !(uErr & X86_TRAP_PF_P) /* not set means page not present instead of page protection violation */
1103 && !SHW_PDE_IS_P(pPDDst->a[iPDDst]))
1104 {
1105 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2SyncPT; });
1106 LogFlow(("=>SyncPT GCPhysFault=%RGp\n", GCPhysFault));
1107 rc = PGM_BTH_NAME(SyncPT)(pVCpu, 0 /* iPDSrc */, NULL /* pPDSrc */, GCPhysFault);
1108 if (RT_SUCCESS(rc))
1109 return rc;
1110 Log(("SyncPT: %RGp failed!! rc=%Rrc\n", GCPhysFault, rc));
1111 VMCPU_FF_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3); /** @todo no need to do global sync, right? */
1112 return VINF_PGM_SYNC_CR3;
1113 }
1114
1115 /*
1116 * Check if this fault address is flagged for special treatment,
1117 * which means we'll have to figure out the physical address and
1118 * check flags associated with it.
1119 *
1120 * ASSUME that we can limit any special access handling to pages
1121 * in page tables which the guest believes to be present.
1122 */
1123 PPGMPAGE pPage;
1124 rc = pgmPhysGetPageEx(pVM, GCPhysFault, &pPage);
1125 if (RT_FAILURE(rc))
1126 {
1127 /*
1128 * When the guest accesses invalid physical memory (e.g. probing
1129 * of RAM or accessing a remapped MMIO range), then we'll fall
1130 * back to the recompiler to emulate the instruction.
1131 */
1132 LogFlow(("PGM #PF: pgmPhysGetPageEx(%RGp) failed with %Rrc\n", GCPhysFault, rc));
1133 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.StatRZTrap0eHandlersInvalid);
1134 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2InvalidPhys; });
1135 return VINF_EM_RAW_EMULATE_INSTR;
1136 }
1137
1138 /*
1139 * Any handlers for this page?
1140 */
1141 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1142 return VBOXSTRICTRC_TODO(PGM_BTH_NAME(Trap0eHandlerDoAccessHandlers)(pVCpu, uErr, pRegFrame, GCPhysFault, pPage,
1143 pfLockTaken));
1144
1145 /*
1146 * We are here only if page is present in Guest page tables and
1147 * trap is not handled by our handlers.
1148 *
1149 * Check it for page out-of-sync situation.
1150 */
1151 if (!(uErr & X86_TRAP_PF_P))
1152 {
1153 /*
1154 * Page is not present in our page tables. Try to sync it!
1155 */
1156 if (uErr & X86_TRAP_PF_US)
1157 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
1158 else /* supervisor */
1159 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
1160
1161 if (PGM_PAGE_IS_BALLOONED(pPage))
1162 {
1163 /* Emulate reads from ballooned pages as they are not present in
1164 our shadow page tables. (Required for e.g. Solaris guests; soft
1165 ecc, random nr generator.) */
1166 rc = VBOXSTRICTRC_TODO(PGMInterpretInstruction(pVM, pVCpu, pRegFrame, GCPhysFault));
1167 LogFlow(("PGM: PGMInterpretInstruction balloon -> rc=%d pPage=%R[pgmpage]\n", rc, pPage));
1168 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncBallloon));
1169 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2Ballooned; });
1170 return rc;
1171 }
1172
1173 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, PGM_SYNC_NR_PAGES, uErr);
1174 if (RT_SUCCESS(rc))
1175 {
1176 /* The page was successfully synced, return to the guest. */
1177 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSync; });
1178 return VINF_SUCCESS;
1179 }
1180 }
1181 else
1182 {
1183 /*
1184 * Write protected pages are made writable when the guest makes the
1185 * first write to it. This happens for pages that are shared, write
1186 * monitored or not yet allocated.
1187 *
1188 * We may also end up here when CR0.WP=0 in the guest.
1189 *
1190 * Also, a side effect of not flushing global PDEs are out of sync
1191 * pages due to physical monitored regions, that are no longer valid.
1192 * Assume for now it only applies to the read/write flag.
1193 */
1194 if (uErr & X86_TRAP_PF_RW)
1195 {
1196 /*
1197 * Check if it is a read-only page.
1198 */
1199 if (PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1200 {
1201 Assert(!PGM_PAGE_IS_ZERO(pPage));
1202 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhysFault));
1203 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2MakeWritable; });
1204
1205 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysFault);
1206 if (rc != VINF_SUCCESS)
1207 {
1208 AssertMsg(rc == VINF_PGM_SYNC_CR3 || RT_FAILURE(rc), ("%Rrc\n", rc));
1209 return rc;
1210 }
1211 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
1212 return VINF_EM_NO_MEMORY;
1213 }
1214
1215 if (uErr & X86_TRAP_PF_US)
1216 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUserWrite));
1217 else
1218 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisorWrite));
1219
1220 /*
1221 * Sync the page.
1222 *
1223 * Note: Do NOT use PGM_SYNC_NR_PAGES here. That only works if the
1224 * page is not present, which is not true in this case.
1225 */
1226 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrcDummy, GCPhysFault, 1, uErr);
1227 if (RT_SUCCESS(rc))
1228 {
1229 /*
1230 * Page was successfully synced, return to guest but invalidate
1231 * the TLB first as the page is very likely to be in it.
1232 */
1233 HMInvalidatePhysPage(pVM, GCPhysFault);
1234 STAM_STATS({ pVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pVCpu->pgm.s.Stats.StatRZTrap0eTime2OutOfSyncHndObs; });
1235 return VINF_SUCCESS;
1236 }
1237 }
1238 }
1239
1240 /*
1241 * If we get here it is because something failed above, i.e. most like guru meditation time.
1242 */
1243 LogRelFunc(("returns rc=%Rrc GCPhysFault=%RGp uErr=%RX64 cs:rip=%04x:%08RX64\n", rc, GCPhysFault, (uint64_t)uErr,
1244 pRegFrame->cs.Sel, pRegFrame->rip));
1245 return rc;
1246
1247# else
1248 RT_NOREF7(pVCpu, uErr, pRegFrame, GCPhysNested, fIsLinearAddrValid, GCPtrNested, pWalk);
1249 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
1250 return VERR_PGM_NOT_USED_IN_MODE;
1251# endif
1252}
1253
1254#endif /* !IN_RING3 */
1255
1256
1257/**
1258 * Emulation of the invlpg instruction.
1259 *
1260 *
1261 * @returns VBox status code.
1262 *
1263 * @param pVCpu The cross context virtual CPU structure.
1264 * @param GCPtrPage Page to invalidate.
1265 *
1266 * @remark ASSUMES that the guest is updating before invalidating. This order
1267 * isn't required by the CPU, so this is speculative and could cause
1268 * trouble.
1269 * @remark No TLB shootdown is done on any other VCPU as we assume that
1270 * invlpg emulation is the *only* reason for calling this function.
1271 * (The guest has to shoot down TLB entries on other CPUs itself)
1272 * Currently true, but keep in mind!
1273 *
1274 * @todo Clean this up! Most of it is (or should be) no longer necessary as we catch all page table accesses.
1275 * Should only be required when PGMPOOL_WITH_OPTIMIZED_DIRTY_PT is active (PAE or AMD64 (for now))
1276 */
1277PGM_BTH_DECL(int, InvalidatePage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
1278{
1279#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1280 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
1281 && PGM_SHW_TYPE != PGM_TYPE_NONE
1282 int rc;
1283 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1284 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1285
1286 PGM_LOCK_ASSERT_OWNER(pVM);
1287
1288 LogFlow(("InvalidatePage %RGv\n", GCPtrPage));
1289
1290 /*
1291 * Get the shadow PD entry and skip out if this PD isn't present.
1292 * (Guessing that it is frequent for a shadow PDE to not be present, do this first.)
1293 */
1294# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1295 const unsigned iPDDst = (uint32_t)GCPtrPage >> SHW_PD_SHIFT;
1296 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1297
1298 /* Fetch the pgm pool shadow descriptor. */
1299 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1300# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1301 if (!pShwPde)
1302 {
1303 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1304 return VINF_SUCCESS;
1305 }
1306# else
1307 Assert(pShwPde);
1308# endif
1309
1310# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1311 const unsigned iPdpt = (uint32_t)GCPtrPage >> X86_PDPT_SHIFT;
1312 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
1313
1314 /* If the shadow PDPE isn't present, then skip the invalidate. */
1315# ifdef IN_RING3 /* Possible we didn't resync yet when called from REM. */
1316 if (!pPdptDst || !(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1317# else
1318 if (!(pPdptDst->a[iPdpt].u & X86_PDPE_P))
1319# endif
1320 {
1321 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1322 PGM_INVL_PG(pVCpu, GCPtrPage);
1323 return VINF_SUCCESS;
1324 }
1325
1326 /* Fetch the pgm pool shadow descriptor. */
1327 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
1328 AssertReturn(pShwPde, VERR_PGM_POOL_GET_PAGE_FAILED);
1329
1330 PX86PDPAE pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1331 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1332 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1333
1334# else /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1335 /* PML4 */
1336 /*const unsigned iPml4 = (GCPtrPage >> X86_PML4_SHIFT) & X86_PML4_MASK;*/
1337 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
1338 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1339 PX86PDPAE pPDDst;
1340 PX86PDPT pPdptDst;
1341 PX86PML4E pPml4eDst;
1342 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eDst, &pPdptDst, &pPDDst);
1343 if (rc != VINF_SUCCESS)
1344 {
1345 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
1346 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1347 PGM_INVL_PG(pVCpu, GCPtrPage);
1348 return VINF_SUCCESS;
1349 }
1350 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1351 Assert(pPDDst);
1352 Assert(pPdptDst->a[iPdpt].u & X86_PDPE_P);
1353
1354 /* Fetch the pgm pool shadow descriptor. */
1355 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & SHW_PDPE_PG_MASK);
1356 Assert(pShwPde);
1357
1358# endif /* PGM_SHW_TYPE == PGM_TYPE_AMD64 */
1359
1360 const SHWPDE PdeDst = *pPdeDst;
1361 if (!(PdeDst.u & X86_PDE_P))
1362 {
1363 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1364 PGM_INVL_PG(pVCpu, GCPtrPage);
1365 return VINF_SUCCESS;
1366 }
1367
1368 /*
1369 * Get the guest PD entry and calc big page.
1370 */
1371# if PGM_GST_TYPE == PGM_TYPE_32BIT
1372 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
1373 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
1374 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
1375# else /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1376 unsigned iPDSrc = 0;
1377# if PGM_GST_TYPE == PGM_TYPE_PAE
1378 X86PDPE PdpeSrcIgn;
1379 PX86PDPAE pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrcIgn);
1380# else /* AMD64 */
1381 PX86PML4E pPml4eSrcIgn;
1382 X86PDPE PdpeSrcIgn;
1383 PX86PDPAE pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrcIgn, &PdpeSrcIgn, &iPDSrc);
1384# endif
1385 GSTPDE PdeSrc;
1386
1387 if (pPDSrc)
1388 PdeSrc = pPDSrc->a[iPDSrc];
1389 else
1390 PdeSrc.u = 0;
1391# endif /* PGM_GST_TYPE != PGM_TYPE_32BIT */
1392 const bool fWasBigPage = RT_BOOL(PdeDst.u & PGM_PDFLAGS_BIG_PAGE);
1393 const bool fIsBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
1394 if (fWasBigPage != fIsBigPage)
1395 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1396
1397# ifdef IN_RING3
1398 /*
1399 * If a CR3 Sync is pending we may ignore the invalidate page operation
1400 * depending on the kind of sync and if it's a global page or not.
1401 * This doesn't make sense in GC/R0 so we'll skip it entirely there.
1402 */
1403# ifdef PGM_SKIP_GLOBAL_PAGEDIRS_ON_NONGLOBAL_FLUSH
1404 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
1405 || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)
1406 && fIsBigPage
1407 && (PdeSrc.u & X86_PDE4M_G)
1408 )
1409 )
1410# else
1411 if (VM_FF_IS_ANY_SET(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL) )
1412# endif
1413 {
1414 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePageSkipped));
1415 return VINF_SUCCESS;
1416 }
1417# endif /* IN_RING3 */
1418
1419 /*
1420 * Deal with the Guest PDE.
1421 */
1422 rc = VINF_SUCCESS;
1423 if (PdeSrc.u & X86_PDE_P)
1424 {
1425 Assert( (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
1426 && ((PdeSrc.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW) || pVCpu->pgm.s.cNetwareWp0Hacks > 0));
1427 if (!fIsBigPage)
1428 {
1429 /*
1430 * 4KB - page.
1431 */
1432 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1433 RTGCPHYS GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
1434
1435# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1436 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
1437 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
1438# endif
1439 if (pShwPage->GCPhys == GCPhys)
1440 {
1441 /* Syncing it here isn't 100% safe and it's probably not worth spending time syncing it. */
1442 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
1443
1444 PGSTPT pPTSrc;
1445 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
1446 if (RT_SUCCESS(rc))
1447 {
1448 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
1449 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
1450 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
1451 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
1452 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
1453 GCPtrPage, PteSrc.u & X86_PTE_P,
1454 (PteSrc.u & PdeSrc.u & X86_PTE_RW),
1455 (PteSrc.u & PdeSrc.u & X86_PTE_US),
1456 (uint64_t)PteSrc.u,
1457 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
1458 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
1459 }
1460 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4KBPages));
1461 PGM_INVL_PG(pVCpu, GCPtrPage);
1462 }
1463 else
1464 {
1465 /*
1466 * The page table address changed.
1467 */
1468 LogFlow(("InvalidatePage: Out-of-sync at %RGp PdeSrc=%RX64 PdeDst=%RX64 ShwGCPhys=%RGp iPDDst=%#x\n",
1469 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, iPDDst));
1470 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1471 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1472 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDOutOfSync));
1473 PGM_INVL_VCPU_TLBS(pVCpu);
1474 }
1475 }
1476 else
1477 {
1478 /*
1479 * 2/4MB - page.
1480 */
1481 /* Before freeing the page, check if anything really changed. */
1482 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
1483 RTGCPHYS GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
1484# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
1485 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
1486 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
1487# endif
1488 if ( pShwPage->GCPhys == GCPhys
1489 && pShwPage->enmKind == BTH_PGMPOOLKIND_PT_FOR_BIG)
1490 {
1491 /* ASSUMES a the given bits are identical for 4M and normal PDEs */
1492 /** @todo This test is wrong as it cannot check the G bit!
1493 * FIXME */
1494 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1495 == (PdeDst.u & (X86_PDE_P | X86_PDE_RW | X86_PDE_US))
1496 && ( (PdeSrc.u & X86_PDE4M_D) /** @todo rainy day: What about read-only 4M pages? not very common, but still... */
1497 || (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)))
1498 {
1499 LogFlow(("Skipping flush for big page containing %RGv (PD=%X .u=%RX64)-> nothing has changed!\n", GCPtrPage, iPDSrc, PdeSrc.u));
1500 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPagesSkip));
1501 return VINF_SUCCESS;
1502 }
1503 }
1504
1505 /*
1506 * Ok, the page table is present and it's been changed in the guest.
1507 * If we're in host context, we'll just mark it as not present taking the lazy approach.
1508 * We could do this for some flushes in GC too, but we need an algorithm for
1509 * deciding which 4MB pages containing code likely to be executed very soon.
1510 */
1511 LogFlow(("InvalidatePage: Out-of-sync PD at %RGp PdeSrc=%RX64 PdeDst=%RX64\n",
1512 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
1513 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1514 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1515 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePage4MBPages));
1516 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
1517 }
1518 }
1519 else
1520 {
1521 /*
1522 * Page directory is not present, mark shadow PDE not present.
1523 */
1524 pgmPoolFree(pVM, PdeDst.u & SHW_PDE_PG_MASK, pShwPde->idx, iPDDst);
1525 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
1526 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,InvalidatePagePDNPs));
1527 PGM_INVL_PG(pVCpu, GCPtrPage);
1528 }
1529 return rc;
1530
1531#else /* guest real and protected mode, nested + ept, none. */
1532 /* There's no such thing as InvalidatePage when paging is disabled, so just ignore. */
1533 NOREF(pVCpu); NOREF(GCPtrPage);
1534 return VINF_SUCCESS;
1535#endif
1536}
1537
1538#if PGM_SHW_TYPE != PGM_TYPE_NONE
1539
1540/**
1541 * Update the tracking of shadowed pages.
1542 *
1543 * @param pVCpu The cross context virtual CPU structure.
1544 * @param pShwPage The shadow page.
1545 * @param HCPhys The physical page we is being dereferenced.
1546 * @param iPte Shadow PTE index
1547 * @param GCPhysPage Guest physical address (only valid if pShwPage->fDirty is set)
1548 */
1549DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackDeref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, RTHCPHYS HCPhys, uint16_t iPte,
1550 RTGCPHYS GCPhysPage)
1551{
1552 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1553
1554# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1555 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1556 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1557
1558 /* Use the hint we retrieved from the cached guest PT. */
1559 if (pShwPage->fDirty)
1560 {
1561 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1562
1563 Assert(pShwPage->cPresent);
1564 Assert(pPool->cPresent);
1565 pShwPage->cPresent--;
1566 pPool->cPresent--;
1567
1568 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysPage);
1569 AssertRelease(pPhysPage);
1570 pgmTrackDerefGCPhys(pPool, pShwPage, pPhysPage, iPte);
1571 return;
1572 }
1573# else
1574 NOREF(GCPhysPage);
1575# endif
1576
1577 STAM_PROFILE_START(&pVM->pgm.s.Stats.StatTrackDeref, a);
1578 LogFlow(("SyncPageWorkerTrackDeref: Damn HCPhys=%RHp pShwPage->idx=%#x!!!\n", HCPhys, pShwPage->idx));
1579
1580 /** @todo If this turns out to be a bottle neck (*very* likely) two things can be done:
1581 * 1. have a medium sized HCPhys -> GCPhys TLB (hash?)
1582 * 2. write protect all shadowed pages. I.e. implement caching.
1583 */
1584 /** @todo duplicated in the 2nd half of pgmPoolTracDerefGCPhysHint */
1585
1586 /*
1587 * Find the guest address.
1588 */
1589 for (PPGMRAMRANGE pRam = pVM->pgm.s.CTX_SUFF(pRamRangesX);
1590 pRam;
1591 pRam = pRam->CTX_SUFF(pNext))
1592 {
1593 unsigned iPage = pRam->cb >> GUEST_PAGE_SHIFT;
1594 while (iPage-- > 0)
1595 {
1596 if (PGM_PAGE_GET_HCPHYS(&pRam->aPages[iPage]) == HCPhys)
1597 {
1598 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1599
1600 Assert(pShwPage->cPresent);
1601 Assert(pPool->cPresent);
1602 pShwPage->cPresent--;
1603 pPool->cPresent--;
1604
1605 pgmTrackDerefGCPhys(pPool, pShwPage, &pRam->aPages[iPage], iPte);
1606 STAM_PROFILE_STOP(&pVM->pgm.s.Stats.StatTrackDeref, a);
1607 return;
1608 }
1609 }
1610 }
1611
1612 for (;;)
1613 AssertReleaseMsgFailed(("HCPhys=%RHp wasn't found!\n", HCPhys));
1614}
1615
1616
1617/**
1618 * Update the tracking of shadowed pages.
1619 *
1620 * @param pVCpu The cross context virtual CPU structure.
1621 * @param pShwPage The shadow page.
1622 * @param u16 The top 16-bit of the pPage->HCPhys.
1623 * @param pPage Pointer to the guest page. this will be modified.
1624 * @param iPTDst The index into the shadow table.
1625 */
1626DECLINLINE(void) PGM_BTH_NAME(SyncPageWorkerTrackAddref)(PVMCPUCC pVCpu, PPGMPOOLPAGE pShwPage, uint16_t u16,
1627 PPGMPAGE pPage, const unsigned iPTDst)
1628{
1629 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1630
1631 /*
1632 * Just deal with the simple first time here.
1633 */
1634 if (!u16)
1635 {
1636 STAM_COUNTER_INC(&pVM->pgm.s.Stats.StatTrackVirgin);
1637 u16 = PGMPOOL_TD_MAKE(1, pShwPage->idx);
1638 /* Save the page table index. */
1639 PGM_PAGE_SET_PTE_INDEX(pVM, pPage, iPTDst);
1640 }
1641 else
1642 u16 = pgmPoolTrackPhysExtAddref(pVM, pPage, u16, pShwPage->idx, iPTDst);
1643
1644 /* write back */
1645 Log2(("SyncPageWorkerTrackAddRef: u16=%#x->%#x iPTDst=%#x\n", u16, PGM_PAGE_GET_TRACKING(pPage), iPTDst));
1646 PGM_PAGE_SET_TRACKING(pVM, pPage, u16);
1647
1648 /* update statistics. */
1649 pVM->pgm.s.CTX_SUFF(pPool)->cPresent++;
1650 pShwPage->cPresent++;
1651 if (pShwPage->iFirstPresent > iPTDst)
1652 pShwPage->iFirstPresent = iPTDst;
1653}
1654
1655
1656/**
1657 * Modifies a shadow PTE to account for access handlers.
1658 *
1659 * @param pVM The cross context VM structure.
1660 * @param pPage The page in question.
1661 * @param fPteSrc The shadowed flags of the source PTE. Must include the
1662 * A (accessed) bit so it can be emulated correctly.
1663 * @param pPteDst The shadow PTE (output). This is temporary storage and
1664 * does not need to be set atomically.
1665 */
1666DECLINLINE(void) PGM_BTH_NAME(SyncHandlerPte)(PVMCC pVM, PCPGMPAGE pPage, uint64_t fPteSrc, PSHWPTE pPteDst)
1667{
1668 NOREF(pVM); RT_NOREF_PV(fPteSrc);
1669
1670 /** @todo r=bird: Are we actually handling dirty and access bits for pages with access handlers correctly? No.
1671 * Update: \#PF should deal with this before or after calling the handlers. It has all the info to do the job efficiently. */
1672 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage))
1673 {
1674 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark read-only\n", pPage));
1675# if PGM_SHW_TYPE == PGM_TYPE_EPT
1676 pPteDst->u = PGM_PAGE_GET_HCPHYS(pPage) | EPT_E_READ | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1677# else
1678 if (fPteSrc & X86_PTE_A)
1679 {
1680 SHW_PTE_SET(*pPteDst, fPteSrc | PGM_PAGE_GET_HCPHYS(pPage));
1681 SHW_PTE_SET_RO(*pPteDst);
1682 }
1683 else
1684 SHW_PTE_SET(*pPteDst, 0);
1685# endif
1686 }
1687# ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1688# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
1689 else if ( PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage)
1690 && ( BTH_IS_NP_ACTIVE(pVM)
1691 || (fPteSrc & (X86_PTE_RW | X86_PTE_US)) == X86_PTE_RW) /** @todo Remove X86_PTE_US here and pGstWalk->Core.fEffectiveUS before the sync page test. */
1692# if PGM_SHW_TYPE == PGM_TYPE_AMD64
1693 && pVM->pgm.s.fLessThan52PhysicalAddressBits
1694# endif
1695 )
1696 {
1697 LogFlow(("SyncHandlerPte: MMIO page -> invalid \n"));
1698# if PGM_SHW_TYPE == PGM_TYPE_EPT
1699 /* 25.2.3.1: Reserved physical address bit -> EPT Misconfiguration (exit 49) */
1700 pPteDst->u = pVM->pgm.s.HCPhysInvMmioPg
1701 /* 25.2.3.1: bits 2:0 = 010b -> EPT Misconfiguration (exit 49) */
1702 | EPT_E_WRITE
1703 /* 25.2.3.1: leaf && 2:0 != 0 && u3Emt in {2, 3, 7} -> EPT Misconfiguration */
1704 | EPT_E_MEMTYPE_INVALID_3;
1705# else
1706 /* Set high page frame bits that MBZ (bankers on PAE, CPU dependent on AMD64). */
1707 SHW_PTE_SET(*pPteDst, pVM->pgm.s.HCPhysInvMmioPg | X86_PTE_PAE_MBZ_MASK_NO_NX | X86_PTE_P);
1708# endif
1709 }
1710# endif
1711# endif /* PGM_WITH_MMIO_OPTIMIZATIONS */
1712 else
1713 {
1714 LogFlow(("SyncHandlerPte: monitored page (%R[pgmpage]) -> mark not present\n", pPage));
1715 SHW_PTE_SET(*pPteDst, 0);
1716 }
1717 /** @todo count these kinds of entries. */
1718}
1719
1720
1721/**
1722 * Creates a 4K shadow page for a guest page.
1723 *
1724 * For 4M pages the caller must convert the PDE4M to a PTE, this includes adjusting the
1725 * physical address. The PdeSrc argument only the flags are used. No page
1726 * structured will be mapped in this function.
1727 *
1728 * @param pVCpu The cross context virtual CPU structure.
1729 * @param pPteDst Destination page table entry.
1730 * @param PdeSrc Source page directory entry (i.e. Guest OS page directory entry).
1731 * Can safely assume that only the flags are being used.
1732 * @param PteSrc Source page table entry (i.e. Guest OS page table entry).
1733 * @param pShwPage Pointer to the shadow page.
1734 * @param iPTDst The index into the shadow table.
1735 *
1736 * @remark Not used for 2/4MB pages!
1737 */
1738# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) || defined(DOXYGEN_RUNNING)
1739static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, GSTPDE PdeSrc, GSTPTE PteSrc,
1740 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1741# else
1742static void PGM_BTH_NAME(SyncPageWorker)(PVMCPUCC pVCpu, PSHWPTE pPteDst, RTGCPHYS GCPhysPage,
1743 PPGMPOOLPAGE pShwPage, unsigned iPTDst)
1744# endif
1745{
1746 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1747 RTGCPHYS GCPhysOldPage = NIL_RTGCPHYS;
1748
1749# if defined(PGMPOOL_WITH_OPTIMIZED_DIRTY_PT) \
1750 && PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) \
1751 && (PGM_GST_TYPE == PGM_TYPE_PAE || PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_SHW_TYPE == PGM_TYPE_PAE /* pae/32bit combo */)
1752
1753 if (pShwPage->fDirty)
1754 {
1755 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
1756 PGSTPT pGstPT;
1757
1758 /* Note that iPTDst can be used to index the guest PT even in the pae/32bit combo as we copy only half the table; see pgmPoolAddDirtyPage. */
1759 pGstPT = (PGSTPT)&pPool->aDirtyPages[pShwPage->idxDirtyEntry].aPage[0];
1760 GCPhysOldPage = GST_GET_PTE_GCPHYS(pGstPT->a[iPTDst]);
1761 pGstPT->a[iPTDst].u = PteSrc.u;
1762 }
1763# else
1764 Assert(!pShwPage->fDirty);
1765# endif
1766
1767# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1768 if ( (PteSrc.u & X86_PTE_P)
1769 && GST_IS_PTE_VALID(pVCpu, PteSrc))
1770# endif
1771 {
1772# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1773 RTGCPHYS GCPhysPage = GST_GET_PTE_GCPHYS(PteSrc);
1774# endif
1775 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysPage);
1776
1777 /*
1778 * Find the ram range.
1779 */
1780 PPGMPAGE pPage;
1781 int rc = pgmPhysGetPageEx(pVM, GCPhysPage, &pPage);
1782 if (RT_SUCCESS(rc))
1783 {
1784 /* Ignore ballooned pages.
1785 Don't return errors or use a fatal assert here as part of a
1786 shadow sync range might included ballooned pages. */
1787 if (PGM_PAGE_IS_BALLOONED(pPage))
1788 {
1789 Assert(!SHW_PTE_IS_P(*pPteDst)); /** @todo user tracking needs updating if this triggers. */
1790 return;
1791 }
1792
1793# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
1794 /* Make the page writable if necessary. */
1795 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1796 && ( PGM_PAGE_IS_ZERO(pPage)
1797# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1798 || ( (PteSrc.u & X86_PTE_RW)
1799# else
1800 || ( 1
1801# endif
1802 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
1803# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
1804 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
1805# endif
1806# ifdef VBOX_WITH_PAGE_SHARING
1807 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
1808# endif
1809 )
1810 )
1811 )
1812 {
1813 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhysPage);
1814 AssertRC(rc);
1815 }
1816# endif
1817
1818 /*
1819 * Make page table entry.
1820 */
1821 SHWPTE PteDst;
1822# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1823 uint64_t fGstShwPteFlags = GST_GET_PTE_SHW_FLAGS(pVCpu, PteSrc);
1824# else
1825 uint64_t fGstShwPteFlags = X86_PTE_P | X86_PTE_RW | X86_PTE_US | X86_PTE_A | X86_PTE_D;
1826# endif
1827 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
1828 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, fGstShwPteFlags, &PteDst);
1829 else
1830 {
1831# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1832 /*
1833 * If the page or page directory entry is not marked accessed,
1834 * we mark the page not present.
1835 */
1836 if (!(PteSrc.u & X86_PTE_A) || !(PdeSrc.u & X86_PDE_A))
1837 {
1838 LogFlow(("SyncPageWorker: page and or page directory not accessed -> mark not present\n"));
1839 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,AccessedPage));
1840 SHW_PTE_SET(PteDst, 0);
1841 }
1842 /*
1843 * If the page is not flagged as dirty and is writable, then make it read-only, so we can set the dirty bit
1844 * when the page is modified.
1845 */
1846 else if (!(PteSrc.u & X86_PTE_D) && (PdeSrc.u & PteSrc.u & X86_PTE_RW))
1847 {
1848 AssertCompile(X86_PTE_RW == X86_PDE_RW);
1849 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPage));
1850 SHW_PTE_SET(PteDst,
1851 fGstShwPteFlags
1852 | PGM_PAGE_GET_HCPHYS(pPage)
1853 | PGM_PTFLAGS_TRACK_DIRTY);
1854 SHW_PTE_SET_RO(PteDst);
1855 }
1856 else
1857# endif
1858 {
1859 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageSkipped));
1860# if PGM_SHW_TYPE == PGM_TYPE_EPT
1861 PteDst.u = PGM_PAGE_GET_HCPHYS(pPage)
1862 | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_MEMTYPE_WB | EPT_E_IGNORE_PAT;
1863# else
1864 SHW_PTE_SET(PteDst, fGstShwPteFlags | PGM_PAGE_GET_HCPHYS(pPage));
1865# endif
1866 }
1867
1868 /*
1869 * Make sure only allocated pages are mapped writable.
1870 */
1871 if ( SHW_PTE_IS_P_RW(PteDst)
1872 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
1873 {
1874 /* Still applies to shared pages. */
1875 Assert(!PGM_PAGE_IS_ZERO(pPage));
1876 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet. Why, isn't it? */
1877 Log3(("SyncPageWorker: write-protecting %RGp pPage=%R[pgmpage]at iPTDst=%d\n", GCPhysPage, pPage, iPTDst));
1878 }
1879 }
1880
1881 /*
1882 * Keep user track up to date.
1883 */
1884 if (SHW_PTE_IS_P(PteDst))
1885 {
1886 if (!SHW_PTE_IS_P(*pPteDst))
1887 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1888 else if (SHW_PTE_GET_HCPHYS(*pPteDst) != SHW_PTE_GET_HCPHYS(PteDst))
1889 {
1890 Log2(("SyncPageWorker: deref! *pPteDst=%RX64 PteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst), SHW_PTE_LOG64(PteDst)));
1891 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1892 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
1893 }
1894 }
1895 else if (SHW_PTE_IS_P(*pPteDst))
1896 {
1897 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1898 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1899 }
1900
1901 /*
1902 * Update statistics and commit the entry.
1903 */
1904# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1905 if (!(PteSrc.u & X86_PTE_G))
1906 pShwPage->fSeenNonGlobal = true;
1907# endif
1908 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
1909 return;
1910 }
1911
1912/** @todo count these three different kinds. */
1913 Log2(("SyncPageWorker: invalid address in Pte\n"));
1914 }
1915# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
1916 else if (!(PteSrc.u & X86_PTE_P))
1917 Log2(("SyncPageWorker: page not present in Pte\n"));
1918 else
1919 Log2(("SyncPageWorker: invalid Pte\n"));
1920# endif
1921
1922 /*
1923 * The page is not present or the PTE is bad. Replace the shadow PTE by
1924 * an empty entry, making sure to keep the user tracking up to date.
1925 */
1926 if (SHW_PTE_IS_P(*pPteDst))
1927 {
1928 Log2(("SyncPageWorker: deref! *pPteDst=%RX64\n", SHW_PTE_LOG64(*pPteDst)));
1929 PGM_BTH_NAME(SyncPageWorkerTrackDeref)(pVCpu, pShwPage, SHW_PTE_GET_HCPHYS(*pPteDst), iPTDst, GCPhysOldPage);
1930 }
1931 SHW_PTE_ATOMIC_SET(*pPteDst, 0);
1932}
1933
1934
1935/**
1936 * Syncs a guest OS page.
1937 *
1938 * There are no conflicts at this point, neither is there any need for
1939 * page table allocations.
1940 *
1941 * When called in PAE or AMD64 guest mode, the guest PDPE shall be valid.
1942 * When called in AMD64 guest mode, the guest PML4E shall be valid.
1943 *
1944 * @returns VBox status code.
1945 * @returns VINF_PGM_SYNCPAGE_MODIFIED_PDE if it modifies the PDE in any way.
1946 * @param pVCpu The cross context virtual CPU structure.
1947 * @param PdeSrc Page directory entry of the guest.
1948 * @param GCPtrPage Guest context page address.
1949 * @param cPages Number of pages to sync (PGM_SYNC_N_PAGES) (default=1).
1950 * @param uErr Fault error (X86_TRAP_PF_*).
1951 */
1952static int PGM_BTH_NAME(SyncPage)(PVMCPUCC pVCpu, GSTPDE PdeSrc, RTGCPTR GCPtrPage, unsigned cPages, unsigned uErr)
1953{
1954 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1955 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
1956 LogFlow(("SyncPage: GCPtrPage=%RGv cPages=%u uErr=%#x\n", GCPtrPage, cPages, uErr));
1957 RT_NOREF_PV(uErr); RT_NOREF_PV(cPages); RT_NOREF_PV(GCPtrPage);
1958
1959 PGM_LOCK_ASSERT_OWNER(pVM);
1960
1961# if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
1962 || PGM_GST_TYPE == PGM_TYPE_PAE \
1963 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
1964 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE)
1965
1966 /*
1967 * Assert preconditions.
1968 */
1969 Assert(PdeSrc.u & X86_PDE_P);
1970 Assert(cPages);
1971# if 0 /* rarely useful; leave for debugging. */
1972 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPagePD[(GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK]);
1973# endif
1974
1975 /*
1976 * Get the shadow PDE, find the shadow page table in the pool.
1977 */
1978# if PGM_SHW_TYPE == PGM_TYPE_32BIT
1979 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1980 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
1981
1982 /* Fetch the pgm pool shadow descriptor. */
1983 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
1984 Assert(pShwPde);
1985
1986# elif PGM_SHW_TYPE == PGM_TYPE_PAE
1987 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
1988 PPGMPOOLPAGE pShwPde = NULL;
1989 PX86PDPAE pPDDst;
1990
1991 /* Fetch the pgm pool shadow descriptor. */
1992 int rc2 = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
1993 AssertRCSuccessReturn(rc2, rc2);
1994 Assert(pShwPde);
1995
1996 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
1997 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
1998
1999# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2000 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2001 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2002 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2003 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2004
2005 int rc2 = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2006 AssertRCSuccessReturn(rc2, rc2);
2007 Assert(pPDDst && pPdptDst);
2008 PX86PDEPAE pPdeDst = &pPDDst->a[iPDDst];
2009# endif
2010 SHWPDE PdeDst = *pPdeDst;
2011
2012 /*
2013 * - In the guest SMP case we could have blocked while another VCPU reused
2014 * this page table.
2015 * - With W7-64 we may also take this path when the A bit is cleared on
2016 * higher level tables (PDPE/PML4E). The guest does not invalidate the
2017 * relevant TLB entries. If we're write monitoring any page mapped by
2018 * the modified entry, we may end up here with a "stale" TLB entry.
2019 */
2020 if (!(PdeDst.u & X86_PDE_P))
2021 {
2022 Log(("CPU%u: SyncPage: Pde at %RGv changed behind our back? (pPdeDst=%p/%RX64) uErr=%#x\n", pVCpu->idCpu, GCPtrPage, pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2023 AssertMsg(pVM->cCpus > 1 || (uErr & (X86_TRAP_PF_P | X86_TRAP_PF_RW)) == (X86_TRAP_PF_P | X86_TRAP_PF_RW),
2024 ("Unexpected missing PDE p=%p/%RX64 uErr=%#x\n", pPdeDst, (uint64_t)PdeDst.u, (uint32_t)uErr));
2025 if (uErr & X86_TRAP_PF_P)
2026 PGM_INVL_PG(pVCpu, GCPtrPage);
2027 return VINF_SUCCESS; /* force the instruction to be executed again. */
2028 }
2029
2030 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2031 Assert(pShwPage);
2032
2033# if PGM_GST_TYPE == PGM_TYPE_AMD64
2034 /* Fetch the pgm pool shadow descriptor. */
2035 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2036 Assert(pShwPde);
2037# endif
2038
2039 /*
2040 * Check that the page is present and that the shadow PDE isn't out of sync.
2041 */
2042 const bool fBigPage = (PdeSrc.u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu);
2043 const bool fPdeValid = !fBigPage ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc);
2044 RTGCPHYS GCPhys;
2045 if (!fBigPage)
2046 {
2047 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2048# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2049 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2050 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2051# endif
2052 }
2053 else
2054 {
2055 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2056# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2057 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2058 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2059# endif
2060 }
2061 /** @todo This doesn't check the G bit of 2/4MB pages. FIXME */
2062 if ( fPdeValid
2063 && pShwPage->GCPhys == GCPhys
2064 && (PdeSrc.u & X86_PDE_P)
2065 && (PdeSrc.u & X86_PDE_US) == (PdeDst.u & X86_PDE_US)
2066 && ((PdeSrc.u & X86_PDE_RW) == (PdeDst.u & X86_PDE_RW) || !(PdeDst.u & X86_PDE_RW))
2067# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2068 && ((PdeSrc.u & X86_PDE_PAE_NX) == (PdeDst.u & X86_PDE_PAE_NX) || !GST_IS_NX_ACTIVE(pVCpu))
2069# endif
2070 )
2071 {
2072 /*
2073 * Check that the PDE is marked accessed already.
2074 * Since we set the accessed bit *before* getting here on a #PF, this
2075 * check is only meant for dealing with non-#PF'ing paths.
2076 */
2077 if (PdeSrc.u & X86_PDE_A)
2078 {
2079 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2080 if (!fBigPage)
2081 {
2082 /*
2083 * 4KB Page - Map the guest page table.
2084 */
2085 PGSTPT pPTSrc;
2086 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2087 if (RT_SUCCESS(rc))
2088 {
2089# ifdef PGM_SYNC_N_PAGES
2090 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2091 if ( cPages > 1
2092 && !(uErr & X86_TRAP_PF_P)
2093 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2094 {
2095 /*
2096 * This code path is currently only taken when the caller is PGMTrap0eHandler
2097 * for non-present pages!
2098 *
2099 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2100 * deal with locality.
2101 */
2102 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2103# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2104 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2105 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2106# else
2107 const unsigned offPTSrc = 0;
2108# endif
2109 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2110 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2111 iPTDst = 0;
2112 else
2113 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2114
2115 for (; iPTDst < iPTDstEnd; iPTDst++)
2116 {
2117 const PGSTPTE pPteSrc = &pPTSrc->a[offPTSrc + iPTDst];
2118
2119 if ( (pPteSrc->u & X86_PTE_P)
2120 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2121 {
2122 RTGCPTR GCPtrCurPage = (GCPtrPage & ~(RTGCPTR)(GST_PT_MASK << GST_PT_SHIFT))
2123 | ((offPTSrc + iPTDst) << GUEST_PAGE_SHIFT);
2124 NOREF(GCPtrCurPage);
2125 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, *pPteSrc, pShwPage, iPTDst);
2126 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx%s\n",
2127 GCPtrCurPage, pPteSrc->u & X86_PTE_P,
2128 !!(pPteSrc->u & PdeSrc.u & X86_PTE_RW),
2129 !!(pPteSrc->u & PdeSrc.u & X86_PTE_US),
2130 (uint64_t)pPteSrc->u,
2131 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2132 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2133 }
2134 }
2135 }
2136 else
2137# endif /* PGM_SYNC_N_PAGES */
2138 {
2139 const unsigned iPTSrc = (GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK;
2140 GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2141 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2142 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2143 Log2(("SyncPage: 4K %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx} PteDst=%08llx %s\n",
2144 GCPtrPage, PteSrc.u & X86_PTE_P,
2145 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2146 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2147 (uint64_t)PteSrc.u,
2148 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2149 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2150 }
2151 }
2152 else /* MMIO or invalid page: emulated in #PF handler. */
2153 {
2154 LogFlow(("PGM_GCPHYS_2_PTR %RGp failed with %Rrc\n", GCPhys, rc));
2155 Assert(!SHW_PTE_IS_P(pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK]));
2156 }
2157 }
2158 else
2159 {
2160 /*
2161 * 4/2MB page - lazy syncing shadow 4K pages.
2162 * (There are many causes of getting here, it's no longer only CSAM.)
2163 */
2164 /* Calculate the GC physical address of this 4KB shadow page. */
2165 GCPhys = PGM_A20_APPLY(pVCpu, GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc) | (GCPtrPage & GST_BIG_PAGE_OFFSET_MASK));
2166 /* Find ram range. */
2167 PPGMPAGE pPage;
2168 int rc = pgmPhysGetPageEx(pVM, GCPhys, &pPage);
2169 if (RT_SUCCESS(rc))
2170 {
2171 AssertFatalMsg(!PGM_PAGE_IS_BALLOONED(pPage), ("Unexpected ballooned page at %RGp\n", GCPhys));
2172
2173# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2174 /* Try to make the page writable if necessary. */
2175 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2176 && ( PGM_PAGE_IS_ZERO(pPage)
2177 || ( (PdeSrc.u & X86_PDE_RW)
2178 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2179# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2180 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2181# endif
2182# ifdef VBOX_WITH_PAGE_SHARING
2183 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2184# endif
2185 )
2186 )
2187 )
2188 {
2189 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2190 AssertRC(rc);
2191 }
2192# endif
2193
2194 /*
2195 * Make shadow PTE entry.
2196 */
2197 SHWPTE PteDst;
2198 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2199 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc), &PteDst);
2200 else
2201 SHW_PTE_SET(PteDst, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc) | PGM_PAGE_GET_HCPHYS(pPage));
2202
2203 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2204 if ( SHW_PTE_IS_P(PteDst)
2205 && !SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2206 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2207
2208 /* Make sure only allocated pages are mapped writable. */
2209 if ( SHW_PTE_IS_P_RW(PteDst)
2210 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2211 {
2212 /* Still applies to shared pages. */
2213 Assert(!PGM_PAGE_IS_ZERO(pPage));
2214 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2215 Log3(("SyncPage: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, GCPtrPage));
2216 }
2217
2218 SHW_PTE_ATOMIC_SET2(pPTDst->a[iPTDst], PteDst);
2219
2220 /*
2221 * If the page is not flagged as dirty and is writable, then make it read-only
2222 * at PD level, so we can set the dirty bit when the page is modified.
2223 *
2224 * ASSUMES that page access handlers are implemented on page table entry level.
2225 * Thus we will first catch the dirty access and set PDE.D and restart. If
2226 * there is an access handler, we'll trap again and let it work on the problem.
2227 */
2228 /** @todo r=bird: figure out why we need this here, SyncPT should've taken care of this already.
2229 * As for invlpg, it simply frees the whole shadow PT.
2230 * ...It's possibly because the guest clears it and the guest doesn't really tell us... */
2231 if ((PdeSrc.u & (X86_PDE4M_D | X86_PDE_RW)) == X86_PDE_RW)
2232 {
2233 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2234 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2235 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2236 }
2237 else
2238 {
2239 PdeDst.u &= ~(SHWUINT)(PGM_PDFLAGS_TRACK_DIRTY | X86_PDE_RW);
2240 PdeDst.u |= PdeSrc.u & X86_PDE_RW;
2241 }
2242 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2243 Log2(("SyncPage: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} GCPhys=%RGp%s\n",
2244 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US),
2245 (uint64_t)PdeSrc.u, GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2246 }
2247 else
2248 {
2249 LogFlow(("PGM_GCPHYS_2_PTR %RGp (big) failed with %Rrc\n", GCPhys, rc));
2250 /** @todo must wipe the shadow page table entry in this
2251 * case. */
2252 }
2253 }
2254 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2255 return VINF_SUCCESS;
2256 }
2257
2258 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDNAs));
2259 }
2260 else if (fPdeValid)
2261 {
2262 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPagePDOutOfSync));
2263 Log2(("SyncPage: Out-Of-Sync PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2264 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2265 }
2266 else
2267 {
2268/// @todo STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_MID_Z(Stat,SyncPagePDOutOfSyncAndInvalid));
2269 Log2(("SyncPage: Bad PDE at %RGp PdeSrc=%RX64 PdeDst=%RX64 (GCPhys %RGp vs %RGp)\n",
2270 GCPtrPage, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u, pShwPage->GCPhys, GCPhys));
2271 }
2272
2273 /*
2274 * Mark the PDE not present. Restart the instruction and let #PF call SyncPT.
2275 * Yea, I'm lazy.
2276 */
2277 pgmPoolFreeByPage(pPool, pShwPage, pShwPde->idx, iPDDst);
2278 SHW_PDE_ATOMIC_SET(*pPdeDst, 0);
2279
2280 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2281 PGM_INVL_VCPU_TLBS(pVCpu);
2282 return VINF_PGM_SYNCPAGE_MODIFIED_PDE;
2283
2284
2285# elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
2286 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
2287 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT)
2288 NOREF(PdeSrc);
2289
2290# ifdef PGM_SYNC_N_PAGES
2291 /*
2292 * Get the shadow PDE, find the shadow page table in the pool.
2293 */
2294# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2295 X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
2296
2297# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2298 X86PDEPAE PdeDst = pgmShwGetPaePDE(pVCpu, GCPtrPage);
2299
2300# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2301 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2302 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64; NOREF(iPdpt);
2303 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2304 X86PDEPAE PdeDst;
2305 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2306
2307 int rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2308 AssertRCSuccessReturn(rc, rc);
2309 Assert(pPDDst && pPdptDst);
2310 PdeDst = pPDDst->a[iPDDst];
2311
2312# elif PGM_SHW_TYPE == PGM_TYPE_EPT
2313 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
2314 PEPTPD pPDDst;
2315 EPTPDE PdeDst;
2316
2317 int rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, NULL, &pPDDst);
2318 if (rc != VINF_SUCCESS)
2319 {
2320 AssertRC(rc);
2321 return rc;
2322 }
2323 Assert(pPDDst);
2324 PdeDst = pPDDst->a[iPDDst];
2325# endif
2326 /* In the guest SMP case we could have blocked while another VCPU reused this page table. */
2327 if (!SHW_PDE_IS_P(PdeDst))
2328 {
2329 AssertMsg(pVM->cCpus > 1, ("Unexpected missing PDE %RX64\n", (uint64_t)PdeDst.u));
2330 Log(("CPU%d: SyncPage: Pde at %RGv changed behind our back!\n", pVCpu->idCpu, GCPtrPage));
2331 return VINF_SUCCESS; /* force the instruction to be executed again. */
2332 }
2333
2334 /* Can happen in the guest SMP case; other VCPU activated this PDE while we were blocking to handle the page fault. */
2335 if (SHW_PDE_IS_BIG(PdeDst))
2336 {
2337 Assert(pVM->pgm.s.fNestedPaging);
2338 Log(("CPU%d: SyncPage: Pde (big:%RX64) at %RGv changed behind our back!\n", pVCpu->idCpu, PdeDst.u, GCPtrPage));
2339 return VINF_SUCCESS;
2340 }
2341
2342 /* Mask away the page offset. */
2343 GCPtrPage &= ~((RTGCPTR)0xfff);
2344
2345 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, PdeDst.u & SHW_PDE_PG_MASK);
2346 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2347
2348 Assert(cPages == 1 || !(uErr & X86_TRAP_PF_P));
2349 if ( cPages > 1
2350 && !(uErr & X86_TRAP_PF_P)
2351 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2352 {
2353 /*
2354 * This code path is currently only taken when the caller is PGMTrap0eHandler
2355 * for non-present pages!
2356 *
2357 * We're setting PGM_SYNC_NR_PAGES pages around the faulting page to sync it and
2358 * deal with locality.
2359 */
2360 unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2361 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2362 if (iPTDst < PGM_SYNC_NR_PAGES / 2)
2363 iPTDst = 0;
2364 else
2365 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2366 for (; iPTDst < iPTDstEnd; iPTDst++)
2367 {
2368 if (!SHW_PTE_IS_P(pPTDst->a[iPTDst]))
2369 {
2370 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2371 | (iPTDst << GUEST_PAGE_SHIFT));
2372
2373 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2374 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
2375 GCPtrCurPage,
2376 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2377 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2378
2379 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
2380 break;
2381 }
2382 else
2383 Log4(("%RGv iPTDst=%x pPTDst->a[iPTDst] %RX64\n",
2384 (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT)) | (iPTDst << GUEST_PAGE_SHIFT), iPTDst, SHW_PTE_LOG64(pPTDst->a[iPTDst]) ));
2385 }
2386 }
2387 else
2388# endif /* PGM_SYNC_N_PAGES */
2389 {
2390 const unsigned iPTDst = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2391 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
2392 | (iPTDst << GUEST_PAGE_SHIFT));
2393
2394 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
2395
2396 Log2(("SyncPage: 4K %RGv PteSrc:{P=1 RW=1 U=1}PteDst=%08llx%s\n",
2397 GCPtrPage,
2398 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
2399 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
2400 }
2401 return VINF_SUCCESS;
2402
2403# else
2404 NOREF(PdeSrc);
2405 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
2406 return VERR_PGM_NOT_USED_IN_MODE;
2407# endif
2408}
2409
2410#endif /* PGM_SHW_TYPE != PGM_TYPE_NONE */
2411#if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
2412
2413/**
2414 * Handle dirty bit tracking faults.
2415 *
2416 * @returns VBox status code.
2417 * @param pVCpu The cross context virtual CPU structure.
2418 * @param uErr Page fault error code.
2419 * @param pPdeSrc Guest page directory entry.
2420 * @param pPdeDst Shadow page directory entry.
2421 * @param GCPtrPage Guest context page address.
2422 */
2423static int PGM_BTH_NAME(CheckDirtyPageFault)(PVMCPUCC pVCpu, uint32_t uErr, PSHWPDE pPdeDst, GSTPDE const *pPdeSrc,
2424 RTGCPTR GCPtrPage)
2425{
2426 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2427 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
2428 NOREF(uErr);
2429
2430 PGM_LOCK_ASSERT_OWNER(pVM);
2431
2432 /*
2433 * Handle big page.
2434 */
2435 if ((pPdeSrc->u & X86_PDE_PS) && GST_IS_PSE_ACTIVE(pVCpu))
2436 {
2437 if ((pPdeDst->u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY))
2438 {
2439 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2440 Assert(pPdeSrc->u & X86_PDE_RW);
2441
2442 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB entry will not harm; write access will simply
2443 * fault again and take this path to only invalidate the entry (see below). */
2444 SHWPDE PdeDst = *pPdeDst;
2445 PdeDst.u &= ~(SHWUINT)PGM_PDFLAGS_TRACK_DIRTY;
2446 PdeDst.u |= X86_PDE_RW | X86_PDE_A;
2447 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2448 PGM_INVL_BIG_PG(pVCpu, GCPtrPage);
2449 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2450 }
2451
2452# ifdef IN_RING0
2453 /* Check for stale TLB entry; only applies to the SMP guest case. */
2454 if ( pVM->cCpus > 1
2455 && (pPdeDst->u & (X86_PDE_P | X86_PDE_RW | X86_PDE_A)) == (X86_PDE_P | X86_PDE_RW | X86_PDE_A))
2456 {
2457 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2458 if (pShwPage)
2459 {
2460 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2461 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2462 if (SHW_PTE_IS_P_RW(*pPteDst))
2463 {
2464 /* Stale TLB entry. */
2465 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2466 PGM_INVL_PG(pVCpu, GCPtrPage);
2467 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2468 }
2469 }
2470 }
2471# endif /* IN_RING0 */
2472 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2473 }
2474
2475 /*
2476 * Map the guest page table.
2477 */
2478 PGSTPT pPTSrc;
2479 int rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, GST_GET_PDE_GCPHYS(*pPdeSrc), &pPTSrc);
2480 AssertRCReturn(rc, rc);
2481
2482 if (SHW_PDE_IS_P(*pPdeDst))
2483 {
2484 GSTPTE const *pPteSrc = &pPTSrc->a[(GCPtrPage >> GST_PT_SHIFT) & GST_PT_MASK];
2485 const GSTPTE PteSrc = *pPteSrc;
2486
2487 /*
2488 * Map shadow page table.
2489 */
2490 PPGMPOOLPAGE pShwPage = pgmPoolGetPage(pPool, pPdeDst->u & SHW_PDE_PG_MASK);
2491 if (pShwPage)
2492 {
2493 PSHWPT pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2494 PSHWPTE pPteDst = &pPTDst->a[(GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK];
2495 if (SHW_PTE_IS_P(*pPteDst)) /** @todo Optimize accessed bit emulation? */
2496 {
2497 if (SHW_PTE_IS_TRACK_DIRTY(*pPteDst))
2498 {
2499 PPGMPAGE pPage = pgmPhysGetPage(pVM, GST_GET_PTE_GCPHYS(PteSrc));
2500 SHWPTE PteDst = *pPteDst;
2501
2502 LogFlow(("DIRTY page trap addr=%RGv\n", GCPtrPage));
2503 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageTrap));
2504
2505 Assert(PteSrc.u & X86_PTE_RW);
2506
2507 /* Note: No need to invalidate this entry on other VCPUs as a stale TLB
2508 * entry will not harm; write access will simply fault again and
2509 * take this path to only invalidate the entry.
2510 */
2511 if (RT_LIKELY(pPage))
2512 {
2513 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2514 {
2515 //AssertMsgFailed(("%R[pgmpage] - we don't set PGM_PTFLAGS_TRACK_DIRTY for these pages\n", pPage));
2516 Assert(!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPage));
2517 /* Assuming write handlers here as the PTE is present (otherwise we wouldn't be here). */
2518 SHW_PTE_SET_RO(PteDst);
2519 }
2520 else
2521 {
2522 if ( PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_WRITE_MONITORED
2523 && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM)
2524 {
2525 rc = pgmPhysPageMakeWritable(pVM, pPage, GST_GET_PTE_GCPHYS(PteSrc));
2526 AssertRC(rc);
2527 }
2528 if (PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED)
2529 SHW_PTE_SET_RW(PteDst);
2530 else
2531 {
2532 /* Still applies to shared pages. */
2533 Assert(!PGM_PAGE_IS_ZERO(pPage));
2534 SHW_PTE_SET_RO(PteDst);
2535 }
2536 }
2537 }
2538 else
2539 SHW_PTE_SET_RW(PteDst); /** @todo r=bird: This doesn't make sense to me. */
2540
2541 SHW_PTE_SET(PteDst, (SHW_PTE_GET_U(PteDst) | X86_PTE_D | X86_PTE_A) & ~(uint64_t)PGM_PTFLAGS_TRACK_DIRTY);
2542 SHW_PTE_ATOMIC_SET2(*pPteDst, PteDst);
2543 PGM_INVL_PG(pVCpu, GCPtrPage);
2544 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2545 }
2546
2547# ifdef IN_RING0
2548 /* Check for stale TLB entry; only applies to the SMP guest case. */
2549 if ( pVM->cCpus > 1
2550 && SHW_PTE_IS_RW(*pPteDst)
2551 && SHW_PTE_IS_A(*pPteDst))
2552 {
2553 /* Stale TLB entry. */
2554 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageStale));
2555 PGM_INVL_PG(pVCpu, GCPtrPage);
2556 return VINF_PGM_HANDLED_DIRTY_BIT_FAULT; /* restarts the instruction. */
2557 }
2558# endif
2559 }
2560 }
2561 else
2562 AssertMsgFailed(("pgmPoolGetPageByHCPhys %RGp failed!\n", pPdeDst->u & SHW_PDE_PG_MASK));
2563 }
2564
2565 return VINF_PGM_NO_DIRTY_BIT_TRACKING;
2566}
2567
2568#endif /* PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
2569
2570/**
2571 * Sync a shadow page table.
2572 *
2573 * The shadow page table is not present in the shadow PDE.
2574 *
2575 * Handles mapping conflicts.
2576 *
2577 * This is called by VerifyAccessSyncPage, PrefetchPage, InvalidatePage (on
2578 * conflict), and Trap0eHandler.
2579 *
2580 * A precondition for this method is that the shadow PDE is not present. The
2581 * caller must take the PGM lock before checking this and continue to hold it
2582 * when calling this method.
2583 *
2584 * @returns VBox status code.
2585 * @param pVCpu The cross context virtual CPU structure.
2586 * @param iPDSrc Page directory index.
2587 * @param pPDSrc Source page directory (i.e. Guest OS page directory).
2588 * Assume this is a temporary mapping.
2589 * @param GCPtrPage GC Pointer of the page that caused the fault
2590 */
2591static int PGM_BTH_NAME(SyncPT)(PVMCPUCC pVCpu, unsigned iPDSrc, PGSTPD pPDSrc, RTGCPTR GCPtrPage)
2592{
2593 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2594 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
2595
2596#if 0 /* rarely useful; leave for debugging. */
2597 STAM_COUNTER_INC(&pVCpu->pgm.s.StatSyncPtPD[iPDSrc]);
2598#endif
2599 LogFlow(("SyncPT: GCPtrPage=%RGv\n", GCPtrPage)); RT_NOREF_PV(GCPtrPage);
2600
2601 PGM_LOCK_ASSERT_OWNER(pVM);
2602
2603#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
2604 || PGM_GST_TYPE == PGM_TYPE_PAE \
2605 || PGM_GST_TYPE == PGM_TYPE_AMD64) \
2606 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
2607 && PGM_SHW_TYPE != PGM_TYPE_NONE
2608 int rc = VINF_SUCCESS;
2609
2610 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
2611
2612 /*
2613 * Some input validation first.
2614 */
2615 AssertMsg(iPDSrc == ((GCPtrPage >> GST_PD_SHIFT) & GST_PD_MASK), ("iPDSrc=%x GCPtrPage=%RGv\n", iPDSrc, GCPtrPage));
2616
2617 /*
2618 * Get the relevant shadow PDE entry.
2619 */
2620# if PGM_SHW_TYPE == PGM_TYPE_32BIT
2621 const unsigned iPDDst = GCPtrPage >> SHW_PD_SHIFT;
2622 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
2623
2624 /* Fetch the pgm pool shadow descriptor. */
2625 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
2626 Assert(pShwPde);
2627
2628# elif PGM_SHW_TYPE == PGM_TYPE_PAE
2629 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2630 PPGMPOOLPAGE pShwPde = NULL;
2631 PX86PDPAE pPDDst;
2632 PSHWPDE pPdeDst;
2633
2634 /* Fetch the pgm pool shadow descriptor. */
2635 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
2636 AssertRCSuccessReturn(rc, rc);
2637 Assert(pShwPde);
2638
2639 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
2640 pPdeDst = &pPDDst->a[iPDDst];
2641
2642# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
2643 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
2644 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
2645 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
2646 PX86PDPT pPdptDst = NULL; /* initialized to shut up gcc */
2647 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
2648 AssertRCSuccessReturn(rc, rc);
2649 Assert(pPDDst);
2650 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
2651
2652# endif
2653 SHWPDE PdeDst = *pPdeDst;
2654
2655# if PGM_GST_TYPE == PGM_TYPE_AMD64
2656 /* Fetch the pgm pool shadow descriptor. */
2657 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
2658 Assert(pShwPde);
2659# endif
2660
2661 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P.*/
2662
2663 /*
2664 * Sync the page directory entry.
2665 */
2666 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
2667 const bool fPageTable = !(PdeSrc.u & X86_PDE_PS) || !GST_IS_PSE_ACTIVE(pVCpu);
2668 if ( (PdeSrc.u & X86_PDE_P)
2669 && (fPageTable ? GST_IS_PDE_VALID(pVCpu, PdeSrc) : GST_IS_BIG_PDE_VALID(pVCpu, PdeSrc)) )
2670 {
2671 /*
2672 * Allocate & map the page table.
2673 */
2674 PSHWPT pPTDst;
2675 PPGMPOOLPAGE pShwPage;
2676 RTGCPHYS GCPhys;
2677 if (fPageTable)
2678 {
2679 GCPhys = GST_GET_PDE_GCPHYS(PdeSrc);
2680# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2681 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2682 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
2683# endif
2684 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE, PGM_A20_IS_ENABLED(pVCpu),
2685 pShwPde->idx, iPDDst, false /*fLockPage*/,
2686 &pShwPage);
2687 }
2688 else
2689 {
2690 PGMPOOLACCESS enmAccess;
2691# if PGM_WITH_NX(PGM_GST_TYPE, PGM_SHW_TYPE)
2692 const bool fNoExecute = (PdeSrc.u & X86_PDE_PAE_NX) && GST_IS_NX_ACTIVE(pVCpu);
2693# else
2694 const bool fNoExecute = false;
2695# endif
2696
2697 GCPhys = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
2698# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2699 /* Select the right PDE as we're emulating a 4MB page directory with two 2 MB shadow PDEs.*/
2700 GCPhys = PGM_A20_APPLY(pVCpu, GCPhys | (GCPtrPage & (1 << X86_PD_PAE_SHIFT)));
2701# endif
2702 /* Determine the right kind of large page to avoid incorrect cached entry reuse. */
2703 if (PdeSrc.u & X86_PDE_US)
2704 {
2705 if (PdeSrc.u & X86_PDE_RW)
2706 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_RW_NX : PGMPOOLACCESS_USER_RW;
2707 else
2708 enmAccess = (fNoExecute) ? PGMPOOLACCESS_USER_R_NX : PGMPOOLACCESS_USER_R;
2709 }
2710 else
2711 {
2712 if (PdeSrc.u & X86_PDE_RW)
2713 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_RW_NX : PGMPOOLACCESS_SUPERVISOR_RW;
2714 else
2715 enmAccess = (fNoExecute) ? PGMPOOLACCESS_SUPERVISOR_R_NX : PGMPOOLACCESS_SUPERVISOR_R;
2716 }
2717 rc = pgmPoolAlloc(pVM, GCPhys, BTH_PGMPOOLKIND_PT_FOR_BIG, enmAccess, PGM_A20_IS_ENABLED(pVCpu),
2718 pShwPde->idx, iPDDst, false /*fLockPage*/,
2719 &pShwPage);
2720 }
2721 if (rc == VINF_SUCCESS)
2722 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
2723 else if (rc == VINF_PGM_CACHED_PAGE)
2724 {
2725 /*
2726 * The PT was cached, just hook it up.
2727 */
2728 if (fPageTable)
2729 PdeDst.u = pShwPage->Core.Key | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2730 else
2731 {
2732 PdeDst.u = pShwPage->Core.Key | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2733 /* (see explanation and assumptions further down.) */
2734 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2735 {
2736 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2737 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2738 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2739 }
2740 }
2741 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2742 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2743 return VINF_SUCCESS;
2744 }
2745 else
2746 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
2747 /** @todo Why do we bother preserving X86_PDE_AVL_MASK here?
2748 * Both PGM_PDFLAGS_MAPPING and PGM_PDFLAGS_TRACK_DIRTY should be
2749 * irrelevant at this point. */
2750 PdeDst.u &= X86_PDE_AVL_MASK;
2751 PdeDst.u |= pShwPage->Core.Key;
2752
2753 /*
2754 * Page directory has been accessed (this is a fault situation, remember).
2755 */
2756 /** @todo
2757 * Well, when the caller is PrefetchPage or InvalidatePage is isn't a
2758 * fault situation. What's more, the Trap0eHandler has already set the
2759 * accessed bit. So, it's actually just VerifyAccessSyncPage which
2760 * might need setting the accessed flag.
2761 *
2762 * The best idea is to leave this change to the caller and add an
2763 * assertion that it's set already. */
2764 pPDSrc->a[iPDSrc].u |= X86_PDE_A;
2765 if (fPageTable)
2766 {
2767 /*
2768 * Page table - 4KB.
2769 *
2770 * Sync all or just a few entries depending on PGM_SYNC_N_PAGES.
2771 */
2772 Log2(("SyncPT: 4K %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx}\n",
2773 GCPtrPage, PdeSrc.u & X86_PTE_P, !!(PdeSrc.u & X86_PTE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u));
2774 PGSTPT pPTSrc;
2775 rc = PGM_GCPHYS_2_PTR(pVM, GST_GET_PDE_GCPHYS(PdeSrc), &pPTSrc);
2776 if (RT_SUCCESS(rc))
2777 {
2778 /*
2779 * Start by syncing the page directory entry so CSAM's TLB trick works.
2780 */
2781 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | X86_PDE_AVL_MASK))
2782 | GST_GET_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2783 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2784 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2785
2786 /*
2787 * Directory/page user or supervisor privilege: (same goes for read/write)
2788 *
2789 * Directory Page Combined
2790 * U/S U/S U/S
2791 * 0 0 0
2792 * 0 1 0
2793 * 1 0 0
2794 * 1 1 1
2795 *
2796 * Simple AND operation. Table listed for completeness.
2797 *
2798 */
2799 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4K));
2800# ifdef PGM_SYNC_N_PAGES
2801 unsigned iPTBase = (GCPtrPage >> SHW_PT_SHIFT) & SHW_PT_MASK;
2802 unsigned iPTDst = iPTBase;
2803 const unsigned iPTDstEnd = RT_MIN(iPTDst + PGM_SYNC_NR_PAGES / 2, RT_ELEMENTS(pPTDst->a));
2804 if (iPTDst <= PGM_SYNC_NR_PAGES / 2)
2805 iPTDst = 0;
2806 else
2807 iPTDst -= PGM_SYNC_NR_PAGES / 2;
2808# else /* !PGM_SYNC_N_PAGES */
2809 unsigned iPTDst = 0;
2810 const unsigned iPTDstEnd = RT_ELEMENTS(pPTDst->a);
2811# endif /* !PGM_SYNC_N_PAGES */
2812 RTGCPTR GCPtrCur = (GCPtrPage & ~(RTGCPTR)((1 << SHW_PD_SHIFT) - 1))
2813 | ((RTGCPTR)iPTDst << GUEST_PAGE_SHIFT);
2814# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
2815 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
2816 const unsigned offPTSrc = ((GCPtrPage >> SHW_PD_SHIFT) & 1) * 512;
2817# else
2818 const unsigned offPTSrc = 0;
2819# endif
2820 for (; iPTDst < iPTDstEnd; iPTDst++, GCPtrCur += GUEST_PAGE_SIZE)
2821 {
2822 const unsigned iPTSrc = iPTDst + offPTSrc;
2823 const GSTPTE PteSrc = pPTSrc->a[iPTSrc];
2824 if (PteSrc.u & X86_PTE_P)
2825 {
2826 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], PdeSrc, PteSrc, pShwPage, iPTDst);
2827 Log2(("SyncPT: 4K+ %RGv PteSrc:{P=%d RW=%d U=%d raw=%08llx}%s dst.raw=%08llx iPTSrc=%x PdeSrc.u=%x physpte=%RGp\n",
2828 GCPtrCur,
2829 PteSrc.u & X86_PTE_P,
2830 !!(PteSrc.u & PdeSrc.u & X86_PTE_RW),
2831 !!(PteSrc.u & PdeSrc.u & X86_PTE_US),
2832 (uint64_t)PteSrc.u,
2833 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : "", SHW_PTE_LOG64(pPTDst->a[iPTDst]), iPTSrc, PdeSrc.au32[0],
2834 (RTGCPHYS)(GST_GET_PDE_GCPHYS(PdeSrc) + iPTSrc*sizeof(PteSrc)) ));
2835 }
2836 /* else: the page table was cleared by the pool */
2837 } /* for PTEs */
2838 }
2839 }
2840 else
2841 {
2842 /*
2843 * Big page - 2/4MB.
2844 *
2845 * We'll walk the ram range list in parallel and optimize lookups.
2846 * We will only sync one shadow page table at a time.
2847 */
2848 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT4M));
2849
2850 /**
2851 * @todo It might be more efficient to sync only a part of the 4MB
2852 * page (similar to what we do for 4KB PDs).
2853 */
2854
2855 /*
2856 * Start by syncing the page directory entry.
2857 */
2858 PdeDst.u = (PdeDst.u & (SHW_PDE_PG_MASK | (X86_PDE_AVL_MASK & ~PGM_PDFLAGS_TRACK_DIRTY)))
2859 | GST_GET_BIG_PDE_SHW_FLAGS(pVCpu, PdeSrc);
2860
2861 /*
2862 * If the page is not flagged as dirty and is writable, then make it read-only
2863 * at PD level, so we can set the dirty bit when the page is modified.
2864 *
2865 * ASSUMES that page access handlers are implemented on page table entry level.
2866 * Thus we will first catch the dirty access and set PDE.D and restart. If
2867 * there is an access handler, we'll trap again and let it work on the problem.
2868 */
2869 /** @todo move the above stuff to a section in the PGM documentation. */
2870 Assert(!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY));
2871 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
2872 {
2873 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,DirtyPageBig));
2874 PdeDst.u |= PGM_PDFLAGS_TRACK_DIRTY;
2875 PdeDst.u &= ~(SHWUINT)X86_PDE_RW;
2876 }
2877 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
2878 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
2879
2880 /*
2881 * Fill the shadow page table.
2882 */
2883 /* Get address and flags from the source PDE. */
2884 SHWPTE PteDstBase;
2885 SHW_PTE_SET(PteDstBase, GST_GET_BIG_PDE_SHW_FLAGS_4_PTE(pVCpu, PdeSrc));
2886
2887 /* Loop thru the entries in the shadow PT. */
2888 const RTGCPTR GCPtr = (GCPtrPage >> SHW_PD_SHIFT) << SHW_PD_SHIFT; NOREF(GCPtr);
2889 Log2(("SyncPT: BIG %RGv PdeSrc:{P=%d RW=%d U=%d raw=%08llx} Shw=%RGv GCPhys=%RGp %s\n",
2890 GCPtrPage, PdeSrc.u & X86_PDE_P, !!(PdeSrc.u & X86_PDE_RW), !!(PdeSrc.u & X86_PDE_US), (uint64_t)PdeSrc.u, GCPtr,
2891 GCPhys, PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY ? " Track-Dirty" : ""));
2892 PPGMRAMRANGE pRam = pgmPhysGetRangeAtOrAbove(pVM, GCPhys);
2893 unsigned iPTDst = 0;
2894 while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2895 && !VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2896 {
2897 if (pRam && GCPhys >= pRam->GCPhys)
2898 {
2899# ifndef PGM_WITH_A20
2900 unsigned iHCPage = (GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT;
2901# endif
2902 do
2903 {
2904 /* Make shadow PTE. */
2905# ifdef PGM_WITH_A20
2906 PPGMPAGE pPage = &pRam->aPages[(GCPhys - pRam->GCPhys) >> GUEST_PAGE_SHIFT];
2907# else
2908 PPGMPAGE pPage = &pRam->aPages[iHCPage];
2909# endif
2910 SHWPTE PteDst;
2911
2912# ifndef VBOX_WITH_NEW_LAZY_PAGE_ALLOC
2913 /* Try to make the page writable if necessary. */
2914 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
2915 && ( PGM_PAGE_IS_ZERO(pPage)
2916 || ( SHW_PTE_IS_RW(PteDstBase)
2917 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED
2918# ifdef VBOX_WITH_REAL_WRITE_MONITORED_PAGES
2919 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_WRITE_MONITORED
2920# endif
2921# ifdef VBOX_WITH_PAGE_SHARING
2922 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_SHARED
2923# endif
2924 && !PGM_PAGE_IS_BALLOONED(pPage))
2925 )
2926 )
2927 {
2928 rc = pgmPhysPageMakeWritable(pVM, pPage, GCPhys);
2929 AssertRCReturn(rc, rc);
2930 if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
2931 break;
2932 }
2933# endif
2934
2935 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPage))
2936 PGM_BTH_NAME(SyncHandlerPte)(pVM, pPage, SHW_PTE_GET_U(PteDstBase), &PteDst);
2937 else if (PGM_PAGE_IS_BALLOONED(pPage))
2938 SHW_PTE_SET(PteDst, 0); /* Handle ballooned pages at #PF time. */
2939 else
2940 SHW_PTE_SET(PteDst, PGM_PAGE_GET_HCPHYS(pPage) | SHW_PTE_GET_U(PteDstBase));
2941
2942 /* Only map writable pages writable. */
2943 if ( SHW_PTE_IS_P_RW(PteDst)
2944 && PGM_PAGE_GET_STATE(pPage) != PGM_PAGE_STATE_ALLOCATED)
2945 {
2946 /* Still applies to shared pages. */
2947 Assert(!PGM_PAGE_IS_ZERO(pPage));
2948 SHW_PTE_SET_RO(PteDst); /** @todo this isn't quite working yet... */
2949 Log3(("SyncPT: write-protecting %RGp pPage=%R[pgmpage] at %RGv\n", GCPhys, pPage, (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT))));
2950 }
2951
2952 if (SHW_PTE_IS_P(PteDst))
2953 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPage, PGM_PAGE_GET_TRACKING(pPage), pPage, iPTDst);
2954
2955 /* commit it (not atomic, new table) */
2956 pPTDst->a[iPTDst] = PteDst;
2957 Log4(("SyncPT: BIG %RGv PteDst:{P=%d RW=%d U=%d raw=%08llx}%s\n",
2958 (RTGCPTR)(GCPtr | (iPTDst << SHW_PT_SHIFT)), SHW_PTE_IS_P(PteDst), SHW_PTE_IS_RW(PteDst), SHW_PTE_IS_US(PteDst), SHW_PTE_LOG64(PteDst),
2959 SHW_PTE_IS_TRACK_DIRTY(PteDst) ? " Track-Dirty" : ""));
2960
2961 /* advance */
2962 GCPhys += GUEST_PAGE_SIZE;
2963 PGM_A20_APPLY_TO_VAR(pVCpu, GCPhys);
2964# ifndef PGM_WITH_A20
2965 iHCPage++;
2966# endif
2967 iPTDst++;
2968 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2969 && GCPhys <= pRam->GCPhysLast);
2970
2971 /* Advance ram range list. */
2972 while (pRam && GCPhys > pRam->GCPhysLast)
2973 pRam = pRam->CTX_SUFF(pNext);
2974 }
2975 else if (pRam)
2976 {
2977 Log(("Invalid pages at %RGp\n", GCPhys));
2978 do
2979 {
2980 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2981 GCPhys += GUEST_PAGE_SIZE;
2982 iPTDst++;
2983 } while ( iPTDst < RT_ELEMENTS(pPTDst->a)
2984 && GCPhys < pRam->GCPhys);
2985 PGM_A20_APPLY_TO_VAR(pVCpu,GCPhys);
2986 }
2987 else
2988 {
2989 Log(("Invalid pages at %RGp (2)\n", GCPhys));
2990 for ( ; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
2991 SHW_PTE_SET(pPTDst->a[iPTDst], 0); /* Invalid page, we must handle them manually. */
2992 }
2993 } /* while more PTEs */
2994 } /* 4KB / 4MB */
2995 }
2996 else
2997 AssertRelease(!SHW_PDE_IS_P(PdeDst));
2998
2999 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3000 if (RT_FAILURE(rc))
3001 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3002 return rc;
3003
3004#elif (PGM_GST_TYPE == PGM_TYPE_REAL || PGM_GST_TYPE == PGM_TYPE_PROT) \
3005 && !PGM_TYPE_IS_NESTED(PGM_SHW_TYPE) \
3006 && (PGM_SHW_TYPE != PGM_TYPE_EPT || PGM_GST_TYPE == PGM_TYPE_PROT) \
3007 && PGM_SHW_TYPE != PGM_TYPE_NONE
3008 NOREF(iPDSrc); NOREF(pPDSrc);
3009
3010 STAM_PROFILE_START(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3011
3012 /*
3013 * Validate input a little bit.
3014 */
3015 int rc = VINF_SUCCESS;
3016# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3017 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3018 PSHWPDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3019
3020 /* Fetch the pgm pool shadow descriptor. */
3021 PPGMPOOLPAGE pShwPde = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
3022 Assert(pShwPde);
3023
3024# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3025 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3026 PPGMPOOLPAGE pShwPde = NULL; /* initialized to shut up gcc */
3027 PX86PDPAE pPDDst;
3028 PSHWPDE pPdeDst;
3029
3030 /* Fetch the pgm pool shadow descriptor. */
3031 rc = pgmShwGetPaePoolPagePD(pVCpu, GCPtrPage, &pShwPde);
3032 AssertRCSuccessReturn(rc, rc);
3033 Assert(pShwPde);
3034
3035 pPDDst = (PX86PDPAE)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPde);
3036 pPdeDst = &pPDDst->a[iPDDst];
3037
3038# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3039 const unsigned iPdpt = (GCPtrPage >> X86_PDPT_SHIFT) & X86_PDPT_MASK_AMD64;
3040 const unsigned iPDDst = (GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK;
3041 PX86PDPAE pPDDst = NULL; /* initialized to shut up gcc */
3042 PX86PDPT pPdptDst= NULL; /* initialized to shut up gcc */
3043 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtrPage, NULL, &pPdptDst, &pPDDst);
3044 AssertRCSuccessReturn(rc, rc);
3045 Assert(pPDDst);
3046 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3047
3048 /* Fetch the pgm pool shadow descriptor. */
3049 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & X86_PDPE_PG_MASK);
3050 Assert(pShwPde);
3051
3052# elif PGM_SHW_TYPE == PGM_TYPE_EPT
3053 const unsigned iPdpt = (GCPtrPage >> EPT_PDPT_SHIFT) & EPT_PDPT_MASK;
3054 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3055 PEPTPD pPDDst;
3056 PEPTPDPT pPdptDst;
3057
3058 rc = pgmShwGetEPTPDPtr(pVCpu, GCPtrPage, &pPdptDst, &pPDDst);
3059 if (rc != VINF_SUCCESS)
3060 {
3061 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3062 AssertRC(rc);
3063 return rc;
3064 }
3065 Assert(pPDDst);
3066 PSHWPDE pPdeDst = &pPDDst->a[iPDDst];
3067
3068 /* Fetch the pgm pool shadow descriptor. */
3069 /** @todo r=bird: didn't pgmShwGetEPTPDPtr just do this lookup already? */
3070 PPGMPOOLPAGE pShwPde = pgmPoolGetPage(pPool, pPdptDst->a[iPdpt].u & EPT_PDPTE_PG_MASK);
3071 Assert(pShwPde);
3072# endif
3073 SHWPDE PdeDst = *pPdeDst;
3074
3075 Assert(!SHW_PDE_IS_P(PdeDst)); /* We're only supposed to call SyncPT on PDE!P and conflicts.*/
3076
3077# if defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE
3078 if (BTH_IS_NP_ACTIVE(pVM))
3079 {
3080 Assert(!VM_IS_NEM_ENABLED(pVM));
3081
3082 /* Check if we allocated a big page before for this 2 MB range. */
3083 PPGMPAGE pPage;
3084 rc = pgmPhysGetPageEx(pVM, PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PDE2M_PAE_PG_MASK), &pPage);
3085 if (RT_SUCCESS(rc))
3086 {
3087 RTHCPHYS HCPhys = NIL_RTHCPHYS;
3088 if (PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE)
3089 {
3090 if (PGM_A20_IS_ENABLED(pVCpu))
3091 {
3092 STAM_REL_COUNTER_INC(&pVM->pgm.s.StatLargePageReused);
3093 AssertRelease(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3094 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3095 }
3096 else
3097 {
3098 PGM_PAGE_SET_PDE_TYPE(pVM, pPage, PGM_PAGE_PDE_TYPE_PDE_DISABLED);
3099 pVM->pgm.s.cLargePagesDisabled++;
3100 }
3101 }
3102 else if ( PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE_DISABLED
3103 && PGM_A20_IS_ENABLED(pVCpu))
3104 {
3105 /* Recheck the entire 2 MB range to see if we can use it again as a large page. */
3106 rc = pgmPhysRecheckLargePage(pVM, GCPtrPage, pPage);
3107 if (RT_SUCCESS(rc))
3108 {
3109 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3110 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3111 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3112 }
3113 }
3114 else if ( PGMIsUsingLargePages(pVM)
3115 && PGM_A20_IS_ENABLED(pVCpu))
3116 {
3117 rc = pgmPhysAllocLargePage(pVM, GCPtrPage);
3118 if (RT_SUCCESS(rc))
3119 {
3120 Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
3121 Assert(PGM_PAGE_GET_PDE_TYPE(pPage) == PGM_PAGE_PDE_TYPE_PDE);
3122 HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
3123 }
3124 else
3125 LogFlow(("pgmPhysAllocLargePage failed with %Rrc\n", rc));
3126 }
3127
3128 if (HCPhys != NIL_RTHCPHYS)
3129 {
3130# if PGM_SHW_TYPE == PGM_TYPE_EPT
3131 PdeDst.u = HCPhys | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE | EPT_E_LEAF | EPT_E_IGNORE_PAT | EPT_E_MEMTYPE_WB
3132 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo do we need this? */;
3133# else
3134 PdeDst.u = HCPhys | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_PS
3135 | (PdeDst.u & X86_PDE_AVL_MASK) /** @todo PGM_PD_FLAGS? */;
3136# endif
3137 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3138
3139 Log(("SyncPT: Use large page at %RGp PDE=%RX64\n", GCPtrPage, PdeDst.u));
3140 /* Add a reference to the first page only. */
3141 PGM_BTH_NAME(SyncPageWorkerTrackAddref)(pVCpu, pShwPde, PGM_PAGE_GET_TRACKING(pPage), pPage, iPDDst);
3142
3143 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3144 return VINF_SUCCESS;
3145 }
3146 }
3147 }
3148# endif /* defined(PGM_WITH_LARGE_PAGES) && PGM_SHW_TYPE != PGM_TYPE_32BIT && PGM_SHW_TYPE != PGM_TYPE_PAE */
3149
3150 /*
3151 * Allocate & map the page table.
3152 */
3153 PSHWPT pPTDst;
3154 PPGMPOOLPAGE pShwPage;
3155 RTGCPHYS GCPhys;
3156
3157 /* Virtual address = physical address */
3158 GCPhys = PGM_A20_APPLY(pVCpu, GCPtrPage & X86_PAGE_4K_BASE_MASK);
3159 rc = pgmPoolAlloc(pVM, GCPhys & ~(RT_BIT_64(SHW_PD_SHIFT) - 1), BTH_PGMPOOLKIND_PT_FOR_PT, PGMPOOLACCESS_DONTCARE,
3160 PGM_A20_IS_ENABLED(pVCpu), pShwPde->idx, iPDDst, false /*fLockPage*/,
3161 &pShwPage);
3162 if ( rc == VINF_SUCCESS
3163 || rc == VINF_PGM_CACHED_PAGE)
3164 pPTDst = (PSHWPT)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pShwPage);
3165 else
3166 {
3167 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3168 AssertMsgFailedReturn(("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_IPE_UNEXPECTED_INFO_STATUS);
3169 }
3170
3171 if (rc == VINF_SUCCESS)
3172 {
3173 /* New page table; fully set it up. */
3174 Assert(pPTDst);
3175
3176 /* Mask away the page offset. */
3177 GCPtrPage &= ~(RTGCPTR)GUEST_PAGE_OFFSET_MASK;
3178
3179 for (unsigned iPTDst = 0; iPTDst < RT_ELEMENTS(pPTDst->a); iPTDst++)
3180 {
3181 RTGCPTR GCPtrCurPage = PGM_A20_APPLY(pVCpu, (GCPtrPage & ~(RTGCPTR)(SHW_PT_MASK << SHW_PT_SHIFT))
3182 | (iPTDst << GUEST_PAGE_SHIFT));
3183
3184 PGM_BTH_NAME(SyncPageWorker)(pVCpu, &pPTDst->a[iPTDst], GCPtrCurPage, pShwPage, iPTDst);
3185 Log2(("SyncPage: 4K+ %RGv PteSrc:{P=1 RW=1 U=1} PteDst=%08llx%s\n",
3186 GCPtrCurPage,
3187 SHW_PTE_LOG64(pPTDst->a[iPTDst]),
3188 SHW_PTE_IS_TRACK_DIRTY(pPTDst->a[iPTDst]) ? " Track-Dirty" : ""));
3189
3190 if (RT_UNLIKELY(VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)))
3191 break;
3192 }
3193 }
3194 else
3195 rc = VINF_SUCCESS; /* Cached entry; assume it's still fully valid. */
3196
3197 /* Save the new PDE. */
3198# if PGM_SHW_TYPE == PGM_TYPE_EPT
3199 PdeDst.u = pShwPage->Core.Key | EPT_E_READ | EPT_E_WRITE | EPT_E_EXECUTE
3200 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo do we really need this? */);
3201# else
3202 PdeDst.u = pShwPage->Core.Key | X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A
3203 | (PdeDst.u & X86_PDE_AVL_MASK /** @todo use a PGM_PD_FLAGS define */);
3204# endif
3205 SHW_PDE_ATOMIC_SET2(*pPdeDst, PdeDst);
3206
3207 STAM_PROFILE_STOP(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPT), a);
3208 if (RT_FAILURE(rc))
3209 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,SyncPTFailed));
3210 return rc;
3211
3212#else
3213 NOREF(iPDSrc); NOREF(pPDSrc);
3214 AssertReleaseMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_SHW_TYPE, PGM_GST_TYPE));
3215 return VERR_PGM_NOT_USED_IN_MODE;
3216#endif
3217}
3218
3219
3220
3221/**
3222 * Prefetch a page/set of pages.
3223 *
3224 * Typically used to sync commonly used pages before entering raw mode
3225 * after a CR3 reload.
3226 *
3227 * @returns VBox status code.
3228 * @param pVCpu The cross context virtual CPU structure.
3229 * @param GCPtrPage Page to invalidate.
3230 */
3231PGM_BTH_DECL(int, PrefetchPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage)
3232{
3233#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3234 || PGM_GST_TYPE == PGM_TYPE_REAL \
3235 || PGM_GST_TYPE == PGM_TYPE_PROT \
3236 || PGM_GST_TYPE == PGM_TYPE_PAE \
3237 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3238 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3239 && PGM_SHW_TYPE != PGM_TYPE_NONE
3240 /*
3241 * Check that all Guest levels thru the PDE are present, getting the
3242 * PD and PDE in the processes.
3243 */
3244 int rc = VINF_SUCCESS;
3245# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3246# if PGM_GST_TYPE == PGM_TYPE_32BIT
3247 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3248 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3249# elif PGM_GST_TYPE == PGM_TYPE_PAE
3250 unsigned iPDSrc;
3251 X86PDPE PdpeSrc;
3252 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3253 if (!pPDSrc)
3254 return VINF_SUCCESS; /* not present */
3255# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3256 unsigned iPDSrc;
3257 PX86PML4E pPml4eSrc;
3258 X86PDPE PdpeSrc;
3259 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3260 if (!pPDSrc)
3261 return VINF_SUCCESS; /* not present */
3262# endif
3263 const GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3264# else
3265 PGSTPD pPDSrc = NULL;
3266 const unsigned iPDSrc = 0;
3267 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3268# endif
3269
3270 if ((PdeSrc.u & (X86_PDE_P | X86_PDE_A)) == (X86_PDE_P | X86_PDE_A))
3271 {
3272 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3273 PGM_LOCK_VOID(pVM);
3274
3275# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3276 const X86PDE PdeDst = pgmShwGet32BitPDE(pVCpu, GCPtrPage);
3277# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3278 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3279 PX86PDPAE pPDDst;
3280 X86PDEPAE PdeDst;
3281# if PGM_GST_TYPE != PGM_TYPE_PAE
3282 X86PDPE PdpeSrc;
3283
3284 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3285 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3286# endif
3287 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3288 if (rc != VINF_SUCCESS)
3289 {
3290 PGM_UNLOCK(pVM);
3291 AssertRC(rc);
3292 return rc;
3293 }
3294 Assert(pPDDst);
3295 PdeDst = pPDDst->a[iPDDst];
3296
3297# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3298 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3299 PX86PDPAE pPDDst;
3300 X86PDEPAE PdeDst;
3301
3302# if PGM_GST_TYPE == PGM_TYPE_PROT
3303 /* AMD-V nested paging */
3304 X86PML4E Pml4eSrc;
3305 X86PDPE PdpeSrc;
3306 PX86PML4E pPml4eSrc = &Pml4eSrc;
3307
3308 /* Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3309 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3310 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3311# endif
3312
3313 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3314 if (rc != VINF_SUCCESS)
3315 {
3316 PGM_UNLOCK(pVM);
3317 AssertRC(rc);
3318 return rc;
3319 }
3320 Assert(pPDDst);
3321 PdeDst = pPDDst->a[iPDDst];
3322# endif
3323 if (!(PdeDst.u & X86_PDE_P))
3324 {
3325 /** @todo r=bird: This guy will set the A bit on the PDE,
3326 * probably harmless. */
3327 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3328 }
3329 else
3330 {
3331 /* Note! We used to sync PGM_SYNC_NR_PAGES pages, which triggered assertions in CSAM, because
3332 * R/W attributes of nearby pages were reset. Not sure how that could happen. Anyway, it
3333 * makes no sense to prefetch more than one page.
3334 */
3335 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3336 if (RT_SUCCESS(rc))
3337 rc = VINF_SUCCESS;
3338 }
3339 PGM_UNLOCK(pVM);
3340 }
3341 return rc;
3342
3343#elif PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3344 NOREF(pVCpu); NOREF(GCPtrPage);
3345 return VINF_SUCCESS; /* ignore */
3346#else
3347 AssertCompile(0);
3348#endif
3349}
3350
3351
3352
3353
3354/**
3355 * Syncs a page during a PGMVerifyAccess() call.
3356 *
3357 * @returns VBox status code (informational included).
3358 * @param pVCpu The cross context virtual CPU structure.
3359 * @param GCPtrPage The address of the page to sync.
3360 * @param fPage The effective guest page flags.
3361 * @param uErr The trap error code.
3362 * @remarks This will normally never be called on invalid guest page
3363 * translation entries.
3364 */
3365PGM_BTH_DECL(int, VerifyAccessSyncPage)(PVMCPUCC pVCpu, RTGCPTR GCPtrPage, unsigned fPage, unsigned uErr)
3366{
3367 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3368
3369 LogFlow(("VerifyAccessSyncPage: GCPtrPage=%RGv fPage=%#x uErr=%#x\n", GCPtrPage, fPage, uErr));
3370 RT_NOREF_PV(GCPtrPage); RT_NOREF_PV(fPage); RT_NOREF_PV(uErr);
3371
3372 Assert(!pVM->pgm.s.fNestedPaging);
3373#if ( PGM_GST_TYPE == PGM_TYPE_32BIT \
3374 || PGM_GST_TYPE == PGM_TYPE_REAL \
3375 || PGM_GST_TYPE == PGM_TYPE_PROT \
3376 || PGM_GST_TYPE == PGM_TYPE_PAE \
3377 || PGM_GST_TYPE == PGM_TYPE_AMD64 ) \
3378 && !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) \
3379 && PGM_SHW_TYPE != PGM_TYPE_NONE
3380
3381 /*
3382 * Get guest PD and index.
3383 */
3384 /** @todo Performance: We've done all this a jiffy ago in the
3385 * PGMGstGetPage call. */
3386# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3387# if PGM_GST_TYPE == PGM_TYPE_32BIT
3388 const unsigned iPDSrc = (uint32_t)GCPtrPage >> GST_PD_SHIFT;
3389 PGSTPD pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3390
3391# elif PGM_GST_TYPE == PGM_TYPE_PAE
3392 unsigned iPDSrc = 0;
3393 X86PDPE PdpeSrc;
3394 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtrPage, &iPDSrc, &PdpeSrc);
3395 if (RT_UNLIKELY(!pPDSrc))
3396 {
3397 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3398 return VINF_EM_RAW_GUEST_TRAP;
3399 }
3400
3401# elif PGM_GST_TYPE == PGM_TYPE_AMD64
3402 unsigned iPDSrc = 0; /* shut up gcc */
3403 PX86PML4E pPml4eSrc = NULL; /* ditto */
3404 X86PDPE PdpeSrc;
3405 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtrPage, &pPml4eSrc, &PdpeSrc, &iPDSrc);
3406 if (RT_UNLIKELY(!pPDSrc))
3407 {
3408 Log(("PGMVerifyAccess: access violation for %RGv due to non-present PDPTR\n", GCPtrPage));
3409 return VINF_EM_RAW_GUEST_TRAP;
3410 }
3411# endif
3412
3413# else /* !PGM_WITH_PAGING */
3414 PGSTPD pPDSrc = NULL;
3415 const unsigned iPDSrc = 0;
3416# endif /* !PGM_WITH_PAGING */
3417 int rc = VINF_SUCCESS;
3418
3419 PGM_LOCK_VOID(pVM);
3420
3421 /*
3422 * First check if the shadow pd is present.
3423 */
3424# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3425 PX86PDE pPdeDst = pgmShwGet32BitPDEPtr(pVCpu, GCPtrPage);
3426
3427# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3428 PX86PDEPAE pPdeDst;
3429 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3430 PX86PDPAE pPDDst;
3431# if PGM_GST_TYPE != PGM_TYPE_PAE
3432 /* Fake PDPT entry; access control handled on the page table level, so allow everything. */
3433 X86PDPE PdpeSrc;
3434 PdpeSrc.u = X86_PDPE_P; /* rw/us are reserved for PAE pdpte's; accessed bit causes invalid VT-x guest state errors */
3435# endif
3436 rc = pgmShwSyncPaePDPtr(pVCpu, GCPtrPage, PdpeSrc.u, &pPDDst);
3437 if (rc != VINF_SUCCESS)
3438 {
3439 PGM_UNLOCK(pVM);
3440 AssertRC(rc);
3441 return rc;
3442 }
3443 Assert(pPDDst);
3444 pPdeDst = &pPDDst->a[iPDDst];
3445
3446# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3447 const unsigned iPDDst = ((GCPtrPage >> SHW_PD_SHIFT) & SHW_PD_MASK);
3448 PX86PDPAE pPDDst;
3449 PX86PDEPAE pPdeDst;
3450
3451# if PGM_GST_TYPE == PGM_TYPE_PROT
3452 /* AMD-V nested paging: Fake PML4 & PDPT entry; access control handled on the page table level, so allow everything. */
3453 X86PML4E Pml4eSrc;
3454 X86PDPE PdpeSrc;
3455 PX86PML4E pPml4eSrc = &Pml4eSrc;
3456 Pml4eSrc.u = X86_PML4E_P | X86_PML4E_RW | X86_PML4E_US | X86_PML4E_A;
3457 PdpeSrc.u = X86_PDPE_P | X86_PDPE_RW | X86_PDPE_US | X86_PDPE_A;
3458# endif
3459
3460 rc = pgmShwSyncLongModePDPtr(pVCpu, GCPtrPage, pPml4eSrc->u, PdpeSrc.u, &pPDDst);
3461 if (rc != VINF_SUCCESS)
3462 {
3463 PGM_UNLOCK(pVM);
3464 AssertRC(rc);
3465 return rc;
3466 }
3467 Assert(pPDDst);
3468 pPdeDst = &pPDDst->a[iPDDst];
3469# endif
3470
3471 if (!(pPdeDst->u & X86_PDE_P))
3472 {
3473 rc = PGM_BTH_NAME(SyncPT)(pVCpu, iPDSrc, pPDSrc, GCPtrPage);
3474 if (rc != VINF_SUCCESS)
3475 {
3476 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3477 PGM_UNLOCK(pVM);
3478 AssertRC(rc);
3479 return rc;
3480 }
3481 }
3482
3483# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3484 /* Check for dirty bit fault */
3485 rc = PGM_BTH_NAME(CheckDirtyPageFault)(pVCpu, uErr, pPdeDst, &pPDSrc->a[iPDSrc], GCPtrPage);
3486 if (rc == VINF_PGM_HANDLED_DIRTY_BIT_FAULT)
3487 Log(("PGMVerifyAccess: success (dirty)\n"));
3488 else
3489# endif
3490 {
3491# if PGM_WITH_PAGING(PGM_GST_TYPE, PGM_SHW_TYPE)
3492 GSTPDE PdeSrc = pPDSrc->a[iPDSrc];
3493# else
3494 GSTPDE const PdeSrc = { X86_PDE_P | X86_PDE_RW | X86_PDE_US | X86_PDE_A }; /* faked so we don't have to #ifdef everything */
3495# endif
3496
3497 Assert(rc != VINF_EM_RAW_GUEST_TRAP);
3498 if (uErr & X86_TRAP_PF_US)
3499 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncUser));
3500 else /* supervisor */
3501 STAM_COUNTER_INC(&pVCpu->pgm.s.Stats.CTX_MID_Z(Stat,PageOutOfSyncSupervisor));
3502
3503 rc = PGM_BTH_NAME(SyncPage)(pVCpu, PdeSrc, GCPtrPage, 1, 0);
3504 if (RT_SUCCESS(rc))
3505 {
3506 /* Page was successfully synced */
3507 Log2(("PGMVerifyAccess: success (sync)\n"));
3508 rc = VINF_SUCCESS;
3509 }
3510 else
3511 {
3512 Log(("PGMVerifyAccess: access violation for %RGv rc=%Rrc\n", GCPtrPage, rc));
3513 rc = VINF_EM_RAW_GUEST_TRAP;
3514 }
3515 }
3516 PGM_DYNMAP_UNUSED_HINT(pVCpu, pPdeDst);
3517 PGM_UNLOCK(pVM);
3518 return rc;
3519
3520#else /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3521
3522 AssertLogRelMsgFailed(("Shw=%d Gst=%d is not implemented!\n", PGM_GST_TYPE, PGM_SHW_TYPE));
3523 return VERR_PGM_NOT_USED_IN_MODE;
3524#endif /* PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) */
3525}
3526
3527
3528/**
3529 * Syncs the paging hierarchy starting at CR3.
3530 *
3531 * @returns VBox status code, R0/RC may return VINF_PGM_SYNC_CR3, no other
3532 * informational status codes.
3533 * @retval VERR_PGM_NO_HYPERVISOR_ADDRESS in raw-mode when we're unable to map
3534 * the VMM into guest context.
3535 * @param pVCpu The cross context virtual CPU structure.
3536 * @param cr0 Guest context CR0 register.
3537 * @param cr3 Guest context CR3 register. Not subjected to the A20
3538 * mask.
3539 * @param cr4 Guest context CR4 register.
3540 * @param fGlobal Including global page directories or not
3541 */
3542PGM_BTH_DECL(int, SyncCR3)(PVMCPUCC pVCpu, uint64_t cr0, uint64_t cr3, uint64_t cr4, bool fGlobal)
3543{
3544 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
3545 NOREF(cr0); NOREF(cr3); NOREF(cr4); NOREF(fGlobal);
3546
3547 LogFlow(("SyncCR3 FF=%d fGlobal=%d\n", !!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3), fGlobal));
3548
3549#if !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE
3550# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
3551 PGM_LOCK_VOID(pVM);
3552 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
3553 if (pPool->cDirtyPages)
3554 pgmPoolResetDirtyPages(pVM);
3555 PGM_UNLOCK(pVM);
3556# endif
3557#endif /* !NESTED && !EPT */
3558
3559#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3560 /*
3561 * Nested / EPT / None - No work.
3562 */
3563 return VINF_SUCCESS;
3564
3565#elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3566 /*
3567 * AMD64 (Shw & Gst) - No need to check all paging levels; we zero
3568 * out the shadow parts when the guest modifies its tables.
3569 */
3570 return VINF_SUCCESS;
3571
3572#else /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3573
3574 return VINF_SUCCESS;
3575#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_AMD64 */
3576}
3577
3578
3579
3580
3581#ifdef VBOX_STRICT
3582
3583/**
3584 * Checks that the shadow page table is in sync with the guest one.
3585 *
3586 * @returns The number of errors.
3587 * @param pVCpu The cross context virtual CPU structure.
3588 * @param cr3 Guest context CR3 register.
3589 * @param cr4 Guest context CR4 register.
3590 * @param GCPtr Where to start. Defaults to 0.
3591 * @param cb How much to check. Defaults to everything.
3592 */
3593PGM_BTH_DECL(unsigned, AssertCR3)(PVMCPUCC pVCpu, uint64_t cr3, uint64_t cr4, RTGCPTR GCPtr, RTGCPTR cb)
3594{
3595 NOREF(pVCpu); NOREF(cr3); NOREF(cr4); NOREF(GCPtr); NOREF(cb);
3596#if PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) || PGM_SHW_TYPE == PGM_TYPE_NONE
3597 return 0;
3598#else
3599 unsigned cErrors = 0;
3600 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3601 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool); NOREF(pPool);
3602
3603# if PGM_GST_TYPE == PGM_TYPE_PAE
3604 /** @todo currently broken; crashes below somewhere */
3605 AssertFailed();
3606# endif
3607
3608# if PGM_GST_TYPE == PGM_TYPE_32BIT \
3609 || PGM_GST_TYPE == PGM_TYPE_PAE \
3610 || PGM_GST_TYPE == PGM_TYPE_AMD64
3611
3612 bool fBigPagesSupported = GST_IS_PSE_ACTIVE(pVCpu);
3613 PPGMCPU pPGM = &pVCpu->pgm.s;
3614 RTGCPHYS GCPhysGst; /* page address derived from the guest page tables. */
3615 RTHCPHYS HCPhysShw; /* page address derived from the shadow page tables. */
3616# ifndef IN_RING0
3617 RTHCPHYS HCPhys; /* general usage. */
3618# endif
3619 int rc;
3620
3621 /*
3622 * Check that the Guest CR3 and all its mappings are correct.
3623 */
3624 AssertMsgReturn(pPGM->GCPhysCR3 == PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK),
3625 ("Invalid GCPhysCR3=%RGp cr3=%RGp\n", pPGM->GCPhysCR3, (RTGCPHYS)cr3),
3626 false);
3627# if !defined(IN_RING0) && PGM_GST_TYPE != PGM_TYPE_AMD64
3628# if 0
3629# if PGM_GST_TYPE == PGM_TYPE_32BIT
3630 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGst32BitPdRC, NULL, &HCPhysShw);
3631# else
3632 rc = PGMShwGetPage(pVCpu, (RTRCUINTPTR)pPGM->pGstPaePdptRC, NULL, &HCPhysShw);
3633# endif
3634 AssertRCReturn(rc, 1);
3635 HCPhys = NIL_RTHCPHYS;
3636 rc = pgmRamGCPhys2HCPhys(pVM, PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK), &HCPhys);
3637 AssertMsgReturn(HCPhys == HCPhysShw, ("HCPhys=%RHp HCPhyswShw=%RHp (cr3)\n", HCPhys, HCPhysShw), false);
3638# endif
3639# if PGM_GST_TYPE == PGM_TYPE_32BIT && defined(IN_RING3)
3640 pgmGstGet32bitPDPtr(pVCpu);
3641 RTGCPHYS GCPhys;
3642 rc = PGMR3DbgR3Ptr2GCPhys(pVM->pUVM, pPGM->pGst32BitPdR3, &GCPhys);
3643 AssertRCReturn(rc, 1);
3644 AssertMsgReturn(PGM_A20_APPLY(pVCpu, cr3 & GST_CR3_PAGE_MASK) == GCPhys, ("GCPhys=%RGp cr3=%RGp\n", GCPhys, (RTGCPHYS)cr3), false);
3645# endif
3646# endif /* !IN_RING0 */
3647
3648 /*
3649 * Get and check the Shadow CR3.
3650 */
3651# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3652 unsigned cPDEs = X86_PG_ENTRIES;
3653 unsigned cIncrement = X86_PG_ENTRIES * GUEST_PAGE_SIZE;
3654# elif PGM_SHW_TYPE == PGM_TYPE_PAE
3655# if PGM_GST_TYPE == PGM_TYPE_32BIT
3656 unsigned cPDEs = X86_PG_PAE_ENTRIES * 4; /* treat it as a 2048 entry table. */
3657# else
3658 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3659# endif
3660 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3661# elif PGM_SHW_TYPE == PGM_TYPE_AMD64
3662 unsigned cPDEs = X86_PG_PAE_ENTRIES;
3663 unsigned cIncrement = X86_PG_PAE_ENTRIES * GUEST_PAGE_SIZE;
3664# endif
3665 if (cb != ~(RTGCPTR)0)
3666 cPDEs = RT_MIN(cb >> SHW_PD_SHIFT, 1);
3667
3668/** @todo call the other two PGMAssert*() functions. */
3669
3670# if PGM_GST_TYPE == PGM_TYPE_AMD64
3671 unsigned iPml4 = (GCPtr >> X86_PML4_SHIFT) & X86_PML4_MASK;
3672
3673 for (; iPml4 < X86_PG_PAE_ENTRIES; iPml4++)
3674 {
3675 PPGMPOOLPAGE pShwPdpt = NULL;
3676 PX86PML4E pPml4eSrc;
3677 PX86PML4E pPml4eDst;
3678 RTGCPHYS GCPhysPdptSrc;
3679
3680 pPml4eSrc = pgmGstGetLongModePML4EPtr(pVCpu, iPml4);
3681 pPml4eDst = pgmShwGetLongModePML4EPtr(pVCpu, iPml4);
3682
3683 /* Fetch the pgm pool shadow descriptor if the shadow pml4e is present. */
3684 if (!(pPml4eDst->u & X86_PML4E_P))
3685 {
3686 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3687 continue;
3688 }
3689
3690 pShwPdpt = pgmPoolGetPage(pPool, pPml4eDst->u & X86_PML4E_PG_MASK);
3691 GCPhysPdptSrc = PGM_A20_APPLY(pVCpu, pPml4eSrc->u & X86_PML4E_PG_MASK);
3692
3693 if ((pPml4eSrc->u & X86_PML4E_P) != (pPml4eDst->u & X86_PML4E_P))
3694 {
3695 AssertMsgFailed(("Present bit doesn't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3696 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3697 cErrors++;
3698 continue;
3699 }
3700
3701 if (GCPhysPdptSrc != pShwPdpt->GCPhys)
3702 {
3703 AssertMsgFailed(("Physical address doesn't match! iPml4 %d pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, pPml4eDst->u, pPml4eSrc->u, pShwPdpt->GCPhys, GCPhysPdptSrc));
3704 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3705 cErrors++;
3706 continue;
3707 }
3708
3709 if ( (pPml4eDst->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX))
3710 != (pPml4eSrc->u & (X86_PML4E_US | X86_PML4E_RW | X86_PML4E_NX)))
3711 {
3712 AssertMsgFailed(("User/Write/NoExec bits don't match! pPml4eDst.u=%#RX64 pPml4eSrc.u=%RX64\n", pPml4eDst->u, pPml4eSrc->u));
3713 GCPtr += _2M * UINT64_C(512) * UINT64_C(512);
3714 cErrors++;
3715 continue;
3716 }
3717# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3718 {
3719# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 */
3720
3721# if PGM_GST_TYPE == PGM_TYPE_AMD64 || PGM_GST_TYPE == PGM_TYPE_PAE
3722 /*
3723 * Check the PDPTEs too.
3724 */
3725 unsigned iPdpt = (GCPtr >> SHW_PDPT_SHIFT) & SHW_PDPT_MASK;
3726
3727 for (;iPdpt <= SHW_PDPT_MASK; iPdpt++)
3728 {
3729 unsigned iPDSrc = 0; /* initialized to shut up gcc */
3730 PPGMPOOLPAGE pShwPde = NULL;
3731 PX86PDPE pPdpeDst;
3732 RTGCPHYS GCPhysPdeSrc;
3733 X86PDPE PdpeSrc;
3734 PdpeSrc.u = 0; /* initialized to shut up gcc 4.5 */
3735# if PGM_GST_TYPE == PGM_TYPE_PAE
3736 PGSTPD pPDSrc = pgmGstGetPaePDPtr(pVCpu, GCPtr, &iPDSrc, &PdpeSrc);
3737 PX86PDPT pPdptDst = pgmShwGetPaePDPTPtr(pVCpu);
3738# else
3739 PX86PML4E pPml4eSrcIgn;
3740 PX86PDPT pPdptDst;
3741 PX86PDPAE pPDDst;
3742 PGSTPD pPDSrc = pgmGstGetLongModePDPtr(pVCpu, GCPtr, &pPml4eSrcIgn, &PdpeSrc, &iPDSrc);
3743
3744 rc = pgmShwGetLongModePDPtr(pVCpu, GCPtr, NULL, &pPdptDst, &pPDDst);
3745 if (rc != VINF_SUCCESS)
3746 {
3747 AssertMsg(rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT, ("Unexpected rc=%Rrc\n", rc));
3748 GCPtr += 512 * _2M;
3749 continue; /* next PDPTE */
3750 }
3751 Assert(pPDDst);
3752# endif
3753 Assert(iPDSrc == 0);
3754
3755 pPdpeDst = &pPdptDst->a[iPdpt];
3756
3757 if (!(pPdpeDst->u & X86_PDPE_P))
3758 {
3759 GCPtr += 512 * _2M;
3760 continue; /* next PDPTE */
3761 }
3762
3763 pShwPde = pgmPoolGetPage(pPool, pPdpeDst->u & X86_PDPE_PG_MASK);
3764 GCPhysPdeSrc = PGM_A20_APPLY(pVCpu, PdpeSrc.u & X86_PDPE_PG_MASK);
3765
3766 if ((pPdpeDst->u & X86_PDPE_P) != (PdpeSrc.u & X86_PDPE_P))
3767 {
3768 AssertMsgFailed(("Present bit doesn't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3769 GCPtr += 512 * _2M;
3770 cErrors++;
3771 continue;
3772 }
3773
3774 if (GCPhysPdeSrc != pShwPde->GCPhys)
3775 {
3776# if PGM_GST_TYPE == PGM_TYPE_AMD64
3777 AssertMsgFailed(("Physical address doesn't match! iPml4 %d iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPml4, iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3778# else
3779 AssertMsgFailed(("Physical address doesn't match! iPdpt %d pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64 Phys %RX64 vs %RX64\n", iPdpt, pPdpeDst->u, PdpeSrc.u, pShwPde->GCPhys, GCPhysPdeSrc));
3780# endif
3781 GCPtr += 512 * _2M;
3782 cErrors++;
3783 continue;
3784 }
3785
3786# if PGM_GST_TYPE == PGM_TYPE_AMD64
3787 if ( (pPdpeDst->u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX))
3788 != (PdpeSrc.u & (X86_PDPE_US | X86_PDPE_RW | X86_PDPE_LM_NX)))
3789 {
3790 AssertMsgFailed(("User/Write/NoExec bits don't match! pPdpeDst.u=%#RX64 pPdpeSrc.u=%RX64\n", pPdpeDst->u, PdpeSrc.u));
3791 GCPtr += 512 * _2M;
3792 cErrors++;
3793 continue;
3794 }
3795# endif
3796
3797# else /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3798 {
3799# endif /* PGM_GST_TYPE != PGM_TYPE_AMD64 && PGM_GST_TYPE != PGM_TYPE_PAE */
3800# if PGM_GST_TYPE == PGM_TYPE_32BIT
3801 GSTPD const *pPDSrc = pgmGstGet32bitPDPtr(pVCpu);
3802# if PGM_SHW_TYPE == PGM_TYPE_32BIT
3803 PCX86PD pPDDst = pgmShwGet32BitPDPtr(pVCpu);
3804# endif
3805# endif /* PGM_GST_TYPE == PGM_TYPE_32BIT */
3806 /*
3807 * Iterate the shadow page directory.
3808 */
3809 GCPtr = (GCPtr >> SHW_PD_SHIFT) << SHW_PD_SHIFT;
3810 unsigned iPDDst = (GCPtr >> SHW_PD_SHIFT) & SHW_PD_MASK;
3811
3812 for (;
3813 iPDDst < cPDEs;
3814 iPDDst++, GCPtr += cIncrement)
3815 {
3816# if PGM_SHW_TYPE == PGM_TYPE_PAE
3817 const SHWPDE PdeDst = *pgmShwGetPaePDEPtr(pVCpu, GCPtr);
3818# else
3819 const SHWPDE PdeDst = pPDDst->a[iPDDst];
3820# endif
3821 if ( (PdeDst.u & X86_PDE_P)
3822 || ((PdeDst.u & (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) == (X86_PDE_P | PGM_PDFLAGS_TRACK_DIRTY)) )
3823 {
3824 HCPhysShw = PdeDst.u & SHW_PDE_PG_MASK;
3825 PPGMPOOLPAGE pPoolPage = pgmPoolGetPage(pPool, HCPhysShw);
3826 if (!pPoolPage)
3827 {
3828 AssertMsgFailed(("Invalid page table address %RHp at %RGv! PdeDst=%#RX64\n",
3829 HCPhysShw, GCPtr, (uint64_t)PdeDst.u));
3830 cErrors++;
3831 continue;
3832 }
3833 const SHWPT *pPTDst = (const SHWPT *)PGMPOOL_PAGE_2_PTR_V2(pVM, pVCpu, pPoolPage);
3834
3835 if (PdeDst.u & (X86_PDE4M_PWT | X86_PDE4M_PCD))
3836 {
3837 AssertMsgFailed(("PDE flags PWT and/or PCD is set at %RGv! These flags are not virtualized! PdeDst=%#RX64\n",
3838 GCPtr, (uint64_t)PdeDst.u));
3839 cErrors++;
3840 }
3841
3842 if (PdeDst.u & (X86_PDE4M_G | X86_PDE4M_D))
3843 {
3844 AssertMsgFailed(("4K PDE reserved flags at %RGv! PdeDst=%#RX64\n",
3845 GCPtr, (uint64_t)PdeDst.u));
3846 cErrors++;
3847 }
3848
3849 const GSTPDE PdeSrc = pPDSrc->a[(iPDDst >> (GST_PD_SHIFT - SHW_PD_SHIFT)) & GST_PD_MASK];
3850 if (!(PdeSrc.u & X86_PDE_P))
3851 {
3852 AssertMsgFailed(("Guest PDE at %RGv is not present! PdeDst=%#RX64 PdeSrc=%#RX64\n",
3853 GCPtr, (uint64_t)PdeDst.u, (uint64_t)PdeSrc.u));
3854 cErrors++;
3855 continue;
3856 }
3857
3858 if ( !(PdeSrc.u & X86_PDE_PS)
3859 || !fBigPagesSupported)
3860 {
3861 GCPhysGst = GST_GET_PDE_GCPHYS(PdeSrc);
3862# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3863 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | ((iPDDst & 1) * (GUEST_PAGE_SIZE / 2)));
3864# endif
3865 }
3866 else
3867 {
3868# if PGM_GST_TYPE == PGM_TYPE_32BIT
3869 if (PdeSrc.u & X86_PDE4M_PG_HIGH_MASK)
3870 {
3871 AssertMsgFailed(("Guest PDE at %RGv is using PSE36 or similar! PdeSrc=%#RX64\n",
3872 GCPtr, (uint64_t)PdeSrc.u));
3873 cErrors++;
3874 continue;
3875 }
3876# endif
3877 GCPhysGst = GST_GET_BIG_PDE_GCPHYS(pVM, PdeSrc);
3878# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3879 GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst | (GCPtr & RT_BIT(X86_PAGE_2M_SHIFT)));
3880# endif
3881 }
3882
3883 if ( pPoolPage->enmKind
3884 != (!(PdeSrc.u & X86_PDE_PS) || !fBigPagesSupported ? BTH_PGMPOOLKIND_PT_FOR_PT : BTH_PGMPOOLKIND_PT_FOR_BIG))
3885 {
3886 AssertMsgFailed(("Invalid shadow page table kind %d at %RGv! PdeSrc=%#RX64\n",
3887 pPoolPage->enmKind, GCPtr, (uint64_t)PdeSrc.u));
3888 cErrors++;
3889 }
3890
3891 PPGMPAGE pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
3892 if (!pPhysPage)
3893 {
3894 AssertMsgFailed(("Cannot find guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3895 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3896 cErrors++;
3897 continue;
3898 }
3899
3900 if (GCPhysGst != pPoolPage->GCPhys)
3901 {
3902 AssertMsgFailed(("GCPhysGst=%RGp != pPage->GCPhys=%RGp at %RGv\n",
3903 GCPhysGst, pPoolPage->GCPhys, GCPtr));
3904 cErrors++;
3905 continue;
3906 }
3907
3908 if ( !(PdeSrc.u & X86_PDE_PS)
3909 || !fBigPagesSupported)
3910 {
3911 /*
3912 * Page Table.
3913 */
3914 const GSTPT *pPTSrc;
3915 rc = PGM_GCPHYS_2_PTR_V2(pVM, pVCpu, PGM_A20_APPLY(pVCpu, GCPhysGst & ~(RTGCPHYS)(GUEST_PAGE_SIZE - 1)),
3916 &pPTSrc);
3917 if (RT_FAILURE(rc))
3918 {
3919 AssertMsgFailed(("Cannot map/convert guest physical address %RGp in the PDE at %RGv! PdeSrc=%#RX64\n",
3920 GCPhysGst, GCPtr, (uint64_t)PdeSrc.u));
3921 cErrors++;
3922 continue;
3923 }
3924 if ( (PdeSrc.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/))
3925 != (PdeDst.u & (X86_PDE_P | X86_PDE_US | X86_PDE_RW/* | X86_PDE_A*/)))
3926 {
3927 /// @todo We get here a lot on out-of-sync CR3 entries. The access handler should zap them to avoid false alarms here!
3928 // (This problem will go away when/if we shadow multiple CR3s.)
3929 AssertMsgFailed(("4K PDE flags mismatch at %RGv! PdeSrc=%#RX64 PdeDst=%#RX64\n",
3930 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
3931 cErrors++;
3932 continue;
3933 }
3934 if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
3935 {
3936 AssertMsgFailed(("4K PDEs cannot have PGM_PDFLAGS_TRACK_DIRTY set! GCPtr=%RGv PdeDst=%#RX64\n",
3937 GCPtr, (uint64_t)PdeDst.u));
3938 cErrors++;
3939 continue;
3940 }
3941
3942 /* iterate the page table. */
3943# if PGM_SHW_TYPE == PGM_TYPE_PAE && PGM_GST_TYPE == PGM_TYPE_32BIT
3944 /* Select the right PDE as we're emulating a 4kb page table with 2 shadow page tables. */
3945 const unsigned offPTSrc = ((GCPtr >> SHW_PD_SHIFT) & 1) * 512;
3946# else
3947 const unsigned offPTSrc = 0;
3948# endif
3949 for (unsigned iPT = 0, off = 0;
3950 iPT < RT_ELEMENTS(pPTDst->a);
3951 iPT++, off += GUEST_PAGE_SIZE)
3952 {
3953 const SHWPTE PteDst = pPTDst->a[iPT];
3954
3955 /* skip not-present and dirty tracked entries. */
3956 if (!(SHW_PTE_GET_U(PteDst) & (X86_PTE_P | PGM_PTFLAGS_TRACK_DIRTY))) /** @todo deal with ALL handlers and CSAM !P pages! */
3957 continue;
3958 Assert(SHW_PTE_IS_P(PteDst));
3959
3960 const GSTPTE PteSrc = pPTSrc->a[iPT + offPTSrc];
3961 if (!(PteSrc.u & X86_PTE_P))
3962 {
3963# ifdef IN_RING3
3964 PGMAssertHandlerAndFlagsInSync(pVM);
3965 DBGFR3PagingDumpEx(pVM->pUVM, pVCpu->idCpu, DBGFPGDMP_FLAGS_CURRENT_CR3 | DBGFPGDMP_FLAGS_CURRENT_MODE
3966 | DBGFPGDMP_FLAGS_GUEST | DBGFPGDMP_FLAGS_HEADER | DBGFPGDMP_FLAGS_PRINT_CR3,
3967 0, 0, UINT64_MAX, 99, NULL);
3968# endif
3969 AssertMsgFailed(("Out of sync (!P) PTE at %RGv! PteSrc=%#RX64 PteDst=%#RX64 pPTSrc=%RGv iPTSrc=%x PdeSrc=%x physpte=%RGp\n",
3970 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst), pPTSrc, iPT + offPTSrc, PdeSrc.au32[0],
3971 (uint64_t)GST_GET_PDE_GCPHYS(PdeSrc) + (iPT + offPTSrc) * sizeof(PteSrc)));
3972 cErrors++;
3973 continue;
3974 }
3975
3976 uint64_t fIgnoreFlags = GST_PTE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_G | X86_PTE_D | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT;
3977# if 1 /** @todo sync accessed bit properly... */
3978 fIgnoreFlags |= X86_PTE_A;
3979# endif
3980
3981 /* match the physical addresses */
3982 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
3983 GCPhysGst = GST_GET_PTE_GCPHYS(PteSrc);
3984
3985# ifdef IN_RING3
3986 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
3987 if (RT_FAILURE(rc))
3988 {
3989# if 0
3990 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
3991 {
3992 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
3993 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
3994 cErrors++;
3995 continue;
3996 }
3997# endif
3998 }
3999 else if (HCPhysShw != (HCPhys & SHW_PTE_PG_MASK))
4000 {
4001 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4002 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4003 cErrors++;
4004 continue;
4005 }
4006# endif
4007
4008 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4009 if (!pPhysPage)
4010 {
4011# if 0
4012 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4013 {
4014 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PteSrc=%#RX64 PteDst=%#RX64\n",
4015 GCPhysGst, GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4016 cErrors++;
4017 continue;
4018 }
4019# endif
4020 if (SHW_PTE_IS_RW(PteDst))
4021 {
4022 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4023 GCPtr + off, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4024 cErrors++;
4025 }
4026 fIgnoreFlags |= X86_PTE_RW;
4027 }
4028 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4029 {
4030 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage:%R[pgmpage] GCPhysGst=%RGp PteSrc=%#RX64 PteDst=%#RX64\n",
4031 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4032 cErrors++;
4033 continue;
4034 }
4035
4036 /* flags */
4037 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4038 {
4039 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4040 {
4041 if (SHW_PTE_IS_RW(PteDst))
4042 {
4043 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4044 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4045 cErrors++;
4046 continue;
4047 }
4048 fIgnoreFlags |= X86_PTE_RW;
4049 }
4050 else
4051 {
4052 if ( SHW_PTE_IS_P(PteDst)
4053# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4054 && !PGM_PAGE_IS_MMIO(pPhysPage)
4055# endif
4056 )
4057 {
4058 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PteSrc=%#RX64 PteDst=%#RX64\n",
4059 GCPtr + off, pPhysPage, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4060 cErrors++;
4061 continue;
4062 }
4063 fIgnoreFlags |= X86_PTE_P;
4064 }
4065 }
4066 else
4067 {
4068 if ((PteSrc.u & (X86_PTE_RW | X86_PTE_D)) == X86_PTE_RW)
4069 {
4070 if (SHW_PTE_IS_RW(PteDst))
4071 {
4072 AssertMsgFailed(("!DIRTY page at %RGv is writable! PteSrc=%#RX64 PteDst=%#RX64\n",
4073 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4074 cErrors++;
4075 continue;
4076 }
4077 if (!SHW_PTE_IS_TRACK_DIRTY(PteDst))
4078 {
4079 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4080 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4081 cErrors++;
4082 continue;
4083 }
4084 if (SHW_PTE_IS_D(PteDst))
4085 {
4086 AssertMsgFailed(("!DIRTY page at %RGv is marked DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4087 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4088 cErrors++;
4089 }
4090# if 0 /** @todo sync access bit properly... */
4091 if (PteDst.n.u1Accessed != PteSrc.n.u1Accessed)
4092 {
4093 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4094 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4095 cErrors++;
4096 }
4097 fIgnoreFlags |= X86_PTE_RW;
4098# else
4099 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4100# endif
4101 }
4102 else if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4103 {
4104 /* access bit emulation (not implemented). */
4105 if ((PteSrc.u & X86_PTE_A) || SHW_PTE_IS_P(PteDst))
4106 {
4107 AssertMsgFailed(("PGM_PTFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PteSrc=%#RX64 PteDst=%#RX64\n",
4108 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4109 cErrors++;
4110 continue;
4111 }
4112 if (!SHW_PTE_IS_A(PteDst))
4113 {
4114 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PteSrc=%#RX64 PteDst=%#RX64\n",
4115 GCPtr + off, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4116 cErrors++;
4117 }
4118 fIgnoreFlags |= X86_PTE_P;
4119 }
4120# ifdef DEBUG_sandervl
4121 fIgnoreFlags |= X86_PTE_D | X86_PTE_A;
4122# endif
4123 }
4124
4125 if ( (PteSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4126 && (PteSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4127 )
4128 {
4129 AssertMsgFailed(("Flags mismatch at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PteSrc=%#RX64 PteDst=%#RX64\n",
4130 GCPtr + off, (uint64_t)PteSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4131 fIgnoreFlags, (uint64_t)PteSrc.u, SHW_PTE_LOG64(PteDst)));
4132 cErrors++;
4133 continue;
4134 }
4135 } /* foreach PTE */
4136 }
4137 else
4138 {
4139 /*
4140 * Big Page.
4141 */
4142 uint64_t fIgnoreFlags = X86_PDE_AVL_MASK | GST_PDE_PG_MASK | X86_PDE4M_G | X86_PDE4M_D | X86_PDE4M_PS | X86_PDE4M_PWT | X86_PDE4M_PCD;
4143 if ((PdeSrc.u & (X86_PDE_RW | X86_PDE4M_D)) == X86_PDE_RW)
4144 {
4145 if (PdeDst.u & X86_PDE_RW)
4146 {
4147 AssertMsgFailed(("!DIRTY page at %RGv is writable! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4148 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4149 cErrors++;
4150 continue;
4151 }
4152 if (!(PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY))
4153 {
4154 AssertMsgFailed(("!DIRTY page at %RGv is not marked TRACK_DIRTY! PteSrc=%#RX64 PteDst=%#RX64\n",
4155 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4156 cErrors++;
4157 continue;
4158 }
4159# if 0 /** @todo sync access bit properly... */
4160 if (PdeDst.n.u1Accessed != PdeSrc.b.u1Accessed)
4161 {
4162 AssertMsgFailed(("!DIRTY page at %RGv is has mismatching accessed bit! PteSrc=%#RX64 PteDst=%#RX64\n",
4163 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4164 cErrors++;
4165 }
4166 fIgnoreFlags |= X86_PTE_RW;
4167# else
4168 fIgnoreFlags |= X86_PTE_RW | X86_PTE_A;
4169# endif
4170 }
4171 else if (PdeDst.u & PGM_PDFLAGS_TRACK_DIRTY)
4172 {
4173 /* access bit emulation (not implemented). */
4174 if ((PdeSrc.u & X86_PDE_A) || SHW_PDE_IS_P(PdeDst))
4175 {
4176 AssertMsgFailed(("PGM_PDFLAGS_TRACK_DIRTY set at %RGv but no accessed bit emulation! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4177 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4178 cErrors++;
4179 continue;
4180 }
4181 if (!SHW_PDE_IS_A(PdeDst))
4182 {
4183 AssertMsgFailed(("!ACCESSED page at %RGv is has the accessed bit set! PdeSrc=%#RX64 PdeDst=%#RX64\n",
4184 GCPtr, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4185 cErrors++;
4186 }
4187 fIgnoreFlags |= X86_PTE_P;
4188 }
4189
4190 if ((PdeSrc.u & ~fIgnoreFlags) != (PdeDst.u & ~fIgnoreFlags))
4191 {
4192 AssertMsgFailed(("Flags mismatch (B) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PdeDst=%#RX64\n",
4193 GCPtr, (uint64_t)PdeSrc.u & ~fIgnoreFlags, (uint64_t)PdeDst.u & ~fIgnoreFlags,
4194 fIgnoreFlags, (uint64_t)PdeSrc.u, (uint64_t)PdeDst.u));
4195 cErrors++;
4196 }
4197
4198 /* iterate the page table. */
4199 for (unsigned iPT = 0, off = 0;
4200 iPT < RT_ELEMENTS(pPTDst->a);
4201 iPT++, off += GUEST_PAGE_SIZE, GCPhysGst = PGM_A20_APPLY(pVCpu, GCPhysGst + GUEST_PAGE_SIZE))
4202 {
4203 const SHWPTE PteDst = pPTDst->a[iPT];
4204
4205 if (SHW_PTE_IS_TRACK_DIRTY(PteDst))
4206 {
4207 AssertMsgFailed(("The PTE at %RGv emulating a 2/4M page is marked TRACK_DIRTY! PdeSrc=%#RX64 PteDst=%#RX64\n",
4208 GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4209 cErrors++;
4210 }
4211
4212 /* skip not-present entries. */
4213 if (!SHW_PTE_IS_P(PteDst)) /** @todo deal with ALL handlers and CSAM !P pages! */
4214 continue;
4215
4216 fIgnoreFlags = X86_PTE_PAE_PG_MASK | X86_PTE_AVL_MASK | X86_PTE_PWT | X86_PTE_PCD | X86_PTE_PAT | X86_PTE_D | X86_PTE_A | X86_PTE_G | X86_PTE_PAE_NX;
4217
4218 /* match the physical addresses */
4219 HCPhysShw = SHW_PTE_GET_HCPHYS(PteDst);
4220
4221# ifdef IN_RING3
4222 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysGst, &HCPhys);
4223 if (RT_FAILURE(rc))
4224 {
4225# if 0
4226 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4227 {
4228 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4229 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4230 cErrors++;
4231 }
4232# endif
4233 }
4234 else if (HCPhysShw != (HCPhys & X86_PTE_PAE_PG_MASK))
4235 {
4236 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp HCPhys=%RHp GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4237 GCPtr + off, HCPhysShw, HCPhys, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4238 cErrors++;
4239 continue;
4240 }
4241# endif
4242 pPhysPage = pgmPhysGetPage(pVM, GCPhysGst);
4243 if (!pPhysPage)
4244 {
4245# if 0 /** @todo make MMR3PageDummyHCPhys an 'All' function! */
4246 if (HCPhysShw != MMR3PageDummyHCPhys(pVM)) /** @todo this is wrong. */
4247 {
4248 AssertMsgFailed(("Cannot find guest physical address %RGp at %RGv! PdeSrc=%#RX64 PteDst=%#RX64\n",
4249 GCPhysGst, GCPtr + off, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4250 cErrors++;
4251 continue;
4252 }
4253# endif
4254 if (SHW_PTE_IS_RW(PteDst))
4255 {
4256 AssertMsgFailed(("Invalid guest page at %RGv is writable! GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4257 GCPtr + off, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4258 cErrors++;
4259 }
4260 fIgnoreFlags |= X86_PTE_RW;
4261 }
4262 else if (HCPhysShw != PGM_PAGE_GET_HCPHYS(pPhysPage))
4263 {
4264 AssertMsgFailed(("Out of sync (phys) at %RGv! HCPhysShw=%RHp pPhysPage=%R[pgmpage] GCPhysGst=%RGp PdeSrc=%#RX64 PteDst=%#RX64\n",
4265 GCPtr + off, HCPhysShw, pPhysPage, GCPhysGst, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4266 cErrors++;
4267 continue;
4268 }
4269
4270 /* flags */
4271 if (PGM_PAGE_HAS_ACTIVE_HANDLERS(pPhysPage))
4272 {
4273 if (!PGM_PAGE_HAS_ACTIVE_ALL_HANDLERS(pPhysPage))
4274 {
4275 if (PGM_PAGE_GET_HNDL_PHYS_STATE(pPhysPage) != PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
4276 {
4277 if (SHW_PTE_IS_RW(PteDst))
4278 {
4279 AssertMsgFailed(("WRITE access flagged at %RGv but the page is writable! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4280 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4281 cErrors++;
4282 continue;
4283 }
4284 fIgnoreFlags |= X86_PTE_RW;
4285 }
4286 }
4287 else
4288 {
4289 if ( SHW_PTE_IS_P(PteDst)
4290# if PGM_SHW_TYPE == PGM_TYPE_EPT || PGM_SHW_TYPE == PGM_TYPE_PAE || PGM_SHW_TYPE == PGM_TYPE_AMD64
4291 && !PGM_PAGE_IS_MMIO(pPhysPage)
4292# endif
4293 )
4294 {
4295 AssertMsgFailed(("ALL access flagged at %RGv but the page is present! pPhysPage=%R[pgmpage] PdeSrc=%#RX64 PteDst=%#RX64\n",
4296 GCPtr + off, pPhysPage, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4297 cErrors++;
4298 continue;
4299 }
4300 fIgnoreFlags |= X86_PTE_P;
4301 }
4302 }
4303
4304 if ( (PdeSrc.u & ~fIgnoreFlags) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags)
4305 && (PdeSrc.u & ~(fIgnoreFlags | X86_PTE_RW)) != (SHW_PTE_GET_U(PteDst) & ~fIgnoreFlags) /* lazy phys handler dereg. */
4306 )
4307 {
4308 AssertMsgFailed(("Flags mismatch (BT) at %RGv! %#RX64 != %#RX64 fIgnoreFlags=%#RX64 PdeSrc=%#RX64 PteDst=%#RX64\n",
4309 GCPtr + off, (uint64_t)PdeSrc.u & ~fIgnoreFlags, SHW_PTE_LOG64(PteDst) & ~fIgnoreFlags,
4310 fIgnoreFlags, (uint64_t)PdeSrc.u, SHW_PTE_LOG64(PteDst)));
4311 cErrors++;
4312 continue;
4313 }
4314 } /* for each PTE */
4315 }
4316 }
4317 /* not present */
4318
4319 } /* for each PDE */
4320
4321 } /* for each PDPTE */
4322
4323 } /* for each PML4E */
4324
4325# ifdef DEBUG
4326 if (cErrors)
4327 LogFlow(("AssertCR3: cErrors=%d\n", cErrors));
4328# endif
4329# endif /* GST is in {32BIT, PAE, AMD64} */
4330 return cErrors;
4331#endif /* !PGM_TYPE_IS_NESTED_OR_EPT(PGM_SHW_TYPE) && PGM_SHW_TYPE != PGM_TYPE_NONE */
4332}
4333#endif /* VBOX_STRICT */
4334
4335
4336/**
4337 * Sets up the CR3 for shadow paging
4338 *
4339 * @returns Strict VBox status code.
4340 * @retval VINF_SUCCESS.
4341 *
4342 * @param pVCpu The cross context virtual CPU structure.
4343 * @param GCPhysCR3 The physical address in the CR3 register. (A20 mask
4344 * already applied.)
4345 */
4346PGM_BTH_DECL(int, MapCR3)(PVMCPUCC pVCpu, RTGCPHYS GCPhysCR3)
4347{
4348 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4349 int rc = VINF_SUCCESS;
4350
4351 /* Update guest paging info. */
4352#if PGM_GST_TYPE == PGM_TYPE_32BIT \
4353 || PGM_GST_TYPE == PGM_TYPE_PAE \
4354 || PGM_GST_TYPE == PGM_TYPE_AMD64
4355
4356 LogFlow(("MapCR3: %RGp\n", GCPhysCR3));
4357 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4358
4359# if PGM_GST_TYPE == PGM_TYPE_PAE
4360 if ( !pVCpu->pgm.s.CTX_SUFF(fPaePdpesAndCr3Mapped)
4361 || pVCpu->pgm.s.GCPhysPaeCR3 != GCPhysCR3)
4362# endif
4363 {
4364 /*
4365 * Map the page CR3 points at.
4366 */
4367 RTHCPTR HCPtrGuestCR3;
4368 rc = pgmGstMapCr3(pVCpu, GCPhysCR3, &HCPtrGuestCR3);
4369 if (RT_SUCCESS(rc))
4370 {
4371# if PGM_GST_TYPE == PGM_TYPE_32BIT
4372# ifdef IN_RING3
4373 pVCpu->pgm.s.pGst32BitPdR3 = (PX86PD)HCPtrGuestCR3;
4374 pVCpu->pgm.s.pGst32BitPdR0 = NIL_RTR0PTR;
4375# else
4376 pVCpu->pgm.s.pGst32BitPdR3 = NIL_RTR3PTR;
4377 pVCpu->pgm.s.pGst32BitPdR0 = (PX86PD)HCPtrGuestCR3;
4378# endif
4379
4380# elif PGM_GST_TYPE == PGM_TYPE_PAE
4381# ifdef IN_RING3
4382 pVCpu->pgm.s.pGstPaePdptR3 = (PX86PDPT)HCPtrGuestCR3;
4383 pVCpu->pgm.s.pGstPaePdptR0 = NIL_RTR0PTR;
4384# else
4385 pVCpu->pgm.s.pGstPaePdptR3 = NIL_RTR3PTR;
4386 pVCpu->pgm.s.pGstPaePdptR0 = (PX86PDPT)HCPtrGuestCR3;
4387# endif
4388
4389 /*
4390 * Update CPUM and map the 4 PDs too.
4391 */
4392 X86PDPE aGstPaePdpes[X86_PG_PAE_PDPE_ENTRIES];
4393 memcpy(&aGstPaePdpes, HCPtrGuestCR3, sizeof(aGstPaePdpes));
4394 CPUMSetGuestPaePdpes(pVCpu, &aGstPaePdpes[0]);
4395 PGMGstMapPaePdpes(pVCpu, &aGstPaePdpes[0]);
4396
4397 pVCpu->pgm.s.GCPhysPaeCR3 = GCPhysCR3;
4398# ifdef IN_RING3
4399 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = true;
4400 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4401# else
4402 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4403 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = true;
4404# endif
4405
4406# elif PGM_GST_TYPE == PGM_TYPE_AMD64
4407# ifdef IN_RING3
4408 pVCpu->pgm.s.pGstAmd64Pml4R3 = (PX86PML4)HCPtrGuestCR3;
4409 pVCpu->pgm.s.pGstAmd64Pml4R0 = NIL_RTR0PTR;
4410# else
4411 pVCpu->pgm.s.pGstAmd64Pml4R3 = NIL_RTR3PTR;
4412 pVCpu->pgm.s.pGstAmd64Pml4R0 = (PX86PML4)HCPtrGuestCR3;
4413# endif
4414# endif
4415 }
4416 else
4417 AssertMsgFailed(("rc=%Rrc GCPhysGuestPD=%RGp\n", rc, GCPhysCR3));
4418 }
4419#endif
4420
4421 /*
4422 * Update shadow paging info for guest modes with paging (32-bit, PAE, AMD64).
4423 */
4424# if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4425 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4426 || PGM_SHW_TYPE == PGM_TYPE_AMD64) \
4427 && ( PGM_GST_TYPE != PGM_TYPE_REAL \
4428 && PGM_GST_TYPE != PGM_TYPE_PROT))
4429
4430 Assert(!pVM->pgm.s.fNestedPaging);
4431 PGM_A20_ASSERT_MASKED(pVCpu, GCPhysCR3);
4432
4433 /*
4434 * Update the shadow root page as well since that's not fixed.
4435 */
4436 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4437 PPGMPOOLPAGE pOldShwPageCR3 = pVCpu->pgm.s.CTX_SUFF(pShwPageCR3);
4438 PPGMPOOLPAGE pNewShwPageCR3;
4439
4440 PGM_LOCK_VOID(pVM);
4441
4442# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4443 if (pPool->cDirtyPages)
4444 pgmPoolResetDirtyPages(pVM);
4445# endif
4446
4447 Assert(!(GCPhysCR3 >> (GUEST_PAGE_SHIFT + 32))); /** @todo what is this for? */
4448 int const rc2 = pgmPoolAlloc(pVM, GCPhysCR3 & GST_CR3_PAGE_MASK, BTH_PGMPOOLKIND_ROOT, PGMPOOLACCESS_DONTCARE,
4449 PGM_A20_IS_ENABLED(pVCpu), NIL_PGMPOOL_IDX, UINT32_MAX, true /*fLockPage*/, &pNewShwPageCR3);
4450 AssertFatalRC(rc2);
4451
4452 pVCpu->pgm.s.pShwPageCR3R3 = pgmPoolConvertPageToR3(pPool, pNewShwPageCR3);
4453 pVCpu->pgm.s.pShwPageCR3R0 = pgmPoolConvertPageToR0(pPool, pNewShwPageCR3);
4454
4455 /* Set the current hypervisor CR3. */
4456 CPUMSetHyperCR3(pVCpu, PGMGetHyperCR3(pVCpu));
4457
4458 /* Clean up the old CR3 root. */
4459 if ( pOldShwPageCR3
4460 && pOldShwPageCR3 != pNewShwPageCR3 /* @todo can happen due to incorrect syncing between REM & PGM; find the real cause */)
4461 {
4462 Assert(pOldShwPageCR3->enmKind != PGMPOOLKIND_FREE);
4463
4464 /* Mark the page as unlocked; allow flushing again. */
4465 pgmPoolUnlockPage(pPool, pOldShwPageCR3);
4466
4467 pgmPoolFreeByPage(pPool, pOldShwPageCR3, NIL_PGMPOOL_IDX, UINT32_MAX);
4468 }
4469 PGM_UNLOCK(pVM);
4470# else
4471 NOREF(GCPhysCR3);
4472# endif
4473
4474 return rc;
4475}
4476
4477/**
4478 * Unmaps the shadow CR3.
4479 *
4480 * @returns VBox status, no specials.
4481 * @param pVCpu The cross context virtual CPU structure.
4482 */
4483PGM_BTH_DECL(int, UnmapCR3)(PVMCPUCC pVCpu)
4484{
4485 LogFlow(("UnmapCR3\n"));
4486
4487 int rc = VINF_SUCCESS;
4488 PVMCC pVM = pVCpu->CTX_SUFF(pVM); NOREF(pVM);
4489
4490 /*
4491 * Update guest paging info.
4492 */
4493#if PGM_GST_TYPE == PGM_TYPE_32BIT
4494 pVCpu->pgm.s.pGst32BitPdR3 = 0;
4495 pVCpu->pgm.s.pGst32BitPdR0 = 0;
4496
4497#elif PGM_GST_TYPE == PGM_TYPE_PAE
4498 pVCpu->pgm.s.pGstPaePdptR3 = 0;
4499 pVCpu->pgm.s.pGstPaePdptR0 = 0;
4500 for (unsigned i = 0; i < X86_PG_PAE_PDPE_ENTRIES; i++)
4501 {
4502 pVCpu->pgm.s.apGstPaePDsR3[i] = 0;
4503 pVCpu->pgm.s.apGstPaePDsR0[i] = 0;
4504 pVCpu->pgm.s.aGCPhysGstPaePDs[i] = NIL_RTGCPHYS;
4505 }
4506
4507#elif PGM_GST_TYPE == PGM_TYPE_AMD64
4508 pVCpu->pgm.s.pGstAmd64Pml4R3 = 0;
4509 pVCpu->pgm.s.pGstAmd64Pml4R0 = 0;
4510
4511#else /* prot/real mode stub */
4512 /* nothing to do */
4513#endif
4514
4515 /*
4516 * Update second-level address translation info.
4517 */
4518#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
4519 pVCpu->pgm.s.pGstEptPml4R3 = 0;
4520 pVCpu->pgm.s.pGstEptPml4R0 = 0;
4521#endif
4522
4523 pVCpu->pgm.s.fPaePdpesAndCr3MappedR3 = false;
4524 pVCpu->pgm.s.fPaePdpesAndCr3MappedR0 = false;
4525 pVCpu->pgm.s.GCPhysPaeCR3 = NIL_RTGCPHYS;
4526
4527 /*
4528 * Update shadow paging info.
4529 */
4530#if ( ( PGM_SHW_TYPE == PGM_TYPE_32BIT \
4531 || PGM_SHW_TYPE == PGM_TYPE_PAE \
4532 || PGM_SHW_TYPE == PGM_TYPE_AMD64))
4533# if PGM_GST_TYPE != PGM_TYPE_REAL
4534 Assert(!pVM->pgm.s.fNestedPaging);
4535# endif
4536 PGM_LOCK_VOID(pVM);
4537
4538 if (pVCpu->pgm.s.CTX_SUFF(pShwPageCR3))
4539 {
4540 PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
4541
4542# ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
4543 if (pPool->cDirtyPages)
4544 pgmPoolResetDirtyPages(pVM);
4545# endif
4546
4547 /* Mark the page as unlocked; allow flushing again. */
4548 pgmPoolUnlockPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3));
4549
4550 pgmPoolFreeByPage(pPool, pVCpu->pgm.s.CTX_SUFF(pShwPageCR3), NIL_PGMPOOL_IDX, UINT32_MAX);
4551 pVCpu->pgm.s.pShwPageCR3R3 = 0;
4552 pVCpu->pgm.s.pShwPageCR3R0 = 0;
4553 }
4554
4555 PGM_UNLOCK(pVM);
4556#endif
4557
4558 return rc;
4559}
4560
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use