VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/PGMR0.cpp

Last change on this file was 106061, checked in by vboxsync, 3 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 58.7 KB
Line 
1/* $Id: PGMR0.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * PGM - Page Manager and Monitor, Ring-0.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PGM
33#define VBOX_WITHOUT_PAGING_BIT_FIELDS /* 64-bit bitfields are just asking for trouble. See @bugref{9841} and others. */
34#include <VBox/rawpci.h>
35#include <VBox/vmm/pgm.h>
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/gmm.h>
38#include "PGMInternal.h"
39#include <VBox/vmm/pdmdev.h>
40#include <VBox/vmm/vmcc.h>
41#include <VBox/vmm/gvm.h>
42#include "PGMInline.h"
43#include <VBox/log.h>
44#include <VBox/err.h>
45#include <iprt/assert.h>
46#include <iprt/mem.h>
47#include <iprt/memobj.h>
48#include <iprt/process.h>
49#include <iprt/rand.h>
50#include <iprt/string.h>
51#include <iprt/time.h>
52
53
54/*
55 * Instantiate the ring-0 header/code templates.
56 */
57/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
58#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
59#include "PGMR0Bth.h"
60#undef PGM_BTH_NAME
61
62#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
63#include "PGMR0Bth.h"
64#undef PGM_BTH_NAME
65
66#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
67#include "PGMR0Bth.h"
68#undef PGM_BTH_NAME
69
70#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
71#include "PGMR0Bth.h"
72#undef PGM_BTH_NAME
73
74
75/**
76 * Initializes the per-VM data for the PGM.
77 *
78 * This is called from under the GVMM lock, so it should only initialize the
79 * data so PGMR0CleanupVM and others will work smoothly.
80 *
81 * @returns VBox status code.
82 * @param pGVM Pointer to the global VM structure.
83 * @param hMemObj Handle to the memory object backing pGVM.
84 */
85VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM, RTR0MEMOBJ hMemObj)
86{
87 AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
88 AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
89 AssertCompile(sizeof(pGVM->aCpus[0].pgm.s) <= sizeof(pGVM->aCpus[0].pgm.padding));
90 AssertCompile(sizeof(pGVM->aCpus[0].pgmr0.s) <= sizeof(pGVM->aCpus[0].pgmr0.padding));
91
92 /* Set the RAM range memory handles to NIL. */
93 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.acRamRangePages) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
94 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
95 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRamRanges));
96 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs); i++)
97 {
98 pGVM->pgmr0.s.ahRamRangeMemObjs[i] = NIL_RTR0MEMOBJ;
99 pGVM->pgmr0.s.ahRamRangeMapObjs[i] = NIL_RTR0MEMOBJ;
100 }
101 Assert(pGVM->pgmr0.s.idRamRangeMax == 0); /* the structure is ZERO'ed */
102
103 /* Set the MMIO2 range memory handles to NIL. */
104 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apMmio2RamRanges));
105 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apMmio2RamRanges));
106 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs); i++)
107 {
108 pGVM->pgmr0.s.ahMmio2MemObjs[i] = NIL_RTR0MEMOBJ;
109 pGVM->pgmr0.s.ahMmio2MapObjs[i] = NIL_RTR0MEMOBJ;
110 }
111
112 /* Set the ROM range memory handles to NIL. */
113 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRomRanges));
114 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMapObjs) == RT_ELEMENTS(pGVM->pgmr0.s.apRomRanges));
115 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs); i++)
116 {
117 pGVM->pgmr0.s.ahRomRangeMemObjs[i] = NIL_RTR0MEMOBJ;
118 pGVM->pgmr0.s.ahRomRangeMapObjs[i] = NIL_RTR0MEMOBJ;
119 }
120
121 /* Set the physical handler related memory handles to NIL. */
122 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
123 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
124 {
125 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
126 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
127 }
128 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
129 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
130
131 /*
132 * Initialize the handler type table with return to ring-3 callbacks so we
133 * don't have to do anything special for ring-3 only registrations.
134 *
135 * Note! The random bits of the hType value is mainly for prevent trouble
136 * with zero initialized handles w/o needing to sacrifice handle zero.
137 */
138 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes); i++)
139 {
140 pGVM->pgmr0.s.aPhysHandlerTypes[i].hType = i | (RTRandU64() & ~(uint64_t)PGMPHYSHANDLERTYPE_IDX_MASK);
141 pGVM->pgmr0.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
142 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
143 pGVM->pgmr0.s.aPhysHandlerTypes[i].pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
144
145 pGVM->pgm.s.aPhysHandlerTypes[i].hType = pGVM->pgmr0.s.aPhysHandlerTypes[i].hType;
146 pGVM->pgm.s.aPhysHandlerTypes[i].enmKind = PGMPHYSHANDLERKIND_INVALID;
147 }
148
149 /*
150 * Get the physical address of the ZERO and MMIO-dummy pages.
151 */
152 AssertReturn(((uintptr_t)&pGVM->pgm.s.abZeroPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
153 pGVM->pgm.s.HCPhysZeroPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abZeroPg) >> HOST_PAGE_SHIFT);
154 AssertReturn(pGVM->pgm.s.HCPhysZeroPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
155
156 AssertReturn(((uintptr_t)&pGVM->pgm.s.abMmioPg[0] & HOST_PAGE_OFFSET_MASK) == 0, VERR_INTERNAL_ERROR_2);
157 pGVM->pgm.s.HCPhysMmioPg = RTR0MemObjGetPagePhysAddr(hMemObj, RT_UOFFSETOF_DYN(GVM, pgm.s.abMmioPg) >> HOST_PAGE_SHIFT);
158 AssertReturn(pGVM->pgm.s.HCPhysMmioPg != NIL_RTHCPHYS, VERR_INTERNAL_ERROR_3);
159
160 pGVM->pgm.s.HCPhysInvMmioPg = pGVM->pgm.s.HCPhysMmioPg;
161
162 return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
163}
164
165
166/**
167 * Initalize the per-VM PGM for ring-0.
168 *
169 * @returns VBox status code.
170 * @param pGVM Pointer to the global VM structure.
171 */
172VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
173{
174 /*
175 * Set up the ring-0 context for our access handlers.
176 */
177 int rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, 0 /*fFlags*/,
178 pgmPhysRomWriteHandler, pgmPhysRomWritePfHandler,
179 "ROM write protection", pGVM->pgm.s.hRomPhysHandlerType);
180 AssertLogRelRCReturn(rc, rc);
181
182 /*
183 * Register the physical access handler doing dirty MMIO2 tracing.
184 */
185 rc = PGMR0HandlerPhysicalTypeSetUpContext(pGVM, PGMPHYSHANDLERKIND_WRITE, PGMPHYSHANDLER_F_KEEP_PGM_LOCK,
186 pgmPhysMmio2WriteHandler, pgmPhysMmio2WritePfHandler,
187 "MMIO2 dirty page tracing", pGVM->pgm.s.hMmio2DirtyPhysHandlerType);
188 AssertLogRelRCReturn(rc, rc);
189
190 /*
191 * The page pool.
192 */
193 return pgmR0PoolInitVM(pGVM);
194}
195
196
197/**
198 * Called at the end of the ring-0 initialization to seal access handler types.
199 *
200 * @param pGVM Pointer to the global VM structure.
201 */
202VMMR0_INT_DECL(void) PGMR0DoneInitVM(PGVM pGVM)
203{
204 /*
205 * Seal all the access handler types. Does both ring-3 and ring-0.
206 *
207 * Note! Since this is a void function and we don't have any ring-0 state
208 * machinery for marking the VM as bogus, this code will just
209 * override corrupted values as best as it can.
210 */
211 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
212 for (size_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes); i++)
213 {
214 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[i];
215 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[i];
216 PGMPHYSHANDLERKIND const enmKindR3 = pTypeR3->enmKind;
217 PGMPHYSHANDLERKIND const enmKindR0 = pTypeR0->enmKind;
218 AssertLogRelMsgStmt(pTypeR0->hType == pTypeR3->hType,
219 ("i=%u %#RX64 vs %#RX64 %s\n", i, pTypeR0->hType, pTypeR3->hType, pTypeR0->pszDesc),
220 pTypeR3->hType = pTypeR0->hType);
221 switch (enmKindR3)
222 {
223 case PGMPHYSHANDLERKIND_ALL:
224 case PGMPHYSHANDLERKIND_MMIO:
225 if ( enmKindR0 == enmKindR3
226 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
227 {
228 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
229 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
230 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_ALL;
231 continue;
232 }
233 break;
234
235 case PGMPHYSHANDLERKIND_WRITE:
236 if ( enmKindR0 == enmKindR3
237 || enmKindR0 == PGMPHYSHANDLERKIND_INVALID)
238 {
239 pTypeR3->fRing0Enabled = enmKindR0 == enmKindR3;
240 pTypeR0->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
241 pTypeR3->uState = PGM_PAGE_HNDL_PHYS_STATE_WRITE;
242 continue;
243 }
244 break;
245
246 default:
247 AssertLogRelMsgFailed(("i=%u enmKindR3=%d\n", i, enmKindR3));
248 RT_FALL_THROUGH();
249 case PGMPHYSHANDLERKIND_INVALID:
250 AssertLogRelMsg(enmKindR0 == PGMPHYSHANDLERKIND_INVALID,
251 ("i=%u enmKind=%d %s\n", i, enmKindR0, pTypeR0->pszDesc));
252 AssertLogRelMsg(pTypeR0->pfnHandler == pgmR0HandlerPhysicalHandlerToRing3,
253 ("i=%u pfnHandler=%p %s\n", i, pTypeR0->pfnHandler, pTypeR0->pszDesc));
254 AssertLogRelMsg(pTypeR0->pfnPfHandler == pgmR0HandlerPhysicalPfHandlerToRing3,
255 ("i=%u pfnPfHandler=%p %s\n", i, pTypeR0->pfnPfHandler, pTypeR0->pszDesc));
256
257 /* Unused of bad ring-3 entry, make it and the ring-0 one harmless. */
258 pTypeR3->enmKind = PGMPHYSHANDLERKIND_END;
259 pTypeR3->fRing0DevInsIdx = false;
260 pTypeR3->fKeepPgmLock = false;
261 pTypeR3->uState = 0;
262 break;
263 }
264 pTypeR3->fRing0Enabled = false;
265
266 /* Make sure the entry is harmless and goes to ring-3. */
267 pTypeR0->enmKind = PGMPHYSHANDLERKIND_END;
268 pTypeR0->pfnHandler = pgmR0HandlerPhysicalHandlerToRing3;
269 pTypeR0->pfnPfHandler = pgmR0HandlerPhysicalPfHandlerToRing3;
270 pTypeR0->fRing0DevInsIdx = false;
271 pTypeR0->fKeepPgmLock = false;
272 pTypeR0->uState = 0;
273 pTypeR0->pszDesc = "invalid";
274 }
275}
276
277
278/**
279 * Cleans up any loose ends before the GVM structure is destroyed.
280 */
281VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
282{
283 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
284 {
285 if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
286 {
287 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
288 AssertRC(rc);
289 pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
290 }
291
292 if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
293 {
294 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
295 AssertRC(rc);
296 pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
297 }
298 }
299
300 if (pGVM->pgmr0.s.hPhysHandlerMapObj != NIL_RTR0MEMOBJ)
301 {
302 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMapObj, true /*fFreeMappings*/);
303 AssertRC(rc);
304 pGVM->pgmr0.s.hPhysHandlerMapObj = NIL_RTR0MEMOBJ;
305 }
306
307 if (pGVM->pgmr0.s.hPhysHandlerMemObj != NIL_RTR0MEMOBJ)
308 {
309 int rc = RTR0MemObjFree(pGVM->pgmr0.s.hPhysHandlerMemObj, true /*fFreeMappings*/);
310 AssertRC(rc);
311 pGVM->pgmr0.s.hPhysHandlerMemObj = NIL_RTR0MEMOBJ;
312 }
313
314 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahRomRangeMemObjs); i++)
315 {
316 if (pGVM->pgmr0.s.ahRomRangeMapObjs[i] != NIL_RTR0MEMOBJ)
317 {
318 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRomRangeMapObjs[i], true /*fFreeMappings*/);
319 AssertRC(rc);
320 pGVM->pgmr0.s.ahRomRangeMapObjs[i] = NIL_RTR0MEMOBJ;
321 }
322
323 if (pGVM->pgmr0.s.ahRomRangeMemObjs[i] != NIL_RTR0MEMOBJ)
324 {
325 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRomRangeMemObjs[i], true /*fFreeMappings*/);
326 AssertRC(rc);
327 pGVM->pgmr0.s.ahRomRangeMemObjs[i] = NIL_RTR0MEMOBJ;
328 }
329 }
330
331 for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahMmio2MemObjs); i++)
332 {
333 if (pGVM->pgmr0.s.ahMmio2MapObjs[i] != NIL_RTR0MEMOBJ)
334 {
335 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahMmio2MapObjs[i], true /*fFreeMappings*/);
336 AssertRC(rc);
337 pGVM->pgmr0.s.ahMmio2MapObjs[i] = NIL_RTR0MEMOBJ;
338 }
339
340 if (pGVM->pgmr0.s.ahMmio2MemObjs[i] != NIL_RTR0MEMOBJ)
341 {
342 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahMmio2MemObjs[i], true /*fFreeMappings*/);
343 AssertRC(rc);
344 pGVM->pgmr0.s.ahMmio2MemObjs[i] = NIL_RTR0MEMOBJ;
345 }
346 }
347
348 uint32_t const cRangesMax = RT_MIN(pGVM->pgmr0.s.idRamRangeMax, RT_ELEMENTS(pGVM->pgmr0.s.ahRamRangeMemObjs) - 1U) + 1U;
349 for (uint32_t i = 0; i < cRangesMax; i++)
350 {
351 if (pGVM->pgmr0.s.ahRamRangeMapObjs[i] != NIL_RTR0MEMOBJ)
352 {
353 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRamRangeMapObjs[i], true /*fFreeMappings*/);
354 AssertRC(rc);
355 pGVM->pgmr0.s.ahRamRangeMapObjs[i] = NIL_RTR0MEMOBJ;
356 }
357
358 if (pGVM->pgmr0.s.ahRamRangeMemObjs[i] != NIL_RTR0MEMOBJ)
359 {
360 int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahRamRangeMemObjs[i], true /*fFreeMappings*/);
361 AssertRC(rc);
362 pGVM->pgmr0.s.ahRamRangeMemObjs[i] = NIL_RTR0MEMOBJ;
363 }
364 }
365
366 if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
367 RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
368}
369
370
371/**
372 * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
373 *
374 * @returns The following VBox status codes.
375 * @retval VINF_SUCCESS on success. FF cleared.
376 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
377 *
378 * @param pGVM The global (ring-0) VM structure.
379 * @param idCpu The ID of the calling EMT.
380 * @param fRing3 Set if the caller is ring-3. Determins whether to
381 * return VINF_EM_NO_MEMORY or not.
382 *
383 * @thread EMT(idCpu)
384 *
385 * @remarks Must be called from within the PGM critical section. The caller
386 * must clear the new pages.
387 */
388int pgmR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu, bool fRing3)
389{
390 /*
391 * Validate inputs.
392 */
393 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
394 Assert(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf());
395 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
396
397 /*
398 * Check for error injection.
399 */
400 if (RT_LIKELY(!pGVM->pgm.s.fErrInjHandyPages))
401 { /* likely */ }
402 else
403 return VERR_NO_MEMORY;
404
405 /*
406 * Try allocate a full set of handy pages.
407 */
408 uint32_t const iFirst = pGVM->pgm.s.cHandyPages;
409 AssertMsgReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), ("%#x\n", iFirst), VERR_PGM_HANDY_PAGE_IPE);
410
411 uint32_t const cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
412 if (!cPages)
413 return VINF_SUCCESS;
414
415 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
416 if (RT_SUCCESS(rc))
417 {
418 uint32_t const cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages); /** @todo allow allocating less... */
419 pGVM->pgm.s.cHandyPages = cHandyPages;
420 VM_FF_CLEAR(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
421 VM_FF_CLEAR(pGVM, VM_FF_PGM_NO_MEMORY);
422
423#ifdef VBOX_STRICT
424 for (uint32_t i = 0; i < cHandyPages; i++)
425 {
426 Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
427 Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
428 Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
429 Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_GMMPAGEDESC_PHYS);
430 Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
431 }
432#endif
433
434 /*
435 * Clear the pages.
436 */
437 for (uint32_t iPage = iFirst; iPage < cHandyPages; iPage++)
438 {
439 PGMMPAGEDESC pPage = &pGVM->pgm.s.aHandyPages[iPage];
440 if (!pPage->fZeroed)
441 {
442 void *pv = NULL;
443#ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
444 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
445#else
446 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
447#endif
448 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
449
450 RT_BZERO(pv, GUEST_PAGE_SIZE);
451 pPage->fZeroed = true;
452 }
453#ifdef VBOX_STRICT
454 else
455 {
456 void *pv = NULL;
457# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
458 rc = SUPR0HCPhysToVirt(pPage->HCPhysGCPhys, &pv);
459# else
460 rc = GMMR0PageIdToVirt(pGVM, pPage->idPage, &pv);
461# endif
462 AssertMsgRCReturn(rc, ("idPage=%#x HCPhys=%RHp rc=%Rrc\n", pPage->idPage, pPage->HCPhysGCPhys, rc), rc);
463 AssertReturn(ASMMemIsZero(pv, GUEST_PAGE_SIZE), VERR_PGM_HANDY_PAGE_IPE);
464 }
465#endif
466 Log3(("PGMR0PhysAllocateHandyPages: idPage=%#x HCPhys=%RGp\n", pPage->idPage, pPage->HCPhysGCPhys));
467 }
468 }
469 else
470 {
471 /*
472 * We should never get here unless there is a genuine shortage of
473 * memory (or some internal error). Flag the error so the VM can be
474 * suspended ASAP and the user informed. If we're totally out of
475 * handy pages we will return failure.
476 */
477 /* Report the failure. */
478 LogRel(("PGM: Failed to procure handy pages; rc=%Rrc cHandyPages=%#x\n"
479 " cAllPages=%#x cPrivatePages=%#x cSharedPages=%#x cZeroPages=%#x\n",
480 rc, pGVM->pgm.s.cHandyPages,
481 pGVM->pgm.s.cAllPages, pGVM->pgm.s.cPrivatePages, pGVM->pgm.s.cSharedPages, pGVM->pgm.s.cZeroPages));
482
483 GMMMEMSTATSREQ Stats = { { SUPVMMR0REQHDR_MAGIC, sizeof(Stats) }, 0, 0, 0, 0, 0 };
484 if (RT_SUCCESS(GMMR0QueryMemoryStatsReq(pGVM, idCpu, &Stats)))
485 LogRel(("GMM: Statistics:\n"
486 " Allocated pages: %RX64\n"
487 " Free pages: %RX64\n"
488 " Shared pages: %RX64\n"
489 " Maximum pages: %RX64\n"
490 " Ballooned pages: %RX64\n",
491 Stats.cAllocPages, Stats.cFreePages, Stats.cSharedPages, Stats.cMaxPages, Stats.cBalloonedPages));
492
493 if ( rc != VERR_NO_MEMORY
494 && rc != VERR_NO_PHYS_MEMORY
495 && rc != VERR_LOCK_FAILED)
496 for (uint32_t iPage = 0; iPage < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); iPage++)
497 LogRel(("PGM: aHandyPages[#%#04x] = {.HCPhysGCPhys=%RHp, .idPage=%#08x, .idSharedPage=%#08x}\n",
498 iPage, pGVM->pgm.s.aHandyPages[iPage].HCPhysGCPhys, pGVM->pgm.s.aHandyPages[iPage].idPage,
499 pGVM->pgm.s.aHandyPages[iPage].idSharedPage));
500
501 /* Set the FFs and adjust rc. */
502 VM_FF_SET(pGVM, VM_FF_PGM_NEED_HANDY_PAGES);
503 VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
504 if (!fRing3)
505 if ( rc == VERR_NO_MEMORY
506 || rc == VERR_NO_PHYS_MEMORY
507 || rc == VERR_LOCK_FAILED
508 || rc == VERR_MAP_FAILED)
509 rc = VINF_EM_NO_MEMORY;
510 }
511
512 LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
513 return rc;
514}
515
516
517/**
518 * Worker function for PGMR3PhysAllocateHandyPages / VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES.
519 *
520 * @returns The following VBox status codes.
521 * @retval VINF_SUCCESS on success. FF cleared.
522 * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
523 *
524 * @param pGVM The global (ring-0) VM structure.
525 * @param idCpu The ID of the calling EMT.
526 *
527 * @thread EMT(idCpu)
528 *
529 * @remarks Must be called from within the PGM critical section. The caller
530 * must clear the new pages.
531 */
532VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
533{
534 /*
535 * Validate inputs.
536 */
537 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
538 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
539
540 /*
541 * Enter the PGM lock and call the worker.
542 */
543 int rc = PGM_LOCK(pGVM);
544 if (RT_SUCCESS(rc))
545 {
546 rc = pgmR0PhysAllocateHandyPages(pGVM, idCpu, true /*fRing3*/);
547 PGM_UNLOCK(pGVM);
548 }
549 return rc;
550}
551
552
553/**
554 * Flushes any changes pending in the handy page array.
555 *
556 * It is very important that this gets done when page sharing is enabled.
557 *
558 * @returns The following VBox status codes.
559 * @retval VINF_SUCCESS on success. FF cleared.
560 *
561 * @param pGVM The global (ring-0) VM structure.
562 * @param idCpu The ID of the calling EMT.
563 *
564 * @thread EMT(idCpu)
565 *
566 * @remarks Must be called from within the PGM critical section.
567 */
568VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
569{
570 /*
571 * Validate inputs.
572 */
573 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
574 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
575 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
576
577 /*
578 * Try allocate a full set of handy pages.
579 */
580 uint32_t iFirst = pGVM->pgm.s.cHandyPages;
581 AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
582 uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
583 if (!cPages)
584 return VINF_SUCCESS;
585 int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
586
587 LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
588 return rc;
589}
590
591
592/**
593 * Allocate a large page at @a GCPhys.
594 *
595 * @returns The following VBox status codes.
596 * @retval VINF_SUCCESS on success.
597 * @retval VINF_EM_NO_MEMORY if we're out of memory.
598 *
599 * @param pGVM The global (ring-0) VM structure.
600 * @param idCpu The ID of the calling EMT.
601 * @param GCPhys The guest physical address of the page.
602 *
603 * @thread EMT(idCpu)
604 *
605 * @remarks Must be called from within the PGM critical section. The caller
606 * must clear the new pages.
607 */
608int pgmR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
609{
610 STAM_PROFILE_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, a);
611 PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
612
613 /*
614 * Allocate a large page.
615 */
616 RTHCPHYS HCPhys = NIL_GMMPAGEDESC_PHYS;
617 uint32_t idPage = NIL_GMM_PAGEID;
618
619 if (true) /** @todo pre-allocate 2-3 pages on the allocation thread. */
620 {
621 uint64_t const nsAllocStart = RTTimeNanoTS();
622 if (nsAllocStart < pGVM->pgm.s.nsLargePageRetry)
623 {
624 LogFlowFunc(("returns VERR_TRY_AGAIN - %RU64 ns left of hold off period\n", pGVM->pgm.s.nsLargePageRetry - nsAllocStart));
625 return VERR_TRY_AGAIN;
626 }
627
628 int const rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M, &idPage, &HCPhys);
629
630 uint64_t const nsAllocEnd = RTTimeNanoTS();
631 uint64_t const cNsElapsed = nsAllocEnd - nsAllocStart;
632 STAM_REL_PROFILE_ADD_PERIOD(&pGVM->pgm.s.StatLargePageAlloc, cNsElapsed);
633 if (cNsElapsed < RT_NS_100MS)
634 pGVM->pgm.s.cLargePageLongAllocRepeats = 0;
635 else
636 {
637 /* If a large page allocation takes more than 100ms back off for a
638 while so the host OS can reshuffle memory and make some more large
639 pages available. However if it took over a second, just disable it. */
640 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageOverflow);
641 pGVM->pgm.s.cLargePageLongAllocRepeats++;
642 if (cNsElapsed > RT_NS_1SEC)
643 {
644 LogRel(("PGMR0PhysAllocateLargePage: Disabling large pages after %'RU64 ns allocation time.\n", cNsElapsed));
645 PGMSetLargePageUsage(pGVM, false);
646 }
647 else
648 {
649 Log(("PGMR0PhysAllocateLargePage: Suspending large page allocations for %u sec after %'RU64 ns allocation time.\n",
650 30 * pGVM->pgm.s.cLargePageLongAllocRepeats, cNsElapsed));
651 pGVM->pgm.s.nsLargePageRetry = nsAllocEnd + RT_NS_30SEC * pGVM->pgm.s.cLargePageLongAllocRepeats;
652 }
653 }
654
655 if (RT_FAILURE(rc))
656 {
657 Log(("PGMR0PhysAllocateLargePage: Failed: %Rrc\n", rc));
658 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageAllocFailed);
659 if (rc == VERR_NOT_SUPPORTED)
660 {
661 LogRel(("PGM: Disabling large pages because of VERR_NOT_SUPPORTED status.\n"));
662 PGMSetLargePageUsage(pGVM, false);
663 }
664 return rc;
665 }
666 }
667
668 STAM_PROFILE_STOP_START(&pGVM->pgm.s.Stats.StatLargePageAlloc2, &pGVM->pgm.s.Stats.StatLargePageSetup, a);
669
670 /*
671 * Enter the pages into PGM.
672 */
673 bool fFlushTLBs = false;
674 VBOXSTRICTRC rc = VINF_SUCCESS;
675 unsigned cLeft = _2M / GUEST_PAGE_SIZE;
676 while (cLeft-- > 0)
677 {
678 PPGMPAGE const pPage = pgmPhysGetPage(pGVM, GCPhys);
679 AssertReturn(pPage && PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM && PGM_PAGE_IS_ZERO(pPage), VERR_PGM_UNEXPECTED_PAGE_STATE);
680
681 /* Make sure there are no zero mappings. */
682 uint16_t const u16Tracking = PGM_PAGE_GET_TRACKING(pPage);
683 if (u16Tracking == 0)
684 Assert(PGM_PAGE_GET_PTE_INDEX(pPage) == 0);
685 else
686 {
687 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageZeroEvict);
688 VBOXSTRICTRC rc3 = pgmPoolTrackUpdateGCPhys(pGVM, GCPhys, pPage, true /*fFlushPTEs*/, &fFlushTLBs);
689 Log(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: tracking=%#x rc3=%Rrc\n", GCPhys, u16Tracking, VBOXSTRICTRC_VAL(rc3)));
690 if (rc3 != VINF_SUCCESS && rc == VINF_SUCCESS)
691 rc = rc3; /** @todo not perfect... */
692 PGM_PAGE_SET_PTE_INDEX(pGVM, pPage, 0);
693 PGM_PAGE_SET_TRACKING(pGVM, pPage, 0);
694 }
695
696 /* Setup the new page. */
697 PGM_PAGE_SET_HCPHYS(pGVM, pPage, HCPhys);
698 PGM_PAGE_SET_STATE(pGVM, pPage, PGM_PAGE_STATE_ALLOCATED);
699 PGM_PAGE_SET_PDE_TYPE(pGVM, pPage, PGM_PAGE_PDE_TYPE_PDE);
700 PGM_PAGE_SET_PAGEID(pGVM, pPage, idPage);
701 Log3(("PGMR0PhysAllocateLargePage: GCPhys=%RGp: idPage=%#x HCPhys=%RGp (old tracking=%#x)\n",
702 GCPhys, idPage, HCPhys, u16Tracking));
703
704 /* advance */
705 idPage++;
706 HCPhys += GUEST_PAGE_SIZE;
707 GCPhys += GUEST_PAGE_SIZE;
708 }
709
710 STAM_COUNTER_ADD(&pGVM->pgm.s.Stats.StatRZPageReplaceZero, _2M / GUEST_PAGE_SIZE);
711 pGVM->pgm.s.cZeroPages -= _2M / GUEST_PAGE_SIZE;
712 pGVM->pgm.s.cPrivatePages += _2M / GUEST_PAGE_SIZE;
713
714 /*
715 * Flush all TLBs.
716 */
717 if (!fFlushTLBs)
718 { /* likely as we shouldn't normally map zero pages */ }
719 else
720 {
721 STAM_REL_COUNTER_INC(&pGVM->pgm.s.StatLargePageTlbFlush);
722 PGM_INVL_ALL_VCPU_TLBS(pGVM);
723 }
724 /** @todo this is a little expensive (~3000 ticks) since we'll have to
725 * invalidate everything. Add a version to the TLB? */
726 pgmPhysInvalidatePageMapTLB(pGVM, false /*fInRendezvous*/);
727 IEMTlbInvalidateAllPhysicalAllCpus(pGVM, idCpu, IEMTLBPHYSFLUSHREASON_ALLOCATED_LARGE);
728
729 STAM_PROFILE_STOP(&pGVM->pgm.s.Stats.StatLargePageSetup, a);
730#if 0 /** @todo returning info statuses here might not be a great idea... */
731 LogFlow(("PGMR0PhysAllocateLargePage: returns %Rrc\n", VBOXSTRICTRC_VAL(rc) ));
732 return VBOXSTRICTRC_TODO(rc);
733#else
734 LogFlow(("PGMR0PhysAllocateLargePage: returns VINF_SUCCESS (rc=%Rrc)\n", VBOXSTRICTRC_VAL(rc) ));
735 return VINF_SUCCESS;
736#endif
737}
738
739
740/**
741 * Allocate a large page at @a GCPhys.
742 *
743 * @returns The following VBox status codes.
744 * @retval VINF_SUCCESS on success.
745 * @retval VINF_EM_NO_MEMORY if we're out of memory.
746 *
747 * @param pGVM The global (ring-0) VM structure.
748 * @param idCpu The ID of the calling EMT.
749 * @param GCPhys The guest physical address of the page.
750 *
751 * @thread EMT(idCpu)
752 *
753 * @remarks Must be called from within the PGM critical section. The caller
754 * must clear the new pages.
755 */
756VMMR0_INT_DECL(int) PGMR0PhysAllocateLargePage(PGVM pGVM, VMCPUID idCpu, RTGCPHYS GCPhys)
757{
758 /*
759 * Validate inputs.
760 */
761 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
762 AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
763
764 int rc = PGM_LOCK(pGVM);
765 AssertRCReturn(rc, rc);
766
767 /* The caller might have done this already, but since we're ring-3 callable we
768 need to make sure everything is fine before starting the allocation here. */
769 for (unsigned i = 0; i < _2M / GUEST_PAGE_SIZE; i++)
770 {
771 PPGMPAGE pPage;
772 rc = pgmPhysGetPageEx(pGVM, GCPhys + i * GUEST_PAGE_SIZE, &pPage);
773 AssertRCReturnStmt(rc, PGM_UNLOCK(pGVM), rc);
774 AssertReturnStmt(PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM, PGM_UNLOCK(pGVM), VERR_PGM_PHYS_NOT_RAM);
775 AssertReturnStmt(PGM_PAGE_IS_ZERO(pPage), PGM_UNLOCK(pGVM), VERR_PGM_UNEXPECTED_PAGE_STATE);
776 }
777
778 /*
779 * Call common code.
780 */
781 rc = pgmR0PhysAllocateLargePage(pGVM, idCpu, GCPhys);
782
783 PGM_UNLOCK(pGVM);
784 return rc;
785}
786
787
788/**
789 * Locate a MMIO2 range.
790 *
791 * @returns Pointer to the MMIO2 range.
792 * @param pGVM The global (ring-0) VM structure.
793 * @param pDevIns The device instance owning the region.
794 * @param hMmio2 Handle to look up.
795 */
796DECLINLINE(int32_t) pgmR0PhysMmio2ValidateHandle(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
797{
798 /*
799 * We use the lookup table here as list walking is tedious in ring-0 when using
800 * ring-3 pointers and this probably will require some kind of refactoring anyway.
801 */
802 AssertReturn(hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.aMmio2Ranges) && hMmio2 != 0, VERR_INVALID_HANDLE);
803 uint32_t const idx = hMmio2 - 1U;
804 AssertReturn(pGVM->pgm.s.aMmio2Ranges[idx].pDevInsR3 == pDevIns->pDevInsForR3, VERR_NOT_OWNER);
805 AssertReturn(pGVM->pgm.s.aMmio2Ranges[idx].idMmio2 == hMmio2, VERR_INVALID_HANDLE);
806 AssertReturn(pGVM->pgmr0.s.ahMmio2MapObjs[idx] != NIL_RTR0MEMOBJ, VERR_INVALID_HANDLE);
807 AssertReturn(pGVM->pgmr0.s.acMmio2RangePages[idx] != 0, VERR_INVALID_HANDLE);
808 return idx;
809}
810
811
812/**
813 * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
814 *
815 * @returns VBox status code.
816 * @param pGVM The global (ring-0) VM structure.
817 * @param pDevIns The device instance.
818 * @param hMmio2 The MMIO2 region to map into ring-0 address space.
819 * @param offSub The offset into the region.
820 * @param cbSub The size of the mapping, zero meaning all the rest.
821 * @param ppvMapping Where to return the ring-0 mapping address.
822 */
823VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
824 size_t offSub, size_t cbSub, void **ppvMapping)
825{
826 *ppvMapping = NULL;
827 AssertReturn(!(offSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
828 AssertReturn(!(cbSub & HOST_PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
829
830 /*
831 * Validate and translate hMmio2 into an MMIO2 index.
832 */
833 uint32_t const idxFirst = pgmR0PhysMmio2ValidateHandle(pGVM, pDevIns, hMmio2);
834 AssertReturn((int32_t)idxFirst >= 0, (int32_t)idxFirst);
835
836#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
837 uint8_t * const pbR0 = pGVM->pgmr0.s.apbMmio2Backing[idxFirst];
838#else
839 RTR0MEMOBJ const hMemObj = pGVM->pgmr0.s.ahMmio2MemObjs[idxFirst];
840#endif
841 RTGCPHYS const cbReal = (RTGCPHYS)pGVM->pgmr0.s.acMmio2RangePages[idxFirst] << GUEST_PAGE_SHIFT;
842 ASMCompilerBarrier();
843
844 AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
845 if (cbSub == 0)
846 cbSub = cbReal - offSub;
847 else
848 AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
849
850#ifndef VBOX_WITH_LINEAR_HOST_PHYS_MEM
851 /*
852 * Just return the address of the existing ring-0 mapping.
853 */
854 AssertPtrReturn(pbR0, VERR_INTERNAL_ERROR_4);
855 *ppvMapping = &pbR0[offSub];
856 return VINF_SUCCESS;
857#else
858 /*
859 * Call IPRT to do the mapping. Cleanup is done indirectly by telling
860 * RTR0MemObjFree to include mappings. It can only be done once, so no
861 * risk of excessive mapping leaks.
862 */
863 RTR0MEMOBJ hMapObj;
864 int rc = RTR0MemObjMapKernelEx(&hMapObj, hMemObj, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, offSub, cbSub);
865 if (RT_SUCCESS(rc))
866 *ppvMapping = RTR0MemObjAddress(hMapObj);
867 return rc;
868#endif
869}
870
871
872/**
873 * This is called during PGMR3Init to init the physical access handler allocator
874 * and tree.
875 *
876 * @returns VBox status code.
877 * @param pGVM Pointer to the global VM structure.
878 * @param cEntries Desired number of physical access handlers to reserve
879 * space for (will be adjusted).
880 * @thread EMT(0)
881 */
882VMMR0_INT_DECL(int) PGMR0PhysHandlerInitReqHandler(PGVM pGVM, uint32_t cEntries)
883{
884 /*
885 * Validate the input and state.
886 */
887 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
888 AssertRCReturn(rc, rc);
889 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
890
891 AssertReturn(pGVM->pgmr0.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
892 AssertReturn(pGVM->pgm.s.PhysHandlerAllocator.m_paNodes == NULL, VERR_WRONG_ORDER);
893
894 AssertLogRelMsgReturn(cEntries <= _64K, ("%#x\n", cEntries), VERR_OUT_OF_RANGE);
895
896 /*
897 * Calculate the table size and allocate it.
898 */
899 uint32_t cbTreeAndBitmap = 0;
900 uint32_t const cbTotalAligned = pgmHandlerPhysicalCalcTableSizes(&cEntries, &cbTreeAndBitmap);
901 RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
902 rc = RTR0MemObjAllocPage(&hMemObj, cbTotalAligned, false);
903 if (RT_SUCCESS(rc))
904 {
905 RTR0MEMOBJ hMapObj = NIL_RTR0MEMOBJ;
906 rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
907 if (RT_SUCCESS(rc))
908 {
909 uint8_t *pb = (uint8_t *)RTR0MemObjAddress(hMemObj);
910 if (!RTR0MemObjWasZeroInitialized(hMemObj))
911 RT_BZERO(pb, cbTotalAligned);
912
913 pGVM->pgmr0.s.PhysHandlerAllocator.initSlabAllocator(cEntries, (PPGMPHYSHANDLER)&pb[cbTreeAndBitmap],
914 (uint64_t *)&pb[sizeof(PGMPHYSHANDLERTREE)]);
915 pGVM->pgmr0.s.pPhysHandlerTree = (PPGMPHYSHANDLERTREE)pb;
916 pGVM->pgmr0.s.pPhysHandlerTree->initWithAllocator(&pGVM->pgmr0.s.PhysHandlerAllocator);
917 pGVM->pgmr0.s.hPhysHandlerMemObj = hMemObj;
918 pGVM->pgmr0.s.hPhysHandlerMapObj = hMapObj;
919
920 AssertCompile(sizeof(pGVM->pgm.s.PhysHandlerAllocator) == sizeof(pGVM->pgmr0.s.PhysHandlerAllocator));
921 RTR3PTR R3Ptr = RTR0MemObjAddressR3(hMapObj);
922 pGVM->pgm.s.pPhysHandlerTree = R3Ptr;
923 pGVM->pgm.s.PhysHandlerAllocator.m_paNodes = R3Ptr + cbTreeAndBitmap;
924 pGVM->pgm.s.PhysHandlerAllocator.m_pbmAlloc = R3Ptr + sizeof(PGMPHYSHANDLERTREE);
925 pGVM->pgm.s.PhysHandlerAllocator.m_cNodes = cEntries;
926 pGVM->pgm.s.PhysHandlerAllocator.m_cErrors = 0;
927 pGVM->pgm.s.PhysHandlerAllocator.m_idxAllocHint = 0;
928 pGVM->pgm.s.PhysHandlerAllocator.m_uPadding = 0;
929 return VINF_SUCCESS;
930 }
931
932 RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
933 }
934 return rc;
935}
936
937
938/**
939 * Updates a physical access handler type with ring-0 callback functions.
940 *
941 * The handler type must first have been registered in ring-3.
942 *
943 * @returns VBox status code.
944 * @param pGVM The global (ring-0) VM structure.
945 * @param enmKind The kind of access handler.
946 * @param fFlags PGMPHYSHANDLER_F_XXX
947 * @param pfnHandler Pointer to the ring-0 handler callback.
948 * @param pfnPfHandler Pointer to the ring-0 \#PF handler callback.
949 * callback. Can be NULL (not recommended though).
950 * @param pszDesc The type description.
951 * @param hType The handle to do ring-0 callback registrations for.
952 * @thread EMT(0)
953 */
954VMMR0_INT_DECL(int) PGMR0HandlerPhysicalTypeSetUpContext(PGVM pGVM, PGMPHYSHANDLERKIND enmKind, uint32_t fFlags,
955 PFNPGMPHYSHANDLER pfnHandler, PFNPGMRZPHYSPFHANDLER pfnPfHandler,
956 const char *pszDesc, PGMPHYSHANDLERTYPE hType)
957{
958 /*
959 * Validate input.
960 */
961 AssertPtrReturn(pfnHandler, VERR_INVALID_POINTER);
962 AssertPtrNullReturn(pfnPfHandler, VERR_INVALID_POINTER);
963
964 AssertPtrReturn(pszDesc, VERR_INVALID_POINTER);
965 AssertReturn( enmKind == PGMPHYSHANDLERKIND_WRITE
966 || enmKind == PGMPHYSHANDLERKIND_ALL
967 || enmKind == PGMPHYSHANDLERKIND_MMIO,
968 VERR_INVALID_PARAMETER);
969 AssertMsgReturn(!(fFlags & ~PGMPHYSHANDLER_F_VALID_MASK), ("%#x\n", fFlags), VERR_INVALID_FLAGS);
970
971 PPGMPHYSHANDLERTYPEINTR0 const pTypeR0 = &pGVM->pgmr0.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
972 AssertMsgReturn(hType == pTypeR0->hType, ("%#RX64, expected=%#RX64\n", hType, pTypeR0->hType), VERR_INVALID_HANDLE);
973 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == RT_ELEMENTS(pGVM->pgm.s.aPhysHandlerTypes));
974 AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.aPhysHandlerTypes) == PGMPHYSHANDLERTYPE_IDX_MASK + 1);
975 AssertReturn(pTypeR0->enmKind == PGMPHYSHANDLERKIND_INVALID, VERR_ALREADY_INITIALIZED);
976
977 int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
978 AssertRCReturn(rc, rc);
979 VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE); /** @todo ring-0 safe state check. */
980
981 PPGMPHYSHANDLERTYPEINTR3 const pTypeR3 = &pGVM->pgm.s.aPhysHandlerTypes[hType & PGMPHYSHANDLERTYPE_IDX_MASK];
982 AssertMsgReturn(pTypeR3->enmKind == enmKind,
983 ("%#x: %d, expected %d\n", hType, pTypeR3->enmKind, enmKind),
984 VERR_INVALID_HANDLE);
985 AssertMsgReturn(pTypeR3->fKeepPgmLock == RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK),
986 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fKeepPgmLock, fFlags),
987 VERR_INVALID_HANDLE);
988 AssertMsgReturn(pTypeR3->fRing0DevInsIdx == RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX),
989 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fRing0DevInsIdx, fFlags),
990 VERR_INVALID_HANDLE);
991 AssertMsgReturn(pTypeR3->fNotInHm == RT_BOOL(fFlags & PGMPHYSHANDLER_F_NOT_IN_HM),
992 ("%#x: %d, fFlags=%#x\n", hType, pTypeR3->fNotInHm, fFlags),
993 VERR_INVALID_HANDLE);
994
995 /*
996 * Update the entry.
997 */
998 pTypeR0->enmKind = enmKind;
999 pTypeR0->uState = enmKind == PGMPHYSHANDLERKIND_WRITE
1000 ? PGM_PAGE_HNDL_PHYS_STATE_WRITE : PGM_PAGE_HNDL_PHYS_STATE_ALL;
1001 pTypeR0->fKeepPgmLock = RT_BOOL(fFlags & PGMPHYSHANDLER_F_KEEP_PGM_LOCK);
1002 pTypeR0->fRing0DevInsIdx = RT_BOOL(fFlags & PGMPHYSHANDLER_F_R0_DEVINS_IDX);
1003 pTypeR0->fNotInHm = RT_BOOL(fFlags & PGMPHYSHANDLER_F_NOT_IN_HM);
1004 pTypeR0->pfnHandler = pfnHandler;
1005 pTypeR0->pfnPfHandler = pfnPfHandler;
1006 pTypeR0->pszDesc = pszDesc;
1007
1008 pTypeR3->fRing0Enabled = true;
1009
1010 LogFlow(("PGMR0HandlerPhysicalTypeRegister: hType=%#x: enmKind=%d fFlags=%#x pfnHandler=%p pfnPfHandler=%p pszDesc=%s\n",
1011 hType, enmKind, fFlags, pfnHandler, pfnPfHandler, pszDesc));
1012 return VINF_SUCCESS;
1013}
1014
1015
1016#ifdef VBOX_WITH_PCI_PASSTHROUGH
1017/* Interface sketch. The interface belongs to a global PCI pass-through
1018 manager. It shall use the global VM handle, not the user VM handle to
1019 store the per-VM info (domain) since that is all ring-0 stuff, thus
1020 passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
1021 we can discuss the PciRaw code re-organtization when I'm back from
1022 vacation.
1023
1024 I've implemented the initial IOMMU set up below. For things to work
1025 reliably, we will probably need add a whole bunch of checks and
1026 GPciRawR0GuestPageUpdate call to the PGM code. For the present,
1027 assuming nested paging (enforced) and prealloc (enforced), no
1028 ballooning (check missing), page sharing (check missing) or live
1029 migration (check missing), it might work fine. At least if some
1030 VM power-off hook is present and can tear down the IOMMU page tables. */
1031
1032/**
1033 * Tells the global PCI pass-through manager that we are about to set up the
1034 * guest page to host page mappings for the specfied VM.
1035 *
1036 * @returns VBox status code.
1037 *
1038 * @param pGVM The ring-0 VM structure.
1039 */
1040VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
1041{
1042 NOREF(pGVM);
1043 return VINF_SUCCESS;
1044}
1045
1046
1047/**
1048 * Assigns a host page mapping for a guest page.
1049 *
1050 * This is only used when setting up the mappings, i.e. between
1051 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
1052 *
1053 * @returns VBox status code.
1054 * @param pGVM The ring-0 VM structure.
1055 * @param GCPhys The address of the guest page (page aligned).
1056 * @param HCPhys The address of the host page (page aligned).
1057 */
1058VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1059{
1060 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1061 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1062
1063 if (pGVM->rawpci.s.pfnContigMemInfo)
1064 /** @todo what do we do on failure? */
1065 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_MAP);
1066
1067 return VINF_SUCCESS;
1068}
1069
1070
1071/**
1072 * Indicates that the specified guest page doesn't exists but doesn't have host
1073 * page mapping we trust PCI pass-through with.
1074 *
1075 * This is only used when setting up the mappings, i.e. between
1076 * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
1077 *
1078 * @returns VBox status code.
1079 * @param pGVM The ring-0 VM structure.
1080 * @param GCPhys The address of the guest page (page aligned).
1081 * @param HCPhys The address of the host page (page aligned).
1082 */
1083VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
1084{
1085 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
1086
1087 if (pGVM->rawpci.s.pfnContigMemInfo)
1088 /** @todo what do we do on failure? */
1089 pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, HOST_PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
1090
1091 return VINF_SUCCESS;
1092}
1093
1094
1095/**
1096 * Tells the global PCI pass-through manager that we have completed setting up
1097 * the guest page to host page mappings for the specfied VM.
1098 *
1099 * This complements GPciRawR0GuestPageBeginAssignments and will be called even
1100 * if some page assignment failed.
1101 *
1102 * @returns VBox status code.
1103 *
1104 * @param pGVM The ring-0 VM structure.
1105 */
1106VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
1107{
1108 NOREF(pGVM);
1109 return VINF_SUCCESS;
1110}
1111
1112
1113/**
1114 * Tells the global PCI pass-through manager that a guest page mapping has
1115 * changed after the initial setup.
1116 *
1117 * @returns VBox status code.
1118 * @param pGVM The ring-0 VM structure.
1119 * @param GCPhys The address of the guest page (page aligned).
1120 * @param HCPhys The new host page address or NIL_RTHCPHYS if
1121 * now unassigned.
1122 */
1123VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
1124{
1125 AssertReturn(!(GCPhys & HOST_PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
1126 AssertReturn(!(HCPhys & HOST_PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
1127 NOREF(pGVM);
1128 return VINF_SUCCESS;
1129}
1130
1131#endif /* VBOX_WITH_PCI_PASSTHROUGH */
1132
1133
1134/**
1135 * Sets up the IOMMU when raw PCI device is enabled.
1136 *
1137 * @note This is a hack that will probably be remodelled and refined later!
1138 *
1139 * @returns VBox status code.
1140 *
1141 * @param pGVM The global (ring-0) VM structure.
1142 */
1143VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
1144{
1145 int rc = GVMMR0ValidateGVM(pGVM);
1146 if (RT_FAILURE(rc))
1147 return rc;
1148
1149#ifdef VBOX_WITH_PCI_PASSTHROUGH
1150# error fixme
1151 if (pGVM->pgm.s.fPciPassthrough)
1152 {
1153 /*
1154 * The Simplistic Approach - Enumerate all the pages and call tell the
1155 * IOMMU about each of them.
1156 */
1157 PGM_LOCK_VOID(pGVM);
1158 rc = GPciRawR0GuestPageBeginAssignments(pGVM);
1159 if (RT_SUCCESS(rc))
1160 {
1161 for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
1162 {
1163 PPGMPAGE pPage = &pRam->aPages[0];
1164 RTGCPHYS GCPhys = pRam->GCPhys;
1165 uint32_t cLeft = pRam->cb >> GUEST_PAGE_SHIFT;
1166 while (cLeft-- > 0)
1167 {
1168 /* Only expose pages that are 100% safe for now. */
1169 if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
1170 && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
1171 && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
1172 rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
1173 else
1174 rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
1175
1176 /* next */
1177 pPage++;
1178 GCPhys += HOST_PAGE_SIZE;
1179 }
1180 }
1181
1182 int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
1183 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
1184 rc = rc2;
1185 }
1186 PGM_UNLOCK(pGVM);
1187 }
1188 else
1189#endif
1190 rc = VERR_NOT_SUPPORTED;
1191 return rc;
1192}
1193
1194
1195/**
1196 * \#PF Handler for nested paging.
1197 *
1198 * @returns VBox status code (appropriate for trap handling and GC return).
1199 * @param pGVM The global (ring-0) VM structure.
1200 * @param pGVCpu The global (ring-0) CPU structure of the calling
1201 * EMT.
1202 * @param enmShwPagingMode Paging mode for the nested page tables.
1203 * @param uErr The trap error code.
1204 * @param pCtx Pointer to the register context for the CPU.
1205 * @param GCPhysFault The fault address.
1206 */
1207VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1208 PCPUMCTX pCtx, RTGCPHYS GCPhysFault)
1209{
1210 int rc;
1211
1212 LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pCtx->rip));
1213 STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
1214 STAM_STATS({ pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = NULL; } );
1215
1216 /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
1217 AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
1218 || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
1219 ("enmShwPagingMode=%d\n", enmShwPagingMode));
1220
1221 /* Reserved shouldn't end up here. */
1222 Assert(!(uErr & X86_TRAP_PF_RSVD));
1223
1224#ifdef VBOX_WITH_STATISTICS
1225 /*
1226 * Error code stats.
1227 */
1228 if (uErr & X86_TRAP_PF_US)
1229 {
1230 if (!(uErr & X86_TRAP_PF_P))
1231 {
1232 if (uErr & X86_TRAP_PF_RW)
1233 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentWrite);
1234 else
1235 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNotPresentRead);
1236 }
1237 else if (uErr & X86_TRAP_PF_RW)
1238 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSWrite);
1239 else if (uErr & X86_TRAP_PF_RSVD)
1240 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSReserved);
1241 else if (uErr & X86_TRAP_PF_ID)
1242 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSNXE);
1243 else
1244 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eUSRead);
1245 }
1246 else
1247 { /* Supervisor */
1248 if (!(uErr & X86_TRAP_PF_P))
1249 {
1250 if (uErr & X86_TRAP_PF_RW)
1251 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentWrite);
1252 else
1253 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVNotPresentRead);
1254 }
1255 else if (uErr & X86_TRAP_PF_RW)
1256 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVWrite);
1257 else if (uErr & X86_TRAP_PF_ID)
1258 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSNXE);
1259 else if (uErr & X86_TRAP_PF_RSVD)
1260 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatRZTrap0eSVReserved);
1261 }
1262#endif
1263
1264 /*
1265 * Call the worker.
1266 *
1267 * Note! We pretend the guest is in protected mode without paging, so we
1268 * can use existing code to build the nested page tables.
1269 */
1270/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
1271 bool fLockTaken = false;
1272 switch (enmShwPagingMode)
1273 {
1274 case PGMMODE_32_BIT:
1275 rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1276 break;
1277 case PGMMODE_PAE:
1278 case PGMMODE_PAE_NX:
1279 rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1280 break;
1281 case PGMMODE_AMD64:
1282 case PGMMODE_AMD64_NX:
1283 rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1284 break;
1285 case PGMMODE_EPT:
1286 rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pCtx, GCPhysFault, &fLockTaken);
1287 break;
1288 default:
1289 AssertFailed();
1290 rc = VERR_INVALID_PARAMETER;
1291 break;
1292 }
1293 if (fLockTaken)
1294 {
1295 PGM_LOCK_ASSERT_OWNER(pGVM);
1296 PGM_UNLOCK(pGVM);
1297 }
1298
1299 if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
1300 rc = VINF_SUCCESS;
1301 /*
1302 * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
1303 * via its page tables, see @bugref{6043}.
1304 */
1305 else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
1306 || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
1307 || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
1308 || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
1309 {
1310 Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pCtx->rip));
1311 /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
1312 single VCPU VMs though. */
1313 rc = VINF_SUCCESS;
1314 }
1315
1316 STAM_STATS({ if (!pGVCpu->pgmr0.s.pStatTrap0eAttributionR0)
1317 pGVCpu->pgmr0.s.pStatTrap0eAttributionR0 = &pGVCpu->pgm.s.Stats.StatRZTrap0eTime2Misc; });
1318 STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.Stats.StatRZTrap0e, pGVCpu->pgmr0.s.pStatTrap0eAttributionR0, a);
1319 return rc;
1320}
1321
1322
1323#ifdef VBOX_WITH_NESTED_HWVIRT_VMX_EPT
1324/**
1325 * Nested \#PF Handler for nested-guest execution using nested paging.
1326 *
1327 * @returns Strict VBox status code (appropriate for trap handling and GC return).
1328 * @param pGVM The global (ring-0) VM structure.
1329 * @param pGVCpu The global (ring-0) CPU structure of the calling
1330 * EMT.
1331 * @param uErr The trap error code.
1332 * @param pCtx Pointer to the register context for the CPU.
1333 * @param GCPhysNestedFault The nested-guest physical address causing the fault.
1334 * @param fIsLinearAddrValid Whether translation of a nested-guest linear address
1335 * caused this fault. If @c false, GCPtrNestedFault
1336 * must be 0.
1337 * @param GCPtrNestedFault The nested-guest linear address that caused this
1338 * fault.
1339 * @param pWalk Where to store the SLAT walk result.
1340 */
1341VMMR0DECL(VBOXSTRICTRC) PGMR0NestedTrap0eHandlerNestedPaging(PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
1342 PCPUMCTX pCtx, RTGCPHYS GCPhysNestedFault,
1343 bool fIsLinearAddrValid, RTGCPTR GCPtrNestedFault, PPGMPTWALK pWalk)
1344{
1345 Assert(enmShwPagingMode == PGMMODE_EPT);
1346 NOREF(enmShwPagingMode);
1347
1348 bool fLockTaken;
1349 VBOXSTRICTRC rcStrict = PGM_BTH_NAME_EPT_PROT(NestedTrap0eHandler)(pGVCpu, uErr, pCtx, GCPhysNestedFault,
1350 fIsLinearAddrValid, GCPtrNestedFault, pWalk, &fLockTaken);
1351 if (fLockTaken)
1352 {
1353 PGM_LOCK_ASSERT_OWNER(pGVCpu->CTX_SUFF(pVM));
1354 PGM_UNLOCK(pGVCpu->CTX_SUFF(pVM));
1355 }
1356 Assert(rcStrict != VINF_PGM_SYNCPAGE_MODIFIED_PDE); /* This rc isn't used with Nested Paging and nested-EPT. */
1357 return rcStrict;
1358}
1359#endif /* VBOX_WITH_NESTED_HWVIRT_VMX_EPT */
1360
1361
1362/**
1363 * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
1364 * employed for MMIO pages.
1365 *
1366 * @returns VBox status code (appropriate for trap handling and GC return).
1367 * @param pGVM The global (ring-0) VM structure.
1368 * @param pGVCpu The global (ring-0) CPU structure of the calling
1369 * EMT.
1370 * @param enmShwPagingMode Paging mode for the nested page tables.
1371 * @param pCtx Pointer to the register context for the CPU.
1372 * @param GCPhysFault The fault address.
1373 * @param uErr The error code, UINT32_MAX if not available
1374 * (VT-x).
1375 */
1376VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
1377 PCPUMCTX pCtx, RTGCPHYS GCPhysFault, uint32_t uErr)
1378{
1379#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
1380 STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
1381 VBOXSTRICTRC rc;
1382
1383 /*
1384 * Try lookup the all access physical handler for the address.
1385 */
1386 PGM_LOCK_VOID(pGVM);
1387 PPGMPHYSHANDLER pHandler;
1388 rc = pgmHandlerPhysicalLookup(pGVM, GCPhysFault, &pHandler);
1389 if (RT_SUCCESS(rc))
1390 {
1391 PCPGMPHYSHANDLERTYPEINT pHandlerType = PGMPHYSHANDLER_GET_TYPE_NO_NULL(pGVM, pHandler);
1392 if (RT_LIKELY( pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE
1393 && !pHandlerType->fNotInHm /*paranoia*/ ))
1394 {
1395 /*
1396 * If the handle has aliases page or pages that have been temporarily
1397 * disabled, we'll have to take a detour to make sure we resync them
1398 * to avoid lots of unnecessary exits.
1399 */
1400 PPGMPAGE pPage;
1401 if ( ( pHandler->cAliasedPages
1402 || pHandler->cTmpOffPages)
1403 && ( (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
1404 || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
1405 )
1406 {
1407 Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
1408 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1409 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1410 PGM_UNLOCK(pGVM);
1411 }
1412 else
1413 {
1414 if (pHandlerType->pfnPfHandler)
1415 {
1416 uint64_t const uUser = !pHandlerType->fRing0DevInsIdx ? pHandler->uUser
1417 : (uintptr_t)PDMDeviceRing0IdxToInstance(pGVM, pHandler->uUser);
1418 STAM_PROFILE_START(&pHandler->Stat, h);
1419 PGM_UNLOCK(pGVM);
1420
1421 Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->pfnPfHandler, uErr, GCPhysFault, uUser));
1422 rc = pHandlerType->pfnPfHandler(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pCtx,
1423 GCPhysFault, GCPhysFault, uUser);
1424
1425 STAM_PROFILE_STOP(&pHandler->Stat, h); /* no locking needed, entry is unlikely reused before we get here. */
1426 }
1427 else
1428 {
1429 PGM_UNLOCK(pGVM);
1430 Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
1431 rc = VINF_EM_RAW_EMULATE_INSTR;
1432 }
1433 }
1434 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1435 return rc;
1436 }
1437 }
1438 else
1439 AssertMsgReturn(rc == VERR_NOT_FOUND, ("%Rrc GCPhysFault=%RGp\n", VBOXSTRICTRC_VAL(rc), GCPhysFault), rc);
1440
1441 /*
1442 * Must be out of sync, so do a SyncPage and restart the instruction.
1443 *
1444 * ASSUMES that ALL handlers are page aligned and covers whole pages
1445 * (assumption asserted in PGMHandlerPhysicalRegisterEx).
1446 */
1447 Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
1448 STAM_COUNTER_INC(&pGVCpu->pgm.s.Stats.StatR0NpMiscfgSyncPage);
1449 rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
1450 PGM_UNLOCK(pGVM);
1451
1452 STAM_PROFILE_STOP(&pGVCpu->pgm.s.Stats.StatR0NpMiscfg, a);
1453 return rc;
1454
1455#else
1456 AssertLogRelFailed();
1457 return VERR_PGM_NOT_USED_IN_MODE;
1458#endif
1459}
1460
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette