VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 91014

Last change on this file since 91014 was 91014, checked in by vboxsync, 4 years ago

VMM: Made VBOX_WITH_RAM_IN_KERNEL non-optional, removing all the tests for it. bugref:9627

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 109.8 KB
Line 
1/* $Id: GVMMR0.cpp 91014 2021-08-31 01:03:39Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** @def GVMM_CHECK_SMAP_SETUP
101 * SMAP check setup. */
102/** @def GVMM_CHECK_SMAP_CHECK
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
104 * it will be logged and @a a_BadExpr is executed. */
105/** @def GVMM_CHECK_SMAP_CHECK2
106 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
107 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
108 * executed. */
109#define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
110#define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
111#define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
112
113/** Special value that GVMMR0DeregisterVCpu sets. */
114#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
115AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
116
117
118/*********************************************************************************************************************************
119* Structures and Typedefs *
120*********************************************************************************************************************************/
121
122/**
123 * Global VM handle.
124 */
125typedef struct GVMHANDLE
126{
127 /** The index of the next handle in the list (free or used). (0 is nil.) */
128 uint16_t volatile iNext;
129 /** Our own index / handle value. */
130 uint16_t iSelf;
131 /** The process ID of the handle owner.
132 * This is used for access checks. */
133 RTPROCESS ProcId;
134 /** The pointer to the ring-0 only (aka global) VM structure. */
135 PGVM pGVM;
136 /** The virtual machine object. */
137 void *pvObj;
138 /** The session this VM is associated with. */
139 PSUPDRVSESSION pSession;
140 /** The ring-0 handle of the EMT0 thread.
141 * This is used for ownership checks as well as looking up a VM handle by thread
142 * at times like assertions. */
143 RTNATIVETHREAD hEMT0;
144} GVMHANDLE;
145/** Pointer to a global VM handle. */
146typedef GVMHANDLE *PGVMHANDLE;
147
148/** Number of GVM handles (including the NIL handle). */
149#if HC_ARCH_BITS == 64
150# define GVMM_MAX_HANDLES 8192
151#else
152# define GVMM_MAX_HANDLES 128
153#endif
154
155/**
156 * Per host CPU GVMM data.
157 */
158typedef struct GVMMHOSTCPU
159{
160 /** Magic number (GVMMHOSTCPU_MAGIC). */
161 uint32_t volatile u32Magic;
162 /** The CPU ID. */
163 RTCPUID idCpu;
164 /** The CPU set index. */
165 uint32_t idxCpuSet;
166
167#ifdef GVMM_SCHED_WITH_PPT
168 /** Periodic preemption timer data. */
169 struct
170 {
171 /** The handle to the periodic preemption timer. */
172 PRTTIMER pTimer;
173 /** Spinlock protecting the data below. */
174 RTSPINLOCK hSpinlock;
175 /** The smalles Hz that we need to care about. (static) */
176 uint32_t uMinHz;
177 /** The number of ticks between each historization. */
178 uint32_t cTicksHistoriziationInterval;
179 /** The current historization tick (counting up to
180 * cTicksHistoriziationInterval and then resetting). */
181 uint32_t iTickHistorization;
182 /** The current timer interval. This is set to 0 when inactive. */
183 uint32_t cNsInterval;
184 /** The current timer frequency. This is set to 0 when inactive. */
185 uint32_t uTimerHz;
186 /** The current max frequency reported by the EMTs.
187 * This gets historicize and reset by the timer callback. This is
188 * read without holding the spinlock, so needs atomic updating. */
189 uint32_t volatile uDesiredHz;
190 /** Whether the timer was started or not. */
191 bool volatile fStarted;
192 /** Set if we're starting timer. */
193 bool volatile fStarting;
194 /** The index of the next history entry (mod it). */
195 uint32_t iHzHistory;
196 /** Historicized uDesiredHz values. The array wraps around, new entries
197 * are added at iHzHistory. This is updated approximately every
198 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
199 uint32_t aHzHistory[8];
200 /** Statistics counter for recording the number of interval changes. */
201 uint32_t cChanges;
202 /** Statistics counter for recording the number of timer starts. */
203 uint32_t cStarts;
204 } Ppt;
205#endif /* GVMM_SCHED_WITH_PPT */
206
207} GVMMHOSTCPU;
208/** Pointer to the per host CPU GVMM data. */
209typedef GVMMHOSTCPU *PGVMMHOSTCPU;
210/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
211#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
212/** The interval on history entry should cover (approximately) give in
213 * nanoseconds. */
214#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
215
216
217/**
218 * The GVMM instance data.
219 */
220typedef struct GVMM
221{
222 /** Eyecatcher / magic. */
223 uint32_t u32Magic;
224 /** The index of the head of the free handle chain. (0 is nil.) */
225 uint16_t volatile iFreeHead;
226 /** The index of the head of the active handle chain. (0 is nil.) */
227 uint16_t volatile iUsedHead;
228 /** The number of VMs. */
229 uint16_t volatile cVMs;
230 /** Alignment padding. */
231 uint16_t u16Reserved;
232 /** The number of EMTs. */
233 uint32_t volatile cEMTs;
234 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
235 uint32_t volatile cHaltedEMTs;
236 /** Mini lock for restricting early wake-ups to one thread. */
237 bool volatile fDoingEarlyWakeUps;
238 bool afPadding[3]; /**< explicit alignment padding. */
239 /** When the next halted or sleeping EMT will wake up.
240 * This is set to 0 when it needs recalculating and to UINT64_MAX when
241 * there are no halted or sleeping EMTs in the GVMM. */
242 uint64_t uNsNextEmtWakeup;
243 /** The lock used to serialize VM creation, destruction and associated events that
244 * isn't performance critical. Owners may acquire the list lock. */
245 RTCRITSECT CreateDestroyLock;
246 /** The lock used to serialize used list updates and accesses.
247 * This indirectly includes scheduling since the scheduler will have to walk the
248 * used list to examin running VMs. Owners may not acquire any other locks. */
249 RTCRITSECTRW UsedLock;
250 /** The handle array.
251 * The size of this array defines the maximum number of currently running VMs.
252 * The first entry is unused as it represents the NIL handle. */
253 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
254
255 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
256 * The number of EMTs that means we no longer consider ourselves alone on a
257 * CPU/Core.
258 */
259 uint32_t cEMTsMeansCompany;
260 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
261 * The minimum sleep time for when we're alone, in nano seconds.
262 */
263 uint32_t nsMinSleepAlone;
264 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
265 * The minimum sleep time for when we've got company, in nano seconds.
266 */
267 uint32_t nsMinSleepCompany;
268 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
269 * The limit for the first round of early wake-ups, given in nano seconds.
270 */
271 uint32_t nsEarlyWakeUp1;
272 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
273 * The limit for the second round of early wake-ups, given in nano seconds.
274 */
275 uint32_t nsEarlyWakeUp2;
276
277 /** Set if we're doing early wake-ups.
278 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
279 bool volatile fDoEarlyWakeUps;
280
281 /** The number of entries in the host CPU array (aHostCpus). */
282 uint32_t cHostCpus;
283 /** Per host CPU data (variable length). */
284 GVMMHOSTCPU aHostCpus[1];
285} GVMM;
286AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
287AssertCompileMemberAlignment(GVMM, UsedLock, 8);
288AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
289/** Pointer to the GVMM instance data. */
290typedef GVMM *PGVMM;
291
292/** The GVMM::u32Magic value (Charlie Haden). */
293#define GVMM_MAGIC UINT32_C(0x19370806)
294
295
296
297/*********************************************************************************************************************************
298* Global Variables *
299*********************************************************************************************************************************/
300/** Pointer to the GVMM instance data.
301 * (Just my general dislike for global variables.) */
302static PGVMM g_pGVMM = NULL;
303
304/** Macro for obtaining and validating the g_pGVMM pointer.
305 * On failure it will return from the invoking function with the specified return value.
306 *
307 * @param pGVMM The name of the pGVMM variable.
308 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
309 * status codes.
310 */
311#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
312 do { \
313 (pGVMM) = g_pGVMM;\
314 AssertPtrReturn((pGVMM), (rc)); \
315 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
316 } while (0)
317
318/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
319 * On failure it will return from the invoking function.
320 *
321 * @param pGVMM The name of the pGVMM variable.
322 */
323#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
324 do { \
325 (pGVMM) = g_pGVMM;\
326 AssertPtrReturnVoid((pGVMM)); \
327 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
328 } while (0)
329
330
331/*********************************************************************************************************************************
332* Internal Functions *
333*********************************************************************************************************************************/
334static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
335static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
336static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
337static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
338
339#ifdef GVMM_SCHED_WITH_PPT
340static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
341#endif
342
343
344/**
345 * Initializes the GVMM.
346 *
347 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
348 *
349 * @returns VBox status code.
350 */
351GVMMR0DECL(int) GVMMR0Init(void)
352{
353 LogFlow(("GVMMR0Init:\n"));
354
355 /*
356 * Allocate and initialize the instance data.
357 */
358 uint32_t cHostCpus = RTMpGetArraySize();
359 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
360
361 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
362 if (!pGVMM)
363 return VERR_NO_MEMORY;
364 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
365 "GVMM-CreateDestroyLock");
366 if (RT_SUCCESS(rc))
367 {
368 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
369 if (RT_SUCCESS(rc))
370 {
371 pGVMM->u32Magic = GVMM_MAGIC;
372 pGVMM->iUsedHead = 0;
373 pGVMM->iFreeHead = 1;
374
375 /* the nil handle */
376 pGVMM->aHandles[0].iSelf = 0;
377 pGVMM->aHandles[0].iNext = 0;
378
379 /* the tail */
380 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
381 pGVMM->aHandles[i].iSelf = i;
382 pGVMM->aHandles[i].iNext = 0; /* nil */
383
384 /* the rest */
385 while (i-- > 1)
386 {
387 pGVMM->aHandles[i].iSelf = i;
388 pGVMM->aHandles[i].iNext = i + 1;
389 }
390
391 /* The default configuration values. */
392 uint32_t cNsResolution = RTSemEventMultiGetResolution();
393 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
394 if (cNsResolution >= 5*RT_NS_100US)
395 {
396 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
397 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
398 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
399 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
400 }
401 else if (cNsResolution > RT_NS_100US)
402 {
403 pGVMM->nsMinSleepAlone = cNsResolution / 2;
404 pGVMM->nsMinSleepCompany = cNsResolution / 4;
405 pGVMM->nsEarlyWakeUp1 = 0;
406 pGVMM->nsEarlyWakeUp2 = 0;
407 }
408 else
409 {
410 pGVMM->nsMinSleepAlone = 2000;
411 pGVMM->nsMinSleepCompany = 2000;
412 pGVMM->nsEarlyWakeUp1 = 0;
413 pGVMM->nsEarlyWakeUp2 = 0;
414 }
415 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
416
417 /* The host CPU data. */
418 pGVMM->cHostCpus = cHostCpus;
419 uint32_t iCpu = cHostCpus;
420 RTCPUSET PossibleSet;
421 RTMpGetSet(&PossibleSet);
422 while (iCpu-- > 0)
423 {
424 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
425#ifdef GVMM_SCHED_WITH_PPT
426 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
427 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
428 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
429 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
430 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
431 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
432 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
433 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
434 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
435 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
436 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
437 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
438#endif
439
440 if (RTCpuSetIsMember(&PossibleSet, iCpu))
441 {
442 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
443 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
444
445#ifdef GVMM_SCHED_WITH_PPT
446 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
447 50*1000*1000 /* whatever */,
448 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
449 gvmmR0SchedPeriodicPreemptionTimerCallback,
450 &pGVMM->aHostCpus[iCpu]);
451 if (RT_SUCCESS(rc))
452 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
453 if (RT_FAILURE(rc))
454 {
455 while (iCpu < cHostCpus)
456 {
457 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
458 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
459 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
460 iCpu++;
461 }
462 break;
463 }
464#endif
465 }
466 else
467 {
468 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
469 pGVMM->aHostCpus[iCpu].u32Magic = 0;
470 }
471 }
472 if (RT_SUCCESS(rc))
473 {
474 g_pGVMM = pGVMM;
475 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
476 return VINF_SUCCESS;
477 }
478
479 /* bail out. */
480 RTCritSectRwDelete(&pGVMM->UsedLock);
481 }
482 RTCritSectDelete(&pGVMM->CreateDestroyLock);
483 }
484
485 RTMemFree(pGVMM);
486 return rc;
487}
488
489
490/**
491 * Terminates the GVM.
492 *
493 * This is called while owning the loader semaphore (see supdrvLdrFree()).
494 * And unless something is wrong, there should be absolutely no VMs
495 * registered at this point.
496 */
497GVMMR0DECL(void) GVMMR0Term(void)
498{
499 LogFlow(("GVMMR0Term:\n"));
500
501 PGVMM pGVMM = g_pGVMM;
502 g_pGVMM = NULL;
503 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
504 {
505 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
506 return;
507 }
508
509 /*
510 * First of all, stop all active timers.
511 */
512 uint32_t cActiveTimers = 0;
513 uint32_t iCpu = pGVMM->cHostCpus;
514 while (iCpu-- > 0)
515 {
516 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
517#ifdef GVMM_SCHED_WITH_PPT
518 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
519 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
520 cActiveTimers++;
521#endif
522 }
523 if (cActiveTimers)
524 RTThreadSleep(1); /* fudge */
525
526 /*
527 * Invalidate the and free resources.
528 */
529 pGVMM->u32Magic = ~GVMM_MAGIC;
530 RTCritSectRwDelete(&pGVMM->UsedLock);
531 RTCritSectDelete(&pGVMM->CreateDestroyLock);
532
533 pGVMM->iFreeHead = 0;
534 if (pGVMM->iUsedHead)
535 {
536 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
537 pGVMM->iUsedHead = 0;
538 }
539
540#ifdef GVMM_SCHED_WITH_PPT
541 iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
545 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
546 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
547 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
548 }
549#endif
550
551 RTMemFree(pGVMM);
552}
553
554
555/**
556 * A quick hack for setting global config values.
557 *
558 * @returns VBox status code.
559 *
560 * @param pSession The session handle. Used for authentication.
561 * @param pszName The variable name.
562 * @param u64Value The new value.
563 */
564GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
565{
566 /*
567 * Validate input.
568 */
569 PGVMM pGVMM;
570 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
571 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
572 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
573
574 /*
575 * String switch time!
576 */
577 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
578 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
579 int rc = VINF_SUCCESS;
580 pszName += sizeof("/GVMM/") - 1;
581 if (!strcmp(pszName, "cEMTsMeansCompany"))
582 {
583 if (u64Value <= UINT32_MAX)
584 pGVMM->cEMTsMeansCompany = u64Value;
585 else
586 rc = VERR_OUT_OF_RANGE;
587 }
588 else if (!strcmp(pszName, "MinSleepAlone"))
589 {
590 if (u64Value <= RT_NS_100MS)
591 pGVMM->nsMinSleepAlone = u64Value;
592 else
593 rc = VERR_OUT_OF_RANGE;
594 }
595 else if (!strcmp(pszName, "MinSleepCompany"))
596 {
597 if (u64Value <= RT_NS_100MS)
598 pGVMM->nsMinSleepCompany = u64Value;
599 else
600 rc = VERR_OUT_OF_RANGE;
601 }
602 else if (!strcmp(pszName, "EarlyWakeUp1"))
603 {
604 if (u64Value <= RT_NS_100MS)
605 {
606 pGVMM->nsEarlyWakeUp1 = u64Value;
607 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
608 }
609 else
610 rc = VERR_OUT_OF_RANGE;
611 }
612 else if (!strcmp(pszName, "EarlyWakeUp2"))
613 {
614 if (u64Value <= RT_NS_100MS)
615 {
616 pGVMM->nsEarlyWakeUp2 = u64Value;
617 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
618 }
619 else
620 rc = VERR_OUT_OF_RANGE;
621 }
622 else
623 rc = VERR_CFGM_VALUE_NOT_FOUND;
624 return rc;
625}
626
627
628/**
629 * A quick hack for getting global config values.
630 *
631 * @returns VBox status code.
632 *
633 * @param pSession The session handle. Used for authentication.
634 * @param pszName The variable name.
635 * @param pu64Value Where to return the value.
636 */
637GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
638{
639 /*
640 * Validate input.
641 */
642 PGVMM pGVMM;
643 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
644 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
645 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
646 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
647
648 /*
649 * String switch time!
650 */
651 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
652 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
653 int rc = VINF_SUCCESS;
654 pszName += sizeof("/GVMM/") - 1;
655 if (!strcmp(pszName, "cEMTsMeansCompany"))
656 *pu64Value = pGVMM->cEMTsMeansCompany;
657 else if (!strcmp(pszName, "MinSleepAlone"))
658 *pu64Value = pGVMM->nsMinSleepAlone;
659 else if (!strcmp(pszName, "MinSleepCompany"))
660 *pu64Value = pGVMM->nsMinSleepCompany;
661 else if (!strcmp(pszName, "EarlyWakeUp1"))
662 *pu64Value = pGVMM->nsEarlyWakeUp1;
663 else if (!strcmp(pszName, "EarlyWakeUp2"))
664 *pu64Value = pGVMM->nsEarlyWakeUp2;
665 else
666 rc = VERR_CFGM_VALUE_NOT_FOUND;
667 return rc;
668}
669
670
671/**
672 * Acquire the 'used' lock in shared mode.
673 *
674 * This prevents destruction of the VM while we're in ring-0.
675 *
676 * @returns IPRT status code, see RTSemFastMutexRequest.
677 * @param a_pGVMM The GVMM instance data.
678 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
679 */
680#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
681
682/**
683 * Release the 'used' lock in when owning it in shared mode.
684 *
685 * @returns IPRT status code, see RTSemFastMutexRequest.
686 * @param a_pGVMM The GVMM instance data.
687 * @sa GVMMR0_USED_SHARED_LOCK
688 */
689#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
690
691/**
692 * Acquire the 'used' lock in exclusive mode.
693 *
694 * Only use this function when making changes to the used list.
695 *
696 * @returns IPRT status code, see RTSemFastMutexRequest.
697 * @param a_pGVMM The GVMM instance data.
698 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
699 */
700#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
701
702/**
703 * Release the 'used' lock when owning it in exclusive mode.
704 *
705 * @returns IPRT status code, see RTSemFastMutexRelease.
706 * @param a_pGVMM The GVMM instance data.
707 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
708 */
709#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
710
711
712/**
713 * Try acquire the 'create & destroy' lock.
714 *
715 * @returns IPRT status code, see RTSemFastMutexRequest.
716 * @param pGVMM The GVMM instance data.
717 */
718DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
719{
720 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
721 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
722 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
723 return rc;
724}
725
726
727/**
728 * Release the 'create & destroy' lock.
729 *
730 * @returns IPRT status code, see RTSemFastMutexRequest.
731 * @param pGVMM The GVMM instance data.
732 */
733DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
734{
735 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
736 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
737 AssertRC(rc);
738 return rc;
739}
740
741
742/**
743 * Request wrapper for the GVMMR0CreateVM API.
744 *
745 * @returns VBox status code.
746 * @param pReq The request buffer.
747 * @param pSession The session handle. The VM will be associated with this.
748 */
749GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
750{
751 /*
752 * Validate the request.
753 */
754 if (!RT_VALID_PTR(pReq))
755 return VERR_INVALID_POINTER;
756 if (pReq->Hdr.cbReq != sizeof(*pReq))
757 return VERR_INVALID_PARAMETER;
758 if (pReq->pSession != pSession)
759 return VERR_INVALID_POINTER;
760
761 /*
762 * Execute it.
763 */
764 PGVM pGVM;
765 pReq->pVMR0 = NULL;
766 pReq->pVMR3 = NIL_RTR3PTR;
767 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
768 if (RT_SUCCESS(rc))
769 {
770 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
771 pReq->pVMR3 = pGVM->pVMR3;
772 }
773 return rc;
774}
775
776
777/**
778 * Allocates the VM structure and registers it with GVM.
779 *
780 * The caller will become the VM owner and there by the EMT.
781 *
782 * @returns VBox status code.
783 * @param pSession The support driver session.
784 * @param cCpus Number of virtual CPUs for the new VM.
785 * @param ppGVM Where to store the pointer to the VM structure.
786 *
787 * @thread EMT.
788 */
789GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
790{
791 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
792 PGVMM pGVMM;
793 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
794
795 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
796 *ppGVM = NULL;
797
798 if ( cCpus == 0
799 || cCpus > VMM_MAX_CPU_COUNT)
800 return VERR_INVALID_PARAMETER;
801
802 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
803 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
804 RTPROCESS ProcId = RTProcSelf();
805 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
806
807 /*
808 * The whole allocation process is protected by the lock.
809 */
810 int rc = gvmmR0CreateDestroyLock(pGVMM);
811 AssertRCReturn(rc, rc);
812
813 /*
814 * Only one VM per session.
815 */
816 if (SUPR0GetSessionVM(pSession) != NULL)
817 {
818 gvmmR0CreateDestroyUnlock(pGVMM);
819 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
820 return VERR_ALREADY_EXISTS;
821 }
822
823 /*
824 * Allocate a handle first so we don't waste resources unnecessarily.
825 */
826 uint16_t iHandle = pGVMM->iFreeHead;
827 if (iHandle)
828 {
829 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
830
831 /* consistency checks, a bit paranoid as always. */
832 if ( !pHandle->pGVM
833 && !pHandle->pvObj
834 && pHandle->iSelf == iHandle)
835 {
836 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
837 if (pHandle->pvObj)
838 {
839 /*
840 * Move the handle from the free to used list and perform permission checks.
841 */
842 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
843 AssertRC(rc);
844
845 pGVMM->iFreeHead = pHandle->iNext;
846 pHandle->iNext = pGVMM->iUsedHead;
847 pGVMM->iUsedHead = iHandle;
848 pGVMM->cVMs++;
849
850 pHandle->pGVM = NULL;
851 pHandle->pSession = pSession;
852 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
853 pHandle->ProcId = NIL_RTPROCESS;
854
855 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
856
857 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
858 if (RT_SUCCESS(rc))
859 {
860 /*
861 * Allocate memory for the VM structure (combined VM + GVM).
862 */
863 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
864 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
865 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
866 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
867 if (RT_SUCCESS(rc))
868 {
869 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
870 AssertPtr(pGVM);
871
872 /*
873 * Initialise the structure.
874 */
875 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
876 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
877 pGVM->gvmm.s.VMMemObj = hVMMemObj;
878 rc = GMMR0InitPerVMData(pGVM);
879 int rc2 = PGMR0InitPerVMData(pGVM);
880 int rc3 = VMMR0InitPerVMData(pGVM);
881 DBGFR0InitPerVMData(pGVM);
882 PDMR0InitPerVMData(pGVM);
883 IOMR0InitPerVMData(pGVM);
884 TMR0InitPerVMData(pGVM);
885 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
886 {
887 /*
888 * Allocate page array.
889 * This currently have to be made available to ring-3, but this is should change eventually.
890 */
891 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
892 if (RT_SUCCESS(rc))
893 {
894 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
895 for (uint32_t iPage = 0; iPage < cPages; iPage++)
896 {
897 paPages[iPage].uReserved = 0;
898 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
899 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
900 }
901
902 /*
903 * Map the page array, VM and VMCPU structures into ring-3.
904 */
905 AssertCompileSizeAlignment(VM, PAGE_SIZE);
906 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
907 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
908 0 /*offSub*/, sizeof(VM));
909 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
910 {
911 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
912 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
913 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
914 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
915 }
916 if (RT_SUCCESS(rc))
917 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
918 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
919 NIL_RTR0PROCESS);
920 if (RT_SUCCESS(rc))
921 {
922 /*
923 * Initialize all the VM pointers.
924 */
925 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
926 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
927
928 for (VMCPUID i = 0; i < cCpus; i++)
929 {
930 pGVM->aCpus[i].pVMR0 = pGVM;
931 pGVM->aCpus[i].pVMR3 = pVMR3;
932 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
933 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
934 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
935 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
936 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
937 }
938
939 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
940 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
941 ("%p\n", pGVM->paVMPagesR3));
942
943 /*
944 * Complete the handle - take the UsedLock sem just to be careful.
945 */
946 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
947 AssertRC(rc);
948
949 pHandle->pGVM = pGVM;
950 pHandle->hEMT0 = hEMT0;
951 pHandle->ProcId = ProcId;
952 pGVM->pVMR3 = pVMR3;
953 pGVM->pVMR3Unsafe = pVMR3;
954 pGVM->aCpus[0].hEMT = hEMT0;
955 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
956 pGVM->aCpus[0].cEmtHashCollisions = 0;
957 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
958 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
959 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
960 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
961 pGVMM->cEMTs += cCpus;
962
963 /* Associate it with the session and create the context hook for EMT0. */
964 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
965 if (RT_SUCCESS(rc))
966 {
967 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
968 if (RT_SUCCESS(rc))
969 {
970 /*
971 * Done!
972 */
973 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
974
975 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
976 gvmmR0CreateDestroyUnlock(pGVMM);
977
978 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
979
980 *ppGVM = pGVM;
981 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
982 return VINF_SUCCESS;
983 }
984
985 SUPR0SetSessionVM(pSession, NULL, NULL);
986 }
987 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
988 }
989
990 /* Cleanup mappings. */
991 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
992 {
993 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
994 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
995 }
996 for (VMCPUID i = 0; i < cCpus; i++)
997 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
998 {
999 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1000 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1001 }
1002 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1003 {
1004 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1005 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1006 }
1007 }
1008 }
1009 else
1010 {
1011 if (RT_SUCCESS_NP(rc))
1012 rc = rc2;
1013 if (RT_SUCCESS_NP(rc))
1014 rc = rc3;
1015 }
1016 }
1017 }
1018 /* else: The user wasn't permitted to create this VM. */
1019
1020 /*
1021 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1022 * object reference here. A little extra mess because of non-recursive lock.
1023 */
1024 void *pvObj = pHandle->pvObj;
1025 pHandle->pvObj = NULL;
1026 gvmmR0CreateDestroyUnlock(pGVMM);
1027
1028 SUPR0ObjRelease(pvObj, pSession);
1029
1030 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1031 return rc;
1032 }
1033
1034 rc = VERR_NO_MEMORY;
1035 }
1036 else
1037 rc = VERR_GVMM_IPE_1;
1038 }
1039 else
1040 rc = VERR_GVM_TOO_MANY_VMS;
1041
1042 gvmmR0CreateDestroyUnlock(pGVMM);
1043 return rc;
1044}
1045
1046
1047/**
1048 * Initializes the per VM data belonging to GVMM.
1049 *
1050 * @param pGVM Pointer to the global VM structure.
1051 * @param hSelf The handle.
1052 * @param cCpus The CPU count.
1053 * @param pSession The session this VM is associated with.
1054 */
1055static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1056{
1057 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1058 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1059 AssertCompileMemberAlignment(VM, cpum, 64);
1060 AssertCompileMemberAlignment(VM, tm, 64);
1061
1062 /* GVM: */
1063 pGVM->u32Magic = GVM_MAGIC;
1064 pGVM->hSelf = hSelf;
1065 pGVM->cCpus = cCpus;
1066 pGVM->pSession = pSession;
1067 pGVM->pSelf = pGVM;
1068
1069 /* VM: */
1070 pGVM->enmVMState = VMSTATE_CREATING;
1071 pGVM->hSelfUnsafe = hSelf;
1072 pGVM->pSessionUnsafe = pSession;
1073 pGVM->pVMR0ForCall = pGVM;
1074 pGVM->cCpusUnsafe = cCpus;
1075 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1076 pGVM->uStructVersion = 1;
1077 pGVM->cbSelf = sizeof(VM);
1078 pGVM->cbVCpu = sizeof(VMCPU);
1079
1080 /* GVMM: */
1081 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1082 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1083 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1084 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1085 pGVM->gvmm.s.fDoneVMMR0Init = false;
1086 pGVM->gvmm.s.fDoneVMMR0Term = false;
1087 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1088 {
1089 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1090 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1091 }
1092
1093 /*
1094 * Per virtual CPU.
1095 */
1096 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1097 {
1098 pGVM->aCpus[i].idCpu = i;
1099 pGVM->aCpus[i].idCpuUnsafe = i;
1100 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1101 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1102 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1103 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1104 pGVM->aCpus[i].pGVM = pGVM;
1105 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1106 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1107 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1108 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1109 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1110 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1111 }
1112}
1113
1114
1115/**
1116 * Does the VM initialization.
1117 *
1118 * @returns VBox status code.
1119 * @param pGVM The global (ring-0) VM structure.
1120 */
1121GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1122{
1123 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1124
1125 int rc = VERR_INTERNAL_ERROR_3;
1126 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1127 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1128 {
1129 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1130 {
1131 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1132 if (RT_FAILURE(rc))
1133 {
1134 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1135 break;
1136 }
1137 }
1138 }
1139 else
1140 rc = VERR_WRONG_ORDER;
1141
1142 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1143 return rc;
1144}
1145
1146
1147/**
1148 * Indicates that we're done with the ring-0 initialization
1149 * of the VM.
1150 *
1151 * @param pGVM The global (ring-0) VM structure.
1152 * @thread EMT(0)
1153 */
1154GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1155{
1156 /* Set the indicator. */
1157 pGVM->gvmm.s.fDoneVMMR0Init = true;
1158}
1159
1160
1161/**
1162 * Indicates that we're doing the ring-0 termination of the VM.
1163 *
1164 * @returns true if termination hasn't been done already, false if it has.
1165 * @param pGVM Pointer to the global VM structure. Optional.
1166 * @thread EMT(0) or session cleanup thread.
1167 */
1168GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1169{
1170 /* Validate the VM structure, state and handle. */
1171 AssertPtrReturn(pGVM, false);
1172
1173 /* Set the indicator. */
1174 if (pGVM->gvmm.s.fDoneVMMR0Term)
1175 return false;
1176 pGVM->gvmm.s.fDoneVMMR0Term = true;
1177 return true;
1178}
1179
1180
1181/**
1182 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1183 *
1184 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1185 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1186 * would've been nice if the caller was actually the EMT thread or that we somehow
1187 * could've associated the calling thread with the VM up front.
1188 *
1189 * @returns VBox status code.
1190 * @param pGVM The global (ring-0) VM structure.
1191 *
1192 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1193 */
1194GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1195{
1196 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1197 PGVMM pGVMM;
1198 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1199
1200 /*
1201 * Validate the VM structure, state and caller.
1202 */
1203 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1204 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1205 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1206 VERR_WRONG_ORDER);
1207
1208 uint32_t hGVM = pGVM->hSelf;
1209 ASMCompilerBarrier();
1210 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1211 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1212
1213 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1214 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1215
1216 RTPROCESS ProcId = RTProcSelf();
1217 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1218 AssertReturn( ( pHandle->hEMT0 == hSelf
1219 && pHandle->ProcId == ProcId)
1220 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1221
1222 /*
1223 * Lookup the handle and destroy the object.
1224 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1225 * object, we take some precautions against racing callers just in case...
1226 */
1227 int rc = gvmmR0CreateDestroyLock(pGVMM);
1228 AssertRC(rc);
1229
1230 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1231 if ( pHandle->pGVM == pGVM
1232 && ( ( pHandle->hEMT0 == hSelf
1233 && pHandle->ProcId == ProcId)
1234 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1235 && RT_VALID_PTR(pHandle->pvObj)
1236 && RT_VALID_PTR(pHandle->pSession)
1237 && RT_VALID_PTR(pHandle->pGVM)
1238 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1239 {
1240 /* Check that other EMTs have deregistered. */
1241 uint32_t cNotDeregistered = 0;
1242 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1243 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1244 if (cNotDeregistered == 0)
1245 {
1246 /* Grab the object pointer. */
1247 void *pvObj = pHandle->pvObj;
1248 pHandle->pvObj = NULL;
1249 gvmmR0CreateDestroyUnlock(pGVMM);
1250
1251 SUPR0ObjRelease(pvObj, pHandle->pSession);
1252 }
1253 else
1254 {
1255 gvmmR0CreateDestroyUnlock(pGVMM);
1256 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1257 }
1258 }
1259 else
1260 {
1261 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1262 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1263 gvmmR0CreateDestroyUnlock(pGVMM);
1264 rc = VERR_GVMM_IPE_2;
1265 }
1266
1267 return rc;
1268}
1269
1270
1271/**
1272 * Performs VM cleanup task as part of object destruction.
1273 *
1274 * @param pGVM The GVM pointer.
1275 */
1276static void gvmmR0CleanupVM(PGVM pGVM)
1277{
1278 if ( pGVM->gvmm.s.fDoneVMMR0Init
1279 && !pGVM->gvmm.s.fDoneVMMR0Term)
1280 {
1281 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1282 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1283 {
1284 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1285 VMMR0TermVM(pGVM, NIL_VMCPUID);
1286 }
1287 else
1288 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1289 }
1290
1291 GMMR0CleanupVM(pGVM);
1292#ifdef VBOX_WITH_NEM_R0
1293 NEMR0CleanupVM(pGVM);
1294#endif
1295 PDMR0CleanupVM(pGVM);
1296 IOMR0CleanupVM(pGVM);
1297 DBGFR0CleanupVM(pGVM);
1298 PGMR0CleanupVM(pGVM);
1299 TMR0CleanupVM(pGVM);
1300 VMMR0CleanupVM(pGVM);
1301}
1302
1303
1304/**
1305 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1306 *
1307 * pvUser1 is the GVM instance pointer.
1308 * pvUser2 is the handle pointer.
1309 */
1310static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1311{
1312 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1313
1314 NOREF(pvObj);
1315
1316 /*
1317 * Some quick, paranoid, input validation.
1318 */
1319 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1320 AssertPtr(pHandle);
1321 PGVMM pGVMM = (PGVMM)pvUser1;
1322 Assert(pGVMM == g_pGVMM);
1323 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1324 if ( !iHandle
1325 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1326 || iHandle != pHandle->iSelf)
1327 {
1328 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1329 return;
1330 }
1331
1332 int rc = gvmmR0CreateDestroyLock(pGVMM);
1333 AssertRC(rc);
1334 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1335 AssertRC(rc);
1336
1337 /*
1338 * This is a tad slow but a doubly linked list is too much hassle.
1339 */
1340 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1341 {
1342 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1343 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1344 gvmmR0CreateDestroyUnlock(pGVMM);
1345 return;
1346 }
1347
1348 if (pGVMM->iUsedHead == iHandle)
1349 pGVMM->iUsedHead = pHandle->iNext;
1350 else
1351 {
1352 uint16_t iPrev = pGVMM->iUsedHead;
1353 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1354 while (iPrev)
1355 {
1356 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1357 {
1358 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1359 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1360 gvmmR0CreateDestroyUnlock(pGVMM);
1361 return;
1362 }
1363 if (RT_UNLIKELY(c-- <= 0))
1364 {
1365 iPrev = 0;
1366 break;
1367 }
1368
1369 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1370 break;
1371 iPrev = pGVMM->aHandles[iPrev].iNext;
1372 }
1373 if (!iPrev)
1374 {
1375 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1376 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1377 gvmmR0CreateDestroyUnlock(pGVMM);
1378 return;
1379 }
1380
1381 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1382 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1383 }
1384 pHandle->iNext = 0;
1385 pGVMM->cVMs--;
1386
1387 /*
1388 * Do the global cleanup round.
1389 */
1390 PGVM pGVM = pHandle->pGVM;
1391 if ( RT_VALID_PTR(pGVM)
1392 && pGVM->u32Magic == GVM_MAGIC)
1393 {
1394 pGVMM->cEMTs -= pGVM->cCpus;
1395
1396 if (pGVM->pSession)
1397 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1398
1399 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1400
1401 gvmmR0CleanupVM(pGVM);
1402
1403 /*
1404 * Do the GVMM cleanup - must be done last.
1405 */
1406 /* The VM and VM pages mappings/allocations. */
1407 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1408 {
1409 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1410 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1411 }
1412
1413 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1414 {
1415 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1416 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1417 }
1418
1419 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1420 {
1421 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1422 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1423 }
1424
1425 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1426 {
1427 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1428 {
1429 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1430 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1431 }
1432 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1433 {
1434 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1435 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1436 }
1437 }
1438
1439 /* the GVM structure itself. */
1440 pGVM->u32Magic |= UINT32_C(0x80000000);
1441 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1442 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1443 pGVM = NULL;
1444
1445 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1446 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1447 AssertRC(rc);
1448 }
1449 /* else: GVMMR0CreateVM cleanup. */
1450
1451 /*
1452 * Free the handle.
1453 */
1454 pHandle->iNext = pGVMM->iFreeHead;
1455 pGVMM->iFreeHead = iHandle;
1456 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1457 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1458 ASMAtomicWriteNullPtr(&pHandle->pSession);
1459 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1460 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1461
1462 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1463 gvmmR0CreateDestroyUnlock(pGVMM);
1464 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1465}
1466
1467
1468/**
1469 * Registers the calling thread as the EMT of a Virtual CPU.
1470 *
1471 * Note that VCPU 0 is automatically registered during VM creation.
1472 *
1473 * @returns VBox status code
1474 * @param pGVM The global (ring-0) VM structure.
1475 * @param idCpu VCPU id to register the current thread as.
1476 */
1477GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1478{
1479 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1480
1481 /*
1482 * Validate the VM structure, state and handle.
1483 */
1484 PGVMM pGVMM;
1485 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1486 if (RT_SUCCESS(rc))
1487 {
1488 if (idCpu < pGVM->cCpus)
1489 {
1490 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1491
1492 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1493
1494 /* Check that the EMT isn't already assigned to a thread. */
1495 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1496 {
1497 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1498
1499 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1500 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1501 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1502 if (RT_SUCCESS(rc))
1503 {
1504 /*
1505 * Do the assignment, then try setup the hook. Undo if that fails.
1506 */
1507 unsigned cCollisions = 0;
1508 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1509 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1510 {
1511 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1512 do
1513 {
1514 cCollisions++;
1515 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1516 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1517 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1518 }
1519 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1520 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1521 pGVM->aCpus[idCpu].hNativeThreadR0 = hNativeSelf;
1522 pGVM->aCpus[idCpu].hEMT = hNativeSelf;
1523 pGVM->aCpus[idCpu].cEmtHashCollisions = (uint8_t)cCollisions;
1524 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1525
1526 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1527 if (RT_SUCCESS(rc))
1528 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1529 else
1530 {
1531 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1532 pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1533 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1534 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1535 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = UINT16_MAX;
1536 }
1537 }
1538 }
1539 else
1540 rc = VERR_ACCESS_DENIED;
1541
1542 gvmmR0CreateDestroyUnlock(pGVMM);
1543 }
1544 else
1545 rc = VERR_INVALID_CPU_ID;
1546 }
1547 return rc;
1548}
1549
1550
1551/**
1552 * Deregisters the calling thread as the EMT of a Virtual CPU.
1553 *
1554 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1555 *
1556 * @returns VBox status code
1557 * @param pGVM The global (ring-0) VM structure.
1558 * @param idCpu VCPU id to register the current thread as.
1559 */
1560GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1561{
1562 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1563
1564 /*
1565 * Validate the VM structure, state and handle.
1566 */
1567 PGVMM pGVMM;
1568 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1569 if (RT_SUCCESS(rc))
1570 {
1571 /*
1572 * Take the destruction lock and recheck the handle state to
1573 * prevent racing GVMMR0DestroyVM.
1574 */
1575 gvmmR0CreateDestroyLock(pGVMM);
1576
1577 uint32_t hSelf = pGVM->hSelf;
1578 ASMCompilerBarrier();
1579 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1580 && pGVMM->aHandles[hSelf].pvObj != NULL
1581 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1582 {
1583 /*
1584 * Do per-EMT cleanups.
1585 */
1586 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1587
1588 /*
1589 * Invalidate hEMT. We don't use NIL here as that would allow
1590 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1591 */
1592 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1593 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1594
1595 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1596 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1597 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1598 }
1599
1600 gvmmR0CreateDestroyUnlock(pGVMM);
1601 }
1602 return rc;
1603}
1604
1605
1606/**
1607 * Lookup a GVM structure by its handle.
1608 *
1609 * @returns The GVM pointer on success, NULL on failure.
1610 * @param hGVM The global VM handle. Asserts on bad handle.
1611 */
1612GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1613{
1614 PGVMM pGVMM;
1615 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1616
1617 /*
1618 * Validate.
1619 */
1620 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1621 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1622
1623 /*
1624 * Look it up.
1625 */
1626 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1627 AssertPtrReturn(pHandle->pvObj, NULL);
1628 PGVM pGVM = pHandle->pGVM;
1629 AssertPtrReturn(pGVM, NULL);
1630
1631 return pGVM;
1632}
1633
1634
1635/**
1636 * Check that the given GVM and VM structures match up.
1637 *
1638 * The calling thread must be in the same process as the VM. All current lookups
1639 * are by threads inside the same process, so this will not be an issue.
1640 *
1641 * @returns VBox status code.
1642 * @param pGVM The global (ring-0) VM structure.
1643 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1644 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1645 * shared mode when requested.
1646 *
1647 * Be very careful if not taking the lock as it's
1648 * possible that the VM will disappear then!
1649 *
1650 * @remark This will not assert on an invalid pGVM but try return silently.
1651 */
1652static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1653{
1654 /*
1655 * Check the pointers.
1656 */
1657 int rc;
1658 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1659 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1660 {
1661 /*
1662 * Get the pGVMM instance and check the VM handle.
1663 */
1664 PGVMM pGVMM;
1665 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1666
1667 uint16_t hGVM = pGVM->hSelf;
1668 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1669 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1670 {
1671 RTPROCESS const pidSelf = RTProcSelf();
1672 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1673 if (fTakeUsedLock)
1674 {
1675 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1676 AssertRCReturn(rc, rc);
1677 }
1678
1679 if (RT_LIKELY( pHandle->pGVM == pGVM
1680 && pHandle->ProcId == pidSelf
1681 && RT_VALID_PTR(pHandle->pvObj)))
1682 {
1683 /*
1684 * Some more VM data consistency checks.
1685 */
1686 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1687 && pGVM->hSelfUnsafe == hGVM
1688 && pGVM->pSelf == pGVM))
1689 {
1690 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1691 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1692 {
1693 *ppGVMM = pGVMM;
1694 return VINF_SUCCESS;
1695 }
1696 rc = VERR_INCONSISTENT_VM_HANDLE;
1697 }
1698 else
1699 rc = VERR_INCONSISTENT_VM_HANDLE;
1700 }
1701 else
1702 rc = VERR_INVALID_VM_HANDLE;
1703
1704 if (fTakeUsedLock)
1705 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1706 }
1707 else
1708 rc = VERR_INVALID_VM_HANDLE;
1709 }
1710 else
1711 rc = VERR_INVALID_POINTER;
1712 return rc;
1713}
1714
1715
1716/**
1717 * Validates a GVM/VM pair.
1718 *
1719 * @returns VBox status code.
1720 * @param pGVM The global (ring-0) VM structure.
1721 */
1722GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1723{
1724 PGVMM pGVMM;
1725 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1726}
1727
1728
1729/**
1730 * Check that the given GVM and VM structures match up.
1731 *
1732 * The calling thread must be in the same process as the VM. All current lookups
1733 * are by threads inside the same process, so this will not be an issue.
1734 *
1735 * @returns VBox status code.
1736 * @param pGVM The global (ring-0) VM structure.
1737 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1738 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1739 * @thread EMT
1740 *
1741 * @remarks This will assert in all failure paths.
1742 */
1743static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1744{
1745 /*
1746 * Check the pointers.
1747 */
1748 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1749 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1750
1751 /*
1752 * Get the pGVMM instance and check the VM handle.
1753 */
1754 PGVMM pGVMM;
1755 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1756
1757 uint16_t hGVM = pGVM->hSelf;
1758 ASMCompilerBarrier();
1759 AssertReturn( hGVM != NIL_GVM_HANDLE
1760 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1761
1762 RTPROCESS const pidSelf = RTProcSelf();
1763 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1764 AssertReturn( pHandle->pGVM == pGVM
1765 && pHandle->ProcId == pidSelf
1766 && RT_VALID_PTR(pHandle->pvObj),
1767 VERR_INVALID_HANDLE);
1768
1769 /*
1770 * Check the EMT claim.
1771 */
1772 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1773 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1774 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1775
1776 /*
1777 * Some more VM data consistency checks.
1778 */
1779 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1780 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1781 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1782 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1783
1784 *ppGVMM = pGVMM;
1785 return VINF_SUCCESS;
1786}
1787
1788
1789/**
1790 * Validates a GVM/EMT pair.
1791 *
1792 * @returns VBox status code.
1793 * @param pGVM The global (ring-0) VM structure.
1794 * @param idCpu The Virtual CPU ID of the calling EMT.
1795 * @thread EMT(idCpu)
1796 */
1797GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1798{
1799 PGVMM pGVMM;
1800 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1801}
1802
1803
1804/**
1805 * Looks up the VM belonging to the specified EMT thread.
1806 *
1807 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1808 * unnecessary kernel panics when the EMT thread hits an assertion. The
1809 * call may or not be an EMT thread.
1810 *
1811 * @returns Pointer to the VM on success, NULL on failure.
1812 * @param hEMT The native thread handle of the EMT.
1813 * NIL_RTNATIVETHREAD means the current thread
1814 */
1815GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1816{
1817 /*
1818 * No Assertions here as we're usually called in a AssertMsgN or
1819 * RTAssert* context.
1820 */
1821 PGVMM pGVMM = g_pGVMM;
1822 if ( !RT_VALID_PTR(pGVMM)
1823 || pGVMM->u32Magic != GVMM_MAGIC)
1824 return NULL;
1825
1826 if (hEMT == NIL_RTNATIVETHREAD)
1827 hEMT = RTThreadNativeSelf();
1828 RTPROCESS ProcId = RTProcSelf();
1829
1830 /*
1831 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1832 */
1833/** @todo introduce some pid hash table here, please. */
1834 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1835 {
1836 if ( pGVMM->aHandles[i].iSelf == i
1837 && pGVMM->aHandles[i].ProcId == ProcId
1838 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1839 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1840 {
1841 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1842 return pGVMM->aHandles[i].pGVM;
1843
1844 /* This is fearly safe with the current process per VM approach. */
1845 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1846 VMCPUID const cCpus = pGVM->cCpus;
1847 ASMCompilerBarrier();
1848 if ( cCpus < 1
1849 || cCpus > VMM_MAX_CPU_COUNT)
1850 continue;
1851 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1852 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1853 return pGVMM->aHandles[i].pGVM;
1854 }
1855 }
1856 return NULL;
1857}
1858
1859
1860/**
1861 * Looks up the GVMCPU belonging to the specified EMT thread.
1862 *
1863 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1864 * unnecessary kernel panics when the EMT thread hits an assertion. The
1865 * call may or not be an EMT thread.
1866 *
1867 * @returns Pointer to the VM on success, NULL on failure.
1868 * @param hEMT The native thread handle of the EMT.
1869 * NIL_RTNATIVETHREAD means the current thread
1870 */
1871GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1872{
1873 /*
1874 * No Assertions here as we're usually called in a AssertMsgN,
1875 * RTAssert*, Log and LogRel contexts.
1876 */
1877 PGVMM pGVMM = g_pGVMM;
1878 if ( !RT_VALID_PTR(pGVMM)
1879 || pGVMM->u32Magic != GVMM_MAGIC)
1880 return NULL;
1881
1882 if (hEMT == NIL_RTNATIVETHREAD)
1883 hEMT = RTThreadNativeSelf();
1884 RTPROCESS ProcId = RTProcSelf();
1885
1886 /*
1887 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1888 */
1889/** @todo introduce some pid hash table here, please. */
1890 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1891 {
1892 if ( pGVMM->aHandles[i].iSelf == i
1893 && pGVMM->aHandles[i].ProcId == ProcId
1894 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1895 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1896 {
1897 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1898 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1899 return &pGVM->aCpus[0];
1900
1901 /* This is fearly safe with the current process per VM approach. */
1902 VMCPUID const cCpus = pGVM->cCpus;
1903 ASMCompilerBarrier();
1904 ASMCompilerBarrier();
1905 if ( cCpus < 1
1906 || cCpus > VMM_MAX_CPU_COUNT)
1907 continue;
1908 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1909 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1910 return &pGVM->aCpus[idCpu];
1911 }
1912 }
1913 return NULL;
1914}
1915
1916
1917/**
1918 * Get the GVMCPU structure for the given EMT.
1919 *
1920 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
1921 * @param pGVM The global (ring-0) VM structure.
1922 * @param hEMT The native thread handle of the EMT.
1923 * NIL_RTNATIVETHREAD means the current thread
1924 */
1925GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
1926{
1927 /*
1928 * Validate & adjust input.
1929 */
1930 AssertPtr(pGVM);
1931 Assert(pGVM->u32Magic == GVM_MAGIC);
1932 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
1933 {
1934 hEMT = RTThreadNativeSelf();
1935 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
1936 }
1937
1938 /*
1939 * Find the matching hash table entry.
1940 */
1941 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
1942 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
1943 { /* likely */ }
1944 else
1945 {
1946#ifdef VBOX_STRICT
1947 unsigned cCollisions = 0;
1948#endif
1949 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
1950 for (;;)
1951 {
1952 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
1953 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1954 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
1955 break;
1956 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
1957 {
1958#ifdef VBOX_STRICT
1959 uint32_t idxCpu = pGVM->cCpus;
1960 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
1961 while (idxCpu-- > 0)
1962 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
1963#endif
1964 return NULL;
1965 }
1966 }
1967 }
1968
1969 /*
1970 * Validate the VCpu number and translate it into a pointer.
1971 */
1972 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
1973 AssertReturn(idCpu < pGVM->cCpus, NULL);
1974 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
1975 Assert(pGVCpu->hNativeThreadR0 == hEMT);
1976 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
1977 return pGVCpu;
1978}
1979
1980
1981/**
1982 * This is will wake up expired and soon-to-be expired VMs.
1983 *
1984 * @returns Number of VMs that has been woken up.
1985 * @param pGVMM Pointer to the GVMM instance data.
1986 * @param u64Now The current time.
1987 */
1988static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1989{
1990 /*
1991 * Skip this if we've got disabled because of high resolution wakeups or by
1992 * the user.
1993 */
1994 if (!pGVMM->fDoEarlyWakeUps)
1995 return 0;
1996
1997/** @todo Rewrite this algorithm. See performance defect XYZ. */
1998
1999 /*
2000 * A cheap optimization to stop wasting so much time here on big setups.
2001 */
2002 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2003 if ( pGVMM->cHaltedEMTs == 0
2004 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2005 return 0;
2006
2007 /*
2008 * Only one thread doing this at a time.
2009 */
2010 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2011 return 0;
2012
2013 /*
2014 * The first pass will wake up VMs which have actually expired
2015 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2016 */
2017 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2018 uint64_t u64Min = UINT64_MAX;
2019 unsigned cWoken = 0;
2020 unsigned cHalted = 0;
2021 unsigned cTodo2nd = 0;
2022 unsigned cTodo3rd = 0;
2023 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2024 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2025 i = pGVMM->aHandles[i].iNext)
2026 {
2027 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2028 if ( RT_VALID_PTR(pCurGVM)
2029 && pCurGVM->u32Magic == GVM_MAGIC)
2030 {
2031 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2032 {
2033 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2034 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2035 if (u64)
2036 {
2037 if (u64 <= u64Now)
2038 {
2039 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2040 {
2041 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2042 AssertRC(rc);
2043 cWoken++;
2044 }
2045 }
2046 else
2047 {
2048 cHalted++;
2049 if (u64 <= uNsEarlyWakeUp1)
2050 cTodo2nd++;
2051 else if (u64 <= uNsEarlyWakeUp2)
2052 cTodo3rd++;
2053 else if (u64 < u64Min)
2054 u64 = u64Min;
2055 }
2056 }
2057 }
2058 }
2059 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2060 }
2061
2062 if (cTodo2nd)
2063 {
2064 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2065 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2066 i = pGVMM->aHandles[i].iNext)
2067 {
2068 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2069 if ( RT_VALID_PTR(pCurGVM)
2070 && pCurGVM->u32Magic == GVM_MAGIC)
2071 {
2072 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2073 {
2074 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2075 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2076 if ( u64
2077 && u64 <= uNsEarlyWakeUp1)
2078 {
2079 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2080 {
2081 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2082 AssertRC(rc);
2083 cWoken++;
2084 }
2085 }
2086 }
2087 }
2088 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2089 }
2090 }
2091
2092 if (cTodo3rd)
2093 {
2094 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2095 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2096 i = pGVMM->aHandles[i].iNext)
2097 {
2098 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2099 if ( RT_VALID_PTR(pCurGVM)
2100 && pCurGVM->u32Magic == GVM_MAGIC)
2101 {
2102 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2103 {
2104 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2105 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2106 if ( u64
2107 && u64 <= uNsEarlyWakeUp2)
2108 {
2109 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2110 {
2111 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2112 AssertRC(rc);
2113 cWoken++;
2114 }
2115 }
2116 }
2117 }
2118 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2119 }
2120 }
2121
2122 /*
2123 * Set the minimum value.
2124 */
2125 pGVMM->uNsNextEmtWakeup = u64Min;
2126
2127 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2128 return cWoken;
2129}
2130
2131
2132/**
2133 * Halt the EMT thread.
2134 *
2135 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2136 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2137 * @param pGVM The global (ring-0) VM structure.
2138 * @param pGVCpu The global (ring-0) CPU structure of the calling
2139 * EMT.
2140 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2141 * @thread EMT(pGVCpu).
2142 */
2143GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2144{
2145 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2146 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2147 GVMM_CHECK_SMAP_SETUP();
2148 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2149
2150 PGVMM pGVMM;
2151 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2152
2153 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2154 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2155
2156 /*
2157 * If we're doing early wake-ups, we must take the UsedList lock before we
2158 * start querying the current time.
2159 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2160 */
2161 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2162 if (fDoEarlyWakeUps)
2163 {
2164 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2165 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2166 }
2167
2168 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2169
2170 /* GIP hack: We might are frequently sleeping for short intervals where the
2171 difference between GIP and system time matters on systems with high resolution
2172 system time. So, convert the input from GIP to System time in that case. */
2173 Assert(ASMGetFlags() & X86_EFL_IF);
2174 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2175 const uint64_t u64NowGip = RTTimeNanoTS();
2176 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2177
2178 if (fDoEarlyWakeUps)
2179 {
2180 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2181 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2182 }
2183
2184 /*
2185 * Go to sleep if we must...
2186 * Cap the sleep time to 1 second to be on the safe side.
2187 */
2188 int rc;
2189 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2190 if ( u64NowGip < u64ExpireGipTime
2191 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2192 ? pGVMM->nsMinSleepCompany
2193 : pGVMM->nsMinSleepAlone))
2194 {
2195 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2196 if (cNsInterval > RT_NS_1SEC)
2197 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2198 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2199 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2200 if (fDoEarlyWakeUps)
2201 {
2202 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2203 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2204 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2205 }
2206 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2207
2208 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2209 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2210 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2211 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2212
2213 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2214 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2215
2216 /* Reset the semaphore to try prevent a few false wake-ups. */
2217 if (rc == VINF_SUCCESS)
2218 {
2219 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2220 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2221 }
2222 else if (rc == VERR_TIMEOUT)
2223 {
2224 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2225 rc = VINF_SUCCESS;
2226 }
2227 }
2228 else
2229 {
2230 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2231 if (fDoEarlyWakeUps)
2232 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2233 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2234 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2235 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2236 rc = VINF_SUCCESS;
2237 }
2238
2239 return rc;
2240}
2241
2242
2243/**
2244 * Halt the EMT thread.
2245 *
2246 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2247 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2248 * @param pGVM The global (ring-0) VM structure.
2249 * @param idCpu The Virtual CPU ID of the calling EMT.
2250 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2251 * @thread EMT(idCpu).
2252 */
2253GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2254{
2255 GVMM_CHECK_SMAP_SETUP();
2256 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2257 PGVMM pGVMM;
2258 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2259 if (RT_SUCCESS(rc))
2260 {
2261 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2262 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2263 }
2264 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2265 return rc;
2266}
2267
2268
2269
2270/**
2271 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2272 * the a sleeping EMT.
2273 *
2274 * @retval VINF_SUCCESS if successfully woken up.
2275 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2276 *
2277 * @param pGVM The global (ring-0) VM structure.
2278 * @param pGVCpu The global (ring-0) VCPU structure.
2279 */
2280DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2281{
2282 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2283
2284 /*
2285 * Signal the semaphore regardless of whether it's current blocked on it.
2286 *
2287 * The reason for this is that there is absolutely no way we can be 100%
2288 * certain that it isn't *about* go to go to sleep on it and just got
2289 * delayed a bit en route. So, we will always signal the semaphore when
2290 * the it is flagged as halted in the VMM.
2291 */
2292/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2293 int rc;
2294 if (pGVCpu->gvmm.s.u64HaltExpire)
2295 {
2296 rc = VINF_SUCCESS;
2297 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2298 }
2299 else
2300 {
2301 rc = VINF_GVM_NOT_BLOCKED;
2302 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2303 }
2304
2305 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2306 AssertRC(rc2);
2307
2308 return rc;
2309}
2310
2311
2312/**
2313 * Wakes up the halted EMT thread so it can service a pending request.
2314 *
2315 * @returns VBox status code.
2316 * @retval VINF_SUCCESS if successfully woken up.
2317 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2318 *
2319 * @param pGVM The global (ring-0) VM structure.
2320 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2321 * @param fTakeUsedLock Take the used lock or not
2322 * @thread Any but EMT(idCpu).
2323 */
2324GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2325{
2326 GVMM_CHECK_SMAP_SETUP();
2327 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2328
2329 /*
2330 * Validate input and take the UsedLock.
2331 */
2332 PGVMM pGVMM;
2333 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2334 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2335 if (RT_SUCCESS(rc))
2336 {
2337 if (idCpu < pGVM->cCpus)
2338 {
2339 /*
2340 * Do the actual job.
2341 */
2342 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2343 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2344
2345 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2346 {
2347 /*
2348 * While we're here, do a round of scheduling.
2349 */
2350 Assert(ASMGetFlags() & X86_EFL_IF);
2351 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2352 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2353 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2354 }
2355 }
2356 else
2357 rc = VERR_INVALID_CPU_ID;
2358
2359 if (fTakeUsedLock)
2360 {
2361 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2362 AssertRC(rc2);
2363 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2364 }
2365 }
2366
2367 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2368 return rc;
2369}
2370
2371
2372/**
2373 * Wakes up the halted EMT thread so it can service a pending request.
2374 *
2375 * @returns VBox status code.
2376 * @retval VINF_SUCCESS if successfully woken up.
2377 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2378 *
2379 * @param pGVM The global (ring-0) VM structure.
2380 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2381 * @thread Any but EMT(idCpu).
2382 */
2383GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2384{
2385 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2386}
2387
2388
2389/**
2390 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2391 * parameter and no used locking.
2392 *
2393 * @returns VBox status code.
2394 * @retval VINF_SUCCESS if successfully woken up.
2395 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2396 *
2397 * @param pGVM The global (ring-0) VM structure.
2398 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2399 * @thread Any but EMT(idCpu).
2400 * @deprecated Don't use in new code if possible! Use the GVM variant.
2401 */
2402GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2403{
2404 GVMM_CHECK_SMAP_SETUP();
2405 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2406 PGVMM pGVMM;
2407 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2408 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2409 if (RT_SUCCESS(rc))
2410 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2411 return rc;
2412}
2413
2414
2415/**
2416 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2417 * the Virtual CPU if it's still busy executing guest code.
2418 *
2419 * @returns VBox status code.
2420 * @retval VINF_SUCCESS if poked successfully.
2421 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2422 *
2423 * @param pGVM The global (ring-0) VM structure.
2424 * @param pVCpu The cross context virtual CPU structure.
2425 */
2426DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2427{
2428 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2429
2430 RTCPUID idHostCpu = pVCpu->idHostCpu;
2431 if ( idHostCpu == NIL_RTCPUID
2432 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2433 {
2434 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2435 return VINF_GVM_NOT_BUSY_IN_GC;
2436 }
2437
2438 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2439 RTMpPokeCpu(idHostCpu);
2440 return VINF_SUCCESS;
2441}
2442
2443
2444/**
2445 * Pokes an EMT if it's still busy running guest code.
2446 *
2447 * @returns VBox status code.
2448 * @retval VINF_SUCCESS if poked successfully.
2449 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2450 *
2451 * @param pGVM The global (ring-0) VM structure.
2452 * @param idCpu The ID of the virtual CPU to poke.
2453 * @param fTakeUsedLock Take the used lock or not
2454 */
2455GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2456{
2457 /*
2458 * Validate input and take the UsedLock.
2459 */
2460 PGVMM pGVMM;
2461 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2462 if (RT_SUCCESS(rc))
2463 {
2464 if (idCpu < pGVM->cCpus)
2465 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2466 else
2467 rc = VERR_INVALID_CPU_ID;
2468
2469 if (fTakeUsedLock)
2470 {
2471 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2472 AssertRC(rc2);
2473 }
2474 }
2475
2476 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2477 return rc;
2478}
2479
2480
2481/**
2482 * Pokes an EMT if it's still busy running guest code.
2483 *
2484 * @returns VBox status code.
2485 * @retval VINF_SUCCESS if poked successfully.
2486 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2487 *
2488 * @param pGVM The global (ring-0) VM structure.
2489 * @param idCpu The ID of the virtual CPU to poke.
2490 */
2491GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2492{
2493 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2494}
2495
2496
2497/**
2498 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2499 * used locking.
2500 *
2501 * @returns VBox status code.
2502 * @retval VINF_SUCCESS if poked successfully.
2503 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2504 *
2505 * @param pGVM The global (ring-0) VM structure.
2506 * @param idCpu The ID of the virtual CPU to poke.
2507 *
2508 * @deprecated Don't use in new code if possible! Use the GVM variant.
2509 */
2510GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2511{
2512 PGVMM pGVMM;
2513 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2514 if (RT_SUCCESS(rc))
2515 {
2516 if (idCpu < pGVM->cCpus)
2517 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2518 else
2519 rc = VERR_INVALID_CPU_ID;
2520 }
2521 return rc;
2522}
2523
2524
2525/**
2526 * Wakes up a set of halted EMT threads so they can service pending request.
2527 *
2528 * @returns VBox status code, no informational stuff.
2529 *
2530 * @param pGVM The global (ring-0) VM structure.
2531 * @param pSleepSet The set of sleepers to wake up.
2532 * @param pPokeSet The set of CPUs to poke.
2533 */
2534GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2535{
2536 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2537 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2538 GVMM_CHECK_SMAP_SETUP();
2539 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2540 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2541
2542 /*
2543 * Validate input and take the UsedLock.
2544 */
2545 PGVMM pGVMM;
2546 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2547 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2548 if (RT_SUCCESS(rc))
2549 {
2550 rc = VINF_SUCCESS;
2551 VMCPUID idCpu = pGVM->cCpus;
2552 while (idCpu-- > 0)
2553 {
2554 /* Don't try poke or wake up ourselves. */
2555 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2556 continue;
2557
2558 /* just ignore errors for now. */
2559 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2560 {
2561 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2562 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2563 }
2564 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2565 {
2566 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2567 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2568 }
2569 }
2570
2571 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2572 AssertRC(rc2);
2573 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2574 }
2575
2576 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2577 return rc;
2578}
2579
2580
2581/**
2582 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2583 *
2584 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2585 * @param pGVM The global (ring-0) VM structure.
2586 * @param pReq Pointer to the request packet.
2587 */
2588GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2589{
2590 /*
2591 * Validate input and pass it on.
2592 */
2593 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2594 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2595
2596 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2597}
2598
2599
2600
2601/**
2602 * Poll the schedule to see if someone else should get a chance to run.
2603 *
2604 * This is a bit hackish and will not work too well if the machine is
2605 * under heavy load from non-VM processes.
2606 *
2607 * @returns VINF_SUCCESS if not yielded.
2608 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2609 * @param pGVM The global (ring-0) VM structure.
2610 * @param idCpu The Virtual CPU ID of the calling EMT.
2611 * @param fYield Whether to yield or not.
2612 * This is for when we're spinning in the halt loop.
2613 * @thread EMT(idCpu).
2614 */
2615GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2616{
2617 /*
2618 * Validate input.
2619 */
2620 PGVMM pGVMM;
2621 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2622 if (RT_SUCCESS(rc))
2623 {
2624 /*
2625 * We currently only implement helping doing wakeups (fYield = false), so don't
2626 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2627 */
2628 if (!fYield && pGVMM->fDoEarlyWakeUps)
2629 {
2630 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2631 pGVM->gvmm.s.StatsSched.cPollCalls++;
2632
2633 Assert(ASMGetFlags() & X86_EFL_IF);
2634 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2635
2636 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2637
2638 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2639 }
2640 /*
2641 * Not quite sure what we could do here...
2642 */
2643 else if (fYield)
2644 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2645 else
2646 rc = VINF_SUCCESS;
2647 }
2648
2649 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2650 return rc;
2651}
2652
2653
2654#ifdef GVMM_SCHED_WITH_PPT
2655/**
2656 * Timer callback for the periodic preemption timer.
2657 *
2658 * @param pTimer The timer handle.
2659 * @param pvUser Pointer to the per cpu structure.
2660 * @param iTick The current tick.
2661 */
2662static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2663{
2664 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2665 NOREF(pTimer); NOREF(iTick);
2666
2667 /*
2668 * Termination check
2669 */
2670 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2671 return;
2672
2673 /*
2674 * Do the house keeping.
2675 */
2676 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2677
2678 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2679 {
2680 /*
2681 * Historicize the max frequency.
2682 */
2683 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2684 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2685 pCpu->Ppt.iTickHistorization = 0;
2686 pCpu->Ppt.uDesiredHz = 0;
2687
2688 /*
2689 * Check if the current timer frequency.
2690 */
2691 uint32_t uHistMaxHz = 0;
2692 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2693 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2694 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2695 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2696 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2697 else if (uHistMaxHz)
2698 {
2699 /*
2700 * Reprogram it.
2701 */
2702 pCpu->Ppt.cChanges++;
2703 pCpu->Ppt.iTickHistorization = 0;
2704 pCpu->Ppt.uTimerHz = uHistMaxHz;
2705 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2706 pCpu->Ppt.cNsInterval = cNsInterval;
2707 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2708 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2709 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2710 / cNsInterval;
2711 else
2712 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2713 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2714
2715 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2716 RTTimerChangeInterval(pTimer, cNsInterval);
2717 }
2718 else
2719 {
2720 /*
2721 * Stop it.
2722 */
2723 pCpu->Ppt.fStarted = false;
2724 pCpu->Ppt.uTimerHz = 0;
2725 pCpu->Ppt.cNsInterval = 0;
2726 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2727
2728 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2729 RTTimerStop(pTimer);
2730 }
2731 }
2732 else
2733 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2734}
2735#endif /* GVMM_SCHED_WITH_PPT */
2736
2737
2738/**
2739 * Updates the periodic preemption timer for the calling CPU.
2740 *
2741 * The caller must have disabled preemption!
2742 * The caller must check that the host can do high resolution timers.
2743 *
2744 * @param pGVM The global (ring-0) VM structure.
2745 * @param idHostCpu The current host CPU id.
2746 * @param uHz The desired frequency.
2747 */
2748GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2749{
2750 NOREF(pGVM);
2751#ifdef GVMM_SCHED_WITH_PPT
2752 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2753 Assert(RTTimerCanDoHighResolution());
2754
2755 /*
2756 * Resolve the per CPU data.
2757 */
2758 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2759 PGVMM pGVMM = g_pGVMM;
2760 if ( !RT_VALID_PTR(pGVMM)
2761 || pGVMM->u32Magic != GVMM_MAGIC)
2762 return;
2763 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2764 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2765 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2766 && pCpu->idCpu == idHostCpu,
2767 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2768
2769 /*
2770 * Check whether we need to do anything about the timer.
2771 * We have to be a little bit careful since we might be race the timer
2772 * callback here.
2773 */
2774 if (uHz > 16384)
2775 uHz = 16384; /** @todo add a query method for this! */
2776 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2777 && uHz >= pCpu->Ppt.uMinHz
2778 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2779 {
2780 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2781
2782 pCpu->Ppt.uDesiredHz = uHz;
2783 uint32_t cNsInterval = 0;
2784 if (!pCpu->Ppt.fStarted)
2785 {
2786 pCpu->Ppt.cStarts++;
2787 pCpu->Ppt.fStarted = true;
2788 pCpu->Ppt.fStarting = true;
2789 pCpu->Ppt.iTickHistorization = 0;
2790 pCpu->Ppt.uTimerHz = uHz;
2791 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2792 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2793 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2794 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2795 / cNsInterval;
2796 else
2797 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2798 }
2799
2800 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2801
2802 if (cNsInterval)
2803 {
2804 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2805 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2806 AssertRC(rc);
2807
2808 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2809 if (RT_FAILURE(rc))
2810 pCpu->Ppt.fStarted = false;
2811 pCpu->Ppt.fStarting = false;
2812 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2813 }
2814 }
2815#else /* !GVMM_SCHED_WITH_PPT */
2816 NOREF(idHostCpu); NOREF(uHz);
2817#endif /* !GVMM_SCHED_WITH_PPT */
2818}
2819
2820
2821/**
2822 * Calls @a pfnCallback for each VM in the system.
2823 *
2824 * This will enumerate the VMs while holding the global VM used list lock in
2825 * shared mode. So, only suitable for simple work. If more expensive work
2826 * needs doing, a different approach must be taken as using this API would
2827 * otherwise block VM creation and destruction.
2828 *
2829 * @returns VBox status code.
2830 * @param pfnCallback The callback function.
2831 * @param pvUser User argument to the callback.
2832 */
2833GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2834{
2835 PGVMM pGVMM;
2836 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2837
2838 int rc = VINF_SUCCESS;
2839 GVMMR0_USED_SHARED_LOCK(pGVMM);
2840 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2841 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2842 i = pGVMM->aHandles[i].iNext, cLoops++)
2843 {
2844 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2845 if ( RT_VALID_PTR(pGVM)
2846 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2847 && pGVM->u32Magic == GVM_MAGIC)
2848 {
2849 rc = pfnCallback(pGVM, pvUser);
2850 if (rc != VINF_SUCCESS)
2851 break;
2852 }
2853
2854 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2855 }
2856 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2857 return rc;
2858}
2859
2860
2861/**
2862 * Retrieves the GVMM statistics visible to the caller.
2863 *
2864 * @returns VBox status code.
2865 *
2866 * @param pStats Where to put the statistics.
2867 * @param pSession The current session.
2868 * @param pGVM The GVM to obtain statistics for. Optional.
2869 */
2870GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2871{
2872 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2873
2874 /*
2875 * Validate input.
2876 */
2877 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2878 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2879 pStats->cVMs = 0; /* (crash before taking the sem...) */
2880
2881 /*
2882 * Take the lock and get the VM statistics.
2883 */
2884 PGVMM pGVMM;
2885 if (pGVM)
2886 {
2887 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2888 if (RT_FAILURE(rc))
2889 return rc;
2890 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2891 }
2892 else
2893 {
2894 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2895 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2896
2897 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2898 AssertRCReturn(rc, rc);
2899 }
2900
2901 /*
2902 * Enumerate the VMs and add the ones visible to the statistics.
2903 */
2904 pStats->cVMs = 0;
2905 pStats->cEMTs = 0;
2906 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2907
2908 for (unsigned i = pGVMM->iUsedHead;
2909 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2910 i = pGVMM->aHandles[i].iNext)
2911 {
2912 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2913 void *pvObj = pGVMM->aHandles[i].pvObj;
2914 if ( RT_VALID_PTR(pvObj)
2915 && RT_VALID_PTR(pOtherGVM)
2916 && pOtherGVM->u32Magic == GVM_MAGIC
2917 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2918 {
2919 pStats->cVMs++;
2920 pStats->cEMTs += pOtherGVM->cCpus;
2921
2922 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2923 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2924 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2925 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2926 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2927
2928 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2929 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2930 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2931
2932 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2933 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2934
2935 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2936 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2937 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2938 }
2939 }
2940
2941 /*
2942 * Copy out the per host CPU statistics.
2943 */
2944 uint32_t iDstCpu = 0;
2945 uint32_t cSrcCpus = pGVMM->cHostCpus;
2946 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2947 {
2948 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2949 {
2950 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2951 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2952#ifdef GVMM_SCHED_WITH_PPT
2953 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2954 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2955 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2956 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2957#else
2958 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2959 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2960 pStats->aHostCpus[iDstCpu].cChanges = 0;
2961 pStats->aHostCpus[iDstCpu].cStarts = 0;
2962#endif
2963 iDstCpu++;
2964 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2965 break;
2966 }
2967 }
2968 pStats->cHostCpus = iDstCpu;
2969
2970 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2971
2972 return VINF_SUCCESS;
2973}
2974
2975
2976/**
2977 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2978 *
2979 * @returns see GVMMR0QueryStatistics.
2980 * @param pGVM The global (ring-0) VM structure. Optional.
2981 * @param pReq Pointer to the request packet.
2982 * @param pSession The current session.
2983 */
2984GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2985{
2986 /*
2987 * Validate input and pass it on.
2988 */
2989 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2990 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2991 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2992
2993 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2994}
2995
2996
2997/**
2998 * Resets the specified GVMM statistics.
2999 *
3000 * @returns VBox status code.
3001 *
3002 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3003 * @param pSession The current session.
3004 * @param pGVM The GVM to reset statistics for. Optional.
3005 */
3006GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3007{
3008 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3009
3010 /*
3011 * Validate input.
3012 */
3013 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3014 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3015
3016 /*
3017 * Take the lock and get the VM statistics.
3018 */
3019 PGVMM pGVMM;
3020 if (pGVM)
3021 {
3022 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3023 if (RT_FAILURE(rc))
3024 return rc;
3025# define MAYBE_RESET_FIELD(field) \
3026 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3027 MAYBE_RESET_FIELD(cHaltCalls);
3028 MAYBE_RESET_FIELD(cHaltBlocking);
3029 MAYBE_RESET_FIELD(cHaltTimeouts);
3030 MAYBE_RESET_FIELD(cHaltNotBlocking);
3031 MAYBE_RESET_FIELD(cHaltWakeUps);
3032 MAYBE_RESET_FIELD(cWakeUpCalls);
3033 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3034 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3035 MAYBE_RESET_FIELD(cPokeCalls);
3036 MAYBE_RESET_FIELD(cPokeNotBusy);
3037 MAYBE_RESET_FIELD(cPollCalls);
3038 MAYBE_RESET_FIELD(cPollHalts);
3039 MAYBE_RESET_FIELD(cPollWakeUps);
3040# undef MAYBE_RESET_FIELD
3041 }
3042 else
3043 {
3044 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3045
3046 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3047 AssertRCReturn(rc, rc);
3048 }
3049
3050 /*
3051 * Enumerate the VMs and add the ones visible to the statistics.
3052 */
3053 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3054 {
3055 for (unsigned i = pGVMM->iUsedHead;
3056 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3057 i = pGVMM->aHandles[i].iNext)
3058 {
3059 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3060 void *pvObj = pGVMM->aHandles[i].pvObj;
3061 if ( RT_VALID_PTR(pvObj)
3062 && RT_VALID_PTR(pOtherGVM)
3063 && pOtherGVM->u32Magic == GVM_MAGIC
3064 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3065 {
3066# define MAYBE_RESET_FIELD(field) \
3067 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3068 MAYBE_RESET_FIELD(cHaltCalls);
3069 MAYBE_RESET_FIELD(cHaltBlocking);
3070 MAYBE_RESET_FIELD(cHaltTimeouts);
3071 MAYBE_RESET_FIELD(cHaltNotBlocking);
3072 MAYBE_RESET_FIELD(cHaltWakeUps);
3073 MAYBE_RESET_FIELD(cWakeUpCalls);
3074 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3075 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3076 MAYBE_RESET_FIELD(cPokeCalls);
3077 MAYBE_RESET_FIELD(cPokeNotBusy);
3078 MAYBE_RESET_FIELD(cPollCalls);
3079 MAYBE_RESET_FIELD(cPollHalts);
3080 MAYBE_RESET_FIELD(cPollWakeUps);
3081# undef MAYBE_RESET_FIELD
3082 }
3083 }
3084 }
3085
3086 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3087
3088 return VINF_SUCCESS;
3089}
3090
3091
3092/**
3093 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3094 *
3095 * @returns see GVMMR0ResetStatistics.
3096 * @param pGVM The global (ring-0) VM structure. Optional.
3097 * @param pReq Pointer to the request packet.
3098 * @param pSession The current session.
3099 */
3100GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3101{
3102 /*
3103 * Validate input and pass it on.
3104 */
3105 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3106 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3107 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3108
3109 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3110}
3111
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette