VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 98103

Last change on this file since 98103 was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 122.4 KB
Line 
1/* $Id: GVMMR0.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#include <iprt/asm-amd64-x86.h>
81#include <iprt/critsect.h>
82#include <iprt/mem.h>
83#include <iprt/semaphore.h>
84#include <iprt/time.h>
85#include <VBox/log.h>
86#include <iprt/thread.h>
87#include <iprt/process.h>
88#include <iprt/param.h>
89#include <iprt/string.h>
90#include <iprt/assert.h>
91#include <iprt/mem.h>
92#include <iprt/memobj.h>
93#include <iprt/mp.h>
94#include <iprt/cpuset.h>
95#include <iprt/spinlock.h>
96#include <iprt/timer.h>
97
98#include "dtrace/VBoxVMM.h"
99
100
101/*********************************************************************************************************************************
102* Defined Constants And Macros *
103*********************************************************************************************************************************/
104#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
105/** Define this to enable the periodic preemption timer. */
106# define GVMM_SCHED_WITH_PPT
107#endif
108
109#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
110/** Define this to enable the per-EMT high resolution wakeup timers. */
111# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
112#endif
113
114
115/** Special value that GVMMR0DeregisterVCpu sets. */
116#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
117AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
118
119
120/*********************************************************************************************************************************
121* Structures and Typedefs *
122*********************************************************************************************************************************/
123
124/**
125 * Global VM handle.
126 */
127typedef struct GVMHANDLE
128{
129 /** The index of the next handle in the list (free or used). (0 is nil.) */
130 uint16_t volatile iNext;
131 /** Our own index / handle value. */
132 uint16_t iSelf;
133 /** The process ID of the handle owner.
134 * This is used for access checks. */
135 RTPROCESS ProcId;
136 /** The pointer to the ring-0 only (aka global) VM structure. */
137 PGVM pGVM;
138 /** The virtual machine object. */
139 void *pvObj;
140 /** The session this VM is associated with. */
141 PSUPDRVSESSION pSession;
142 /** The ring-0 handle of the EMT0 thread.
143 * This is used for ownership checks as well as looking up a VM handle by thread
144 * at times like assertions. */
145 RTNATIVETHREAD hEMT0;
146} GVMHANDLE;
147/** Pointer to a global VM handle. */
148typedef GVMHANDLE *PGVMHANDLE;
149
150/** Number of GVM handles (including the NIL handle). */
151#if HC_ARCH_BITS == 64
152# define GVMM_MAX_HANDLES 8192
153#else
154# define GVMM_MAX_HANDLES 128
155#endif
156
157/**
158 * Per host CPU GVMM data.
159 */
160typedef struct GVMMHOSTCPU
161{
162 /** Magic number (GVMMHOSTCPU_MAGIC). */
163 uint32_t volatile u32Magic;
164 /** The CPU ID. */
165 RTCPUID idCpu;
166 /** The CPU set index. */
167 uint32_t idxCpuSet;
168
169#ifdef GVMM_SCHED_WITH_PPT
170 /** Periodic preemption timer data. */
171 struct
172 {
173 /** The handle to the periodic preemption timer. */
174 PRTTIMER pTimer;
175 /** Spinlock protecting the data below. */
176 RTSPINLOCK hSpinlock;
177 /** The smalles Hz that we need to care about. (static) */
178 uint32_t uMinHz;
179 /** The number of ticks between each historization. */
180 uint32_t cTicksHistoriziationInterval;
181 /** The current historization tick (counting up to
182 * cTicksHistoriziationInterval and then resetting). */
183 uint32_t iTickHistorization;
184 /** The current timer interval. This is set to 0 when inactive. */
185 uint32_t cNsInterval;
186 /** The current timer frequency. This is set to 0 when inactive. */
187 uint32_t uTimerHz;
188 /** The current max frequency reported by the EMTs.
189 * This gets historicize and reset by the timer callback. This is
190 * read without holding the spinlock, so needs atomic updating. */
191 uint32_t volatile uDesiredHz;
192 /** Whether the timer was started or not. */
193 bool volatile fStarted;
194 /** Set if we're starting timer. */
195 bool volatile fStarting;
196 /** The index of the next history entry (mod it). */
197 uint32_t iHzHistory;
198 /** Historicized uDesiredHz values. The array wraps around, new entries
199 * are added at iHzHistory. This is updated approximately every
200 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
201 uint32_t aHzHistory[8];
202 /** Statistics counter for recording the number of interval changes. */
203 uint32_t cChanges;
204 /** Statistics counter for recording the number of timer starts. */
205 uint32_t cStarts;
206 } Ppt;
207#endif /* GVMM_SCHED_WITH_PPT */
208
209} GVMMHOSTCPU;
210/** Pointer to the per host CPU GVMM data. */
211typedef GVMMHOSTCPU *PGVMMHOSTCPU;
212/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
213#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
214/** The interval on history entry should cover (approximately) give in
215 * nanoseconds. */
216#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
217
218
219/**
220 * The GVMM instance data.
221 */
222typedef struct GVMM
223{
224 /** Eyecatcher / magic. */
225 uint32_t u32Magic;
226 /** The index of the head of the free handle chain. (0 is nil.) */
227 uint16_t volatile iFreeHead;
228 /** The index of the head of the active handle chain. (0 is nil.) */
229 uint16_t volatile iUsedHead;
230 /** The number of VMs. */
231 uint16_t volatile cVMs;
232 /** Alignment padding. */
233 uint16_t u16Reserved;
234 /** The number of EMTs. */
235 uint32_t volatile cEMTs;
236 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
237 uint32_t volatile cHaltedEMTs;
238 /** Mini lock for restricting early wake-ups to one thread. */
239 bool volatile fDoingEarlyWakeUps;
240 bool afPadding[3]; /**< explicit alignment padding. */
241 /** When the next halted or sleeping EMT will wake up.
242 * This is set to 0 when it needs recalculating and to UINT64_MAX when
243 * there are no halted or sleeping EMTs in the GVMM. */
244 uint64_t uNsNextEmtWakeup;
245 /** The lock used to serialize VM creation, destruction and associated events that
246 * isn't performance critical. Owners may acquire the list lock. */
247 RTCRITSECT CreateDestroyLock;
248 /** The lock used to serialize used list updates and accesses.
249 * This indirectly includes scheduling since the scheduler will have to walk the
250 * used list to examin running VMs. Owners may not acquire any other locks. */
251 RTCRITSECTRW UsedLock;
252 /** The handle array.
253 * The size of this array defines the maximum number of currently running VMs.
254 * The first entry is unused as it represents the NIL handle. */
255 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
256
257 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
258 * The number of EMTs that means we no longer consider ourselves alone on a
259 * CPU/Core.
260 */
261 uint32_t cEMTsMeansCompany;
262 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
263 * The minimum sleep time for when we're alone, in nano seconds.
264 */
265 uint32_t nsMinSleepAlone;
266 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
267 * The minimum sleep time for when we've got company, in nano seconds.
268 */
269 uint32_t nsMinSleepCompany;
270#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
271 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
272 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
273 * nano seconds.
274 */
275 uint32_t nsMinSleepWithHrTimer;
276#endif
277 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
278 * The limit for the first round of early wake-ups, given in nano seconds.
279 */
280 uint32_t nsEarlyWakeUp1;
281 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
282 * The limit for the second round of early wake-ups, given in nano seconds.
283 */
284 uint32_t nsEarlyWakeUp2;
285
286 /** Set if we're doing early wake-ups.
287 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
288 bool volatile fDoEarlyWakeUps;
289
290 /** The number of entries in the host CPU array (aHostCpus). */
291 uint32_t cHostCpus;
292 /** Per host CPU data (variable length). */
293 GVMMHOSTCPU aHostCpus[1];
294} GVMM;
295AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
296AssertCompileMemberAlignment(GVMM, UsedLock, 8);
297AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
298/** Pointer to the GVMM instance data. */
299typedef GVMM *PGVMM;
300
301/** The GVMM::u32Magic value (Charlie Haden). */
302#define GVMM_MAGIC UINT32_C(0x19370806)
303
304
305
306/*********************************************************************************************************************************
307* Global Variables *
308*********************************************************************************************************************************/
309/** Pointer to the GVMM instance data.
310 * (Just my general dislike for global variables.) */
311static PGVMM g_pGVMM = NULL;
312
313/** Macro for obtaining and validating the g_pGVMM pointer.
314 * On failure it will return from the invoking function with the specified return value.
315 *
316 * @param pGVMM The name of the pGVMM variable.
317 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
318 * status codes.
319 */
320#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
321 do { \
322 (pGVMM) = g_pGVMM;\
323 AssertPtrReturn((pGVMM), (rc)); \
324 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
325 } while (0)
326
327/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
328 * On failure it will return from the invoking function.
329 *
330 * @param pGVMM The name of the pGVMM variable.
331 */
332#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
333 do { \
334 (pGVMM) = g_pGVMM;\
335 AssertPtrReturnVoid((pGVMM)); \
336 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
337 } while (0)
338
339
340/*********************************************************************************************************************************
341* Internal Functions *
342*********************************************************************************************************************************/
343static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
344static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
345static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
346static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
347
348#ifdef GVMM_SCHED_WITH_PPT
349static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
350#endif
351#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
352static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354
355
356/**
357 * Initializes the GVMM.
358 *
359 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
360 *
361 * @returns VBox status code.
362 */
363GVMMR0DECL(int) GVMMR0Init(void)
364{
365 LogFlow(("GVMMR0Init:\n"));
366
367 /*
368 * Allocate and initialize the instance data.
369 */
370 uint32_t cHostCpus = RTMpGetArraySize();
371 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
372
373 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
374 if (!pGVMM)
375 return VERR_NO_MEMORY;
376 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
377 "GVMM-CreateDestroyLock");
378 if (RT_SUCCESS(rc))
379 {
380 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
381 if (RT_SUCCESS(rc))
382 {
383 pGVMM->u32Magic = GVMM_MAGIC;
384 pGVMM->iUsedHead = 0;
385 pGVMM->iFreeHead = 1;
386
387 /* the nil handle */
388 pGVMM->aHandles[0].iSelf = 0;
389 pGVMM->aHandles[0].iNext = 0;
390
391 /* the tail */
392 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
393 pGVMM->aHandles[i].iSelf = i;
394 pGVMM->aHandles[i].iNext = 0; /* nil */
395
396 /* the rest */
397 while (i-- > 1)
398 {
399 pGVMM->aHandles[i].iSelf = i;
400 pGVMM->aHandles[i].iNext = i + 1;
401 }
402
403 /* The default configuration values. */
404 uint32_t cNsResolution = RTSemEventMultiGetResolution();
405 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
406 if (cNsResolution >= 5*RT_NS_100US)
407 {
408 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
409 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
410 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
411 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
412 }
413 else if (cNsResolution > RT_NS_100US)
414 {
415 pGVMM->nsMinSleepAlone = cNsResolution / 2;
416 pGVMM->nsMinSleepCompany = cNsResolution / 4;
417 pGVMM->nsEarlyWakeUp1 = 0;
418 pGVMM->nsEarlyWakeUp2 = 0;
419 }
420 else
421 {
422 pGVMM->nsMinSleepAlone = 2000;
423 pGVMM->nsMinSleepCompany = 2000;
424 pGVMM->nsEarlyWakeUp1 = 0;
425 pGVMM->nsEarlyWakeUp2 = 0;
426 }
427#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
428 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
429#endif
430 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
431
432 /* The host CPU data. */
433 pGVMM->cHostCpus = cHostCpus;
434 uint32_t iCpu = cHostCpus;
435 RTCPUSET PossibleSet;
436 RTMpGetSet(&PossibleSet);
437 while (iCpu-- > 0)
438 {
439 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
440#ifdef GVMM_SCHED_WITH_PPT
441 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
442 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
443 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
444 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
445 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
446 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
447 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
448 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
450 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
451 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
453#endif
454
455 if (RTCpuSetIsMember(&PossibleSet, iCpu))
456 {
457 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
458 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
459
460#ifdef GVMM_SCHED_WITH_PPT
461 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
462 50*1000*1000 /* whatever */,
463 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
464 gvmmR0SchedPeriodicPreemptionTimerCallback,
465 &pGVMM->aHostCpus[iCpu]);
466 if (RT_SUCCESS(rc))
467 {
468 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
469 if (RT_FAILURE(rc))
470 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
471 }
472 else
473 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
474 if (RT_FAILURE(rc))
475 {
476 while (iCpu < cHostCpus)
477 {
478 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
479 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
480 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
481 iCpu++;
482 }
483 break;
484 }
485#endif
486 }
487 else
488 {
489 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
490 pGVMM->aHostCpus[iCpu].u32Magic = 0;
491 }
492 }
493 if (RT_SUCCESS(rc))
494 {
495 g_pGVMM = pGVMM;
496 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
497 return VINF_SUCCESS;
498 }
499
500 /* bail out. */
501 RTCritSectRwDelete(&pGVMM->UsedLock);
502 }
503 else
504 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
505 RTCritSectDelete(&pGVMM->CreateDestroyLock);
506 }
507 else
508 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
509
510 RTMemFree(pGVMM);
511 return rc;
512}
513
514
515/**
516 * Terminates the GVM.
517 *
518 * This is called while owning the loader semaphore (see supdrvLdrFree()).
519 * And unless something is wrong, there should be absolutely no VMs
520 * registered at this point.
521 */
522GVMMR0DECL(void) GVMMR0Term(void)
523{
524 LogFlow(("GVMMR0Term:\n"));
525
526 PGVMM pGVMM = g_pGVMM;
527 g_pGVMM = NULL;
528 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
529 {
530 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
531 return;
532 }
533
534 /*
535 * First of all, stop all active timers.
536 */
537 uint32_t cActiveTimers = 0;
538 uint32_t iCpu = pGVMM->cHostCpus;
539 while (iCpu-- > 0)
540 {
541 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
542#ifdef GVMM_SCHED_WITH_PPT
543 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
544 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
545 cActiveTimers++;
546#endif
547 }
548 if (cActiveTimers)
549 RTThreadSleep(1); /* fudge */
550
551 /*
552 * Invalidate the and free resources.
553 */
554 pGVMM->u32Magic = ~GVMM_MAGIC;
555 RTCritSectRwDelete(&pGVMM->UsedLock);
556 RTCritSectDelete(&pGVMM->CreateDestroyLock);
557
558 pGVMM->iFreeHead = 0;
559 if (pGVMM->iUsedHead)
560 {
561 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
562 pGVMM->iUsedHead = 0;
563 }
564
565#ifdef GVMM_SCHED_WITH_PPT
566 iCpu = pGVMM->cHostCpus;
567 while (iCpu-- > 0)
568 {
569 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
570 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
571 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
572 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
573 }
574#endif
575
576 RTMemFree(pGVMM);
577}
578
579
580/**
581 * A quick hack for setting global config values.
582 *
583 * @returns VBox status code.
584 *
585 * @param pSession The session handle. Used for authentication.
586 * @param pszName The variable name.
587 * @param u64Value The new value.
588 */
589GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
590{
591 /*
592 * Validate input.
593 */
594 PGVMM pGVMM;
595 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
596 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
597 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
598
599 /*
600 * String switch time!
601 */
602 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
603 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
604 int rc = VINF_SUCCESS;
605 pszName += sizeof("/GVMM/") - 1;
606 if (!strcmp(pszName, "cEMTsMeansCompany"))
607 {
608 if (u64Value <= UINT32_MAX)
609 pGVMM->cEMTsMeansCompany = u64Value;
610 else
611 rc = VERR_OUT_OF_RANGE;
612 }
613 else if (!strcmp(pszName, "MinSleepAlone"))
614 {
615 if (u64Value <= RT_NS_100MS)
616 pGVMM->nsMinSleepAlone = u64Value;
617 else
618 rc = VERR_OUT_OF_RANGE;
619 }
620 else if (!strcmp(pszName, "MinSleepCompany"))
621 {
622 if (u64Value <= RT_NS_100MS)
623 pGVMM->nsMinSleepCompany = u64Value;
624 else
625 rc = VERR_OUT_OF_RANGE;
626 }
627#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
628 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
629 {
630 if (u64Value <= RT_NS_100MS)
631 pGVMM->nsMinSleepWithHrTimer = u64Value;
632 else
633 rc = VERR_OUT_OF_RANGE;
634 }
635#endif
636 else if (!strcmp(pszName, "EarlyWakeUp1"))
637 {
638 if (u64Value <= RT_NS_100MS)
639 {
640 pGVMM->nsEarlyWakeUp1 = u64Value;
641 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
642 }
643 else
644 rc = VERR_OUT_OF_RANGE;
645 }
646 else if (!strcmp(pszName, "EarlyWakeUp2"))
647 {
648 if (u64Value <= RT_NS_100MS)
649 {
650 pGVMM->nsEarlyWakeUp2 = u64Value;
651 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
652 }
653 else
654 rc = VERR_OUT_OF_RANGE;
655 }
656 else
657 rc = VERR_CFGM_VALUE_NOT_FOUND;
658 return rc;
659}
660
661
662/**
663 * A quick hack for getting global config values.
664 *
665 * @returns VBox status code.
666 *
667 * @param pSession The session handle. Used for authentication.
668 * @param pszName The variable name.
669 * @param pu64Value Where to return the value.
670 */
671GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
672{
673 /*
674 * Validate input.
675 */
676 PGVMM pGVMM;
677 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
678 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
679 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
680 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
681
682 /*
683 * String switch time!
684 */
685 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
686 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
687 int rc = VINF_SUCCESS;
688 pszName += sizeof("/GVMM/") - 1;
689 if (!strcmp(pszName, "cEMTsMeansCompany"))
690 *pu64Value = pGVMM->cEMTsMeansCompany;
691 else if (!strcmp(pszName, "MinSleepAlone"))
692 *pu64Value = pGVMM->nsMinSleepAlone;
693 else if (!strcmp(pszName, "MinSleepCompany"))
694 *pu64Value = pGVMM->nsMinSleepCompany;
695#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
696 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
697 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
698#endif
699 else if (!strcmp(pszName, "EarlyWakeUp1"))
700 *pu64Value = pGVMM->nsEarlyWakeUp1;
701 else if (!strcmp(pszName, "EarlyWakeUp2"))
702 *pu64Value = pGVMM->nsEarlyWakeUp2;
703 else
704 rc = VERR_CFGM_VALUE_NOT_FOUND;
705 return rc;
706}
707
708
709/**
710 * Acquire the 'used' lock in shared mode.
711 *
712 * This prevents destruction of the VM while we're in ring-0.
713 *
714 * @returns IPRT status code, see RTSemFastMutexRequest.
715 * @param a_pGVMM The GVMM instance data.
716 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
717 */
718#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
719
720/**
721 * Release the 'used' lock in when owning it in shared mode.
722 *
723 * @returns IPRT status code, see RTSemFastMutexRequest.
724 * @param a_pGVMM The GVMM instance data.
725 * @sa GVMMR0_USED_SHARED_LOCK
726 */
727#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
728
729/**
730 * Acquire the 'used' lock in exclusive mode.
731 *
732 * Only use this function when making changes to the used list.
733 *
734 * @returns IPRT status code, see RTSemFastMutexRequest.
735 * @param a_pGVMM The GVMM instance data.
736 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
737 */
738#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
739
740/**
741 * Release the 'used' lock when owning it in exclusive mode.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRelease.
744 * @param a_pGVMM The GVMM instance data.
745 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
746 */
747#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
748
749
750/**
751 * Try acquire the 'create & destroy' lock.
752 *
753 * @returns IPRT status code, see RTSemFastMutexRequest.
754 * @param pGVMM The GVMM instance data.
755 */
756DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
757{
758 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
759 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
760 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
761 return rc;
762}
763
764
765/**
766 * Release the 'create & destroy' lock.
767 *
768 * @returns IPRT status code, see RTSemFastMutexRequest.
769 * @param pGVMM The GVMM instance data.
770 */
771DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
772{
773 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
774 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
775 AssertRC(rc);
776 return rc;
777}
778
779
780/**
781 * Request wrapper for the GVMMR0CreateVM API.
782 *
783 * @returns VBox status code.
784 * @param pReq The request buffer.
785 * @param pSession The session handle. The VM will be associated with this.
786 */
787GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
788{
789 /*
790 * Validate the request.
791 */
792 if (!RT_VALID_PTR(pReq))
793 return VERR_INVALID_POINTER;
794 if (pReq->Hdr.cbReq != sizeof(*pReq))
795 return VERR_INVALID_PARAMETER;
796 if (pReq->pSession != pSession)
797 return VERR_INVALID_POINTER;
798
799 /*
800 * Execute it.
801 */
802 PGVM pGVM;
803 pReq->pVMR0 = NULL;
804 pReq->pVMR3 = NIL_RTR3PTR;
805 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
806 if (RT_SUCCESS(rc))
807 {
808 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
809 pReq->pVMR3 = pGVM->pVMR3;
810 }
811 return rc;
812}
813
814
815/**
816 * Allocates the VM structure and registers it with GVM.
817 *
818 * The caller will become the VM owner and there by the EMT.
819 *
820 * @returns VBox status code.
821 * @param pSession The support driver session.
822 * @param cCpus Number of virtual CPUs for the new VM.
823 * @param ppGVM Where to store the pointer to the VM structure.
824 *
825 * @thread EMT.
826 */
827GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
828{
829 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
830 PGVMM pGVMM;
831 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
832
833 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
834 *ppGVM = NULL;
835
836 if ( cCpus == 0
837 || cCpus > VMM_MAX_CPU_COUNT)
838 return VERR_INVALID_PARAMETER;
839
840 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
841 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
842 RTPROCESS ProcId = RTProcSelf();
843 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
844
845 /*
846 * The whole allocation process is protected by the lock.
847 */
848 int rc = gvmmR0CreateDestroyLock(pGVMM);
849 AssertRCReturn(rc, rc);
850
851 /*
852 * Only one VM per session.
853 */
854 if (SUPR0GetSessionVM(pSession) != NULL)
855 {
856 gvmmR0CreateDestroyUnlock(pGVMM);
857 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
858 return VERR_ALREADY_EXISTS;
859 }
860
861 /*
862 * Allocate a handle first so we don't waste resources unnecessarily.
863 */
864 uint16_t iHandle = pGVMM->iFreeHead;
865 if (iHandle)
866 {
867 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
868
869 /* consistency checks, a bit paranoid as always. */
870 if ( !pHandle->pGVM
871 && !pHandle->pvObj
872 && pHandle->iSelf == iHandle)
873 {
874 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
875 if (pHandle->pvObj)
876 {
877 /*
878 * Move the handle from the free to used list and perform permission checks.
879 */
880 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
881 AssertRC(rc);
882
883 pGVMM->iFreeHead = pHandle->iNext;
884 pHandle->iNext = pGVMM->iUsedHead;
885 pGVMM->iUsedHead = iHandle;
886 pGVMM->cVMs++;
887
888 pHandle->pGVM = NULL;
889 pHandle->pSession = pSession;
890 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
891 pHandle->ProcId = NIL_RTPROCESS;
892
893 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
894
895 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
896 if (RT_SUCCESS(rc))
897 {
898 /*
899 * Allocate memory for the VM structure (combined VM + GVM).
900 */
901 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
902 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
903 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
904 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
905 if (RT_SUCCESS(rc))
906 {
907 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
908 AssertPtr(pGVM);
909
910 /*
911 * Initialise the structure.
912 */
913 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
914 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
915 pGVM->gvmm.s.VMMemObj = hVMMemObj;
916 rc = GMMR0InitPerVMData(pGVM);
917 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
918 int rc3 = VMMR0InitPerVMData(pGVM);
919 CPUMR0InitPerVMData(pGVM);
920 DBGFR0InitPerVMData(pGVM);
921 PDMR0InitPerVMData(pGVM);
922 IOMR0InitPerVMData(pGVM);
923 TMR0InitPerVMData(pGVM);
924 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
925 {
926 /*
927 * Allocate page array.
928 * This currently have to be made available to ring-3, but this is should change eventually.
929 */
930 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
931 if (RT_SUCCESS(rc))
932 {
933 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
934 for (uint32_t iPage = 0; iPage < cPages; iPage++)
935 {
936 paPages[iPage].uReserved = 0;
937 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
938 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
939 }
940
941 /*
942 * Map the page array, VM and VMCPU structures into ring-3.
943 */
944 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
945 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
946 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
947 0 /*offSub*/, sizeof(VM));
948 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
949 {
950 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
951 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
952 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
953 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
954 }
955 if (RT_SUCCESS(rc))
956 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
957 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
958 NIL_RTR0PROCESS);
959 if (RT_SUCCESS(rc))
960 {
961 /*
962 * Initialize all the VM pointers.
963 */
964 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
965 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
966
967 for (VMCPUID i = 0; i < cCpus; i++)
968 {
969 pGVM->aCpus[i].pVMR0 = pGVM;
970 pGVM->aCpus[i].pVMR3 = pVMR3;
971 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
972 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
973 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
974 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
975 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
976 }
977
978 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
979 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
980 ("%p\n", pGVM->paVMPagesR3));
981
982#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
983 /*
984 * Create the high resolution wake-up timer for EMT 0, ignore failures.
985 */
986 if (RTTimerCanDoHighResolution())
987 {
988 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
989 0 /*one-shot, no interval*/,
990 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
991 &pGVM->aCpus[0]);
992 if (RT_FAILURE(rc4))
993 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
994 }
995#endif
996
997 /*
998 * Complete the handle - take the UsedLock sem just to be careful.
999 */
1000 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1001 AssertRC(rc);
1002
1003 pHandle->pGVM = pGVM;
1004 pHandle->hEMT0 = hEMT0;
1005 pHandle->ProcId = ProcId;
1006 pGVM->pVMR3 = pVMR3;
1007 pGVM->pVMR3Unsafe = pVMR3;
1008 pGVM->aCpus[0].hEMT = hEMT0;
1009 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1010 pGVM->aCpus[0].cEmtHashCollisions = 0;
1011 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1012 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1013 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1014 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1015 pGVMM->cEMTs += cCpus;
1016
1017 /* Associate it with the session and create the context hook for EMT0. */
1018 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1019 if (RT_SUCCESS(rc))
1020 {
1021 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1022 if (RT_SUCCESS(rc))
1023 {
1024 /*
1025 * Done!
1026 */
1027 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1028
1029 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1030 gvmmR0CreateDestroyUnlock(pGVMM);
1031
1032 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1033
1034 *ppGVM = pGVM;
1035 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1036 return VINF_SUCCESS;
1037 }
1038
1039 SUPR0SetSessionVM(pSession, NULL, NULL);
1040 }
1041 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1042 }
1043
1044 /* Cleanup mappings. */
1045 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1046 {
1047 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1048 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1049 }
1050 for (VMCPUID i = 0; i < cCpus; i++)
1051 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1052 {
1053 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1054 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1055 }
1056 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1057 {
1058 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1059 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1060 }
1061 }
1062 }
1063 else
1064 {
1065 if (RT_SUCCESS_NP(rc))
1066 rc = rc2;
1067 if (RT_SUCCESS_NP(rc))
1068 rc = rc3;
1069 }
1070 }
1071 }
1072 /* else: The user wasn't permitted to create this VM. */
1073
1074 /*
1075 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1076 * object reference here. A little extra mess because of non-recursive lock.
1077 */
1078 void *pvObj = pHandle->pvObj;
1079 pHandle->pvObj = NULL;
1080 gvmmR0CreateDestroyUnlock(pGVMM);
1081
1082 SUPR0ObjRelease(pvObj, pSession);
1083
1084 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1085 return rc;
1086 }
1087
1088 rc = VERR_NO_MEMORY;
1089 }
1090 else
1091 rc = VERR_GVMM_IPE_1;
1092 }
1093 else
1094 rc = VERR_GVM_TOO_MANY_VMS;
1095
1096 gvmmR0CreateDestroyUnlock(pGVMM);
1097 return rc;
1098}
1099
1100
1101/**
1102 * Initializes the per VM data belonging to GVMM.
1103 *
1104 * @param pGVM Pointer to the global VM structure.
1105 * @param hSelf The handle.
1106 * @param cCpus The CPU count.
1107 * @param pSession The session this VM is associated with.
1108 */
1109static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1110{
1111 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1112 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1113 AssertCompileMemberAlignment(VM, cpum, 64);
1114 AssertCompileMemberAlignment(VM, tm, 64);
1115
1116 /* GVM: */
1117 pGVM->u32Magic = GVM_MAGIC;
1118 pGVM->hSelf = hSelf;
1119 pGVM->cCpus = cCpus;
1120 pGVM->pSession = pSession;
1121 pGVM->pSelf = pGVM;
1122
1123 /* VM: */
1124 pGVM->enmVMState = VMSTATE_CREATING;
1125 pGVM->hSelfUnsafe = hSelf;
1126 pGVM->pSessionUnsafe = pSession;
1127 pGVM->pVMR0ForCall = pGVM;
1128 pGVM->cCpusUnsafe = cCpus;
1129 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1130 pGVM->uStructVersion = 1;
1131 pGVM->cbSelf = sizeof(VM);
1132 pGVM->cbVCpu = sizeof(VMCPU);
1133
1134 /* GVMM: */
1135 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1136 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1137 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1138 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1139 pGVM->gvmm.s.fDoneVMMR0Init = false;
1140 pGVM->gvmm.s.fDoneVMMR0Term = false;
1141
1142 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1143 {
1144 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1145 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1146 }
1147 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1148
1149 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1150 {
1151 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1152 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1153 }
1154
1155 /*
1156 * Per virtual CPU.
1157 */
1158 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1159 {
1160 pGVM->aCpus[i].idCpu = i;
1161 pGVM->aCpus[i].idCpuUnsafe = i;
1162 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1163 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1164 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1165 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1166 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1167 pGVM->aCpus[i].pGVM = pGVM;
1168 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1169 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1170 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1171 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1172 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1173 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1174 }
1175}
1176
1177
1178/**
1179 * Does the VM initialization.
1180 *
1181 * @returns VBox status code.
1182 * @param pGVM The global (ring-0) VM structure.
1183 */
1184GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1185{
1186 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1187
1188 int rc = VERR_INTERNAL_ERROR_3;
1189 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1190 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1191 {
1192 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1193 {
1194 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1195 if (RT_FAILURE(rc))
1196 {
1197 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1198 break;
1199 }
1200 }
1201 }
1202 else
1203 rc = VERR_WRONG_ORDER;
1204
1205 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1206 return rc;
1207}
1208
1209
1210/**
1211 * Indicates that we're done with the ring-0 initialization
1212 * of the VM.
1213 *
1214 * @param pGVM The global (ring-0) VM structure.
1215 * @thread EMT(0)
1216 */
1217GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1218{
1219 /* Set the indicator. */
1220 pGVM->gvmm.s.fDoneVMMR0Init = true;
1221}
1222
1223
1224/**
1225 * Indicates that we're doing the ring-0 termination of the VM.
1226 *
1227 * @returns true if termination hasn't been done already, false if it has.
1228 * @param pGVM Pointer to the global VM structure. Optional.
1229 * @thread EMT(0) or session cleanup thread.
1230 */
1231GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1232{
1233 /* Validate the VM structure, state and handle. */
1234 AssertPtrReturn(pGVM, false);
1235
1236 /* Set the indicator. */
1237 if (pGVM->gvmm.s.fDoneVMMR0Term)
1238 return false;
1239 pGVM->gvmm.s.fDoneVMMR0Term = true;
1240 return true;
1241}
1242
1243
1244/**
1245 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1246 *
1247 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1248 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1249 * would've been nice if the caller was actually the EMT thread or that we somehow
1250 * could've associated the calling thread with the VM up front.
1251 *
1252 * @returns VBox status code.
1253 * @param pGVM The global (ring-0) VM structure.
1254 *
1255 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1256 */
1257GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1258{
1259 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1260 PGVMM pGVMM;
1261 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1262
1263 /*
1264 * Validate the VM structure, state and caller.
1265 */
1266 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1267 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1268 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1269 VERR_WRONG_ORDER);
1270
1271 uint32_t hGVM = pGVM->hSelf;
1272 ASMCompilerBarrier();
1273 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1274 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1275
1276 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1277 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1278
1279 RTPROCESS ProcId = RTProcSelf();
1280 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1281 AssertReturn( ( pHandle->hEMT0 == hSelf
1282 && pHandle->ProcId == ProcId)
1283 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1284
1285 /*
1286 * Lookup the handle and destroy the object.
1287 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1288 * object, we take some precautions against racing callers just in case...
1289 */
1290 int rc = gvmmR0CreateDestroyLock(pGVMM);
1291 AssertRC(rc);
1292
1293 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1294 if ( pHandle->pGVM == pGVM
1295 && ( ( pHandle->hEMT0 == hSelf
1296 && pHandle->ProcId == ProcId)
1297 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1298 && RT_VALID_PTR(pHandle->pvObj)
1299 && RT_VALID_PTR(pHandle->pSession)
1300 && RT_VALID_PTR(pHandle->pGVM)
1301 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1302 {
1303 /* Check that other EMTs have deregistered. */
1304 uint32_t cNotDeregistered = 0;
1305 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1306 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1307 if (cNotDeregistered == 0)
1308 {
1309 /* Grab the object pointer. */
1310 void *pvObj = pHandle->pvObj;
1311 pHandle->pvObj = NULL;
1312 gvmmR0CreateDestroyUnlock(pGVMM);
1313
1314 SUPR0ObjRelease(pvObj, pHandle->pSession);
1315 }
1316 else
1317 {
1318 gvmmR0CreateDestroyUnlock(pGVMM);
1319 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1320 }
1321 }
1322 else
1323 {
1324 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1325 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1326 gvmmR0CreateDestroyUnlock(pGVMM);
1327 rc = VERR_GVMM_IPE_2;
1328 }
1329
1330 return rc;
1331}
1332
1333
1334/**
1335 * Performs VM cleanup task as part of object destruction.
1336 *
1337 * @param pGVM The GVM pointer.
1338 */
1339static void gvmmR0CleanupVM(PGVM pGVM)
1340{
1341 if ( pGVM->gvmm.s.fDoneVMMR0Init
1342 && !pGVM->gvmm.s.fDoneVMMR0Term)
1343 {
1344 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1345 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1346 {
1347 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1348 VMMR0TermVM(pGVM, NIL_VMCPUID);
1349 }
1350 else
1351 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1352 }
1353
1354 GMMR0CleanupVM(pGVM);
1355#ifdef VBOX_WITH_NEM_R0
1356 NEMR0CleanupVM(pGVM);
1357#endif
1358 PDMR0CleanupVM(pGVM);
1359 IOMR0CleanupVM(pGVM);
1360 DBGFR0CleanupVM(pGVM);
1361 PGMR0CleanupVM(pGVM);
1362 TMR0CleanupVM(pGVM);
1363 VMMR0CleanupVM(pGVM);
1364}
1365
1366
1367/**
1368 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1369 *
1370 * pvUser1 is the GVM instance pointer.
1371 * pvUser2 is the handle pointer.
1372 */
1373static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1374{
1375 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1376
1377 NOREF(pvObj);
1378
1379 /*
1380 * Some quick, paranoid, input validation.
1381 */
1382 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1383 AssertPtr(pHandle);
1384 PGVMM pGVMM = (PGVMM)pvUser1;
1385 Assert(pGVMM == g_pGVMM);
1386 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1387 if ( !iHandle
1388 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1389 || iHandle != pHandle->iSelf)
1390 {
1391 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1392 return;
1393 }
1394
1395 int rc = gvmmR0CreateDestroyLock(pGVMM);
1396 AssertRC(rc);
1397 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1398 AssertRC(rc);
1399
1400 /*
1401 * This is a tad slow but a doubly linked list is too much hassle.
1402 */
1403 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1404 {
1405 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1406 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1407 gvmmR0CreateDestroyUnlock(pGVMM);
1408 return;
1409 }
1410
1411 if (pGVMM->iUsedHead == iHandle)
1412 pGVMM->iUsedHead = pHandle->iNext;
1413 else
1414 {
1415 uint16_t iPrev = pGVMM->iUsedHead;
1416 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1417 while (iPrev)
1418 {
1419 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1420 {
1421 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1422 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1423 gvmmR0CreateDestroyUnlock(pGVMM);
1424 return;
1425 }
1426 if (RT_UNLIKELY(c-- <= 0))
1427 {
1428 iPrev = 0;
1429 break;
1430 }
1431
1432 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1433 break;
1434 iPrev = pGVMM->aHandles[iPrev].iNext;
1435 }
1436 if (!iPrev)
1437 {
1438 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1439 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1440 gvmmR0CreateDestroyUnlock(pGVMM);
1441 return;
1442 }
1443
1444 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1445 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1446 }
1447 pHandle->iNext = 0;
1448 pGVMM->cVMs--;
1449
1450 /*
1451 * Do the global cleanup round.
1452 */
1453 PGVM pGVM = pHandle->pGVM;
1454 if ( RT_VALID_PTR(pGVM)
1455 && pGVM->u32Magic == GVM_MAGIC)
1456 {
1457 pGVMM->cEMTs -= pGVM->cCpus;
1458
1459 if (pGVM->pSession)
1460 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1461
1462 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1463
1464 gvmmR0CleanupVM(pGVM);
1465
1466 /*
1467 * Do the GVMM cleanup - must be done last.
1468 */
1469 /* The VM and VM pages mappings/allocations. */
1470 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1471 {
1472 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1473 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1474 }
1475
1476 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1477 {
1478 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1479 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1480 }
1481
1482 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1483 {
1484 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1485 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1486 }
1487
1488 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1489 {
1490 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1491 {
1492 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1493 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1494 }
1495 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1496 {
1497 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1498 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1499 }
1500#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1501 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1502 {
1503 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1504 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1505 }
1506#endif
1507 }
1508
1509 /* the GVM structure itself. */
1510 pGVM->u32Magic |= UINT32_C(0x80000000);
1511 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1512 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1513 pGVM = NULL;
1514
1515 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1516 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1517 AssertRC(rc);
1518 }
1519 /* else: GVMMR0CreateVM cleanup. */
1520
1521 /*
1522 * Free the handle.
1523 */
1524 pHandle->iNext = pGVMM->iFreeHead;
1525 pGVMM->iFreeHead = iHandle;
1526 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1527 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1528 ASMAtomicWriteNullPtr(&pHandle->pSession);
1529 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1530 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1531
1532 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1533 gvmmR0CreateDestroyUnlock(pGVMM);
1534 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1535}
1536
1537
1538/**
1539 * Registers the calling thread as the EMT of a Virtual CPU.
1540 *
1541 * Note that VCPU 0 is automatically registered during VM creation.
1542 *
1543 * @returns VBox status code
1544 * @param pGVM The global (ring-0) VM structure.
1545 * @param idCpu VCPU id to register the current thread as.
1546 */
1547GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1548{
1549 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1550
1551 /*
1552 * Validate the VM structure, state and handle.
1553 */
1554 PGVMM pGVMM;
1555 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1556 if (RT_SUCCESS(rc))
1557 {
1558 if (idCpu < pGVM->cCpus)
1559 {
1560 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1561 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1562
1563 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1564
1565 /* Check that the EMT isn't already assigned to a thread. */
1566 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1567 {
1568 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1569
1570 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1571 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1572 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1573 if (RT_SUCCESS(rc))
1574 {
1575 /*
1576 * Do the assignment, then try setup the hook. Undo if that fails.
1577 */
1578 unsigned cCollisions = 0;
1579 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1580 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1581 {
1582 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1583 do
1584 {
1585 cCollisions++;
1586 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1587 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1588 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1589 }
1590 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1591 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1592
1593 pGVCpu->hNativeThreadR0 = hNativeSelf;
1594 pGVCpu->hEMT = hNativeSelf;
1595 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1596 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1597
1598 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1599 if (RT_SUCCESS(rc))
1600 {
1601 CPUMR0RegisterVCpuThread(pGVCpu);
1602
1603#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1604 /*
1605 * Create the high resolution wake-up timer, ignore failures.
1606 */
1607 if (RTTimerCanDoHighResolution())
1608 {
1609 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1610 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1611 if (RT_FAILURE(rc2))
1612 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1613 }
1614#endif
1615 }
1616 else
1617 {
1618 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1619 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1620 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1621 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1622 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1623 }
1624 }
1625 }
1626 else
1627 rc = VERR_ACCESS_DENIED;
1628
1629 gvmmR0CreateDestroyUnlock(pGVMM);
1630 }
1631 else
1632 rc = VERR_INVALID_CPU_ID;
1633 }
1634 return rc;
1635}
1636
1637
1638/**
1639 * Deregisters the calling thread as the EMT of a Virtual CPU.
1640 *
1641 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1642 *
1643 * @returns VBox status code
1644 * @param pGVM The global (ring-0) VM structure.
1645 * @param idCpu VCPU id to register the current thread as.
1646 */
1647GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1648{
1649 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1650
1651 /*
1652 * Validate the VM structure, state and handle.
1653 */
1654 PGVMM pGVMM;
1655 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1656 if (RT_SUCCESS(rc))
1657 {
1658 /*
1659 * Take the destruction lock and recheck the handle state to
1660 * prevent racing GVMMR0DestroyVM.
1661 */
1662 gvmmR0CreateDestroyLock(pGVMM);
1663
1664 uint32_t hSelf = pGVM->hSelf;
1665 ASMCompilerBarrier();
1666 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1667 && pGVMM->aHandles[hSelf].pvObj != NULL
1668 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1669 {
1670 /*
1671 * Do per-EMT cleanups.
1672 */
1673 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1674
1675 /*
1676 * Invalidate hEMT. We don't use NIL here as that would allow
1677 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1678 */
1679 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1680 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1681
1682 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1683 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1684 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1685 }
1686
1687 gvmmR0CreateDestroyUnlock(pGVMM);
1688 }
1689 return rc;
1690}
1691
1692
1693/**
1694 * Registers the caller as a given worker thread.
1695 *
1696 * This enables the thread to operate critical sections in ring-0.
1697 *
1698 * @returns VBox status code.
1699 * @param pGVM The global (ring-0) VM structure.
1700 * @param enmWorker The worker thread this is supposed to be.
1701 * @param hNativeSelfR3 The ring-3 native self of the caller.
1702 */
1703GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1704{
1705 /*
1706 * Validate input.
1707 */
1708 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1709 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1710 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1711 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1712 PGVMM pGVMM;
1713 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1714 AssertRCReturn(rc, rc);
1715 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1716
1717 /*
1718 * Grab the big lock and check the VM state again.
1719 */
1720 uint32_t const hSelf = pGVM->hSelf;
1721 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1722 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1723 && pGVMM->aHandles[hSelf].pvObj != NULL
1724 && pGVMM->aHandles[hSelf].pGVM == pGVM
1725 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1726 {
1727 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1728 {
1729 /*
1730 * Check that the thread isn't an EMT or serving in some other worker capacity.
1731 */
1732 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1733 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1734 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1735 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1736 rc = VERR_INVALID_PARAMETER);
1737 if (RT_SUCCESS(rc))
1738 {
1739 /*
1740 * Do the registration.
1741 */
1742 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1743 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1744 {
1745 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1746 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1747 rc = VINF_SUCCESS;
1748 }
1749 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1750 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1751 rc = VERR_ALREADY_EXISTS;
1752 else
1753 rc = VERR_RESOURCE_BUSY;
1754 }
1755 }
1756 else
1757 rc = VERR_VM_INVALID_VM_STATE;
1758 }
1759 else
1760 rc = VERR_INVALID_VM_HANDLE;
1761 gvmmR0CreateDestroyUnlock(pGVMM);
1762 return rc;
1763}
1764
1765
1766/**
1767 * Deregisters a workinger thread (caller).
1768 *
1769 * The worker thread cannot be re-created and re-registered, instead the given
1770 * @a enmWorker slot becomes invalid.
1771 *
1772 * @returns VBox status code.
1773 * @param pGVM The global (ring-0) VM structure.
1774 * @param enmWorker The worker thread this is supposed to be.
1775 */
1776GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1777{
1778 /*
1779 * Validate input.
1780 */
1781 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1782 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1783 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1784 PGVMM pGVMM;
1785 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1786 AssertRCReturn(rc, rc);
1787
1788 /*
1789 * Grab the big lock and check the VM state again.
1790 */
1791 uint32_t const hSelf = pGVM->hSelf;
1792 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1793 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1794 && pGVMM->aHandles[hSelf].pvObj != NULL
1795 && pGVMM->aHandles[hSelf].pGVM == pGVM
1796 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1797 {
1798 /*
1799 * Do the deregistration.
1800 * This will prevent any other threads register as the worker later.
1801 */
1802 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1803 {
1804 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1805 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1806 rc = VINF_SUCCESS;
1807 }
1808 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1809 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1810 rc = VINF_SUCCESS;
1811 else
1812 rc = VERR_NOT_OWNER;
1813 }
1814 else
1815 rc = VERR_INVALID_VM_HANDLE;
1816 gvmmR0CreateDestroyUnlock(pGVMM);
1817 return rc;
1818}
1819
1820
1821/**
1822 * Lookup a GVM structure by its handle.
1823 *
1824 * @returns The GVM pointer on success, NULL on failure.
1825 * @param hGVM The global VM handle. Asserts on bad handle.
1826 */
1827GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1828{
1829 PGVMM pGVMM;
1830 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1831
1832 /*
1833 * Validate.
1834 */
1835 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1836 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1837
1838 /*
1839 * Look it up.
1840 */
1841 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1842 AssertPtrReturn(pHandle->pvObj, NULL);
1843 PGVM pGVM = pHandle->pGVM;
1844 AssertPtrReturn(pGVM, NULL);
1845
1846 return pGVM;
1847}
1848
1849
1850/**
1851 * Check that the given GVM and VM structures match up.
1852 *
1853 * The calling thread must be in the same process as the VM. All current lookups
1854 * are by threads inside the same process, so this will not be an issue.
1855 *
1856 * @returns VBox status code.
1857 * @param pGVM The global (ring-0) VM structure.
1858 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1859 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1860 * shared mode when requested.
1861 *
1862 * Be very careful if not taking the lock as it's
1863 * possible that the VM will disappear then!
1864 *
1865 * @remark This will not assert on an invalid pGVM but try return silently.
1866 */
1867static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1868{
1869 /*
1870 * Check the pointers.
1871 */
1872 int rc;
1873 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1874 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1875 {
1876 /*
1877 * Get the pGVMM instance and check the VM handle.
1878 */
1879 PGVMM pGVMM;
1880 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1881
1882 uint16_t hGVM = pGVM->hSelf;
1883 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1884 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1885 {
1886 RTPROCESS const pidSelf = RTProcSelf();
1887 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1888 if (fTakeUsedLock)
1889 {
1890 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1891 AssertRCReturn(rc, rc);
1892 }
1893
1894 if (RT_LIKELY( pHandle->pGVM == pGVM
1895 && pHandle->ProcId == pidSelf
1896 && RT_VALID_PTR(pHandle->pvObj)))
1897 {
1898 /*
1899 * Some more VM data consistency checks.
1900 */
1901 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1902 && pGVM->hSelfUnsafe == hGVM
1903 && pGVM->pSelf == pGVM))
1904 {
1905 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1906 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1907 {
1908 *ppGVMM = pGVMM;
1909 return VINF_SUCCESS;
1910 }
1911 rc = VERR_INCONSISTENT_VM_HANDLE;
1912 }
1913 else
1914 rc = VERR_INCONSISTENT_VM_HANDLE;
1915 }
1916 else
1917 rc = VERR_INVALID_VM_HANDLE;
1918
1919 if (fTakeUsedLock)
1920 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1921 }
1922 else
1923 rc = VERR_INVALID_VM_HANDLE;
1924 }
1925 else
1926 rc = VERR_INVALID_POINTER;
1927 return rc;
1928}
1929
1930
1931/**
1932 * Validates a GVM/VM pair.
1933 *
1934 * @returns VBox status code.
1935 * @param pGVM The global (ring-0) VM structure.
1936 */
1937GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1938{
1939 PGVMM pGVMM;
1940 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1941}
1942
1943
1944/**
1945 * Check that the given GVM and VM structures match up.
1946 *
1947 * The calling thread must be in the same process as the VM. All current lookups
1948 * are by threads inside the same process, so this will not be an issue.
1949 *
1950 * @returns VBox status code.
1951 * @param pGVM The global (ring-0) VM structure.
1952 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1953 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1954 * @thread EMT
1955 *
1956 * @remarks This will assert in all failure paths.
1957 */
1958static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1959{
1960 /*
1961 * Check the pointers.
1962 */
1963 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1964 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1965
1966 /*
1967 * Get the pGVMM instance and check the VM handle.
1968 */
1969 PGVMM pGVMM;
1970 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1971
1972 uint16_t hGVM = pGVM->hSelf;
1973 ASMCompilerBarrier();
1974 AssertReturn( hGVM != NIL_GVM_HANDLE
1975 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1976
1977 RTPROCESS const pidSelf = RTProcSelf();
1978 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1979 AssertReturn( pHandle->pGVM == pGVM
1980 && pHandle->ProcId == pidSelf
1981 && RT_VALID_PTR(pHandle->pvObj),
1982 VERR_INVALID_HANDLE);
1983
1984 /*
1985 * Check the EMT claim.
1986 */
1987 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1988 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1989 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1990
1991 /*
1992 * Some more VM data consistency checks.
1993 */
1994 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1995 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1996 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1997 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1998
1999 *ppGVMM = pGVMM;
2000 return VINF_SUCCESS;
2001}
2002
2003
2004/**
2005 * Validates a GVM/EMT pair.
2006 *
2007 * @returns VBox status code.
2008 * @param pGVM The global (ring-0) VM structure.
2009 * @param idCpu The Virtual CPU ID of the calling EMT.
2010 * @thread EMT(idCpu)
2011 */
2012GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2013{
2014 PGVMM pGVMM;
2015 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2016}
2017
2018
2019/**
2020 * Looks up the VM belonging to the specified EMT thread.
2021 *
2022 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2023 * unnecessary kernel panics when the EMT thread hits an assertion. The
2024 * call may or not be an EMT thread.
2025 *
2026 * @returns Pointer to the VM on success, NULL on failure.
2027 * @param hEMT The native thread handle of the EMT.
2028 * NIL_RTNATIVETHREAD means the current thread
2029 */
2030GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2031{
2032 /*
2033 * No Assertions here as we're usually called in a AssertMsgN or
2034 * RTAssert* context.
2035 */
2036 PGVMM pGVMM = g_pGVMM;
2037 if ( !RT_VALID_PTR(pGVMM)
2038 || pGVMM->u32Magic != GVMM_MAGIC)
2039 return NULL;
2040
2041 if (hEMT == NIL_RTNATIVETHREAD)
2042 hEMT = RTThreadNativeSelf();
2043 RTPROCESS ProcId = RTProcSelf();
2044
2045 /*
2046 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2047 */
2048/** @todo introduce some pid hash table here, please. */
2049 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2050 {
2051 if ( pGVMM->aHandles[i].iSelf == i
2052 && pGVMM->aHandles[i].ProcId == ProcId
2053 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2054 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2055 {
2056 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2057 return pGVMM->aHandles[i].pGVM;
2058
2059 /* This is fearly safe with the current process per VM approach. */
2060 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2061 VMCPUID const cCpus = pGVM->cCpus;
2062 ASMCompilerBarrier();
2063 if ( cCpus < 1
2064 || cCpus > VMM_MAX_CPU_COUNT)
2065 continue;
2066 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2067 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2068 return pGVMM->aHandles[i].pGVM;
2069 }
2070 }
2071 return NULL;
2072}
2073
2074
2075/**
2076 * Looks up the GVMCPU belonging to the specified EMT thread.
2077 *
2078 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2079 * unnecessary kernel panics when the EMT thread hits an assertion. The
2080 * call may or not be an EMT thread.
2081 *
2082 * @returns Pointer to the VM on success, NULL on failure.
2083 * @param hEMT The native thread handle of the EMT.
2084 * NIL_RTNATIVETHREAD means the current thread
2085 */
2086GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2087{
2088 /*
2089 * No Assertions here as we're usually called in a AssertMsgN,
2090 * RTAssert*, Log and LogRel contexts.
2091 */
2092 PGVMM pGVMM = g_pGVMM;
2093 if ( !RT_VALID_PTR(pGVMM)
2094 || pGVMM->u32Magic != GVMM_MAGIC)
2095 return NULL;
2096
2097 if (hEMT == NIL_RTNATIVETHREAD)
2098 hEMT = RTThreadNativeSelf();
2099 RTPROCESS ProcId = RTProcSelf();
2100
2101 /*
2102 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2103 */
2104/** @todo introduce some pid hash table here, please. */
2105 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2106 {
2107 if ( pGVMM->aHandles[i].iSelf == i
2108 && pGVMM->aHandles[i].ProcId == ProcId
2109 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2110 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2111 {
2112 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2113 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2114 return &pGVM->aCpus[0];
2115
2116 /* This is fearly safe with the current process per VM approach. */
2117 VMCPUID const cCpus = pGVM->cCpus;
2118 ASMCompilerBarrier();
2119 ASMCompilerBarrier();
2120 if ( cCpus < 1
2121 || cCpus > VMM_MAX_CPU_COUNT)
2122 continue;
2123 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2124 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2125 return &pGVM->aCpus[idCpu];
2126 }
2127 }
2128 return NULL;
2129}
2130
2131
2132/**
2133 * Get the GVMCPU structure for the given EMT.
2134 *
2135 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2136 * @param pGVM The global (ring-0) VM structure.
2137 * @param hEMT The native thread handle of the EMT.
2138 * NIL_RTNATIVETHREAD means the current thread
2139 */
2140GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2141{
2142 /*
2143 * Validate & adjust input.
2144 */
2145 AssertPtr(pGVM);
2146 Assert(pGVM->u32Magic == GVM_MAGIC);
2147 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2148 {
2149 hEMT = RTThreadNativeSelf();
2150 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2151 }
2152
2153 /*
2154 * Find the matching hash table entry.
2155 * See similar code in GVMMR0GetRing3ThreadForSelf.
2156 */
2157 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2158 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2159 { /* likely */ }
2160 else
2161 {
2162#ifdef VBOX_STRICT
2163 unsigned cCollisions = 0;
2164#endif
2165 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2166 for (;;)
2167 {
2168 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2169 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2170 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2171 break;
2172 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2173 {
2174#ifdef VBOX_STRICT
2175 uint32_t idxCpu = pGVM->cCpus;
2176 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2177 while (idxCpu-- > 0)
2178 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2179#endif
2180 return NULL;
2181 }
2182 }
2183 }
2184
2185 /*
2186 * Validate the VCpu number and translate it into a pointer.
2187 */
2188 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2189 AssertReturn(idCpu < pGVM->cCpus, NULL);
2190 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2191 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2192 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2193 return pGVCpu;
2194}
2195
2196
2197/**
2198 * Get the native ring-3 thread handle for the caller.
2199 *
2200 * This works for EMTs and registered workers.
2201 *
2202 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2203 * @param pGVM The global (ring-0) VM structure.
2204 */
2205GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2206{
2207 /*
2208 * Validate input.
2209 */
2210 AssertPtr(pGVM);
2211 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2212 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2213 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2214
2215 /*
2216 * Find the matching hash table entry.
2217 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2218 */
2219 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2220 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2221 { /* likely */ }
2222 else
2223 {
2224#ifdef VBOX_STRICT
2225 unsigned cCollisions = 0;
2226#endif
2227 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2228 for (;;)
2229 {
2230 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2231 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2232 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2233 break;
2234 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2235 {
2236#ifdef VBOX_STRICT
2237 uint32_t idxCpu = pGVM->cCpus;
2238 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2239 while (idxCpu-- > 0)
2240 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2241#endif
2242
2243 /*
2244 * Not an EMT, so see if it's a worker thread.
2245 */
2246 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2247 while (--idx > GVMMWORKERTHREAD_INVALID)
2248 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2249 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2250
2251 return NIL_RTNATIVETHREAD;
2252 }
2253 }
2254 }
2255
2256 /*
2257 * Validate the VCpu number and translate it into a pointer.
2258 */
2259 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2260 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2261 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2262 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2263 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2264 return pGVCpu->hNativeThread;
2265}
2266
2267
2268/**
2269 * Converts a pointer with the GVM structure to a host physical address.
2270 *
2271 * @returns Host physical address.
2272 * @param pGVM The global (ring-0) VM structure.
2273 * @param pv The address to convert.
2274 * @thread EMT
2275 */
2276GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2277{
2278 AssertPtr(pGVM);
2279 Assert(pGVM->u32Magic == GVM_MAGIC);
2280 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2281 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2282 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2283}
2284
2285
2286/**
2287 * This is will wake up expired and soon-to-be expired VMs.
2288 *
2289 * @returns Number of VMs that has been woken up.
2290 * @param pGVMM Pointer to the GVMM instance data.
2291 * @param u64Now The current time.
2292 */
2293static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2294{
2295 /*
2296 * Skip this if we've got disabled because of high resolution wakeups or by
2297 * the user.
2298 */
2299 if (!pGVMM->fDoEarlyWakeUps)
2300 return 0;
2301
2302/** @todo Rewrite this algorithm. See performance defect XYZ. */
2303
2304 /*
2305 * A cheap optimization to stop wasting so much time here on big setups.
2306 */
2307 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2308 if ( pGVMM->cHaltedEMTs == 0
2309 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2310 return 0;
2311
2312 /*
2313 * Only one thread doing this at a time.
2314 */
2315 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2316 return 0;
2317
2318 /*
2319 * The first pass will wake up VMs which have actually expired
2320 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2321 */
2322 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2323 uint64_t u64Min = UINT64_MAX;
2324 unsigned cWoken = 0;
2325 unsigned cHalted = 0;
2326 unsigned cTodo2nd = 0;
2327 unsigned cTodo3rd = 0;
2328 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2329 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2330 i = pGVMM->aHandles[i].iNext)
2331 {
2332 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2333 if ( RT_VALID_PTR(pCurGVM)
2334 && pCurGVM->u32Magic == GVM_MAGIC)
2335 {
2336 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2337 {
2338 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2339 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2340 if (u64)
2341 {
2342 if (u64 <= u64Now)
2343 {
2344 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2345 {
2346 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2347 AssertRC(rc);
2348 cWoken++;
2349 }
2350 }
2351 else
2352 {
2353 cHalted++;
2354 if (u64 <= uNsEarlyWakeUp1)
2355 cTodo2nd++;
2356 else if (u64 <= uNsEarlyWakeUp2)
2357 cTodo3rd++;
2358 else if (u64 < u64Min)
2359 u64 = u64Min;
2360 }
2361 }
2362 }
2363 }
2364 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2365 }
2366
2367 if (cTodo2nd)
2368 {
2369 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2370 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2371 i = pGVMM->aHandles[i].iNext)
2372 {
2373 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2374 if ( RT_VALID_PTR(pCurGVM)
2375 && pCurGVM->u32Magic == GVM_MAGIC)
2376 {
2377 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2378 {
2379 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2380 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2381 if ( u64
2382 && u64 <= uNsEarlyWakeUp1)
2383 {
2384 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2385 {
2386 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2387 AssertRC(rc);
2388 cWoken++;
2389 }
2390 }
2391 }
2392 }
2393 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2394 }
2395 }
2396
2397 if (cTodo3rd)
2398 {
2399 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2400 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2401 i = pGVMM->aHandles[i].iNext)
2402 {
2403 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2404 if ( RT_VALID_PTR(pCurGVM)
2405 && pCurGVM->u32Magic == GVM_MAGIC)
2406 {
2407 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2408 {
2409 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2410 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2411 if ( u64
2412 && u64 <= uNsEarlyWakeUp2)
2413 {
2414 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2415 {
2416 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2417 AssertRC(rc);
2418 cWoken++;
2419 }
2420 }
2421 }
2422 }
2423 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2424 }
2425 }
2426
2427 /*
2428 * Set the minimum value.
2429 */
2430 pGVMM->uNsNextEmtWakeup = u64Min;
2431
2432 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2433 return cWoken;
2434}
2435
2436
2437#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2438/**
2439 * Timer callback for the EMT high-resolution wake-up timer.
2440 *
2441 * @param pTimer The timer handle.
2442 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2443 * @param iTick The current tick.
2444 */
2445static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2446{
2447 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2448 NOREF(pTimer); NOREF(iTick);
2449
2450 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2451 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2452 {
2453 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2454 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2455 }
2456 else
2457 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2458
2459 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2460 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2461}
2462#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2463
2464
2465/**
2466 * Halt the EMT thread.
2467 *
2468 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2469 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2470 * @param pGVM The global (ring-0) VM structure.
2471 * @param pGVCpu The global (ring-0) CPU structure of the calling
2472 * EMT.
2473 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2474 * @thread EMT(pGVCpu).
2475 */
2476GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2477{
2478 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2479 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2480 PGVMM pGVMM;
2481 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2482
2483 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2484 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2485
2486 /*
2487 * If we're doing early wake-ups, we must take the UsedList lock before we
2488 * start querying the current time.
2489 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2490 */
2491 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2492 if (fDoEarlyWakeUps)
2493 {
2494 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2495 }
2496
2497 /* GIP hack: We might are frequently sleeping for short intervals where the
2498 difference between GIP and system time matters on systems with high resolution
2499 system time. So, convert the input from GIP to System time in that case. */
2500 Assert(ASMGetFlags() & X86_EFL_IF);
2501 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2502 const uint64_t u64NowGip = RTTimeNanoTS();
2503
2504 if (fDoEarlyWakeUps)
2505 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2506
2507 /*
2508 * Go to sleep if we must...
2509 * Cap the sleep time to 1 second to be on the safe side.
2510 */
2511 int rc;
2512 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2513 if ( u64NowGip < u64ExpireGipTime
2514 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2515 ? pGVMM->nsMinSleepCompany
2516 : pGVMM->nsMinSleepAlone)
2517#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2518 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2519#endif
2520 )
2521 )
2522 {
2523 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2524 if (cNsInterval > RT_NS_1SEC)
2525 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2526 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2527 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2528 if (fDoEarlyWakeUps)
2529 {
2530 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2531 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2532 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2533 }
2534
2535#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2536 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2537 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2538 {
2539 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2540 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2541 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2542 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2543 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2544 }
2545#endif
2546
2547 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2548 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2549 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2550
2551 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2552 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2553
2554#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2555 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2556 { /* likely */ }
2557 else
2558 {
2559 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2560 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2561 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2562 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2563 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2564 }
2565#endif
2566
2567 /* Reset the semaphore to try prevent a few false wake-ups. */
2568 if (rc == VINF_SUCCESS)
2569 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2570 else if (rc == VERR_TIMEOUT)
2571 {
2572 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2573 rc = VINF_SUCCESS;
2574 }
2575 }
2576 else
2577 {
2578 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2579 if (fDoEarlyWakeUps)
2580 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2581 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2582 rc = VINF_SUCCESS;
2583 }
2584
2585 return rc;
2586}
2587
2588
2589/**
2590 * Halt the EMT thread.
2591 *
2592 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2593 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2594 * @param pGVM The global (ring-0) VM structure.
2595 * @param idCpu The Virtual CPU ID of the calling EMT.
2596 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2597 * @thread EMT(idCpu).
2598 */
2599GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2600{
2601 PGVMM pGVMM;
2602 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2603 if (RT_SUCCESS(rc))
2604 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2605 return rc;
2606}
2607
2608
2609
2610/**
2611 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2612 * the a sleeping EMT.
2613 *
2614 * @retval VINF_SUCCESS if successfully woken up.
2615 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2616 *
2617 * @param pGVM The global (ring-0) VM structure.
2618 * @param pGVCpu The global (ring-0) VCPU structure.
2619 */
2620DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2621{
2622 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2623
2624 /*
2625 * Signal the semaphore regardless of whether it's current blocked on it.
2626 *
2627 * The reason for this is that there is absolutely no way we can be 100%
2628 * certain that it isn't *about* go to go to sleep on it and just got
2629 * delayed a bit en route. So, we will always signal the semaphore when
2630 * the it is flagged as halted in the VMM.
2631 */
2632/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2633 int rc;
2634 if (pGVCpu->gvmm.s.u64HaltExpire)
2635 {
2636 rc = VINF_SUCCESS;
2637 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2638 }
2639 else
2640 {
2641 rc = VINF_GVM_NOT_BLOCKED;
2642 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2643 }
2644
2645 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2646 AssertRC(rc2);
2647
2648 return rc;
2649}
2650
2651
2652/**
2653 * Wakes up the halted EMT thread so it can service a pending request.
2654 *
2655 * @returns VBox status code.
2656 * @retval VINF_SUCCESS if successfully woken up.
2657 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2658 *
2659 * @param pGVM The global (ring-0) VM structure.
2660 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2661 * @param fTakeUsedLock Take the used lock or not
2662 * @thread Any but EMT(idCpu).
2663 */
2664GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2665{
2666 /*
2667 * Validate input and take the UsedLock.
2668 */
2669 PGVMM pGVMM;
2670 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2671 if (RT_SUCCESS(rc))
2672 {
2673 if (idCpu < pGVM->cCpus)
2674 {
2675 /*
2676 * Do the actual job.
2677 */
2678 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2679
2680 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2681 {
2682 /*
2683 * While we're here, do a round of scheduling.
2684 */
2685 Assert(ASMGetFlags() & X86_EFL_IF);
2686 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2687 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2688 }
2689 }
2690 else
2691 rc = VERR_INVALID_CPU_ID;
2692
2693 if (fTakeUsedLock)
2694 {
2695 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2696 AssertRC(rc2);
2697 }
2698 }
2699
2700 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2701 return rc;
2702}
2703
2704
2705/**
2706 * Wakes up the halted EMT thread so it can service a pending request.
2707 *
2708 * @returns VBox status code.
2709 * @retval VINF_SUCCESS if successfully woken up.
2710 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2711 *
2712 * @param pGVM The global (ring-0) VM structure.
2713 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2714 * @thread Any but EMT(idCpu).
2715 */
2716GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2717{
2718 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2719}
2720
2721
2722/**
2723 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2724 * parameter and no used locking.
2725 *
2726 * @returns VBox status code.
2727 * @retval VINF_SUCCESS if successfully woken up.
2728 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2729 *
2730 * @param pGVM The global (ring-0) VM structure.
2731 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2732 * @thread Any but EMT(idCpu).
2733 * @deprecated Don't use in new code if possible! Use the GVM variant.
2734 */
2735GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2736{
2737 PGVMM pGVMM;
2738 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2739 if (RT_SUCCESS(rc))
2740 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2741 return rc;
2742}
2743
2744
2745/**
2746 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2747 * the Virtual CPU if it's still busy executing guest code.
2748 *
2749 * @returns VBox status code.
2750 * @retval VINF_SUCCESS if poked successfully.
2751 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2752 *
2753 * @param pGVM The global (ring-0) VM structure.
2754 * @param pVCpu The cross context virtual CPU structure.
2755 */
2756DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2757{
2758 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2759
2760 RTCPUID idHostCpu = pVCpu->idHostCpu;
2761 if ( idHostCpu == NIL_RTCPUID
2762 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2763 {
2764 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2765 return VINF_GVM_NOT_BUSY_IN_GC;
2766 }
2767
2768 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2769 RTMpPokeCpu(idHostCpu);
2770 return VINF_SUCCESS;
2771}
2772
2773
2774/**
2775 * Pokes an EMT if it's still busy running guest code.
2776 *
2777 * @returns VBox status code.
2778 * @retval VINF_SUCCESS if poked successfully.
2779 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2780 *
2781 * @param pGVM The global (ring-0) VM structure.
2782 * @param idCpu The ID of the virtual CPU to poke.
2783 * @param fTakeUsedLock Take the used lock or not
2784 */
2785GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2786{
2787 /*
2788 * Validate input and take the UsedLock.
2789 */
2790 PGVMM pGVMM;
2791 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2792 if (RT_SUCCESS(rc))
2793 {
2794 if (idCpu < pGVM->cCpus)
2795 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2796 else
2797 rc = VERR_INVALID_CPU_ID;
2798
2799 if (fTakeUsedLock)
2800 {
2801 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2802 AssertRC(rc2);
2803 }
2804 }
2805
2806 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2807 return rc;
2808}
2809
2810
2811/**
2812 * Pokes an EMT if it's still busy running guest code.
2813 *
2814 * @returns VBox status code.
2815 * @retval VINF_SUCCESS if poked successfully.
2816 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2817 *
2818 * @param pGVM The global (ring-0) VM structure.
2819 * @param idCpu The ID of the virtual CPU to poke.
2820 */
2821GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2822{
2823 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2824}
2825
2826
2827/**
2828 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2829 * used locking.
2830 *
2831 * @returns VBox status code.
2832 * @retval VINF_SUCCESS if poked successfully.
2833 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2834 *
2835 * @param pGVM The global (ring-0) VM structure.
2836 * @param idCpu The ID of the virtual CPU to poke.
2837 *
2838 * @deprecated Don't use in new code if possible! Use the GVM variant.
2839 */
2840GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2841{
2842 PGVMM pGVMM;
2843 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2844 if (RT_SUCCESS(rc))
2845 {
2846 if (idCpu < pGVM->cCpus)
2847 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2848 else
2849 rc = VERR_INVALID_CPU_ID;
2850 }
2851 return rc;
2852}
2853
2854
2855/**
2856 * Wakes up a set of halted EMT threads so they can service pending request.
2857 *
2858 * @returns VBox status code, no informational stuff.
2859 *
2860 * @param pGVM The global (ring-0) VM structure.
2861 * @param pSleepSet The set of sleepers to wake up.
2862 * @param pPokeSet The set of CPUs to poke.
2863 */
2864GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2865{
2866 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2867 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2868 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2869
2870 /*
2871 * Validate input and take the UsedLock.
2872 */
2873 PGVMM pGVMM;
2874 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2875 if (RT_SUCCESS(rc))
2876 {
2877 rc = VINF_SUCCESS;
2878 VMCPUID idCpu = pGVM->cCpus;
2879 while (idCpu-- > 0)
2880 {
2881 /* Don't try poke or wake up ourselves. */
2882 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2883 continue;
2884
2885 /* just ignore errors for now. */
2886 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2887 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2888 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2889 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2890 }
2891
2892 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2893 AssertRC(rc2);
2894 }
2895
2896 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2897 return rc;
2898}
2899
2900
2901/**
2902 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2903 *
2904 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2905 * @param pGVM The global (ring-0) VM structure.
2906 * @param pReq Pointer to the request packet.
2907 */
2908GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2909{
2910 /*
2911 * Validate input and pass it on.
2912 */
2913 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2914 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2915
2916 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2917}
2918
2919
2920
2921/**
2922 * Poll the schedule to see if someone else should get a chance to run.
2923 *
2924 * This is a bit hackish and will not work too well if the machine is
2925 * under heavy load from non-VM processes.
2926 *
2927 * @returns VINF_SUCCESS if not yielded.
2928 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2929 * @param pGVM The global (ring-0) VM structure.
2930 * @param idCpu The Virtual CPU ID of the calling EMT.
2931 * @param fYield Whether to yield or not.
2932 * This is for when we're spinning in the halt loop.
2933 * @thread EMT(idCpu).
2934 */
2935GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2936{
2937 /*
2938 * Validate input.
2939 */
2940 PGVMM pGVMM;
2941 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2942 if (RT_SUCCESS(rc))
2943 {
2944 /*
2945 * We currently only implement helping doing wakeups (fYield = false), so don't
2946 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2947 */
2948 if (!fYield && pGVMM->fDoEarlyWakeUps)
2949 {
2950 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2951 pGVM->gvmm.s.StatsSched.cPollCalls++;
2952
2953 Assert(ASMGetFlags() & X86_EFL_IF);
2954 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2955
2956 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2957
2958 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2959 }
2960 /*
2961 * Not quite sure what we could do here...
2962 */
2963 else if (fYield)
2964 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2965 else
2966 rc = VINF_SUCCESS;
2967 }
2968
2969 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2970 return rc;
2971}
2972
2973
2974#ifdef GVMM_SCHED_WITH_PPT
2975/**
2976 * Timer callback for the periodic preemption timer.
2977 *
2978 * @param pTimer The timer handle.
2979 * @param pvUser Pointer to the per cpu structure.
2980 * @param iTick The current tick.
2981 */
2982static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2983{
2984 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2985 NOREF(pTimer); NOREF(iTick);
2986
2987 /*
2988 * Termination check
2989 */
2990 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2991 return;
2992
2993 /*
2994 * Do the house keeping.
2995 */
2996 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2997
2998 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2999 {
3000 /*
3001 * Historicize the max frequency.
3002 */
3003 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3004 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3005 pCpu->Ppt.iTickHistorization = 0;
3006 pCpu->Ppt.uDesiredHz = 0;
3007
3008 /*
3009 * Check if the current timer frequency.
3010 */
3011 uint32_t uHistMaxHz = 0;
3012 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3013 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3014 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3015 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3016 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3017 else if (uHistMaxHz)
3018 {
3019 /*
3020 * Reprogram it.
3021 */
3022 pCpu->Ppt.cChanges++;
3023 pCpu->Ppt.iTickHistorization = 0;
3024 pCpu->Ppt.uTimerHz = uHistMaxHz;
3025 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3026 pCpu->Ppt.cNsInterval = cNsInterval;
3027 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3028 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3029 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3030 / cNsInterval;
3031 else
3032 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3033 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3034
3035 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3036 RTTimerChangeInterval(pTimer, cNsInterval);
3037 }
3038 else
3039 {
3040 /*
3041 * Stop it.
3042 */
3043 pCpu->Ppt.fStarted = false;
3044 pCpu->Ppt.uTimerHz = 0;
3045 pCpu->Ppt.cNsInterval = 0;
3046 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3047
3048 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3049 RTTimerStop(pTimer);
3050 }
3051 }
3052 else
3053 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3054}
3055#endif /* GVMM_SCHED_WITH_PPT */
3056
3057
3058/**
3059 * Updates the periodic preemption timer for the calling CPU.
3060 *
3061 * The caller must have disabled preemption!
3062 * The caller must check that the host can do high resolution timers.
3063 *
3064 * @param pGVM The global (ring-0) VM structure.
3065 * @param idHostCpu The current host CPU id.
3066 * @param uHz The desired frequency.
3067 */
3068GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3069{
3070 NOREF(pGVM);
3071#ifdef GVMM_SCHED_WITH_PPT
3072 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3073 Assert(RTTimerCanDoHighResolution());
3074
3075 /*
3076 * Resolve the per CPU data.
3077 */
3078 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3079 PGVMM pGVMM = g_pGVMM;
3080 if ( !RT_VALID_PTR(pGVMM)
3081 || pGVMM->u32Magic != GVMM_MAGIC)
3082 return;
3083 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3084 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3085 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3086 && pCpu->idCpu == idHostCpu,
3087 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3088
3089 /*
3090 * Check whether we need to do anything about the timer.
3091 * We have to be a little bit careful since we might be race the timer
3092 * callback here.
3093 */
3094 if (uHz > 16384)
3095 uHz = 16384; /** @todo add a query method for this! */
3096 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3097 && uHz >= pCpu->Ppt.uMinHz
3098 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3099 {
3100 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3101
3102 pCpu->Ppt.uDesiredHz = uHz;
3103 uint32_t cNsInterval = 0;
3104 if (!pCpu->Ppt.fStarted)
3105 {
3106 pCpu->Ppt.cStarts++;
3107 pCpu->Ppt.fStarted = true;
3108 pCpu->Ppt.fStarting = true;
3109 pCpu->Ppt.iTickHistorization = 0;
3110 pCpu->Ppt.uTimerHz = uHz;
3111 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3112 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3113 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3114 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3115 / cNsInterval;
3116 else
3117 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3118 }
3119
3120 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3121
3122 if (cNsInterval)
3123 {
3124 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3125 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3126 AssertRC(rc);
3127
3128 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3129 if (RT_FAILURE(rc))
3130 pCpu->Ppt.fStarted = false;
3131 pCpu->Ppt.fStarting = false;
3132 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3133 }
3134 }
3135#else /* !GVMM_SCHED_WITH_PPT */
3136 NOREF(idHostCpu); NOREF(uHz);
3137#endif /* !GVMM_SCHED_WITH_PPT */
3138}
3139
3140
3141/**
3142 * Calls @a pfnCallback for each VM in the system.
3143 *
3144 * This will enumerate the VMs while holding the global VM used list lock in
3145 * shared mode. So, only suitable for simple work. If more expensive work
3146 * needs doing, a different approach must be taken as using this API would
3147 * otherwise block VM creation and destruction.
3148 *
3149 * @returns VBox status code.
3150 * @param pfnCallback The callback function.
3151 * @param pvUser User argument to the callback.
3152 */
3153GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3154{
3155 PGVMM pGVMM;
3156 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3157
3158 int rc = VINF_SUCCESS;
3159 GVMMR0_USED_SHARED_LOCK(pGVMM);
3160 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3161 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3162 i = pGVMM->aHandles[i].iNext, cLoops++)
3163 {
3164 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3165 if ( RT_VALID_PTR(pGVM)
3166 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3167 && pGVM->u32Magic == GVM_MAGIC)
3168 {
3169 rc = pfnCallback(pGVM, pvUser);
3170 if (rc != VINF_SUCCESS)
3171 break;
3172 }
3173
3174 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3175 }
3176 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3177 return rc;
3178}
3179
3180
3181/**
3182 * Retrieves the GVMM statistics visible to the caller.
3183 *
3184 * @returns VBox status code.
3185 *
3186 * @param pStats Where to put the statistics.
3187 * @param pSession The current session.
3188 * @param pGVM The GVM to obtain statistics for. Optional.
3189 */
3190GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3191{
3192 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3193
3194 /*
3195 * Validate input.
3196 */
3197 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3198 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3199 pStats->cVMs = 0; /* (crash before taking the sem...) */
3200
3201 /*
3202 * Take the lock and get the VM statistics.
3203 */
3204 PGVMM pGVMM;
3205 if (pGVM)
3206 {
3207 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3208 if (RT_FAILURE(rc))
3209 return rc;
3210 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3211
3212 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3213 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3214 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3215 while (iCpu-- > 0)
3216 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3217 }
3218 else
3219 {
3220 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3221 RT_ZERO(pStats->SchedVM);
3222 RT_ZERO(pStats->aVCpus);
3223
3224 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3225 AssertRCReturn(rc, rc);
3226 }
3227
3228 /*
3229 * Enumerate the VMs and add the ones visible to the statistics.
3230 */
3231 pStats->cVMs = 0;
3232 pStats->cEMTs = 0;
3233 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3234
3235 for (unsigned i = pGVMM->iUsedHead;
3236 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3237 i = pGVMM->aHandles[i].iNext)
3238 {
3239 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3240 void *pvObj = pGVMM->aHandles[i].pvObj;
3241 if ( RT_VALID_PTR(pvObj)
3242 && RT_VALID_PTR(pOtherGVM)
3243 && pOtherGVM->u32Magic == GVM_MAGIC
3244 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3245 {
3246 pStats->cVMs++;
3247 pStats->cEMTs += pOtherGVM->cCpus;
3248
3249 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3250 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3251 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3252 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3253 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3254
3255 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3256 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3257 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3258
3259 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3260 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3261
3262 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3263 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3264 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3265 }
3266 }
3267
3268 /*
3269 * Copy out the per host CPU statistics.
3270 */
3271 uint32_t iDstCpu = 0;
3272 uint32_t cSrcCpus = pGVMM->cHostCpus;
3273 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3274 {
3275 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3276 {
3277 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3278 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3279#ifdef GVMM_SCHED_WITH_PPT
3280 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3281 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3282 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3283 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3284#else
3285 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3286 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3287 pStats->aHostCpus[iDstCpu].cChanges = 0;
3288 pStats->aHostCpus[iDstCpu].cStarts = 0;
3289#endif
3290 iDstCpu++;
3291 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3292 break;
3293 }
3294 }
3295 pStats->cHostCpus = iDstCpu;
3296
3297 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3298
3299 return VINF_SUCCESS;
3300}
3301
3302
3303/**
3304 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3305 *
3306 * @returns see GVMMR0QueryStatistics.
3307 * @param pGVM The global (ring-0) VM structure. Optional.
3308 * @param pReq Pointer to the request packet.
3309 * @param pSession The current session.
3310 */
3311GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3312{
3313 /*
3314 * Validate input and pass it on.
3315 */
3316 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3317 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3318 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3319
3320 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3321}
3322
3323
3324/**
3325 * Resets the specified GVMM statistics.
3326 *
3327 * @returns VBox status code.
3328 *
3329 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3330 * @param pSession The current session.
3331 * @param pGVM The GVM to reset statistics for. Optional.
3332 */
3333GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3334{
3335 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3336
3337 /*
3338 * Validate input.
3339 */
3340 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3341 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3342
3343 /*
3344 * Take the lock and get the VM statistics.
3345 */
3346 PGVMM pGVMM;
3347 if (pGVM)
3348 {
3349 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3350 if (RT_FAILURE(rc))
3351 return rc;
3352# define MAYBE_RESET_FIELD(field) \
3353 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3354 MAYBE_RESET_FIELD(cHaltCalls);
3355 MAYBE_RESET_FIELD(cHaltBlocking);
3356 MAYBE_RESET_FIELD(cHaltTimeouts);
3357 MAYBE_RESET_FIELD(cHaltNotBlocking);
3358 MAYBE_RESET_FIELD(cHaltWakeUps);
3359 MAYBE_RESET_FIELD(cWakeUpCalls);
3360 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3361 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3362 MAYBE_RESET_FIELD(cPokeCalls);
3363 MAYBE_RESET_FIELD(cPokeNotBusy);
3364 MAYBE_RESET_FIELD(cPollCalls);
3365 MAYBE_RESET_FIELD(cPollHalts);
3366 MAYBE_RESET_FIELD(cPollWakeUps);
3367# undef MAYBE_RESET_FIELD
3368 }
3369 else
3370 {
3371 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3372
3373 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3374 AssertRCReturn(rc, rc);
3375 }
3376
3377 /*
3378 * Enumerate the VMs and add the ones visible to the statistics.
3379 */
3380 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3381 {
3382 for (unsigned i = pGVMM->iUsedHead;
3383 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3384 i = pGVMM->aHandles[i].iNext)
3385 {
3386 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3387 void *pvObj = pGVMM->aHandles[i].pvObj;
3388 if ( RT_VALID_PTR(pvObj)
3389 && RT_VALID_PTR(pOtherGVM)
3390 && pOtherGVM->u32Magic == GVM_MAGIC
3391 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3392 {
3393# define MAYBE_RESET_FIELD(field) \
3394 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3395 MAYBE_RESET_FIELD(cHaltCalls);
3396 MAYBE_RESET_FIELD(cHaltBlocking);
3397 MAYBE_RESET_FIELD(cHaltTimeouts);
3398 MAYBE_RESET_FIELD(cHaltNotBlocking);
3399 MAYBE_RESET_FIELD(cHaltWakeUps);
3400 MAYBE_RESET_FIELD(cWakeUpCalls);
3401 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3402 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3403 MAYBE_RESET_FIELD(cPokeCalls);
3404 MAYBE_RESET_FIELD(cPokeNotBusy);
3405 MAYBE_RESET_FIELD(cPollCalls);
3406 MAYBE_RESET_FIELD(cPollHalts);
3407 MAYBE_RESET_FIELD(cPollWakeUps);
3408# undef MAYBE_RESET_FIELD
3409 }
3410 }
3411 }
3412
3413 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3414
3415 return VINF_SUCCESS;
3416}
3417
3418
3419/**
3420 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3421 *
3422 * @returns see GVMMR0ResetStatistics.
3423 * @param pGVM The global (ring-0) VM structure. Optional.
3424 * @param pReq Pointer to the request packet.
3425 * @param pSession The current session.
3426 */
3427GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3428{
3429 /*
3430 * Validate input and pass it on.
3431 */
3432 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3433 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3434 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3435
3436 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3437}
3438
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use