VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp

Last change on this file was 107912, checked in by vboxsync, 37 hours ago

VMM/GVMMR0: doxgyen fix. jiraref:VBP-1470

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 124.2 KB
Line 
1/* $Id: GVMMR0.cpp 107912 2025-01-23 14:20:31Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#ifdef RT_ARCH_AMD64
81# include <iprt/asm-amd64-x86.h>
82#endif
83#include <iprt/critsect.h>
84#include <iprt/mem.h>
85#include <iprt/semaphore.h>
86#include <iprt/time.h>
87#include <VBox/log.h>
88#include <iprt/thread.h>
89#include <iprt/process.h>
90#include <iprt/param.h>
91#include <iprt/string.h>
92#include <iprt/assert.h>
93#include <iprt/mem.h>
94#include <iprt/memobj.h>
95#include <iprt/mp.h>
96#include <iprt/cpuset.h>
97#include <iprt/spinlock.h>
98#include <iprt/timer.h>
99
100#include "dtrace/VBoxVMM.h"
101
102
103/*********************************************************************************************************************************
104* Defined Constants And Macros *
105*********************************************************************************************************************************/
106#if (defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)) \
107 && !defined(VBOX_WITH_MINIMAL_R0)
108/** Define this to enable the periodic preemption timer. */
109# define GVMM_SCHED_WITH_PPT
110#endif
111
112#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
113/** Define this to enable the per-EMT high resolution wakeup timers. */
114# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
115#endif
116
117
118/** Special value that GVMMR0DeregisterVCpu sets. */
119#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
120AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
121
122
123/*********************************************************************************************************************************
124* Structures and Typedefs *
125*********************************************************************************************************************************/
126
127/**
128 * Global VM handle.
129 */
130typedef struct GVMHANDLE
131{
132 /** The index of the next handle in the list (free or used). (0 is nil.) */
133 uint16_t volatile iNext;
134 /** Our own index / handle value. */
135 uint16_t iSelf;
136 /** The process ID of the handle owner.
137 * This is used for access checks. */
138 RTPROCESS ProcId;
139 /** The pointer to the ring-0 only (aka global) VM structure. */
140 PGVM pGVM;
141 /** The virtual machine object. */
142 void *pvObj;
143 /** The session this VM is associated with. */
144 PSUPDRVSESSION pSession;
145 /** The ring-0 handle of the EMT0 thread.
146 * This is used for ownership checks as well as looking up a VM handle by thread
147 * at times like assertions. */
148 RTNATIVETHREAD hEMT0;
149} GVMHANDLE;
150/** Pointer to a global VM handle. */
151typedef GVMHANDLE *PGVMHANDLE;
152
153/** Number of GVM handles (including the NIL handle). */
154#if HC_ARCH_BITS == 64
155# define GVMM_MAX_HANDLES 8192
156#else
157# define GVMM_MAX_HANDLES 128
158#endif
159
160/**
161 * Per host CPU GVMM data.
162 */
163typedef struct GVMMHOSTCPU
164{
165 /** Magic number (GVMMHOSTCPU_MAGIC). */
166 uint32_t volatile u32Magic;
167 /** The CPU ID. */
168 RTCPUID idCpu;
169 /** The CPU set index. */
170 uint32_t idxCpuSet;
171
172#ifdef GVMM_SCHED_WITH_PPT
173 /** Periodic preemption timer data. */
174 struct
175 {
176 /** The handle to the periodic preemption timer. */
177 PRTTIMER pTimer;
178 /** Spinlock protecting the data below. */
179 RTSPINLOCK hSpinlock;
180 /** The smalles Hz that we need to care about. (static) */
181 uint32_t uMinHz;
182 /** The number of ticks between each historization. */
183 uint32_t cTicksHistoriziationInterval;
184 /** The current historization tick (counting up to
185 * cTicksHistoriziationInterval and then resetting). */
186 uint32_t iTickHistorization;
187 /** The current timer interval. This is set to 0 when inactive. */
188 uint32_t cNsInterval;
189 /** The current timer frequency. This is set to 0 when inactive. */
190 uint32_t uTimerHz;
191 /** The current max frequency reported by the EMTs.
192 * This gets historicize and reset by the timer callback. This is
193 * read without holding the spinlock, so needs atomic updating. */
194 uint32_t volatile uDesiredHz;
195 /** Whether the timer was started or not. */
196 bool volatile fStarted;
197 /** Set if we're starting timer. */
198 bool volatile fStarting;
199 /** The index of the next history entry (mod it). */
200 uint32_t iHzHistory;
201 /** Historicized uDesiredHz values. The array wraps around, new entries
202 * are added at iHzHistory. This is updated approximately every
203 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
204 uint32_t aHzHistory[8];
205 /** Statistics counter for recording the number of interval changes. */
206 uint32_t cChanges;
207 /** Statistics counter for recording the number of timer starts. */
208 uint32_t cStarts;
209 } Ppt;
210#endif /* GVMM_SCHED_WITH_PPT */
211
212} GVMMHOSTCPU;
213/** Pointer to the per host CPU GVMM data. */
214typedef GVMMHOSTCPU *PGVMMHOSTCPU;
215/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
216#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
217/** The interval on history entry should cover (approximately) give in
218 * nanoseconds. */
219#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
220
221
222/**
223 * The GVMM instance data.
224 */
225typedef struct GVMM
226{
227 /** Eyecatcher / magic. */
228 uint32_t u32Magic;
229 /** The index of the head of the free handle chain. (0 is nil.) */
230 uint16_t volatile iFreeHead;
231 /** The index of the head of the active handle chain. (0 is nil.) */
232 uint16_t volatile iUsedHead;
233 /** The number of VMs. */
234 uint16_t volatile cVMs;
235 /** Alignment padding. */
236 uint16_t u16Reserved;
237 /** The number of EMTs. */
238 uint32_t volatile cEMTs;
239 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
240 uint32_t volatile cHaltedEMTs;
241 /** Mini lock for restricting early wake-ups to one thread. */
242 bool volatile fDoingEarlyWakeUps;
243 bool afPadding[3]; /**< explicit alignment padding. */
244 /** When the next halted or sleeping EMT will wake up.
245 * This is set to 0 when it needs recalculating and to UINT64_MAX when
246 * there are no halted or sleeping EMTs in the GVMM. */
247 uint64_t uNsNextEmtWakeup;
248 /** The lock used to serialize VM creation, destruction and associated events that
249 * isn't performance critical. Owners may acquire the list lock. */
250 RTCRITSECT CreateDestroyLock;
251 /** The lock used to serialize used list updates and accesses.
252 * This indirectly includes scheduling since the scheduler will have to walk the
253 * used list to examin running VMs. Owners may not acquire any other locks. */
254 RTCRITSECTRW UsedLock;
255 /** The handle array.
256 * The size of this array defines the maximum number of currently running VMs.
257 * The first entry is unused as it represents the NIL handle. */
258 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
259
260 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
261 * The number of EMTs that means we no longer consider ourselves alone on a
262 * CPU/Core.
263 */
264 uint32_t cEMTsMeansCompany;
265 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
266 * The minimum sleep time for when we're alone, in nano seconds.
267 */
268 uint32_t nsMinSleepAlone;
269 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
270 * The minimum sleep time for when we've got company, in nano seconds.
271 */
272 uint32_t nsMinSleepCompany;
273#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
274 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
275 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
276 * nano seconds.
277 */
278 uint32_t nsMinSleepWithHrTimer;
279#endif
280 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
281 * The limit for the first round of early wake-ups, given in nano seconds.
282 */
283 uint32_t nsEarlyWakeUp1;
284 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
285 * The limit for the second round of early wake-ups, given in nano seconds.
286 */
287 uint32_t nsEarlyWakeUp2;
288
289 /** Set if we're doing early wake-ups.
290 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
291 bool volatile fDoEarlyWakeUps;
292
293 /** The number of entries in the host CPU array (aHostCpus). */
294 uint32_t cHostCpus;
295 /** Per host CPU data (variable length). */
296 GVMMHOSTCPU aHostCpus[1];
297} GVMM;
298AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
299AssertCompileMemberAlignment(GVMM, UsedLock, 8);
300AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
301/** Pointer to the GVMM instance data. */
302typedef GVMM *PGVMM;
303
304/** The GVMM::u32Magic value (Charlie Haden). */
305#define GVMM_MAGIC UINT32_C(0x19370806)
306
307
308
309/*********************************************************************************************************************************
310* Global Variables *
311*********************************************************************************************************************************/
312/** Pointer to the GVMM instance data.
313 * (Just my general dislike for global variables.) */
314static PGVMM g_pGVMM = NULL;
315
316/** Macro for obtaining and validating the g_pGVMM pointer.
317 * On failure it will return from the invoking function with the specified return value.
318 *
319 * @param pGVMM The name of the pGVMM variable.
320 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
321 * status codes.
322 */
323#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
324 do { \
325 (pGVMM) = g_pGVMM;\
326 AssertPtrReturn((pGVMM), (rc)); \
327 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
328 } while (0)
329
330/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
331 * On failure it will return from the invoking function.
332 *
333 * @param pGVMM The name of the pGVMM variable.
334 */
335#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
336 do { \
337 (pGVMM) = g_pGVMM;\
338 AssertPtrReturnVoid((pGVMM)); \
339 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
340 } while (0)
341
342
343/*********************************************************************************************************************************
344* Internal Functions *
345*********************************************************************************************************************************/
346static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession);
347static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
348static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
349static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
350
351#ifdef GVMM_SCHED_WITH_PPT
352static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
355static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
356#endif
357
358
359/**
360 * Initializes the GVMM.
361 *
362 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
363 *
364 * @returns VBox status code.
365 */
366GVMMR0DECL(int) GVMMR0Init(void)
367{
368 LogFlow(("GVMMR0Init:\n"));
369
370 /*
371 * Allocate and initialize the instance data.
372 */
373 uint32_t cHostCpus = RTMpGetArraySize();
374 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
375
376 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
377 if (!pGVMM)
378 return VERR_NO_MEMORY;
379 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
380 "GVMM-CreateDestroyLock");
381 if (RT_SUCCESS(rc))
382 {
383 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
384 if (RT_SUCCESS(rc))
385 {
386 pGVMM->u32Magic = GVMM_MAGIC;
387 pGVMM->iUsedHead = 0;
388 pGVMM->iFreeHead = 1;
389
390 /* the nil handle */
391 pGVMM->aHandles[0].iSelf = 0;
392 pGVMM->aHandles[0].iNext = 0;
393
394 /* the tail */
395 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
396 pGVMM->aHandles[i].iSelf = i;
397 pGVMM->aHandles[i].iNext = 0; /* nil */
398
399 /* the rest */
400 while (i-- > 1)
401 {
402 pGVMM->aHandles[i].iSelf = i;
403 pGVMM->aHandles[i].iNext = i + 1;
404 }
405
406 /* The default configuration values. */
407 uint32_t cNsResolution = RTSemEventMultiGetResolution();
408 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
409 if (cNsResolution >= 5*RT_NS_100US)
410 {
411 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
412 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
413 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
414 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
415 }
416 else if (cNsResolution > RT_NS_100US)
417 {
418 pGVMM->nsMinSleepAlone = cNsResolution / 2;
419 pGVMM->nsMinSleepCompany = cNsResolution / 4;
420 pGVMM->nsEarlyWakeUp1 = 0;
421 pGVMM->nsEarlyWakeUp2 = 0;
422 }
423 else
424 {
425 pGVMM->nsMinSleepAlone = 2000;
426 pGVMM->nsMinSleepCompany = 2000;
427 pGVMM->nsEarlyWakeUp1 = 0;
428 pGVMM->nsEarlyWakeUp2 = 0;
429 }
430#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
431 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
432#endif
433 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
434
435 /* The host CPU data. */
436 pGVMM->cHostCpus = cHostCpus;
437 uint32_t iCpu = cHostCpus;
438 RTCPUSET PossibleSet;
439 RTMpGetSet(&PossibleSet);
440 while (iCpu-- > 0)
441 {
442 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
443#ifdef GVMM_SCHED_WITH_PPT
444 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
445 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
446 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
447 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
448 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
450 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
451 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
453 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
454 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
455 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
456#endif
457
458 if (RTCpuSetIsMember(&PossibleSet, iCpu))
459 {
460 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
461 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
462
463#ifdef GVMM_SCHED_WITH_PPT
464 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
465 50*1000*1000 /* whatever */,
466 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
467 gvmmR0SchedPeriodicPreemptionTimerCallback,
468 &pGVMM->aHostCpus[iCpu]);
469 if (RT_SUCCESS(rc))
470 {
471 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
472 if (RT_FAILURE(rc))
473 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
474 }
475 else
476 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
477 if (RT_FAILURE(rc))
478 {
479 while (iCpu < cHostCpus)
480 {
481 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
482 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
483 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
484 iCpu++;
485 }
486 break;
487 }
488#endif
489 }
490 else
491 {
492 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
493 pGVMM->aHostCpus[iCpu].u32Magic = 0;
494 }
495 }
496 if (RT_SUCCESS(rc))
497 {
498 g_pGVMM = pGVMM;
499 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
500 return VINF_SUCCESS;
501 }
502
503 /* bail out. */
504 RTCritSectRwDelete(&pGVMM->UsedLock);
505 }
506 else
507 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
508 RTCritSectDelete(&pGVMM->CreateDestroyLock);
509 }
510 else
511 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
631 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 pGVMM->nsMinSleepWithHrTimer = u64Value;
635 else
636 rc = VERR_OUT_OF_RANGE;
637 }
638#endif
639 else if (!strcmp(pszName, "EarlyWakeUp1"))
640 {
641 if (u64Value <= RT_NS_100MS)
642 {
643 pGVMM->nsEarlyWakeUp1 = u64Value;
644 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
645 }
646 else
647 rc = VERR_OUT_OF_RANGE;
648 }
649 else if (!strcmp(pszName, "EarlyWakeUp2"))
650 {
651 if (u64Value <= RT_NS_100MS)
652 {
653 pGVMM->nsEarlyWakeUp2 = u64Value;
654 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
655 }
656 else
657 rc = VERR_OUT_OF_RANGE;
658 }
659 else
660 rc = VERR_CFGM_VALUE_NOT_FOUND;
661 return rc;
662}
663
664
665/**
666 * A quick hack for getting global config values.
667 *
668 * @returns VBox status code.
669 *
670 * @param pSession The session handle. Used for authentication.
671 * @param pszName The variable name.
672 * @param pu64Value Where to return the value.
673 */
674GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
675{
676 /*
677 * Validate input.
678 */
679 PGVMM pGVMM;
680 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
681 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
682 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
683 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
684
685 /*
686 * String switch time!
687 */
688 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
689 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
690 int rc = VINF_SUCCESS;
691 pszName += sizeof("/GVMM/") - 1;
692 if (!strcmp(pszName, "cEMTsMeansCompany"))
693 *pu64Value = pGVMM->cEMTsMeansCompany;
694 else if (!strcmp(pszName, "MinSleepAlone"))
695 *pu64Value = pGVMM->nsMinSleepAlone;
696 else if (!strcmp(pszName, "MinSleepCompany"))
697 *pu64Value = pGVMM->nsMinSleepCompany;
698#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
699 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
700 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
701#endif
702 else if (!strcmp(pszName, "EarlyWakeUp1"))
703 *pu64Value = pGVMM->nsEarlyWakeUp1;
704 else if (!strcmp(pszName, "EarlyWakeUp2"))
705 *pu64Value = pGVMM->nsEarlyWakeUp2;
706 else
707 rc = VERR_CFGM_VALUE_NOT_FOUND;
708 return rc;
709}
710
711
712/**
713 * Acquire the 'used' lock in shared mode.
714 *
715 * This prevents destruction of the VM while we're in ring-0.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
720 */
721#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Release the 'used' lock in when owning it in shared mode.
725 *
726 * @returns IPRT status code, see RTSemFastMutexRequest.
727 * @param a_pGVMM The GVMM instance data.
728 * @sa GVMMR0_USED_SHARED_LOCK
729 */
730#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
731
732/**
733 * Acquire the 'used' lock in exclusive mode.
734 *
735 * Only use this function when making changes to the used list.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRequest.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
742
743/**
744 * Release the 'used' lock when owning it in exclusive mode.
745 *
746 * @returns IPRT status code, see RTSemFastMutexRelease.
747 * @param a_pGVMM The GVMM instance data.
748 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
749 */
750#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
751
752
753/**
754 * Try acquire the 'create & destroy' lock.
755 *
756 * @returns IPRT status code, see RTSemFastMutexRequest.
757 * @param pGVMM The GVMM instance data.
758 */
759DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
760{
761 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
762 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
763 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
764 return rc;
765}
766
767
768/**
769 * Release the 'create & destroy' lock.
770 *
771 * @returns IPRT status code, see RTSemFastMutexRequest.
772 * @param pGVMM The GVMM instance data.
773 */
774DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
775{
776 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
777 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
778 AssertRC(rc);
779 return rc;
780}
781
782
783/**
784 * Request wrapper for the GVMMR0CreateVM API.
785 *
786 * @returns VBox status code.
787 * @param pReq The request buffer.
788 * @param pSession The session handle. The VM will be associated with this.
789 */
790GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
791{
792 /*
793 * Validate the request.
794 */
795 if (!RT_VALID_PTR(pReq))
796 return VERR_INVALID_POINTER;
797 if (pReq->Hdr.cbReq != sizeof(*pReq))
798 return VERR_INVALID_PARAMETER;
799 if (pReq->pSession != pSession)
800 return VERR_INVALID_POINTER;
801
802 /* Check that VBoxVMM and VMMR0 are likely to have the same idea about the structures. */
803 if (pReq->cbVM != sizeof(VM))
804 {
805 LogRel(("GVMMR0CreateVMReq: cbVM=%#x, expcted %#x\n", pReq->cbVM, sizeof(VM)));
806 return VINF_GVM_MISMATCH_VM_SIZE;
807 }
808 if (pReq->cbVCpu != sizeof(VMCPU))
809 {
810 LogRel(("GVMMR0CreateVMReq: cbVCpu=%#x, expcted %#x\n", pReq->cbVCpu, sizeof(VMCPU)));
811 return VINF_GVM_MISMATCH_VMCPU_SIZE;
812 }
813 if (pReq->uStructVersion != VM_STRUCT_VERSION)
814 {
815 LogRel(("GVMMR0CreateVMReq: uStructVersion=%#x, expcted %#x\n", pReq->uStructVersion, VM_STRUCT_VERSION));
816 return VINF_GVM_MISMATCH_VM_STRUCT_VER;
817 }
818 if (pReq->uSvnRevision != VMMGetSvnRev())
819 {
820 LogRel(("GVMMR0CreateVMReq: uSvnRevision=%u, expcted %u\n", pReq->uSvnRevision, VMMGetSvnRev()));
821 return VINF_GVM_MISMATCH_VMCPU_SIZE;
822 }
823
824 /*
825 * Execute it.
826 */
827 PGVM pGVM;
828 pReq->pVMR0 = NULL;
829 pReq->pVMR3 = NIL_RTR3PTR;
830 int rc = GVMMR0CreateVM(pSession, pReq->enmTarget, pReq->cCpus, &pGVM);
831 if (RT_SUCCESS(rc))
832 {
833 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
834 pReq->pVMR3 = pGVM->pVMR3;
835 }
836 return rc;
837}
838
839
840/**
841 * Allocates the VM structure and registers it with GVM.
842 *
843 * The caller will become the VM owner and there by the EMT.
844 *
845 * @returns VBox status code.
846 * @param pSession The support driver session.
847 * @param enmTarget The target platform architecture of the VM.
848 * @param cCpus Number of virtual CPUs for the new VM.
849 * @param ppGVM Where to store the pointer to the VM structure.
850 *
851 * @thread EMT.
852 */
853GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, VMTARGET enmTarget, uint32_t cCpus, PGVM *ppGVM)
854{
855 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
856 PGVMM pGVMM;
857 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
858
859 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
860 *ppGVM = NULL;
861
862 if ( cCpus == 0
863 || cCpus > VMM_MAX_CPU_COUNT)
864 return VERR_INVALID_PARAMETER;
865 if ( enmTarget != VMTARGET_X86
866 && enmTarget != VMTARGET_ARMV8)
867 return VERR_INVALID_PARAMETER;
868
869 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
870 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
871 RTPROCESS ProcId = RTProcSelf();
872 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
873
874 /*
875 * The whole allocation process is protected by the lock.
876 */
877 int rc = gvmmR0CreateDestroyLock(pGVMM);
878 AssertRCReturn(rc, rc);
879
880 /*
881 * Only one VM per session.
882 */
883 if (SUPR0GetSessionVM(pSession) != NULL)
884 {
885 gvmmR0CreateDestroyUnlock(pGVMM);
886 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
887 return VERR_ALREADY_EXISTS;
888 }
889
890 /*
891 * Allocate a handle first so we don't waste resources unnecessarily.
892 */
893 uint16_t iHandle = pGVMM->iFreeHead;
894 if (iHandle)
895 {
896 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
897
898 /* consistency checks, a bit paranoid as always. */
899 if ( !pHandle->pGVM
900 && !pHandle->pvObj
901 && pHandle->iSelf == iHandle)
902 {
903 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
904 if (pHandle->pvObj)
905 {
906 /*
907 * Move the handle from the free to used list and perform permission checks.
908 */
909 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
910 AssertRC(rc);
911
912 pGVMM->iFreeHead = pHandle->iNext;
913 pHandle->iNext = pGVMM->iUsedHead;
914 pGVMM->iUsedHead = iHandle;
915 pGVMM->cVMs++;
916
917 pHandle->pGVM = NULL;
918 pHandle->pSession = pSession;
919 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
920 pHandle->ProcId = NIL_RTPROCESS;
921
922 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
923
924 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
925 if (RT_SUCCESS(rc))
926 {
927 /*
928 * Allocate memory for the VM structure (combined VM + GVM).
929 */
930 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
931 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
932 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
933 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
934 if (RT_SUCCESS(rc))
935 {
936 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
937 AssertPtr(pGVM);
938
939 /*
940 * Initialise the structure.
941 */
942 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
943 gvmmR0InitPerVMData(pGVM, iHandle, enmTarget, cCpus, pSession);
944 pGVM->gvmm.s.VMMemObj = hVMMemObj;
945#ifndef VBOX_WITH_MINIMAL_R0
946 rc = GMMR0InitPerVMData(pGVM);
947 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
948#else
949 int rc2 = VINF_SUCCESS;
950#endif
951 int rc3 = VMMR0InitPerVMData(pGVM);
952#ifndef VBOX_WITH_MINIMAL_R0
953 CPUMR0InitPerVMData(pGVM);
954 DBGFR0InitPerVMData(pGVM);
955 PDMR0InitPerVMData(pGVM);
956 IOMR0InitPerVMData(pGVM);
957 TMR0InitPerVMData(pGVM);
958#endif
959 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
960 {
961 /*
962 * Allocate page array.
963 * This currently have to be made available to ring-3, but this is should change eventually.
964 */
965 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
966 if (RT_SUCCESS(rc))
967 {
968 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
969 for (uint32_t iPage = 0; iPage < cPages; iPage++)
970 {
971 paPages[iPage].uReserved = 0;
972 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
973 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
974 }
975
976 /*
977 * Map the page array, VM and VMCPU structures into ring-3.
978 */
979 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
980 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
981 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
982 0 /*offSub*/, sizeof(VM));
983 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
984 {
985 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
986 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
987 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
988 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
989 }
990 if (RT_SUCCESS(rc))
991 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
992 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
993 NIL_RTR0PROCESS);
994 if (RT_SUCCESS(rc))
995 {
996 /*
997 * Initialize all the VM pointers.
998 */
999 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1000 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
1001
1002 for (VMCPUID i = 0; i < cCpus; i++)
1003 {
1004 pGVM->aCpus[i].pVMR0 = pGVM;
1005 pGVM->aCpus[i].pVMR3 = pVMR3;
1006 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
1007 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
1008 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
1009 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
1010 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
1011 }
1012
1013 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1014 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
1015 ("%p\n", pGVM->paVMPagesR3));
1016
1017#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1018 /*
1019 * Create the high resolution wake-up timer for EMT 0, ignore failures.
1020 */
1021 if (RTTimerCanDoHighResolution())
1022 {
1023 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
1024 0 /*one-shot, no interval*/,
1025 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
1026 &pGVM->aCpus[0]);
1027 if (RT_FAILURE(rc4))
1028 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
1029 }
1030#endif
1031
1032 /*
1033 * Complete the handle - take the UsedLock sem just to be careful.
1034 */
1035 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1036 AssertRC(rc);
1037
1038 pHandle->pGVM = pGVM;
1039 pHandle->hEMT0 = hEMT0;
1040 pHandle->ProcId = ProcId;
1041 pGVM->pVMR3 = pVMR3;
1042 pGVM->pVMR3Unsafe = pVMR3;
1043 pGVM->aCpus[0].hEMT = hEMT0;
1044 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1045 pGVM->aCpus[0].cEmtHashCollisions = 0;
1046 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1047 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1048 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1049 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1050 pGVMM->cEMTs += cCpus;
1051
1052 /* Associate it with the session and create the context hook for EMT0. */
1053 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1054 if (RT_SUCCESS(rc))
1055 {
1056 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1057 if (RT_SUCCESS(rc))
1058 {
1059 /*
1060 * Done!
1061 */
1062 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1063
1064 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1065 gvmmR0CreateDestroyUnlock(pGVMM);
1066
1067#ifndef VBOX_WITH_MINIMAL_R0
1068 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1069#endif
1070
1071 *ppGVM = pGVM;
1072 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1073 return VINF_SUCCESS;
1074 }
1075
1076 SUPR0SetSessionVM(pSession, NULL, NULL);
1077 }
1078 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1079 }
1080
1081 /* Cleanup mappings. */
1082 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1083 {
1084 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1085 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1086 }
1087 for (VMCPUID i = 0; i < cCpus; i++)
1088 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1089 {
1090 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1091 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1092 }
1093 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1094 {
1095 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1096 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1097 }
1098 }
1099 }
1100 else
1101 {
1102 if (RT_SUCCESS_NP(rc))
1103 rc = rc2;
1104 if (RT_SUCCESS_NP(rc))
1105 rc = rc3;
1106 AssertStmt(RT_FAILURE_NP(rc), rc = VERR_IPE_UNEXPECTED_STATUS);
1107 }
1108 }
1109 }
1110 /* else: The user wasn't permitted to create this VM. */
1111
1112 /*
1113 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1114 * object reference here. A little extra mess because of non-recursive lock.
1115 */
1116 void *pvObj = pHandle->pvObj;
1117 pHandle->pvObj = NULL;
1118 gvmmR0CreateDestroyUnlock(pGVMM);
1119
1120 SUPR0ObjRelease(pvObj, pSession);
1121
1122 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1123 return rc;
1124 }
1125
1126 rc = VERR_NO_MEMORY;
1127 }
1128 else
1129 rc = VERR_GVMM_IPE_1;
1130 }
1131 else
1132 rc = VERR_GVM_TOO_MANY_VMS;
1133
1134 gvmmR0CreateDestroyUnlock(pGVMM);
1135 return rc;
1136}
1137
1138
1139/**
1140 * Initializes the per VM data belonging to GVMM.
1141 *
1142 * @param pGVM Pointer to the global VM structure.
1143 * @param hSelf The handle.
1144 * @param enmTarget The target platform architecture of the VM.
1145 * @param cCpus The CPU count.
1146 * @param pSession The session this VM is associated with.
1147 */
1148static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession)
1149{
1150 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1151 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1152 AssertCompileMemberAlignment(VM, cpum, 64);
1153 AssertCompileMemberAlignment(VM, tm, 64);
1154
1155 /* GVM: */
1156 pGVM->u32Magic = GVM_MAGIC;
1157 pGVM->hSelf = hSelf;
1158 pGVM->cCpus = cCpus;
1159 pGVM->enmTarget = enmTarget;
1160 pGVM->pSession = pSession;
1161 pGVM->pSelf = pGVM;
1162
1163 /* VM: */
1164 pGVM->enmVMState = VMSTATE_CREATING;
1165 pGVM->hSelfUnsafe = hSelf;
1166 pGVM->pSessionUnsafe = pSession;
1167 pGVM->pVMR0ForCall = pGVM;
1168 pGVM->cCpusUnsafe = cCpus;
1169 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1170 pGVM->uStructVersion = VM_STRUCT_VERSION;
1171 pGVM->cbSelf = sizeof(VM);
1172 pGVM->cbVCpu = sizeof(VMCPU);
1173 pGVM->enmTargetUnsafe = enmTarget;
1174
1175 /* GVMM: */
1176 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1177 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1178 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1179 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1180 pGVM->gvmm.s.fDoneVMMR0Init = false;
1181 pGVM->gvmm.s.fDoneVMMR0Term = false;
1182
1183 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1184 {
1185 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1186 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1187 }
1188 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1189
1190 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1191 {
1192 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1193 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1194 }
1195
1196 /*
1197 * Per virtual CPU.
1198 */
1199 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1200 {
1201 pGVM->aCpus[i].idCpu = i;
1202 pGVM->aCpus[i].idCpuUnsafe = i;
1203 pGVM->aCpus[i].enmTarget = enmTarget;
1204 pGVM->aCpus[i].enmTargetUnsafe = enmTarget;
1205 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1206 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1207 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1208 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1209 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1210 pGVM->aCpus[i].pGVM = pGVM;
1211 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1212 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1213 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1214 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1215 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1216 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1217 }
1218}
1219
1220
1221/**
1222 * Does the VM initialization.
1223 *
1224 * @returns VBox status code.
1225 * @param pGVM The global (ring-0) VM structure.
1226 */
1227GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1228{
1229 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1230
1231 int rc = VERR_INTERNAL_ERROR_3;
1232 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1233 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1234 {
1235 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1236 {
1237 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1238 if (RT_FAILURE(rc))
1239 {
1240 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1241 break;
1242 }
1243 }
1244 }
1245 else
1246 rc = VERR_WRONG_ORDER;
1247
1248 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1249 return rc;
1250}
1251
1252
1253/**
1254 * Indicates that we're done with the ring-0 initialization
1255 * of the VM.
1256 *
1257 * @param pGVM The global (ring-0) VM structure.
1258 * @thread EMT(0)
1259 */
1260GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1261{
1262 /* Set the indicator. */
1263 pGVM->gvmm.s.fDoneVMMR0Init = true;
1264}
1265
1266
1267/**
1268 * Indicates that we're doing the ring-0 termination of the VM.
1269 *
1270 * @returns true if termination hasn't been done already, false if it has.
1271 * @param pGVM Pointer to the global VM structure. Optional.
1272 * @thread EMT(0) or session cleanup thread.
1273 */
1274GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1275{
1276 /* Validate the VM structure, state and handle. */
1277 AssertPtrReturn(pGVM, false);
1278
1279 /* Set the indicator. */
1280 if (pGVM->gvmm.s.fDoneVMMR0Term)
1281 return false;
1282 pGVM->gvmm.s.fDoneVMMR0Term = true;
1283 return true;
1284}
1285
1286
1287/**
1288 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1289 *
1290 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1291 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1292 * would've been nice if the caller was actually the EMT thread or that we somehow
1293 * could've associated the calling thread with the VM up front.
1294 *
1295 * @returns VBox status code.
1296 * @param pGVM The global (ring-0) VM structure.
1297 *
1298 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1299 */
1300GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1301{
1302 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1303 PGVMM pGVMM;
1304 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1305
1306 /*
1307 * Validate the VM structure, state and caller.
1308 */
1309 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1310 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1311 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1312 VERR_WRONG_ORDER);
1313
1314 uint32_t hGVM = pGVM->hSelf;
1315 ASMCompilerBarrier();
1316 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1317 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1318
1319 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1320 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1321
1322 RTPROCESS ProcId = RTProcSelf();
1323 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1324 AssertReturn( ( pHandle->hEMT0 == hSelf
1325 && pHandle->ProcId == ProcId)
1326 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1327
1328 /*
1329 * Lookup the handle and destroy the object.
1330 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1331 * object, we take some precautions against racing callers just in case...
1332 */
1333 int rc = gvmmR0CreateDestroyLock(pGVMM);
1334 AssertRC(rc);
1335
1336 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1337 if ( pHandle->pGVM == pGVM
1338 && ( ( pHandle->hEMT0 == hSelf
1339 && pHandle->ProcId == ProcId)
1340 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1341 && RT_VALID_PTR(pHandle->pvObj)
1342 && RT_VALID_PTR(pHandle->pSession)
1343 && RT_VALID_PTR(pHandle->pGVM)
1344 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1345 {
1346 /* Check that other EMTs have deregistered. */
1347 uint32_t cNotDeregistered = 0;
1348 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1349 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1350 if (cNotDeregistered == 0)
1351 {
1352 /* Grab the object pointer. */
1353 void *pvObj = pHandle->pvObj;
1354 pHandle->pvObj = NULL;
1355 gvmmR0CreateDestroyUnlock(pGVMM);
1356
1357 SUPR0ObjRelease(pvObj, pHandle->pSession);
1358 }
1359 else
1360 {
1361 gvmmR0CreateDestroyUnlock(pGVMM);
1362 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1363 }
1364 }
1365 else
1366 {
1367 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1368 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1369 gvmmR0CreateDestroyUnlock(pGVMM);
1370 rc = VERR_GVMM_IPE_2;
1371 }
1372
1373 return rc;
1374}
1375
1376
1377/**
1378 * Performs VM cleanup task as part of object destruction.
1379 *
1380 * @param pGVM The GVM pointer.
1381 */
1382static void gvmmR0CleanupVM(PGVM pGVM)
1383{
1384 if ( pGVM->gvmm.s.fDoneVMMR0Init
1385 && !pGVM->gvmm.s.fDoneVMMR0Term)
1386 {
1387 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1388 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1389 {
1390 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1391 VMMR0TermVM(pGVM, NIL_VMCPUID);
1392 }
1393 else
1394 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1395 }
1396
1397#ifndef VBOX_WITH_MINIMAL_R0
1398 GMMR0CleanupVM(pGVM);
1399# ifdef VBOX_WITH_NEM_R0
1400 NEMR0CleanupVM(pGVM);
1401# endif
1402 PDMR0CleanupVM(pGVM);
1403 IOMR0CleanupVM(pGVM);
1404 DBGFR0CleanupVM(pGVM);
1405 PGMR0CleanupVM(pGVM);
1406 TMR0CleanupVM(pGVM);
1407#endif
1408 VMMR0CleanupVM(pGVM);
1409}
1410
1411
1412/**
1413 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1414 *
1415 * pvUser1 is the GVM instance pointer.
1416 * pvUser2 is the handle pointer.
1417 */
1418static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1419{
1420 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1421
1422 NOREF(pvObj);
1423
1424 /*
1425 * Some quick, paranoid, input validation.
1426 */
1427 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1428 AssertPtr(pHandle);
1429 PGVMM pGVMM = (PGVMM)pvUser1;
1430 Assert(pGVMM == g_pGVMM);
1431 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1432 if ( !iHandle
1433 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1434 || iHandle != pHandle->iSelf)
1435 {
1436 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1437 return;
1438 }
1439
1440 int rc = gvmmR0CreateDestroyLock(pGVMM);
1441 AssertRC(rc);
1442 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1443 AssertRC(rc);
1444
1445 /*
1446 * This is a tad slow but a doubly linked list is too much hassle.
1447 */
1448 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1449 {
1450 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1451 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1452 gvmmR0CreateDestroyUnlock(pGVMM);
1453 return;
1454 }
1455
1456 if (pGVMM->iUsedHead == iHandle)
1457 pGVMM->iUsedHead = pHandle->iNext;
1458 else
1459 {
1460 uint16_t iPrev = pGVMM->iUsedHead;
1461 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1462 while (iPrev)
1463 {
1464 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1465 {
1466 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1467 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1468 gvmmR0CreateDestroyUnlock(pGVMM);
1469 return;
1470 }
1471 if (RT_UNLIKELY(c-- <= 0))
1472 {
1473 iPrev = 0;
1474 break;
1475 }
1476
1477 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1478 break;
1479 iPrev = pGVMM->aHandles[iPrev].iNext;
1480 }
1481 if (!iPrev)
1482 {
1483 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1484 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1485 gvmmR0CreateDestroyUnlock(pGVMM);
1486 return;
1487 }
1488
1489 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1490 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1491 }
1492 pHandle->iNext = 0;
1493 pGVMM->cVMs--;
1494
1495 /*
1496 * Do the global cleanup round.
1497 */
1498 PGVM pGVM = pHandle->pGVM;
1499 if ( RT_VALID_PTR(pGVM)
1500 && pGVM->u32Magic == GVM_MAGIC)
1501 {
1502 pGVMM->cEMTs -= pGVM->cCpus;
1503
1504 if (pGVM->pSession)
1505 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1506
1507 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1508
1509 gvmmR0CleanupVM(pGVM);
1510
1511 /*
1512 * Do the GVMM cleanup - must be done last.
1513 */
1514 /* The VM and VM pages mappings/allocations. */
1515 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1516 {
1517 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1518 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1519 }
1520
1521 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1522 {
1523 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1524 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1525 }
1526
1527 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1528 {
1529 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1530 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1531 }
1532
1533 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1534 {
1535 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1536 {
1537 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1538 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1539 }
1540 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1541 {
1542 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1543 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1544 }
1545#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1546 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1547 {
1548 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1549 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1550 }
1551#endif
1552 }
1553
1554 /* the GVM structure itself. */
1555 pGVM->u32Magic |= UINT32_C(0x80000000);
1556 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1557 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1558 pGVM = NULL;
1559
1560 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1561 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1562 AssertRC(rc);
1563 }
1564 /* else: GVMMR0CreateVM cleanup. */
1565
1566 /*
1567 * Free the handle.
1568 */
1569 pHandle->iNext = pGVMM->iFreeHead;
1570 pGVMM->iFreeHead = iHandle;
1571 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1572 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1573 ASMAtomicWriteNullPtr(&pHandle->pSession);
1574 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1575 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1576
1577 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1578 gvmmR0CreateDestroyUnlock(pGVMM);
1579 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1580}
1581
1582
1583/**
1584 * Registers the calling thread as the EMT of a Virtual CPU.
1585 *
1586 * Note that VCPU 0 is automatically registered during VM creation.
1587 *
1588 * @returns VBox status code
1589 * @param pGVM The global (ring-0) VM structure.
1590 * @param idCpu VCPU id to register the current thread as.
1591 */
1592GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1593{
1594 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1595
1596 /*
1597 * Validate the VM structure, state and handle.
1598 */
1599 PGVMM pGVMM;
1600 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1601 if (RT_SUCCESS(rc))
1602 {
1603 if (idCpu < pGVM->cCpus)
1604 {
1605 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1606 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1607
1608 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1609
1610 /* Check that the EMT isn't already assigned to a thread. */
1611 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1612 {
1613 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1614
1615 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1616 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1617 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1618 if (RT_SUCCESS(rc))
1619 {
1620 /*
1621 * Do the assignment, then try setup the hook. Undo if that fails.
1622 */
1623 unsigned cCollisions = 0;
1624 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1625 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1626 {
1627 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1628 do
1629 {
1630 cCollisions++;
1631 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1632 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1633 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1634 }
1635 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1636 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1637
1638 pGVCpu->hNativeThreadR0 = hNativeSelf;
1639 pGVCpu->hEMT = hNativeSelf;
1640 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1641 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1642
1643 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1644 if (RT_SUCCESS(rc))
1645 {
1646#ifndef VBOX_WITH_MINIMAL_R0
1647 CPUMR0RegisterVCpuThread(pGVCpu);
1648#endif
1649
1650#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1651 /*
1652 * Create the high resolution wake-up timer, ignore failures.
1653 */
1654 if (RTTimerCanDoHighResolution())
1655 {
1656 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1657 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1658 if (RT_FAILURE(rc2))
1659 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1660 }
1661#endif
1662 }
1663 else
1664 {
1665 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1666 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1667 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1668 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1669 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1670 }
1671 }
1672 }
1673 else
1674 rc = VERR_ACCESS_DENIED;
1675
1676 gvmmR0CreateDestroyUnlock(pGVMM);
1677 }
1678 else
1679 rc = VERR_INVALID_CPU_ID;
1680 }
1681 return rc;
1682}
1683
1684
1685/**
1686 * Deregisters the calling thread as the EMT of a Virtual CPU.
1687 *
1688 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1689 *
1690 * @returns VBox status code
1691 * @param pGVM The global (ring-0) VM structure.
1692 * @param idCpu VCPU id to register the current thread as.
1693 */
1694GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1695{
1696 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1697
1698 /*
1699 * Validate the VM structure, state and handle.
1700 */
1701 PGVMM pGVMM;
1702 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1703 if (RT_SUCCESS(rc))
1704 {
1705 /*
1706 * Take the destruction lock and recheck the handle state to
1707 * prevent racing GVMMR0DestroyVM.
1708 */
1709 gvmmR0CreateDestroyLock(pGVMM);
1710
1711 uint32_t hSelf = pGVM->hSelf;
1712 ASMCompilerBarrier();
1713 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1714 && pGVMM->aHandles[hSelf].pvObj != NULL
1715 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1716 {
1717 /*
1718 * Do per-EMT cleanups.
1719 */
1720 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1721
1722 /*
1723 * Invalidate hEMT. We don't use NIL here as that would allow
1724 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1725 */
1726 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1727 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1728
1729 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1730 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1731 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1732 }
1733
1734 gvmmR0CreateDestroyUnlock(pGVMM);
1735 }
1736 return rc;
1737}
1738
1739
1740/**
1741 * Registers the caller as a given worker thread.
1742 *
1743 * This enables the thread to operate critical sections in ring-0.
1744 *
1745 * @returns VBox status code.
1746 * @param pGVM The global (ring-0) VM structure.
1747 * @param enmWorker The worker thread this is supposed to be.
1748 * @param hNativeSelfR3 The ring-3 native self of the caller.
1749 */
1750GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1751{
1752 /*
1753 * Validate input.
1754 */
1755 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1756 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1757 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1758 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1759 PGVMM pGVMM;
1760 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1761 AssertRCReturn(rc, rc);
1762 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1763
1764 /*
1765 * Grab the big lock and check the VM state again.
1766 */
1767 uint32_t const hSelf = pGVM->hSelf;
1768 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1769 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1770 && pGVMM->aHandles[hSelf].pvObj != NULL
1771 && pGVMM->aHandles[hSelf].pGVM == pGVM
1772 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1773 {
1774 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1775 {
1776 /*
1777 * Check that the thread isn't an EMT or serving in some other worker capacity.
1778 */
1779 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1780 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1781 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1782 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1783 rc = VERR_INVALID_PARAMETER);
1784 if (RT_SUCCESS(rc))
1785 {
1786 /*
1787 * Do the registration.
1788 */
1789 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1790 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1791 {
1792 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1793 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1794 rc = VINF_SUCCESS;
1795 }
1796 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1797 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1798 rc = VERR_ALREADY_EXISTS;
1799 else
1800 rc = VERR_RESOURCE_BUSY;
1801 }
1802 }
1803 else
1804 rc = VERR_VM_INVALID_VM_STATE;
1805 }
1806 else
1807 rc = VERR_INVALID_VM_HANDLE;
1808 gvmmR0CreateDestroyUnlock(pGVMM);
1809 return rc;
1810}
1811
1812
1813/**
1814 * Deregisters a workinger thread (caller).
1815 *
1816 * The worker thread cannot be re-created and re-registered, instead the given
1817 * @a enmWorker slot becomes invalid.
1818 *
1819 * @returns VBox status code.
1820 * @param pGVM The global (ring-0) VM structure.
1821 * @param enmWorker The worker thread this is supposed to be.
1822 */
1823GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1824{
1825 /*
1826 * Validate input.
1827 */
1828 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1829 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1830 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1831 PGVMM pGVMM;
1832 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1833 AssertRCReturn(rc, rc);
1834
1835 /*
1836 * Grab the big lock and check the VM state again.
1837 */
1838 uint32_t const hSelf = pGVM->hSelf;
1839 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1840 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1841 && pGVMM->aHandles[hSelf].pvObj != NULL
1842 && pGVMM->aHandles[hSelf].pGVM == pGVM
1843 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1844 {
1845 /*
1846 * Do the deregistration.
1847 * This will prevent any other threads register as the worker later.
1848 */
1849 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1850 {
1851 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1852 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1853 rc = VINF_SUCCESS;
1854 }
1855 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1856 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1857 rc = VINF_SUCCESS;
1858 else
1859 rc = VERR_NOT_OWNER;
1860 }
1861 else
1862 rc = VERR_INVALID_VM_HANDLE;
1863 gvmmR0CreateDestroyUnlock(pGVMM);
1864 return rc;
1865}
1866
1867
1868/**
1869 * Lookup a GVM structure by its handle.
1870 *
1871 * @returns The GVM pointer on success, NULL on failure.
1872 * @param hGVM The global VM handle. Asserts on bad handle.
1873 */
1874GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1875{
1876 PGVMM pGVMM;
1877 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1878
1879 /*
1880 * Validate.
1881 */
1882 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1883 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1884
1885 /*
1886 * Look it up.
1887 */
1888 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1889 AssertPtrReturn(pHandle->pvObj, NULL);
1890 PGVM pGVM = pHandle->pGVM;
1891 AssertPtrReturn(pGVM, NULL);
1892
1893 return pGVM;
1894}
1895
1896
1897/**
1898 * Check that the given GVM and VM structures match up.
1899 *
1900 * The calling thread must be in the same process as the VM. All current lookups
1901 * are by threads inside the same process, so this will not be an issue.
1902 *
1903 * @returns VBox status code.
1904 * @param pGVM The global (ring-0) VM structure.
1905 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1906 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1907 * shared mode when requested.
1908 *
1909 * Be very careful if not taking the lock as it's
1910 * possible that the VM will disappear then!
1911 *
1912 * @remark This will not assert on an invalid pGVM but try return silently.
1913 */
1914static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1915{
1916 /*
1917 * Check the pointers.
1918 */
1919 int rc;
1920 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1921 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1922 {
1923 /*
1924 * Get the pGVMM instance and check the VM handle.
1925 */
1926 PGVMM pGVMM;
1927 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1928
1929 uint16_t hGVM = pGVM->hSelf;
1930 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1931 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1932 {
1933 RTPROCESS const pidSelf = RTProcSelf();
1934 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1935 if (fTakeUsedLock)
1936 {
1937 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1938 AssertRCReturn(rc, rc);
1939 }
1940
1941 if (RT_LIKELY( pHandle->pGVM == pGVM
1942 && pHandle->ProcId == pidSelf
1943 && RT_VALID_PTR(pHandle->pvObj)))
1944 {
1945 /*
1946 * Some more VM data consistency checks.
1947 */
1948 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1949 && pGVM->hSelfUnsafe == hGVM
1950 && pGVM->pSelf == pGVM))
1951 {
1952 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1953 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1954 {
1955 *ppGVMM = pGVMM;
1956 return VINF_SUCCESS;
1957 }
1958 rc = VERR_INCONSISTENT_VM_HANDLE;
1959 }
1960 else
1961 rc = VERR_INCONSISTENT_VM_HANDLE;
1962 }
1963 else
1964 rc = VERR_INVALID_VM_HANDLE;
1965
1966 if (fTakeUsedLock)
1967 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1968 }
1969 else
1970 rc = VERR_INVALID_VM_HANDLE;
1971 }
1972 else
1973 rc = VERR_INVALID_POINTER;
1974 return rc;
1975}
1976
1977
1978/**
1979 * Validates a GVM/VM pair.
1980 *
1981 * @returns VBox status code.
1982 * @param pGVM The global (ring-0) VM structure.
1983 */
1984GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1985{
1986 PGVMM pGVMM;
1987 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1988}
1989
1990
1991/**
1992 * Check that the given GVM and VM structures match up.
1993 *
1994 * The calling thread must be in the same process as the VM. All current lookups
1995 * are by threads inside the same process, so this will not be an issue.
1996 *
1997 * @returns VBox status code.
1998 * @param pGVM The global (ring-0) VM structure.
1999 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2000 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2001 * @thread EMT
2002 *
2003 * @remarks This will assert in all failure paths.
2004 */
2005static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
2006{
2007 /*
2008 * Check the pointers.
2009 */
2010 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2011 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2012
2013 /*
2014 * Get the pGVMM instance and check the VM handle.
2015 */
2016 PGVMM pGVMM;
2017 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2018
2019 uint16_t hGVM = pGVM->hSelf;
2020 ASMCompilerBarrier();
2021 AssertReturn( hGVM != NIL_GVM_HANDLE
2022 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2023
2024 RTPROCESS const pidSelf = RTProcSelf();
2025 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2026 AssertReturn( pHandle->pGVM == pGVM
2027 && pHandle->ProcId == pidSelf
2028 && RT_VALID_PTR(pHandle->pvObj),
2029 VERR_INVALID_HANDLE);
2030
2031 /*
2032 * Check the EMT claim.
2033 */
2034 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2035 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2036 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2037
2038 /*
2039 * Some more VM data consistency checks.
2040 */
2041 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2042 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2043 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
2044 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2045
2046 *ppGVMM = pGVMM;
2047 return VINF_SUCCESS;
2048}
2049
2050
2051/**
2052 * Validates a GVM/EMT pair.
2053 *
2054 * @returns VBox status code.
2055 * @param pGVM The global (ring-0) VM structure.
2056 * @param idCpu The Virtual CPU ID of the calling EMT.
2057 * @thread EMT(idCpu)
2058 */
2059GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2060{
2061 PGVMM pGVMM;
2062 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2063}
2064
2065
2066/**
2067 * Looks up the VM belonging to the specified EMT thread.
2068 *
2069 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2070 * unnecessary kernel panics when the EMT thread hits an assertion. The
2071 * call may or not be an EMT thread.
2072 *
2073 * @returns Pointer to the VM on success, NULL on failure.
2074 * @param hEMT The native thread handle of the EMT.
2075 * NIL_RTNATIVETHREAD means the current thread
2076 */
2077GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2078{
2079 /*
2080 * No Assertions here as we're usually called in a AssertMsgN or
2081 * RTAssert* context.
2082 */
2083 PGVMM pGVMM = g_pGVMM;
2084 if ( !RT_VALID_PTR(pGVMM)
2085 || pGVMM->u32Magic != GVMM_MAGIC)
2086 return NULL;
2087
2088 if (hEMT == NIL_RTNATIVETHREAD)
2089 hEMT = RTThreadNativeSelf();
2090 RTPROCESS ProcId = RTProcSelf();
2091
2092 /*
2093 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2094 */
2095/** @todo introduce some pid hash table here, please. */
2096 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2097 {
2098 if ( pGVMM->aHandles[i].iSelf == i
2099 && pGVMM->aHandles[i].ProcId == ProcId
2100 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2101 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2102 {
2103 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2104 return pGVMM->aHandles[i].pGVM;
2105
2106 /* This is fearly safe with the current process per VM approach. */
2107 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2108 VMCPUID const cCpus = pGVM->cCpus;
2109 ASMCompilerBarrier();
2110 if ( cCpus < 1
2111 || cCpus > VMM_MAX_CPU_COUNT)
2112 continue;
2113 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2114 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2115 return pGVMM->aHandles[i].pGVM;
2116 }
2117 }
2118 return NULL;
2119}
2120
2121
2122/**
2123 * Looks up the GVMCPU belonging to the specified EMT thread.
2124 *
2125 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2126 * unnecessary kernel panics when the EMT thread hits an assertion. The
2127 * call may or not be an EMT thread.
2128 *
2129 * @returns Pointer to the VM on success, NULL on failure.
2130 * @param hEMT The native thread handle of the EMT.
2131 * NIL_RTNATIVETHREAD means the current thread
2132 */
2133GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2134{
2135 /*
2136 * No Assertions here as we're usually called in a AssertMsgN,
2137 * RTAssert*, Log and LogRel contexts.
2138 */
2139 PGVMM pGVMM = g_pGVMM;
2140 if ( !RT_VALID_PTR(pGVMM)
2141 || pGVMM->u32Magic != GVMM_MAGIC)
2142 return NULL;
2143
2144 if (hEMT == NIL_RTNATIVETHREAD)
2145 hEMT = RTThreadNativeSelf();
2146 RTPROCESS ProcId = RTProcSelf();
2147
2148 /*
2149 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2150 */
2151/** @todo introduce some pid hash table here, please. */
2152 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2153 {
2154 if ( pGVMM->aHandles[i].iSelf == i
2155 && pGVMM->aHandles[i].ProcId == ProcId
2156 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2157 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2158 {
2159 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2160 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2161 return &pGVM->aCpus[0];
2162
2163 /* This is fearly safe with the current process per VM approach. */
2164 VMCPUID const cCpus = pGVM->cCpus;
2165 ASMCompilerBarrier();
2166 ASMCompilerBarrier();
2167 if ( cCpus < 1
2168 || cCpus > VMM_MAX_CPU_COUNT)
2169 continue;
2170 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2171 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2172 return &pGVM->aCpus[idCpu];
2173 }
2174 }
2175 return NULL;
2176}
2177
2178
2179/**
2180 * Get the GVMCPU structure for the given EMT.
2181 *
2182 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2183 * @param pGVM The global (ring-0) VM structure.
2184 * @param hEMT The native thread handle of the EMT.
2185 * NIL_RTNATIVETHREAD means the current thread
2186 */
2187GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2188{
2189 /*
2190 * Validate & adjust input.
2191 */
2192 AssertPtr(pGVM);
2193 Assert(pGVM->u32Magic == GVM_MAGIC);
2194 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2195 {
2196 hEMT = RTThreadNativeSelf();
2197 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2198 }
2199
2200 /*
2201 * Find the matching hash table entry.
2202 * See similar code in GVMMR0GetRing3ThreadForSelf.
2203 */
2204 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2205 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2206 { /* likely */ }
2207 else
2208 {
2209#ifdef VBOX_STRICT
2210 unsigned cCollisions = 0;
2211#endif
2212 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2213 for (;;)
2214 {
2215 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2216 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2217 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2218 break;
2219 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2220 {
2221#ifdef VBOX_STRICT
2222 uint32_t idxCpu = pGVM->cCpus;
2223 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2224 while (idxCpu-- > 0)
2225 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2226#endif
2227 return NULL;
2228 }
2229 }
2230 }
2231
2232 /*
2233 * Validate the VCpu number and translate it into a pointer.
2234 */
2235 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2236 AssertReturn(idCpu < pGVM->cCpus, NULL);
2237 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2238 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2239 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2240 return pGVCpu;
2241}
2242
2243
2244/**
2245 * Get the native ring-3 thread handle for the caller.
2246 *
2247 * This works for EMTs and registered workers.
2248 *
2249 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2250 * @param pGVM The global (ring-0) VM structure.
2251 */
2252GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2253{
2254 /*
2255 * Validate input.
2256 */
2257 AssertPtr(pGVM);
2258 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2259 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2260 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2261
2262 /*
2263 * Find the matching hash table entry.
2264 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2265 */
2266 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2267 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2268 { /* likely */ }
2269 else
2270 {
2271#ifdef VBOX_STRICT
2272 unsigned cCollisions = 0;
2273#endif
2274 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2275 for (;;)
2276 {
2277 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2278 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2279 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2280 break;
2281 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2282 {
2283#ifdef VBOX_STRICT
2284 uint32_t idxCpu = pGVM->cCpus;
2285 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2286 while (idxCpu-- > 0)
2287 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2288#endif
2289
2290 /*
2291 * Not an EMT, so see if it's a worker thread.
2292 */
2293 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2294 while (--idx > GVMMWORKERTHREAD_INVALID)
2295 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2296 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2297
2298 return NIL_RTNATIVETHREAD;
2299 }
2300 }
2301 }
2302
2303 /*
2304 * Validate the VCpu number and translate it into a pointer.
2305 */
2306 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2307 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2308 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2309 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2310 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2311 return pGVCpu->hNativeThread;
2312}
2313
2314
2315/**
2316 * Converts a pointer with the GVM structure to a host physical address.
2317 *
2318 * @returns Host physical address.
2319 * @param pGVM The global (ring-0) VM structure.
2320 * @param pv The address to convert.
2321 * @thread EMT
2322 */
2323GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2324{
2325 AssertPtr(pGVM);
2326 Assert(pGVM->u32Magic == GVM_MAGIC);
2327 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2328 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2329 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2330}
2331
2332
2333/**
2334 * This is will wake up expired and soon-to-be expired VMs.
2335 *
2336 * @returns Number of VMs that has been woken up.
2337 * @param pGVMM Pointer to the GVMM instance data.
2338 * @param u64Now The current time.
2339 */
2340static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2341{
2342 /*
2343 * Skip this if we've got disabled because of high resolution wakeups or by
2344 * the user.
2345 */
2346 if (!pGVMM->fDoEarlyWakeUps)
2347 return 0;
2348
2349/** @todo Rewrite this algorithm. See performance defect XYZ. */
2350
2351 /*
2352 * A cheap optimization to stop wasting so much time here on big setups.
2353 */
2354 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2355 if ( pGVMM->cHaltedEMTs == 0
2356 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2357 return 0;
2358
2359 /*
2360 * Only one thread doing this at a time.
2361 */
2362 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2363 return 0;
2364
2365 /*
2366 * The first pass will wake up VMs which have actually expired
2367 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2368 */
2369 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2370 uint64_t u64Min = UINT64_MAX;
2371 unsigned cWoken = 0;
2372 unsigned cHalted = 0;
2373 unsigned cTodo2nd = 0;
2374 unsigned cTodo3rd = 0;
2375 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2376 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2377 i = pGVMM->aHandles[i].iNext)
2378 {
2379 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2380 if ( RT_VALID_PTR(pCurGVM)
2381 && pCurGVM->u32Magic == GVM_MAGIC)
2382 {
2383 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2384 {
2385 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2386 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2387 if (u64)
2388 {
2389 if (u64 <= u64Now)
2390 {
2391 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2392 {
2393 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2394 AssertRC(rc);
2395 cWoken++;
2396 }
2397 }
2398 else
2399 {
2400 cHalted++;
2401 if (u64 <= uNsEarlyWakeUp1)
2402 cTodo2nd++;
2403 else if (u64 <= uNsEarlyWakeUp2)
2404 cTodo3rd++;
2405 else if (u64 < u64Min)
2406 u64 = u64Min;
2407 }
2408 }
2409 }
2410 }
2411 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2412 }
2413
2414 if (cTodo2nd)
2415 {
2416 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2417 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2418 i = pGVMM->aHandles[i].iNext)
2419 {
2420 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2421 if ( RT_VALID_PTR(pCurGVM)
2422 && pCurGVM->u32Magic == GVM_MAGIC)
2423 {
2424 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2425 {
2426 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2427 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2428 if ( u64
2429 && u64 <= uNsEarlyWakeUp1)
2430 {
2431 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2432 {
2433 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2434 AssertRC(rc);
2435 cWoken++;
2436 }
2437 }
2438 }
2439 }
2440 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2441 }
2442 }
2443
2444 if (cTodo3rd)
2445 {
2446 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2447 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2448 i = pGVMM->aHandles[i].iNext)
2449 {
2450 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2451 if ( RT_VALID_PTR(pCurGVM)
2452 && pCurGVM->u32Magic == GVM_MAGIC)
2453 {
2454 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2455 {
2456 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2457 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2458 if ( u64
2459 && u64 <= uNsEarlyWakeUp2)
2460 {
2461 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2462 {
2463 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2464 AssertRC(rc);
2465 cWoken++;
2466 }
2467 }
2468 }
2469 }
2470 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2471 }
2472 }
2473
2474 /*
2475 * Set the minimum value.
2476 */
2477 pGVMM->uNsNextEmtWakeup = u64Min;
2478
2479 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2480 return cWoken;
2481}
2482
2483
2484#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2485/**
2486 * Timer callback for the EMT high-resolution wake-up timer.
2487 *
2488 * @param pTimer The timer handle.
2489 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2490 * @param iTick The current tick.
2491 */
2492static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2493{
2494 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2495 NOREF(pTimer); NOREF(iTick);
2496
2497 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2498 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2499 {
2500 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2501 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2502 }
2503 else
2504 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2505
2506 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2507 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2508}
2509#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2510
2511
2512/**
2513 * Halt the EMT thread.
2514 *
2515 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2516 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2517 * @param pGVM The global (ring-0) VM structure.
2518 * @param pGVCpu The global (ring-0) CPU structure of the calling
2519 * EMT.
2520 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2521 * @thread EMT(pGVCpu).
2522 */
2523GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2524{
2525 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2526 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2527 PGVMM pGVMM;
2528 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2529
2530 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2531 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2532
2533 /*
2534 * If we're doing early wake-ups, we must take the UsedList lock before we
2535 * start querying the current time.
2536 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2537 */
2538 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2539 if (fDoEarlyWakeUps)
2540 {
2541 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2542 }
2543
2544 /* GIP hack: We might are frequently sleeping for short intervals where the
2545 difference between GIP and system time matters on systems with high resolution
2546 system time. So, convert the input from GIP to System time in that case. */
2547 Assert(ASMIntAreEnabled());
2548 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2549 const uint64_t u64NowGip = RTTimeNanoTS();
2550
2551 if (fDoEarlyWakeUps)
2552 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2553
2554 /*
2555 * Go to sleep if we must...
2556 * Cap the sleep time to 1 second to be on the safe side.
2557 */
2558 int rc;
2559 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2560 if ( u64NowGip < u64ExpireGipTime
2561 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2562 ? pGVMM->nsMinSleepCompany
2563 : pGVMM->nsMinSleepAlone)
2564#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2565 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2566#endif
2567 )
2568 )
2569 {
2570 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2571 if (cNsInterval > RT_NS_1SEC)
2572 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2573 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2574 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2575 if (fDoEarlyWakeUps)
2576 {
2577 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2578 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2579 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2580 }
2581
2582#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2583 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2584 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2585 {
2586 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2587 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2588 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2589 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2590 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2591 }
2592#endif
2593
2594 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2595 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2596 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2597
2598 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2599 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2600
2601#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2602 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2603 { /* likely */ }
2604 else
2605 {
2606 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2607 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2608 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2609 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2610 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2611 }
2612#endif
2613
2614 /* Reset the semaphore to try prevent a few false wake-ups. */
2615 if (rc == VINF_SUCCESS)
2616 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2617 else if (rc == VERR_TIMEOUT)
2618 {
2619 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2620 rc = VINF_SUCCESS;
2621 }
2622 }
2623 else
2624 {
2625 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2626 if (fDoEarlyWakeUps)
2627 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2628 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2629 rc = VINF_SUCCESS;
2630 }
2631
2632 return rc;
2633}
2634
2635
2636/**
2637 * Halt the EMT thread.
2638 *
2639 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2640 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2641 * @param pGVM The global (ring-0) VM structure.
2642 * @param idCpu The Virtual CPU ID of the calling EMT.
2643 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2644 * @thread EMT(idCpu).
2645 */
2646GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2647{
2648 PGVMM pGVMM;
2649 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2650 if (RT_SUCCESS(rc))
2651 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2652 return rc;
2653}
2654
2655
2656
2657/**
2658 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2659 * the a sleeping EMT.
2660 *
2661 * @retval VINF_SUCCESS if successfully woken up.
2662 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2663 *
2664 * @param pGVM The global (ring-0) VM structure.
2665 * @param pGVCpu The global (ring-0) VCPU structure.
2666 */
2667DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2668{
2669 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2670
2671 /*
2672 * Signal the semaphore regardless of whether it's current blocked on it.
2673 *
2674 * The reason for this is that there is absolutely no way we can be 100%
2675 * certain that it isn't *about* go to go to sleep on it and just got
2676 * delayed a bit en route. So, we will always signal the semaphore when
2677 * the it is flagged as halted in the VMM.
2678 */
2679/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2680 int rc;
2681 if (pGVCpu->gvmm.s.u64HaltExpire)
2682 {
2683 rc = VINF_SUCCESS;
2684 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2685 }
2686 else
2687 {
2688 rc = VINF_GVM_NOT_BLOCKED;
2689 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2690 }
2691
2692 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2693 AssertRC(rc2);
2694
2695 return rc;
2696}
2697
2698
2699/**
2700 * Wakes up the halted EMT thread so it can service a pending request.
2701 *
2702 * @returns VBox status code.
2703 * @retval VINF_SUCCESS if successfully woken up.
2704 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2705 *
2706 * @param pGVM The global (ring-0) VM structure.
2707 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2708 * @param fTakeUsedLock Take the used lock or not
2709 * @thread Any but EMT(idCpu).
2710 */
2711GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2712{
2713 /*
2714 * Validate input and take the UsedLock.
2715 */
2716 PGVMM pGVMM;
2717 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2718 if (RT_SUCCESS(rc))
2719 {
2720 if (idCpu < pGVM->cCpus)
2721 {
2722 /*
2723 * Do the actual job.
2724 */
2725 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2726
2727 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2728 {
2729 /*
2730 * While we're here, do a round of scheduling.
2731 */
2732 Assert(ASMIntAreEnabled());
2733 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2734 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2735 }
2736 }
2737 else
2738 rc = VERR_INVALID_CPU_ID;
2739
2740 if (fTakeUsedLock)
2741 {
2742 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2743 AssertRC(rc2);
2744 }
2745 }
2746
2747 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2748 return rc;
2749}
2750
2751
2752/**
2753 * Wakes up the halted EMT thread so it can service a pending request.
2754 *
2755 * @returns VBox status code.
2756 * @retval VINF_SUCCESS if successfully woken up.
2757 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2758 *
2759 * @param pGVM The global (ring-0) VM structure.
2760 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2761 * @thread Any but EMT(idCpu).
2762 */
2763GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2764{
2765 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2766}
2767
2768
2769/**
2770 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2771 * parameter and no used locking.
2772 *
2773 * @returns VBox status code.
2774 * @retval VINF_SUCCESS if successfully woken up.
2775 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2776 *
2777 * @param pGVM The global (ring-0) VM structure.
2778 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2779 * @thread Any but EMT(idCpu).
2780 * @deprecated Don't use in new code if possible! Use the GVM variant.
2781 */
2782GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2783{
2784 PGVMM pGVMM;
2785 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2786 if (RT_SUCCESS(rc))
2787 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2788 return rc;
2789}
2790
2791
2792/**
2793 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2794 * the Virtual CPU if it's still busy executing guest code.
2795 *
2796 * @returns VBox status code.
2797 * @retval VINF_SUCCESS if poked successfully.
2798 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2799 *
2800 * @param pGVM The global (ring-0) VM structure.
2801 * @param pVCpu The cross context virtual CPU structure.
2802 */
2803DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2804{
2805 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2806
2807 RTCPUID idHostCpu = pVCpu->idHostCpu;
2808 if ( idHostCpu == NIL_RTCPUID
2809 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2810 {
2811 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2812 return VINF_GVM_NOT_BUSY_IN_GC;
2813 }
2814
2815 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2816 RTMpPokeCpu(idHostCpu);
2817 return VINF_SUCCESS;
2818}
2819
2820
2821/**
2822 * Pokes an EMT if it's still busy running guest code.
2823 *
2824 * @returns VBox status code.
2825 * @retval VINF_SUCCESS if poked successfully.
2826 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2827 *
2828 * @param pGVM The global (ring-0) VM structure.
2829 * @param idCpu The ID of the virtual CPU to poke.
2830 * @param fTakeUsedLock Take the used lock or not
2831 */
2832GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2833{
2834 /*
2835 * Validate input and take the UsedLock.
2836 */
2837 PGVMM pGVMM;
2838 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2839 if (RT_SUCCESS(rc))
2840 {
2841 if (idCpu < pGVM->cCpus)
2842 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2843 else
2844 rc = VERR_INVALID_CPU_ID;
2845
2846 if (fTakeUsedLock)
2847 {
2848 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2849 AssertRC(rc2);
2850 }
2851 }
2852
2853 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2854 return rc;
2855}
2856
2857
2858/**
2859 * Pokes an EMT if it's still busy running guest code.
2860 *
2861 * @returns VBox status code.
2862 * @retval VINF_SUCCESS if poked successfully.
2863 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2864 *
2865 * @param pGVM The global (ring-0) VM structure.
2866 * @param idCpu The ID of the virtual CPU to poke.
2867 */
2868GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2869{
2870 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2871}
2872
2873
2874/**
2875 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2876 * used locking.
2877 *
2878 * @returns VBox status code.
2879 * @retval VINF_SUCCESS if poked successfully.
2880 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2881 *
2882 * @param pGVM The global (ring-0) VM structure.
2883 * @param idCpu The ID of the virtual CPU to poke.
2884 *
2885 * @deprecated Don't use in new code if possible! Use the GVM variant.
2886 */
2887GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2888{
2889 PGVMM pGVMM;
2890 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2891 if (RT_SUCCESS(rc))
2892 {
2893 if (idCpu < pGVM->cCpus)
2894 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2895 else
2896 rc = VERR_INVALID_CPU_ID;
2897 }
2898 return rc;
2899}
2900
2901
2902/**
2903 * Wakes up a set of halted EMT threads so they can service pending request.
2904 *
2905 * @returns VBox status code, no informational stuff.
2906 *
2907 * @param pGVM The global (ring-0) VM structure.
2908 * @param pSleepSet The set of sleepers to wake up.
2909 * @param pPokeSet The set of CPUs to poke.
2910 */
2911GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2912{
2913 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2914 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2915 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2916
2917 /*
2918 * Validate input and take the UsedLock.
2919 */
2920 PGVMM pGVMM;
2921 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2922 if (RT_SUCCESS(rc))
2923 {
2924 rc = VINF_SUCCESS;
2925 VMCPUID idCpu = pGVM->cCpus;
2926 while (idCpu-- > 0)
2927 {
2928 /* Don't try poke or wake up ourselves. */
2929 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2930 continue;
2931
2932 /* just ignore errors for now. */
2933 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2934 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2935 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2936 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2937 }
2938
2939 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2940 AssertRC(rc2);
2941 }
2942
2943 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2944 return rc;
2945}
2946
2947
2948/**
2949 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2950 *
2951 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2952 * @param pGVM The global (ring-0) VM structure.
2953 * @param pReq Pointer to the request packet.
2954 */
2955GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2956{
2957 /*
2958 * Validate input and pass it on.
2959 */
2960 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2961 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2962
2963 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2964}
2965
2966
2967
2968/**
2969 * Poll the schedule to see if someone else should get a chance to run.
2970 *
2971 * This is a bit hackish and will not work too well if the machine is
2972 * under heavy load from non-VM processes.
2973 *
2974 * @returns VINF_SUCCESS if not yielded.
2975 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2976 * @param pGVM The global (ring-0) VM structure.
2977 * @param idCpu The Virtual CPU ID of the calling EMT.
2978 * @param fYield Whether to yield or not.
2979 * This is for when we're spinning in the halt loop.
2980 * @thread EMT(idCpu).
2981 */
2982GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2983{
2984 /*
2985 * Validate input.
2986 */
2987 PGVMM pGVMM;
2988 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2989 if (RT_SUCCESS(rc))
2990 {
2991 /*
2992 * We currently only implement helping doing wakeups (fYield = false), so don't
2993 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2994 */
2995 if (!fYield && pGVMM->fDoEarlyWakeUps)
2996 {
2997 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2998 pGVM->gvmm.s.StatsSched.cPollCalls++;
2999
3000 Assert(ASMIntAreEnabled());
3001 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
3002
3003 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
3004
3005 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3006 }
3007 /*
3008 * Not quite sure what we could do here...
3009 */
3010 else if (fYield)
3011 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
3012 else
3013 rc = VINF_SUCCESS;
3014 }
3015
3016 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
3017 return rc;
3018}
3019
3020
3021#ifdef GVMM_SCHED_WITH_PPT
3022/**
3023 * Timer callback for the periodic preemption timer.
3024 *
3025 * @param pTimer The timer handle.
3026 * @param pvUser Pointer to the per cpu structure.
3027 * @param iTick The current tick.
3028 */
3029static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3030{
3031 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
3032 NOREF(pTimer); NOREF(iTick);
3033
3034 /*
3035 * Termination check
3036 */
3037 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
3038 return;
3039
3040 /*
3041 * Do the house keeping.
3042 */
3043 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3044
3045 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3046 {
3047 /*
3048 * Historicize the max frequency.
3049 */
3050 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3051 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3052 pCpu->Ppt.iTickHistorization = 0;
3053 pCpu->Ppt.uDesiredHz = 0;
3054
3055 /*
3056 * Check if the current timer frequency.
3057 */
3058 uint32_t uHistMaxHz = 0;
3059 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3060 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3061 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3062 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3063 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3064 else if (uHistMaxHz)
3065 {
3066 /*
3067 * Reprogram it.
3068 */
3069 pCpu->Ppt.cChanges++;
3070 pCpu->Ppt.iTickHistorization = 0;
3071 pCpu->Ppt.uTimerHz = uHistMaxHz;
3072 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3073 pCpu->Ppt.cNsInterval = cNsInterval;
3074 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3075 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3076 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3077 / cNsInterval;
3078 else
3079 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3080 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3081
3082 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3083 RTTimerChangeInterval(pTimer, cNsInterval);
3084 }
3085 else
3086 {
3087 /*
3088 * Stop it.
3089 */
3090 pCpu->Ppt.fStarted = false;
3091 pCpu->Ppt.uTimerHz = 0;
3092 pCpu->Ppt.cNsInterval = 0;
3093 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3094
3095 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3096 RTTimerStop(pTimer);
3097 }
3098 }
3099 else
3100 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3101}
3102#endif /* GVMM_SCHED_WITH_PPT */
3103
3104
3105/**
3106 * Updates the periodic preemption timer for the calling CPU.
3107 *
3108 * The caller must have disabled preemption!
3109 * The caller must check that the host can do high resolution timers.
3110 *
3111 * @param pGVM The global (ring-0) VM structure.
3112 * @param idHostCpu The current host CPU id.
3113 * @param uHz The desired frequency.
3114 */
3115GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3116{
3117 NOREF(pGVM);
3118#ifdef GVMM_SCHED_WITH_PPT
3119 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3120 Assert(RTTimerCanDoHighResolution());
3121
3122 /*
3123 * Resolve the per CPU data.
3124 */
3125 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3126 PGVMM pGVMM = g_pGVMM;
3127 if ( !RT_VALID_PTR(pGVMM)
3128 || pGVMM->u32Magic != GVMM_MAGIC)
3129 return;
3130 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3131 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3132 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3133 && pCpu->idCpu == idHostCpu,
3134 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3135
3136 /*
3137 * Check whether we need to do anything about the timer.
3138 * We have to be a little bit careful since we might be race the timer
3139 * callback here.
3140 */
3141 if (uHz > 16384)
3142 uHz = 16384; /** @todo add a query method for this! */
3143 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3144 && uHz >= pCpu->Ppt.uMinHz
3145 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3146 {
3147 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3148
3149 pCpu->Ppt.uDesiredHz = uHz;
3150 uint32_t cNsInterval = 0;
3151 if (!pCpu->Ppt.fStarted)
3152 {
3153 pCpu->Ppt.cStarts++;
3154 pCpu->Ppt.fStarted = true;
3155 pCpu->Ppt.fStarting = true;
3156 pCpu->Ppt.iTickHistorization = 0;
3157 pCpu->Ppt.uTimerHz = uHz;
3158 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3159 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3160 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3161 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3162 / cNsInterval;
3163 else
3164 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3165 }
3166
3167 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3168
3169 if (cNsInterval)
3170 {
3171 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3172 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3173 AssertRC(rc);
3174
3175 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3176 if (RT_FAILURE(rc))
3177 pCpu->Ppt.fStarted = false;
3178 pCpu->Ppt.fStarting = false;
3179 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3180 }
3181 }
3182#else /* !GVMM_SCHED_WITH_PPT */
3183 NOREF(idHostCpu); NOREF(uHz);
3184#endif /* !GVMM_SCHED_WITH_PPT */
3185}
3186
3187
3188/**
3189 * Calls @a pfnCallback for each VM in the system.
3190 *
3191 * This will enumerate the VMs while holding the global VM used list lock in
3192 * shared mode. So, only suitable for simple work. If more expensive work
3193 * needs doing, a different approach must be taken as using this API would
3194 * otherwise block VM creation and destruction.
3195 *
3196 * @returns VBox status code.
3197 * @param pfnCallback The callback function.
3198 * @param pvUser User argument to the callback.
3199 */
3200GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3201{
3202 PGVMM pGVMM;
3203 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3204
3205 int rc = VINF_SUCCESS;
3206 GVMMR0_USED_SHARED_LOCK(pGVMM);
3207 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3208 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3209 i = pGVMM->aHandles[i].iNext, cLoops++)
3210 {
3211 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3212 if ( RT_VALID_PTR(pGVM)
3213 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3214 && pGVM->u32Magic == GVM_MAGIC)
3215 {
3216 rc = pfnCallback(pGVM, pvUser);
3217 if (rc != VINF_SUCCESS)
3218 break;
3219 }
3220
3221 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3222 }
3223 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3224 return rc;
3225}
3226
3227
3228/**
3229 * Retrieves the GVMM statistics visible to the caller.
3230 *
3231 * @returns VBox status code.
3232 *
3233 * @param pStats Where to put the statistics.
3234 * @param pSession The current session.
3235 * @param pGVM The GVM to obtain statistics for. Optional.
3236 */
3237GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3238{
3239 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3240
3241 /*
3242 * Validate input.
3243 */
3244 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3245 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3246 pStats->cVMs = 0; /* (crash before taking the sem...) */
3247
3248 /*
3249 * Take the lock and get the VM statistics.
3250 */
3251 PGVMM pGVMM;
3252 if (pGVM)
3253 {
3254 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3255 if (RT_FAILURE(rc))
3256 return rc;
3257 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3258
3259 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3260 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3261 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3262 while (iCpu-- > 0)
3263 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3264 }
3265 else
3266 {
3267 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3268 RT_ZERO(pStats->SchedVM);
3269 RT_ZERO(pStats->aVCpus);
3270
3271 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3272 AssertRCReturn(rc, rc);
3273 }
3274
3275 /*
3276 * Enumerate the VMs and add the ones visible to the statistics.
3277 */
3278 pStats->cVMs = 0;
3279 pStats->cEMTs = 0;
3280 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3281
3282 for (unsigned i = pGVMM->iUsedHead;
3283 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3284 i = pGVMM->aHandles[i].iNext)
3285 {
3286 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3287 void *pvObj = pGVMM->aHandles[i].pvObj;
3288 if ( RT_VALID_PTR(pvObj)
3289 && RT_VALID_PTR(pOtherGVM)
3290 && pOtherGVM->u32Magic == GVM_MAGIC
3291 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3292 {
3293 pStats->cVMs++;
3294 pStats->cEMTs += pOtherGVM->cCpus;
3295
3296 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3297 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3298 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3299 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3300 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3301
3302 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3303 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3304 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3305
3306 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3307 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3308
3309 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3310 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3311 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3312 }
3313 }
3314
3315 /*
3316 * Copy out the per host CPU statistics.
3317 */
3318 uint32_t iDstCpu = 0;
3319 uint32_t cSrcCpus = pGVMM->cHostCpus;
3320 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3321 {
3322 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3323 {
3324 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3325 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3326#ifdef GVMM_SCHED_WITH_PPT
3327 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3328 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3329 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3330 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3331#else
3332 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3333 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3334 pStats->aHostCpus[iDstCpu].cChanges = 0;
3335 pStats->aHostCpus[iDstCpu].cStarts = 0;
3336#endif
3337 iDstCpu++;
3338 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3339 break;
3340 }
3341 }
3342 pStats->cHostCpus = iDstCpu;
3343
3344 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3345
3346 return VINF_SUCCESS;
3347}
3348
3349
3350/**
3351 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3352 *
3353 * @returns see GVMMR0QueryStatistics.
3354 * @param pGVM The global (ring-0) VM structure. Optional.
3355 * @param pReq Pointer to the request packet.
3356 * @param pSession The current session.
3357 */
3358GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3359{
3360 /*
3361 * Validate input and pass it on.
3362 */
3363 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3364 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3365 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3366
3367 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3368}
3369
3370
3371/**
3372 * Resets the specified GVMM statistics.
3373 *
3374 * @returns VBox status code.
3375 *
3376 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3377 * @param pSession The current session.
3378 * @param pGVM The GVM to reset statistics for. Optional.
3379 */
3380GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3381{
3382 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3383
3384 /*
3385 * Validate input.
3386 */
3387 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3388 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3389
3390 /*
3391 * Take the lock and get the VM statistics.
3392 */
3393 PGVMM pGVMM;
3394 if (pGVM)
3395 {
3396 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3397 if (RT_FAILURE(rc))
3398 return rc;
3399# define MAYBE_RESET_FIELD(field) \
3400 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3401 MAYBE_RESET_FIELD(cHaltCalls);
3402 MAYBE_RESET_FIELD(cHaltBlocking);
3403 MAYBE_RESET_FIELD(cHaltTimeouts);
3404 MAYBE_RESET_FIELD(cHaltNotBlocking);
3405 MAYBE_RESET_FIELD(cHaltWakeUps);
3406 MAYBE_RESET_FIELD(cWakeUpCalls);
3407 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3408 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3409 MAYBE_RESET_FIELD(cPokeCalls);
3410 MAYBE_RESET_FIELD(cPokeNotBusy);
3411 MAYBE_RESET_FIELD(cPollCalls);
3412 MAYBE_RESET_FIELD(cPollHalts);
3413 MAYBE_RESET_FIELD(cPollWakeUps);
3414# undef MAYBE_RESET_FIELD
3415 }
3416 else
3417 {
3418 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3419
3420 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3421 AssertRCReturn(rc, rc);
3422 }
3423
3424 /*
3425 * Enumerate the VMs and add the ones visible to the statistics.
3426 */
3427 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3428 {
3429 for (unsigned i = pGVMM->iUsedHead;
3430 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3431 i = pGVMM->aHandles[i].iNext)
3432 {
3433 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3434 void *pvObj = pGVMM->aHandles[i].pvObj;
3435 if ( RT_VALID_PTR(pvObj)
3436 && RT_VALID_PTR(pOtherGVM)
3437 && pOtherGVM->u32Magic == GVM_MAGIC
3438 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3439 {
3440# define MAYBE_RESET_FIELD(field) \
3441 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3442 MAYBE_RESET_FIELD(cHaltCalls);
3443 MAYBE_RESET_FIELD(cHaltBlocking);
3444 MAYBE_RESET_FIELD(cHaltTimeouts);
3445 MAYBE_RESET_FIELD(cHaltNotBlocking);
3446 MAYBE_RESET_FIELD(cHaltWakeUps);
3447 MAYBE_RESET_FIELD(cWakeUpCalls);
3448 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3449 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3450 MAYBE_RESET_FIELD(cPokeCalls);
3451 MAYBE_RESET_FIELD(cPokeNotBusy);
3452 MAYBE_RESET_FIELD(cPollCalls);
3453 MAYBE_RESET_FIELD(cPollHalts);
3454 MAYBE_RESET_FIELD(cPollWakeUps);
3455# undef MAYBE_RESET_FIELD
3456 }
3457 }
3458 }
3459
3460 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3461
3462 return VINF_SUCCESS;
3463}
3464
3465
3466/**
3467 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3468 *
3469 * @returns see GVMMR0ResetStatistics.
3470 * @param pGVM The global (ring-0) VM structure. Optional.
3471 * @param pReq Pointer to the request packet.
3472 * @param pSession The current session.
3473 */
3474GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3475{
3476 /*
3477 * Validate input and pass it on.
3478 */
3479 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3480 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3481 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3482
3483 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3484}
3485
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette