VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 108968

Last change on this file since 108968 was 108968, checked in by vboxsync, 3 weeks ago

VMM,Main,Devices: Respect VBOX_VMM_TARGET_ARMV8 correctly on amd64 hosts (for IEM debugging purposes). jiraref:VBP-1598

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 125.1 KB
Line 
1/* $Id: GVMMR0.cpp 108968 2025-04-14 20:45:36Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#ifdef RT_ARCH_AMD64
81# include <iprt/asm-amd64-x86.h>
82#endif
83#include <iprt/critsect.h>
84#include <iprt/mem.h>
85#include <iprt/semaphore.h>
86#include <iprt/time.h>
87#include <VBox/log.h>
88#include <iprt/thread.h>
89#include <iprt/process.h>
90#include <iprt/param.h>
91#include <iprt/string.h>
92#include <iprt/assert.h>
93#include <iprt/mem.h>
94#include <iprt/memobj.h>
95#include <iprt/mp.h>
96#include <iprt/cpuset.h>
97#include <iprt/spinlock.h>
98#include <iprt/timer.h>
99
100#include "dtrace/VBoxVMM.h"
101
102
103/*********************************************************************************************************************************
104* Defined Constants And Macros *
105*********************************************************************************************************************************/
106#if (defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)) \
107 && !defined(VBOX_WITH_MINIMAL_R0)
108/** Define this to enable the periodic preemption timer. */
109# define GVMM_SCHED_WITH_PPT
110#endif
111
112#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
113/** Define this to enable the per-EMT high resolution wakeup timers. */
114# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
115#endif
116
117
118/** Special value that GVMMR0DeregisterVCpu sets. */
119#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
120AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
121
122
123/*********************************************************************************************************************************
124* Structures and Typedefs *
125*********************************************************************************************************************************/
126
127/**
128 * Global VM handle.
129 */
130typedef struct GVMHANDLE
131{
132 /** The index of the next handle in the list (free or used). (0 is nil.) */
133 uint16_t volatile iNext;
134 /** Our own index / handle value. */
135 uint16_t iSelf;
136 /** The process ID of the handle owner.
137 * This is used for access checks. */
138 RTPROCESS ProcId;
139 /** The pointer to the ring-0 only (aka global) VM structure. */
140 PGVM pGVM;
141 /** The virtual machine object. */
142 void *pvObj;
143 /** The session this VM is associated with. */
144 PSUPDRVSESSION pSession;
145 /** The ring-0 handle of the EMT0 thread.
146 * This is used for ownership checks as well as looking up a VM handle by thread
147 * at times like assertions. */
148 RTNATIVETHREAD hEMT0;
149} GVMHANDLE;
150/** Pointer to a global VM handle. */
151typedef GVMHANDLE *PGVMHANDLE;
152
153/** Number of GVM handles (including the NIL handle). */
154#if HC_ARCH_BITS == 64
155# define GVMM_MAX_HANDLES 8192
156#else
157# define GVMM_MAX_HANDLES 128
158#endif
159
160/**
161 * Per host CPU GVMM data.
162 */
163typedef struct GVMMHOSTCPU
164{
165 /** Magic number (GVMMHOSTCPU_MAGIC). */
166 uint32_t volatile u32Magic;
167 /** The CPU ID. */
168 RTCPUID idCpu;
169 /** The CPU set index. */
170 uint32_t idxCpuSet;
171
172#ifdef GVMM_SCHED_WITH_PPT
173 /** Periodic preemption timer data. */
174 struct
175 {
176 /** The handle to the periodic preemption timer. */
177 PRTTIMER pTimer;
178 /** Spinlock protecting the data below. */
179 RTSPINLOCK hSpinlock;
180 /** The smalles Hz that we need to care about. (static) */
181 uint32_t uMinHz;
182 /** The number of ticks between each historization. */
183 uint32_t cTicksHistoriziationInterval;
184 /** The current historization tick (counting up to
185 * cTicksHistoriziationInterval and then resetting). */
186 uint32_t iTickHistorization;
187 /** The current timer interval. This is set to 0 when inactive. */
188 uint32_t cNsInterval;
189 /** The current timer frequency. This is set to 0 when inactive. */
190 uint32_t uTimerHz;
191 /** The current max frequency reported by the EMTs.
192 * This gets historicize and reset by the timer callback. This is
193 * read without holding the spinlock, so needs atomic updating. */
194 uint32_t volatile uDesiredHz;
195 /** Whether the timer was started or not. */
196 bool volatile fStarted;
197 /** Set if we're starting timer. */
198 bool volatile fStarting;
199 /** The index of the next history entry (mod it). */
200 uint32_t iHzHistory;
201 /** Historicized uDesiredHz values. The array wraps around, new entries
202 * are added at iHzHistory. This is updated approximately every
203 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
204 uint32_t aHzHistory[8];
205 /** Statistics counter for recording the number of interval changes. */
206 uint32_t cChanges;
207 /** Statistics counter for recording the number of timer starts. */
208 uint32_t cStarts;
209 } Ppt;
210#endif /* GVMM_SCHED_WITH_PPT */
211
212} GVMMHOSTCPU;
213/** Pointer to the per host CPU GVMM data. */
214typedef GVMMHOSTCPU *PGVMMHOSTCPU;
215/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
216#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
217/** The interval on history entry should cover (approximately) give in
218 * nanoseconds. */
219#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
220
221
222/**
223 * The GVMM instance data.
224 */
225typedef struct GVMM
226{
227 /** Eyecatcher / magic. */
228 uint32_t u32Magic;
229 /** The index of the head of the free handle chain. (0 is nil.) */
230 uint16_t volatile iFreeHead;
231 /** The index of the head of the active handle chain. (0 is nil.) */
232 uint16_t volatile iUsedHead;
233 /** The number of VMs. */
234 uint16_t volatile cVMs;
235 /** Alignment padding. */
236 uint16_t u16Reserved;
237 /** The number of EMTs. */
238 uint32_t volatile cEMTs;
239 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
240 uint32_t volatile cHaltedEMTs;
241 /** Mini lock for restricting early wake-ups to one thread. */
242 bool volatile fDoingEarlyWakeUps;
243 bool afPadding[3]; /**< explicit alignment padding. */
244 /** When the next halted or sleeping EMT will wake up.
245 * This is set to 0 when it needs recalculating and to UINT64_MAX when
246 * there are no halted or sleeping EMTs in the GVMM. */
247 uint64_t uNsNextEmtWakeup;
248 /** The lock used to serialize VM creation, destruction and associated events that
249 * isn't performance critical. Owners may acquire the list lock. */
250 RTCRITSECT CreateDestroyLock;
251 /** The lock used to serialize used list updates and accesses.
252 * This indirectly includes scheduling since the scheduler will have to walk the
253 * used list to examin running VMs. Owners may not acquire any other locks. */
254 RTCRITSECTRW UsedLock;
255 /** The handle array.
256 * The size of this array defines the maximum number of currently running VMs.
257 * The first entry is unused as it represents the NIL handle. */
258 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
259
260 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
261 * The number of EMTs that means we no longer consider ourselves alone on a
262 * CPU/Core.
263 */
264 uint32_t cEMTsMeansCompany;
265 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
266 * The minimum sleep time for when we're alone, in nano seconds.
267 */
268 uint32_t nsMinSleepAlone;
269 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
270 * The minimum sleep time for when we've got company, in nano seconds.
271 */
272 uint32_t nsMinSleepCompany;
273#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
274 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
275 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
276 * nano seconds.
277 */
278 uint32_t nsMinSleepWithHrTimer;
279#endif
280 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
281 * The limit for the first round of early wake-ups, given in nano seconds.
282 */
283 uint32_t nsEarlyWakeUp1;
284 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
285 * The limit for the second round of early wake-ups, given in nano seconds.
286 */
287 uint32_t nsEarlyWakeUp2;
288
289 /** Set if we're doing early wake-ups.
290 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
291 bool volatile fDoEarlyWakeUps;
292
293 /** The number of entries in the host CPU array (aHostCpus). */
294 uint32_t cHostCpus;
295 /** Per host CPU data (variable length). */
296 GVMMHOSTCPU aHostCpus[1];
297} GVMM;
298AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
299AssertCompileMemberAlignment(GVMM, UsedLock, 8);
300AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
301/** Pointer to the GVMM instance data. */
302typedef GVMM *PGVMM;
303
304/** The GVMM::u32Magic value (Charlie Haden). */
305#define GVMM_MAGIC UINT32_C(0x19370806)
306
307
308
309/*********************************************************************************************************************************
310* Global Variables *
311*********************************************************************************************************************************/
312/** Pointer to the GVMM instance data.
313 * (Just my general dislike for global variables.) */
314static PGVMM g_pGVMM = NULL;
315
316/** Macro for obtaining and validating the g_pGVMM pointer.
317 * On failure it will return from the invoking function with the specified return value.
318 *
319 * @param pGVMM The name of the pGVMM variable.
320 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
321 * status codes.
322 */
323#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
324 do { \
325 (pGVMM) = g_pGVMM;\
326 AssertPtrReturn((pGVMM), (rc)); \
327 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
328 } while (0)
329
330/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
331 * On failure it will return from the invoking function.
332 *
333 * @param pGVMM The name of the pGVMM variable.
334 */
335#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
336 do { \
337 (pGVMM) = g_pGVMM;\
338 AssertPtrReturnVoid((pGVMM)); \
339 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
340 } while (0)
341
342
343/*********************************************************************************************************************************
344* Internal Functions *
345*********************************************************************************************************************************/
346static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession);
347static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
348static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
349static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
350
351#ifdef GVMM_SCHED_WITH_PPT
352static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
355static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
356#endif
357
358
359/**
360 * Initializes the GVMM.
361 *
362 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
363 *
364 * @returns VBox status code.
365 */
366VMMR0_INT_DECL(int) GVMMR0Init(void)
367{
368 LogFlow(("GVMMR0Init:\n"));
369
370 /*
371 * Allocate and initialize the instance data.
372 */
373 uint32_t cHostCpus = RTMpGetArraySize();
374 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
375
376 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
377 if (!pGVMM)
378 return VERR_NO_MEMORY;
379 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
380 "GVMM-CreateDestroyLock");
381 if (RT_SUCCESS(rc))
382 {
383 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
384 if (RT_SUCCESS(rc))
385 {
386 pGVMM->u32Magic = GVMM_MAGIC;
387 pGVMM->iUsedHead = 0;
388 pGVMM->iFreeHead = 1;
389
390 /* the nil handle */
391 pGVMM->aHandles[0].iSelf = 0;
392 pGVMM->aHandles[0].iNext = 0;
393
394 /* the tail */
395 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
396 pGVMM->aHandles[i].iSelf = i;
397 pGVMM->aHandles[i].iNext = 0; /* nil */
398
399 /* the rest */
400 while (i-- > 1)
401 {
402 pGVMM->aHandles[i].iSelf = i;
403 pGVMM->aHandles[i].iNext = i + 1;
404 }
405
406 /* The default configuration values. */
407 uint32_t cNsResolution = RTSemEventMultiGetResolution();
408 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
409 if (cNsResolution >= 5*RT_NS_100US)
410 {
411 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
412 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
413 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
414 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
415 }
416 else if (cNsResolution > RT_NS_100US)
417 {
418 pGVMM->nsMinSleepAlone = cNsResolution / 2;
419 pGVMM->nsMinSleepCompany = cNsResolution / 4;
420 pGVMM->nsEarlyWakeUp1 = 0;
421 pGVMM->nsEarlyWakeUp2 = 0;
422 }
423 else
424 {
425 pGVMM->nsMinSleepAlone = 2000;
426 pGVMM->nsMinSleepCompany = 2000;
427 pGVMM->nsEarlyWakeUp1 = 0;
428 pGVMM->nsEarlyWakeUp2 = 0;
429 }
430#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
431 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
432#endif
433 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
434
435 /* The host CPU data. */
436 pGVMM->cHostCpus = cHostCpus;
437 uint32_t iCpu = cHostCpus;
438 RTCPUSET PossibleSet;
439 RTMpGetSet(&PossibleSet);
440 while (iCpu-- > 0)
441 {
442 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
443#ifdef GVMM_SCHED_WITH_PPT
444 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
445 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
446 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
447 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
448 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
450 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
451 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
453 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
454 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
455 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
456#endif
457
458 if (RTCpuSetIsMember(&PossibleSet, iCpu))
459 {
460 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
461 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
462
463#ifdef GVMM_SCHED_WITH_PPT
464 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
465 50*1000*1000 /* whatever */,
466 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
467 gvmmR0SchedPeriodicPreemptionTimerCallback,
468 &pGVMM->aHostCpus[iCpu]);
469 if (RT_SUCCESS(rc))
470 {
471 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
472 if (RT_FAILURE(rc))
473 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
474 }
475 else
476 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
477 if (RT_FAILURE(rc))
478 {
479 while (iCpu < cHostCpus)
480 {
481 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
482 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
483 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
484 iCpu++;
485 }
486 break;
487 }
488#endif
489 }
490 else
491 {
492 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
493 pGVMM->aHostCpus[iCpu].u32Magic = 0;
494 }
495 }
496 if (RT_SUCCESS(rc))
497 {
498 g_pGVMM = pGVMM;
499 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
500 return VINF_SUCCESS;
501 }
502
503 /* bail out. */
504 RTCritSectRwDelete(&pGVMM->UsedLock);
505 }
506 else
507 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
508 RTCritSectDelete(&pGVMM->CreateDestroyLock);
509 }
510 else
511 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525VMMR0_INT_DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592VMMR0_INT_DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
631 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 pGVMM->nsMinSleepWithHrTimer = u64Value;
635 else
636 rc = VERR_OUT_OF_RANGE;
637 }
638#endif
639 else if (!strcmp(pszName, "EarlyWakeUp1"))
640 {
641 if (u64Value <= RT_NS_100MS)
642 {
643 pGVMM->nsEarlyWakeUp1 = u64Value;
644 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
645 }
646 else
647 rc = VERR_OUT_OF_RANGE;
648 }
649 else if (!strcmp(pszName, "EarlyWakeUp2"))
650 {
651 if (u64Value <= RT_NS_100MS)
652 {
653 pGVMM->nsEarlyWakeUp2 = u64Value;
654 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
655 }
656 else
657 rc = VERR_OUT_OF_RANGE;
658 }
659 else
660 rc = VERR_CFGM_VALUE_NOT_FOUND;
661 return rc;
662}
663
664
665/**
666 * A quick hack for getting global config values.
667 *
668 * @returns VBox status code.
669 *
670 * @param pSession The session handle. Used for authentication.
671 * @param pszName The variable name.
672 * @param pu64Value Where to return the value.
673 */
674VMMR0_INT_DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
675{
676 /*
677 * Validate input.
678 */
679 PGVMM pGVMM;
680 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
681 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
682 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
683 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
684
685 /*
686 * String switch time!
687 */
688 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
689 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
690 int rc = VINF_SUCCESS;
691 pszName += sizeof("/GVMM/") - 1;
692 if (!strcmp(pszName, "cEMTsMeansCompany"))
693 *pu64Value = pGVMM->cEMTsMeansCompany;
694 else if (!strcmp(pszName, "MinSleepAlone"))
695 *pu64Value = pGVMM->nsMinSleepAlone;
696 else if (!strcmp(pszName, "MinSleepCompany"))
697 *pu64Value = pGVMM->nsMinSleepCompany;
698#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
699 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
700 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
701#endif
702 else if (!strcmp(pszName, "EarlyWakeUp1"))
703 *pu64Value = pGVMM->nsEarlyWakeUp1;
704 else if (!strcmp(pszName, "EarlyWakeUp2"))
705 *pu64Value = pGVMM->nsEarlyWakeUp2;
706 else
707 rc = VERR_CFGM_VALUE_NOT_FOUND;
708 return rc;
709}
710
711
712/**
713 * Acquire the 'used' lock in shared mode.
714 *
715 * This prevents destruction of the VM while we're in ring-0.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
720 */
721#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Release the 'used' lock in when owning it in shared mode.
725 *
726 * @returns IPRT status code, see RTSemFastMutexRequest.
727 * @param a_pGVMM The GVMM instance data.
728 * @sa GVMMR0_USED_SHARED_LOCK
729 */
730#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
731
732/**
733 * Acquire the 'used' lock in exclusive mode.
734 *
735 * Only use this function when making changes to the used list.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRequest.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
742
743/**
744 * Release the 'used' lock when owning it in exclusive mode.
745 *
746 * @returns IPRT status code, see RTSemFastMutexRelease.
747 * @param a_pGVMM The GVMM instance data.
748 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
749 */
750#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
751
752
753/**
754 * Try acquire the 'create & destroy' lock.
755 *
756 * @returns IPRT status code, see RTSemFastMutexRequest.
757 * @param pGVMM The GVMM instance data.
758 */
759DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
760{
761 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
762 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
763 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
764 return rc;
765}
766
767
768/**
769 * Release the 'create & destroy' lock.
770 *
771 * @returns IPRT status code, see RTSemFastMutexRequest.
772 * @param pGVMM The GVMM instance data.
773 */
774DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
775{
776 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
777 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
778 AssertRC(rc);
779 return rc;
780}
781
782
783/**
784 * Request wrapper for the GVMMR0CreateVM API.
785 *
786 * @returns VBox status code.
787 * @param pReq The request buffer.
788 * @param pSession The session handle. The VM will be associated with this.
789 */
790VMMR0_INT_DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
791{
792 /*
793 * Validate the request.
794 */
795 if (!RT_VALID_PTR(pReq))
796 return VERR_INVALID_POINTER;
797 if (pReq->Hdr.cbReq != sizeof(*pReq))
798 return VERR_INVALID_PARAMETER;
799 if (pReq->pSession != pSession)
800 return VERR_INVALID_POINTER;
801
802 /* Check that VBoxVMM and VMMR0 are likely to have the same idea about the structures. */
803 if (pReq->cbVM != sizeof(VM))
804 {
805 LogRel(("GVMMR0CreateVMReq: cbVM=%#x, expected %#x\n", pReq->cbVM, sizeof(VM)));
806 return VERR_GVM_MISMATCH_VM_SIZE;
807 }
808 if (pReq->cbVCpu != sizeof(VMCPU))
809 {
810 LogRel(("GVMMR0CreateVMReq: cbVCpu=%#x, expected %#x\n", pReq->cbVCpu, sizeof(VMCPU)));
811 return VERR_GVM_MISMATCH_VMCPU_SIZE;
812 }
813#ifdef VM_STRUCT_VERSION_NON_NATIVE_TARGETS
814 uint32_t const uExpectedVersion = pReq->enmTarget == VMTARGET_NATIVE
815 ? VM_STRUCT_VERSION : VM_STRUCT_VERSION_NON_NATIVE_TARGETS;
816#else
817 uint32_t const uExpectedVersion = VM_STRUCT_VERSION;
818#endif
819 if (pReq->uStructVersion != uExpectedVersion)
820 {
821 LogRel(("GVMMR0CreateVMReq: uStructVersion=%#x, expected %#x\n", pReq->uStructVersion, uExpectedVersion));
822 return VERR_GVM_MISMATCH_VM_STRUCT_VER;
823 }
824 if (pReq->uSvnRevision != VMMGetSvnRev())
825 {
826 LogRel(("GVMMR0CreateVMReq: uSvnRevision=%u, expected %u\n", pReq->uSvnRevision, VMMGetSvnRev()));
827 return VERR_GVM_MISMATCH_VMCPU_SIZE;
828 }
829
830 /*
831 * Execute it.
832 */
833 PGVM pGVM;
834 pReq->pVMR0 = NULL;
835 pReq->pVMR3 = NIL_RTR3PTR;
836 int rc = GVMMR0CreateVM(pSession, pReq->enmTarget, pReq->cCpus, &pGVM);
837 if (RT_SUCCESS(rc))
838 {
839 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
840 pReq->pVMR3 = pGVM->pVMR3;
841 }
842 return rc;
843}
844
845
846/**
847 * Allocates the VM structure and registers it with GVM.
848 *
849 * The caller will become the VM owner and there by the EMT.
850 *
851 * @returns VBox status code.
852 * @param pSession The support driver session.
853 * @param enmTarget The target platform architecture of the VM.
854 * @param cCpus Number of virtual CPUs for the new VM.
855 * @param ppGVM Where to store the pointer to the VM structure.
856 *
857 * @thread EMT.
858 */
859VMMR0_INT_DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, VMTARGET enmTarget, uint32_t cCpus, PGVM *ppGVM)
860{
861 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
862 PGVMM pGVMM;
863 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
864
865 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
866 *ppGVM = NULL;
867
868 if ( cCpus == 0
869 || cCpus > VMM_MAX_CPU_COUNT)
870 return VERR_INVALID_PARAMETER;
871 if ( enmTarget != VMTARGET_X86
872 && enmTarget != VMTARGET_ARMV8)
873 return VERR_INVALID_PARAMETER;
874
875 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
876 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
877 RTPROCESS ProcId = RTProcSelf();
878 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
879
880 /*
881 * The whole allocation process is protected by the lock.
882 */
883 int rc = gvmmR0CreateDestroyLock(pGVMM);
884 AssertRCReturn(rc, rc);
885
886 /*
887 * Only one VM per session.
888 */
889 if (SUPR0GetSessionVM(pSession) != NULL)
890 {
891 gvmmR0CreateDestroyUnlock(pGVMM);
892 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
893 return VERR_ALREADY_EXISTS;
894 }
895
896 /*
897 * Allocate a handle first so we don't waste resources unnecessarily.
898 */
899 uint16_t iHandle = pGVMM->iFreeHead;
900 if (iHandle)
901 {
902 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
903
904 /* consistency checks, a bit paranoid as always. */
905 if ( !pHandle->pGVM
906 && !pHandle->pvObj
907 && pHandle->iSelf == iHandle)
908 {
909 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
910 if (pHandle->pvObj)
911 {
912 /*
913 * Move the handle from the free to used list and perform permission checks.
914 */
915 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
916 AssertRC(rc);
917
918 pGVMM->iFreeHead = pHandle->iNext;
919 pHandle->iNext = pGVMM->iUsedHead;
920 pGVMM->iUsedHead = iHandle;
921 pGVMM->cVMs++;
922
923 pHandle->pGVM = NULL;
924 pHandle->pSession = pSession;
925 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
926 pHandle->ProcId = NIL_RTPROCESS;
927
928 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
929
930 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
931 if (RT_SUCCESS(rc))
932 {
933 /*
934 * Allocate memory for the VM structure (combined VM + GVM).
935 */
936 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
937 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
938 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
939 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
940 if (RT_SUCCESS(rc))
941 {
942 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
943 AssertPtr(pGVM);
944
945 /*
946 * Initialise the structure.
947 */
948 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
949 gvmmR0InitPerVMData(pGVM, iHandle, enmTarget, cCpus, pSession);
950 pGVM->gvmm.s.VMMemObj = hVMMemObj;
951#ifndef VBOX_WITH_MINIMAL_R0
952 bool const fWithFullR0 = !VM_IS_NON_NATIVE_WITH_LIMITED_R0(pGVM);
953 rc = fWithFullR0 ? GMMR0InitPerVMData(pGVM) : VINF_SUCCESS;
954 int rc2 = fWithFullR0 ? PGMR0InitPerVMData(pGVM, hVMMemObj) : VINF_SUCCESS;
955#else
956 int rc2 = VINF_SUCCESS;
957#endif
958 int rc3 = VMMR0InitPerVMData(pGVM);
959#ifndef VBOX_WITH_MINIMAL_R0
960 if (fWithFullR0)
961 {
962 CPUMR0InitPerVMData(pGVM);
963 DBGFR0InitPerVMData(pGVM);
964 PDMR0InitPerVMData(pGVM);
965 IOMR0InitPerVMData(pGVM);
966 TMR0InitPerVMData(pGVM);
967 }
968#endif
969 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
970 {
971 /*
972 * Allocate page array.
973 * This currently have to be made available to ring-3, but this is should change eventually.
974 */
975 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
976 if (RT_SUCCESS(rc))
977 {
978 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
979 for (uint32_t iPage = 0; iPage < cPages; iPage++)
980 {
981 paPages[iPage].uReserved = 0;
982 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
983 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
984 }
985
986 /*
987 * Map the page array, VM and VMCPU structures into ring-3.
988 */
989 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
990 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
991 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
992 0 /*offSub*/, sizeof(VM));
993 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
994 {
995 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
996 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
997 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
998 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
999 }
1000 if (RT_SUCCESS(rc))
1001 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
1002 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
1003 NIL_RTR0PROCESS);
1004 if (RT_SUCCESS(rc))
1005 {
1006 /*
1007 * Initialize all the VM pointers.
1008 */
1009 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
1010 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
1011
1012 for (VMCPUID i = 0; i < cCpus; i++)
1013 {
1014 pGVM->aCpus[i].pVMR0 = pGVM;
1015 pGVM->aCpus[i].pVMR3 = pVMR3;
1016 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
1017 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
1018 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
1019 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
1020 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
1021 }
1022
1023 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
1024 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
1025 ("%p\n", pGVM->paVMPagesR3));
1026
1027#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1028 /*
1029 * Create the high resolution wake-up timer for EMT 0, ignore failures.
1030 */
1031 if (RTTimerCanDoHighResolution())
1032 {
1033 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
1034 0 /*one-shot, no interval*/,
1035 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
1036 &pGVM->aCpus[0]);
1037 if (RT_FAILURE(rc4))
1038 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
1039 }
1040#endif
1041
1042 /*
1043 * Complete the handle - take the UsedLock sem just to be careful.
1044 */
1045 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1046 AssertRC(rc);
1047
1048 pHandle->pGVM = pGVM;
1049 pHandle->hEMT0 = hEMT0;
1050 pHandle->ProcId = ProcId;
1051 pGVM->pVMR3 = pVMR3;
1052 pGVM->pVMR3Unsafe = pVMR3;
1053 pGVM->aCpus[0].hEMT = hEMT0;
1054 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1055 pGVM->aCpus[0].cEmtHashCollisions = 0;
1056 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1057 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1058 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1059 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1060 pGVMM->cEMTs += cCpus;
1061
1062 /* Associate it with the session and create the context hook for EMT0. */
1063 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1064 if (RT_SUCCESS(rc))
1065 {
1066 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1067 if (RT_SUCCESS(rc))
1068 {
1069 /*
1070 * Done!
1071 */
1072 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1073
1074 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1075 gvmmR0CreateDestroyUnlock(pGVMM);
1076
1077#ifndef VBOX_WITH_MINIMAL_R0
1078 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1079#endif
1080
1081 *ppGVM = pGVM;
1082 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1083 return VINF_SUCCESS;
1084 }
1085
1086 SUPR0SetSessionVM(pSession, NULL, NULL);
1087 }
1088 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1089 }
1090
1091 /* Cleanup mappings. */
1092 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1093 {
1094 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1095 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1096 }
1097 for (VMCPUID i = 0; i < cCpus; i++)
1098 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1099 {
1100 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1101 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1102 }
1103 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1104 {
1105 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1106 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1107 }
1108 }
1109 }
1110 else
1111 {
1112 if (RT_SUCCESS_NP(rc))
1113 rc = rc2;
1114 if (RT_SUCCESS_NP(rc))
1115 rc = rc3;
1116 AssertStmt(RT_FAILURE_NP(rc), rc = VERR_IPE_UNEXPECTED_STATUS);
1117 }
1118 }
1119 }
1120 /* else: The user wasn't permitted to create this VM. */
1121
1122 /*
1123 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1124 * object reference here. A little extra mess because of non-recursive lock.
1125 */
1126 void *pvObj = pHandle->pvObj;
1127 pHandle->pvObj = NULL;
1128 gvmmR0CreateDestroyUnlock(pGVMM);
1129
1130 SUPR0ObjRelease(pvObj, pSession);
1131
1132 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1133 return rc;
1134 }
1135
1136 rc = VERR_NO_MEMORY;
1137 }
1138 else
1139 rc = VERR_GVMM_IPE_1;
1140 }
1141 else
1142 rc = VERR_GVM_TOO_MANY_VMS;
1143
1144 gvmmR0CreateDestroyUnlock(pGVMM);
1145 return rc;
1146}
1147
1148
1149/**
1150 * Initializes the per VM data belonging to GVMM.
1151 *
1152 * @param pGVM Pointer to the global VM structure.
1153 * @param hSelf The handle.
1154 * @param enmTarget The target platform architecture of the VM.
1155 * @param cCpus The CPU count.
1156 * @param pSession The session this VM is associated with.
1157 */
1158static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMTARGET enmTarget, VMCPUID cCpus, PSUPDRVSESSION pSession)
1159{
1160 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1161 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1162 AssertCompileMemberAlignment(VM, cpum, 64);
1163 AssertCompileMemberAlignment(VM, tm, 64);
1164
1165 /* GVM: */
1166 pGVM->u32Magic = GVM_MAGIC;
1167 pGVM->hSelf = hSelf;
1168 pGVM->cCpus = cCpus;
1169 pGVM->enmTarget = enmTarget;
1170 pGVM->pSession = pSession;
1171 pGVM->pSelf = pGVM;
1172
1173 /* VM: */
1174 pGVM->enmVMState = VMSTATE_CREATING;
1175 pGVM->hSelfUnsafe = hSelf;
1176 pGVM->pSessionUnsafe = pSession;
1177 pGVM->pVMR0ForCall = pGVM;
1178 pGVM->cCpusUnsafe = cCpus;
1179 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1180#ifdef VM_STRUCT_VERSION_ALT
1181 pGVM->uStructVersion = enmTarget == VMTARGET_DEFAULT ? VM_STRUCT_VERSION : VM_STRUCT_VERSION_ALT;
1182#else
1183 pGVM->uStructVersion = VM_STRUCT_VERSION;
1184#endif
1185 pGVM->cbSelf = sizeof(VM);
1186 pGVM->cbVCpu = sizeof(VMCPU);
1187 pGVM->enmTargetUnsafe = enmTarget;
1188
1189 /* GVMM: */
1190 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1191 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1192 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1193 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1194 pGVM->gvmm.s.fDoneVMMR0Init = false;
1195 pGVM->gvmm.s.fDoneVMMR0Term = false;
1196
1197 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1198 {
1199 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1200 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1201 }
1202 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1203
1204 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1205 {
1206 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1207 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1208 }
1209
1210 /*
1211 * Per virtual CPU.
1212 */
1213 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1214 {
1215 pGVM->aCpus[i].idCpu = i;
1216 pGVM->aCpus[i].idCpuUnsafe = i;
1217 pGVM->aCpus[i].enmTarget = enmTarget;
1218 pGVM->aCpus[i].enmTargetUnsafe = enmTarget;
1219 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1220 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1221 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1222 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1223 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1224 pGVM->aCpus[i].pGVM = pGVM;
1225 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1226 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1227 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1228 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1229 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1230 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1231 }
1232}
1233
1234
1235/**
1236 * Does the VM initialization.
1237 *
1238 * @returns VBox status code.
1239 * @param pGVM The global (ring-0) VM structure.
1240 */
1241VMMR0_INT_DECL(int) GVMMR0InitVM(PGVM pGVM)
1242{
1243 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1244
1245 int rc = VERR_INTERNAL_ERROR_3;
1246 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1247 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1248 {
1249 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1250 {
1251 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1252 if (RT_FAILURE(rc))
1253 {
1254 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1255 break;
1256 }
1257 }
1258 }
1259 else
1260 rc = VERR_WRONG_ORDER;
1261
1262 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1263 return rc;
1264}
1265
1266
1267/**
1268 * Indicates that we're done with the ring-0 initialization
1269 * of the VM.
1270 *
1271 * @param pGVM The global (ring-0) VM structure.
1272 * @thread EMT(0)
1273 */
1274VMMR0_INT_DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1275{
1276 /* Set the indicator. */
1277 pGVM->gvmm.s.fDoneVMMR0Init = true;
1278}
1279
1280
1281/**
1282 * Indicates that we're doing the ring-0 termination of the VM.
1283 *
1284 * @returns true if termination hasn't been done already, false if it has.
1285 * @param pGVM Pointer to the global VM structure. Optional.
1286 * @thread EMT(0) or session cleanup thread.
1287 */
1288VMMR0_INT_DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1289{
1290 /* Validate the VM structure, state and handle. */
1291 AssertPtrReturn(pGVM, false);
1292
1293 /* Set the indicator. */
1294 if (pGVM->gvmm.s.fDoneVMMR0Term)
1295 return false;
1296 pGVM->gvmm.s.fDoneVMMR0Term = true;
1297 return true;
1298}
1299
1300
1301/**
1302 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1303 *
1304 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1305 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1306 * would've been nice if the caller was actually the EMT thread or that we somehow
1307 * could've associated the calling thread with the VM up front.
1308 *
1309 * @returns VBox status code.
1310 * @param pGVM The global (ring-0) VM structure.
1311 *
1312 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1313 */
1314VMMR0_INT_DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1315{
1316 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1317 PGVMM pGVMM;
1318 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1319
1320 /*
1321 * Validate the VM structure, state and caller.
1322 */
1323 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1324 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1325 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1326 VERR_WRONG_ORDER);
1327
1328 uint32_t hGVM = pGVM->hSelf;
1329 ASMCompilerBarrier();
1330 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1331 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1332
1333 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1334 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1335
1336 RTPROCESS ProcId = RTProcSelf();
1337 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1338 AssertReturn( ( pHandle->hEMT0 == hSelf
1339 && pHandle->ProcId == ProcId)
1340 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1341
1342 /*
1343 * Lookup the handle and destroy the object.
1344 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1345 * object, we take some precautions against racing callers just in case...
1346 */
1347 int rc = gvmmR0CreateDestroyLock(pGVMM);
1348 AssertRC(rc);
1349
1350 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1351 if ( pHandle->pGVM == pGVM
1352 && ( ( pHandle->hEMT0 == hSelf
1353 && pHandle->ProcId == ProcId)
1354 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1355 && RT_VALID_PTR(pHandle->pvObj)
1356 && RT_VALID_PTR(pHandle->pSession)
1357 && RT_VALID_PTR(pHandle->pGVM)
1358 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1359 {
1360 /* Check that other EMTs have deregistered. */
1361 uint32_t cNotDeregistered = 0;
1362 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1363 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1364 if (cNotDeregistered == 0)
1365 {
1366 /* Grab the object pointer. */
1367 void *pvObj = pHandle->pvObj;
1368 pHandle->pvObj = NULL;
1369 gvmmR0CreateDestroyUnlock(pGVMM);
1370
1371 SUPR0ObjRelease(pvObj, pHandle->pSession);
1372 }
1373 else
1374 {
1375 gvmmR0CreateDestroyUnlock(pGVMM);
1376 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1377 }
1378 }
1379 else
1380 {
1381 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1382 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1383 gvmmR0CreateDestroyUnlock(pGVMM);
1384 rc = VERR_GVMM_IPE_2;
1385 }
1386
1387 return rc;
1388}
1389
1390
1391/**
1392 * Performs VM cleanup task as part of object destruction.
1393 *
1394 * @param pGVM The GVM pointer.
1395 */
1396static void gvmmR0CleanupVM(PGVM pGVM)
1397{
1398 if ( pGVM->gvmm.s.fDoneVMMR0Init
1399 && !pGVM->gvmm.s.fDoneVMMR0Term)
1400 {
1401 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1402 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1403 {
1404 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1405 VMMR0TermVM(pGVM, NIL_VMCPUID);
1406 }
1407 else
1408 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1409 }
1410
1411#ifndef VBOX_WITH_MINIMAL_R0
1412 if (VM_IS_NON_NATIVE_WITH_LIMITED_R0(pGVM))
1413 {
1414 GMMR0CleanupVM(pGVM);
1415# ifdef VBOX_WITH_NEM_R0
1416 NEMR0CleanupVM(pGVM);
1417# endif
1418 PDMR0CleanupVM(pGVM);
1419 IOMR0CleanupVM(pGVM);
1420 DBGFR0CleanupVM(pGVM);
1421 PGMR0CleanupVM(pGVM);
1422 TMR0CleanupVM(pGVM);
1423 }
1424#endif
1425 VMMR0CleanupVM(pGVM);
1426}
1427
1428
1429/**
1430 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1431 *
1432 * pvUser1 is the GVM instance pointer.
1433 * pvUser2 is the handle pointer.
1434 */
1435static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1436{
1437 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1438
1439 NOREF(pvObj);
1440
1441 /*
1442 * Some quick, paranoid, input validation.
1443 */
1444 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1445 AssertPtr(pHandle);
1446 PGVMM pGVMM = (PGVMM)pvUser1;
1447 Assert(pGVMM == g_pGVMM);
1448 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1449 if ( !iHandle
1450 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1451 || iHandle != pHandle->iSelf)
1452 {
1453 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1454 return;
1455 }
1456
1457 int rc = gvmmR0CreateDestroyLock(pGVMM);
1458 AssertRC(rc);
1459 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1460 AssertRC(rc);
1461
1462 /*
1463 * This is a tad slow but a doubly linked list is too much hassle.
1464 */
1465 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1466 {
1467 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1468 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1469 gvmmR0CreateDestroyUnlock(pGVMM);
1470 return;
1471 }
1472
1473 if (pGVMM->iUsedHead == iHandle)
1474 pGVMM->iUsedHead = pHandle->iNext;
1475 else
1476 {
1477 uint16_t iPrev = pGVMM->iUsedHead;
1478 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1479 while (iPrev)
1480 {
1481 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1482 {
1483 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1484 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1485 gvmmR0CreateDestroyUnlock(pGVMM);
1486 return;
1487 }
1488 if (RT_UNLIKELY(c-- <= 0))
1489 {
1490 iPrev = 0;
1491 break;
1492 }
1493
1494 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1495 break;
1496 iPrev = pGVMM->aHandles[iPrev].iNext;
1497 }
1498 if (!iPrev)
1499 {
1500 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1501 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1502 gvmmR0CreateDestroyUnlock(pGVMM);
1503 return;
1504 }
1505
1506 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1507 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1508 }
1509 pHandle->iNext = 0;
1510 pGVMM->cVMs--;
1511
1512 /*
1513 * Do the global cleanup round.
1514 */
1515 PGVM pGVM = pHandle->pGVM;
1516 if ( RT_VALID_PTR(pGVM)
1517 && pGVM->u32Magic == GVM_MAGIC)
1518 {
1519 pGVMM->cEMTs -= pGVM->cCpus;
1520
1521 if (pGVM->pSession)
1522 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1523
1524 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1525
1526 gvmmR0CleanupVM(pGVM);
1527
1528 /*
1529 * Do the GVMM cleanup - must be done last.
1530 */
1531 /* The VM and VM pages mappings/allocations. */
1532 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1533 {
1534 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1535 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1536 }
1537
1538 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1539 {
1540 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1541 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1542 }
1543
1544 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1545 {
1546 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1547 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1548 }
1549
1550 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1551 {
1552 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1553 {
1554 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1555 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1556 }
1557 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1558 {
1559 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1560 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1561 }
1562#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1563 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1564 {
1565 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1566 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1567 }
1568#endif
1569 }
1570
1571 /* the GVM structure itself. */
1572 pGVM->u32Magic |= UINT32_C(0x80000000);
1573 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1574 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1575 pGVM = NULL;
1576
1577 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1578 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1579 AssertRC(rc);
1580 }
1581 /* else: GVMMR0CreateVM cleanup. */
1582
1583 /*
1584 * Free the handle.
1585 */
1586 pHandle->iNext = pGVMM->iFreeHead;
1587 pGVMM->iFreeHead = iHandle;
1588 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1589 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1590 ASMAtomicWriteNullPtr(&pHandle->pSession);
1591 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1592 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1593
1594 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1595 gvmmR0CreateDestroyUnlock(pGVMM);
1596 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1597}
1598
1599
1600/**
1601 * Registers the calling thread as the EMT of a Virtual CPU.
1602 *
1603 * Note that VCPU 0 is automatically registered during VM creation.
1604 *
1605 * @returns VBox status code
1606 * @param pGVM The global (ring-0) VM structure.
1607 * @param idCpu VCPU id to register the current thread as.
1608 */
1609VMMR0_INT_DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1610{
1611 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1612
1613 /*
1614 * Validate the VM structure, state and handle.
1615 */
1616 PGVMM pGVMM;
1617 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1618 if (RT_SUCCESS(rc))
1619 {
1620 if (idCpu < pGVM->cCpus)
1621 {
1622 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1623 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1624
1625 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1626
1627 /* Check that the EMT isn't already assigned to a thread. */
1628 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1629 {
1630 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1631
1632 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1633 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1634 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1635 if (RT_SUCCESS(rc))
1636 {
1637 /*
1638 * Do the assignment, then try setup the hook. Undo if that fails.
1639 */
1640 unsigned cCollisions = 0;
1641 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1642 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1643 {
1644 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1645 do
1646 {
1647 cCollisions++;
1648 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1649 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1650 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1651 }
1652 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1653 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1654
1655 pGVCpu->hNativeThreadR0 = hNativeSelf;
1656 pGVCpu->hEMT = hNativeSelf;
1657 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1658 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1659
1660 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1661 if (RT_SUCCESS(rc))
1662 {
1663#ifndef VBOX_WITH_MINIMAL_R0
1664 CPUMR0RegisterVCpuThread(pGVCpu);
1665#endif
1666
1667#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1668 /*
1669 * Create the high resolution wake-up timer, ignore failures.
1670 */
1671 if (RTTimerCanDoHighResolution())
1672 {
1673 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1674 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1675 if (RT_FAILURE(rc2))
1676 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1677 }
1678#endif
1679 }
1680 else
1681 {
1682 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1683 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1684 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1685 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1686 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1687 }
1688 }
1689 }
1690 else
1691 rc = VERR_ACCESS_DENIED;
1692
1693 gvmmR0CreateDestroyUnlock(pGVMM);
1694 }
1695 else
1696 rc = VERR_INVALID_CPU_ID;
1697 }
1698 return rc;
1699}
1700
1701
1702/**
1703 * Deregisters the calling thread as the EMT of a Virtual CPU.
1704 *
1705 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1706 *
1707 * @returns VBox status code
1708 * @param pGVM The global (ring-0) VM structure.
1709 * @param idCpu VCPU id to register the current thread as.
1710 */
1711VMMR0_INT_DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1712{
1713 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1714
1715 /*
1716 * Validate the VM structure, state and handle.
1717 */
1718 PGVMM pGVMM;
1719 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1720 if (RT_SUCCESS(rc))
1721 {
1722 /*
1723 * Take the destruction lock and recheck the handle state to
1724 * prevent racing GVMMR0DestroyVM.
1725 */
1726 gvmmR0CreateDestroyLock(pGVMM);
1727
1728 uint32_t hSelf = pGVM->hSelf;
1729 ASMCompilerBarrier();
1730 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1731 && pGVMM->aHandles[hSelf].pvObj != NULL
1732 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1733 {
1734 /*
1735 * Do per-EMT cleanups.
1736 */
1737 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1738
1739 /*
1740 * Invalidate hEMT. We don't use NIL here as that would allow
1741 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1742 */
1743 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1744 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1745
1746 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1747 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1748 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1749 }
1750
1751 gvmmR0CreateDestroyUnlock(pGVMM);
1752 }
1753 return rc;
1754}
1755
1756
1757/**
1758 * Registers the caller as a given worker thread.
1759 *
1760 * This enables the thread to operate critical sections in ring-0.
1761 *
1762 * @returns VBox status code.
1763 * @param pGVM The global (ring-0) VM structure.
1764 * @param enmWorker The worker thread this is supposed to be.
1765 * @param hNativeSelfR3 The ring-3 native self of the caller.
1766 */
1767VMMR0_INT_DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1768{
1769 /*
1770 * Validate input.
1771 */
1772 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1773 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1774 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1775 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1776 PGVMM pGVMM;
1777 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1778 AssertRCReturn(rc, rc);
1779 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1780
1781 /*
1782 * Grab the big lock and check the VM state again.
1783 */
1784 uint32_t const hSelf = pGVM->hSelf;
1785 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1786 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1787 && pGVMM->aHandles[hSelf].pvObj != NULL
1788 && pGVMM->aHandles[hSelf].pGVM == pGVM
1789 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1790 {
1791 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1792 {
1793 /*
1794 * Check that the thread isn't an EMT or serving in some other worker capacity.
1795 */
1796 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1797 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1798 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1799 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1800 rc = VERR_INVALID_PARAMETER);
1801 if (RT_SUCCESS(rc))
1802 {
1803 /*
1804 * Do the registration.
1805 */
1806 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1807 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1808 {
1809 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1810 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1811 rc = VINF_SUCCESS;
1812 }
1813 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1814 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1815 rc = VERR_ALREADY_EXISTS;
1816 else
1817 rc = VERR_RESOURCE_BUSY;
1818 }
1819 }
1820 else
1821 rc = VERR_VM_INVALID_VM_STATE;
1822 }
1823 else
1824 rc = VERR_INVALID_VM_HANDLE;
1825 gvmmR0CreateDestroyUnlock(pGVMM);
1826 return rc;
1827}
1828
1829
1830/**
1831 * Deregisters a workinger thread (caller).
1832 *
1833 * The worker thread cannot be re-created and re-registered, instead the given
1834 * @a enmWorker slot becomes invalid.
1835 *
1836 * @returns VBox status code.
1837 * @param pGVM The global (ring-0) VM structure.
1838 * @param enmWorker The worker thread this is supposed to be.
1839 */
1840VMMR0_INT_DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1841{
1842 /*
1843 * Validate input.
1844 */
1845 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1846 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1847 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1848 PGVMM pGVMM;
1849 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1850 AssertRCReturn(rc, rc);
1851
1852 /*
1853 * Grab the big lock and check the VM state again.
1854 */
1855 uint32_t const hSelf = pGVM->hSelf;
1856 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1857 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1858 && pGVMM->aHandles[hSelf].pvObj != NULL
1859 && pGVMM->aHandles[hSelf].pGVM == pGVM
1860 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1861 {
1862 /*
1863 * Do the deregistration.
1864 * This will prevent any other threads register as the worker later.
1865 */
1866 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1867 {
1868 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1869 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1870 rc = VINF_SUCCESS;
1871 }
1872 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1873 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1874 rc = VINF_SUCCESS;
1875 else
1876 rc = VERR_NOT_OWNER;
1877 }
1878 else
1879 rc = VERR_INVALID_VM_HANDLE;
1880 gvmmR0CreateDestroyUnlock(pGVMM);
1881 return rc;
1882}
1883
1884
1885/**
1886 * Lookup a GVM structure by its handle.
1887 *
1888 * @returns The GVM pointer on success, NULL on failure.
1889 * @param hGVM The global VM handle. Asserts on bad handle.
1890 */
1891VMMR0_INT_DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1892{
1893 PGVMM pGVMM;
1894 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1895
1896 /*
1897 * Validate.
1898 */
1899 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1900 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1901
1902 /*
1903 * Look it up.
1904 */
1905 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1906 AssertPtrReturn(pHandle->pvObj, NULL);
1907 PGVM pGVM = pHandle->pGVM;
1908 AssertPtrReturn(pGVM, NULL);
1909
1910 return pGVM;
1911}
1912
1913
1914/**
1915 * Check that the given GVM and VM structures match up.
1916 *
1917 * The calling thread must be in the same process as the VM. All current lookups
1918 * are by threads inside the same process, so this will not be an issue.
1919 *
1920 * @returns VBox status code.
1921 * @param pGVM The global (ring-0) VM structure.
1922 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1923 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1924 * shared mode when requested.
1925 *
1926 * Be very careful if not taking the lock as it's
1927 * possible that the VM will disappear then!
1928 *
1929 * @remark This will not assert on an invalid pGVM but try return silently.
1930 */
1931static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1932{
1933 /*
1934 * Check the pointers.
1935 */
1936 int rc;
1937 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1938 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1939 {
1940 /*
1941 * Get the pGVMM instance and check the VM handle.
1942 */
1943 PGVMM pGVMM;
1944 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1945
1946 uint16_t hGVM = pGVM->hSelf;
1947 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1948 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1949 {
1950 RTPROCESS const pidSelf = RTProcSelf();
1951 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1952 if (fTakeUsedLock)
1953 {
1954 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1955 AssertRCReturn(rc, rc);
1956 }
1957
1958 if (RT_LIKELY( pHandle->pGVM == pGVM
1959 && pHandle->ProcId == pidSelf
1960 && RT_VALID_PTR(pHandle->pvObj)))
1961 {
1962 /*
1963 * Some more VM data consistency checks.
1964 */
1965 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1966 && pGVM->hSelfUnsafe == hGVM
1967 && pGVM->pSelf == pGVM))
1968 {
1969 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1970 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1971 {
1972 *ppGVMM = pGVMM;
1973 return VINF_SUCCESS;
1974 }
1975 rc = VERR_INCONSISTENT_VM_HANDLE;
1976 }
1977 else
1978 rc = VERR_INCONSISTENT_VM_HANDLE;
1979 }
1980 else
1981 rc = VERR_INVALID_VM_HANDLE;
1982
1983 if (fTakeUsedLock)
1984 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1985 }
1986 else
1987 rc = VERR_INVALID_VM_HANDLE;
1988 }
1989 else
1990 rc = VERR_INVALID_POINTER;
1991 return rc;
1992}
1993
1994
1995/**
1996 * Validates a GVM/VM pair.
1997 *
1998 * @returns VBox status code.
1999 * @param pGVM The global (ring-0) VM structure.
2000 */
2001VMMR0_INT_DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
2002{
2003 PGVMM pGVMM;
2004 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2005}
2006
2007
2008/**
2009 * Check that the given GVM and VM structures match up.
2010 *
2011 * The calling thread must be in the same process as the VM. All current lookups
2012 * are by threads inside the same process, so this will not be an issue.
2013 *
2014 * @returns VBox status code.
2015 * @param pGVM The global (ring-0) VM structure.
2016 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
2017 * @param ppGVMM Where to store the pointer to the GVMM instance data.
2018 * @thread EMT
2019 *
2020 * @remarks This will assert in all failure paths.
2021 */
2022static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
2023{
2024 /*
2025 * Check the pointers.
2026 */
2027 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
2028 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
2029
2030 /*
2031 * Get the pGVMM instance and check the VM handle.
2032 */
2033 PGVMM pGVMM;
2034 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2035
2036 uint16_t hGVM = pGVM->hSelf;
2037 ASMCompilerBarrier();
2038 AssertReturn( hGVM != NIL_GVM_HANDLE
2039 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
2040
2041 RTPROCESS const pidSelf = RTProcSelf();
2042 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
2043 AssertReturn( pHandle->pGVM == pGVM
2044 && pHandle->ProcId == pidSelf
2045 && RT_VALID_PTR(pHandle->pvObj),
2046 VERR_INVALID_HANDLE);
2047
2048 /*
2049 * Check the EMT claim.
2050 */
2051 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2052 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2053 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2054
2055 /*
2056 * Some more VM data consistency checks.
2057 */
2058 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2059 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2060 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
2061 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2062
2063 *ppGVMM = pGVMM;
2064 return VINF_SUCCESS;
2065}
2066
2067
2068/**
2069 * Validates a GVM/EMT pair.
2070 *
2071 * @returns VBox status code.
2072 * @param pGVM The global (ring-0) VM structure.
2073 * @param idCpu The Virtual CPU ID of the calling EMT.
2074 * @thread EMT(idCpu)
2075 */
2076VMMR0_INT_DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2077{
2078 PGVMM pGVMM;
2079 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2080}
2081
2082
2083/**
2084 * Looks up the VM belonging to the specified EMT thread.
2085 *
2086 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2087 * unnecessary kernel panics when the EMT thread hits an assertion. The
2088 * call may or not be an EMT thread.
2089 *
2090 * @returns Pointer to the VM on success, NULL on failure.
2091 * @param hEMT The native thread handle of the EMT.
2092 * NIL_RTNATIVETHREAD means the current thread
2093 */
2094VMMR0_INT_DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2095{
2096 /*
2097 * No Assertions here as we're usually called in a AssertMsgN or
2098 * RTAssert* context.
2099 */
2100 PGVMM pGVMM = g_pGVMM;
2101 if ( !RT_VALID_PTR(pGVMM)
2102 || pGVMM->u32Magic != GVMM_MAGIC)
2103 return NULL;
2104
2105 if (hEMT == NIL_RTNATIVETHREAD)
2106 hEMT = RTThreadNativeSelf();
2107 RTPROCESS ProcId = RTProcSelf();
2108
2109 /*
2110 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2111 */
2112/** @todo introduce some pid hash table here, please. */
2113 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2114 {
2115 if ( pGVMM->aHandles[i].iSelf == i
2116 && pGVMM->aHandles[i].ProcId == ProcId
2117 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2118 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2119 {
2120 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2121 return pGVMM->aHandles[i].pGVM;
2122
2123 /* This is fearly safe with the current process per VM approach. */
2124 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2125 VMCPUID const cCpus = pGVM->cCpus;
2126 ASMCompilerBarrier();
2127 if ( cCpus < 1
2128 || cCpus > VMM_MAX_CPU_COUNT)
2129 continue;
2130 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2131 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2132 return pGVMM->aHandles[i].pGVM;
2133 }
2134 }
2135 return NULL;
2136}
2137
2138
2139/**
2140 * Looks up the GVMCPU belonging to the specified EMT thread.
2141 *
2142 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2143 * unnecessary kernel panics when the EMT thread hits an assertion. The
2144 * call may or not be an EMT thread.
2145 *
2146 * @returns Pointer to the VM on success, NULL on failure.
2147 * @param hEMT The native thread handle of the EMT.
2148 * NIL_RTNATIVETHREAD means the current thread
2149 */
2150VMMR0_INT_DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2151{
2152 /*
2153 * No Assertions here as we're usually called in a AssertMsgN,
2154 * RTAssert*, Log and LogRel contexts.
2155 */
2156 PGVMM pGVMM = g_pGVMM;
2157 if ( !RT_VALID_PTR(pGVMM)
2158 || pGVMM->u32Magic != GVMM_MAGIC)
2159 return NULL;
2160
2161 if (hEMT == NIL_RTNATIVETHREAD)
2162 hEMT = RTThreadNativeSelf();
2163 RTPROCESS ProcId = RTProcSelf();
2164
2165 /*
2166 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2167 */
2168/** @todo introduce some pid hash table here, please. */
2169 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2170 {
2171 if ( pGVMM->aHandles[i].iSelf == i
2172 && pGVMM->aHandles[i].ProcId == ProcId
2173 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2174 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2175 {
2176 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2177 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2178 return &pGVM->aCpus[0];
2179
2180 /* This is fearly safe with the current process per VM approach. */
2181 VMCPUID const cCpus = pGVM->cCpus;
2182 ASMCompilerBarrier();
2183 ASMCompilerBarrier();
2184 if ( cCpus < 1
2185 || cCpus > VMM_MAX_CPU_COUNT)
2186 continue;
2187 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2188 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2189 return &pGVM->aCpus[idCpu];
2190 }
2191 }
2192 return NULL;
2193}
2194
2195
2196/**
2197 * Get the GVMCPU structure for the given EMT.
2198 *
2199 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2200 * @param pGVM The global (ring-0) VM structure.
2201 * @param hEMT The native thread handle of the EMT.
2202 * NIL_RTNATIVETHREAD means the current thread
2203 */
2204VMMR0_INT_DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2205{
2206 /*
2207 * Validate & adjust input.
2208 */
2209 AssertPtr(pGVM);
2210 Assert(pGVM->u32Magic == GVM_MAGIC);
2211 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2212 {
2213 hEMT = RTThreadNativeSelf();
2214 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2215 }
2216
2217 /*
2218 * Find the matching hash table entry.
2219 * See similar code in GVMMR0GetRing3ThreadForSelf.
2220 */
2221 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2222 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2223 { /* likely */ }
2224 else
2225 {
2226#ifdef VBOX_STRICT
2227 unsigned cCollisions = 0;
2228#endif
2229 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2230 for (;;)
2231 {
2232 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2233 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2234 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2235 break;
2236 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2237 {
2238#ifdef VBOX_STRICT
2239 uint32_t idxCpu = pGVM->cCpus;
2240 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2241 while (idxCpu-- > 0)
2242 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2243#endif
2244 return NULL;
2245 }
2246 }
2247 }
2248
2249 /*
2250 * Validate the VCpu number and translate it into a pointer.
2251 */
2252 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2253 AssertReturn(idCpu < pGVM->cCpus, NULL);
2254 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2255 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2256 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2257 return pGVCpu;
2258}
2259
2260
2261/**
2262 * Get the native ring-3 thread handle for the caller.
2263 *
2264 * This works for EMTs and registered workers.
2265 *
2266 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2267 * @param pGVM The global (ring-0) VM structure.
2268 */
2269VMMR0_INT_DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2270{
2271 /*
2272 * Validate input.
2273 */
2274 AssertPtr(pGVM);
2275 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2276 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2277 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2278
2279 /*
2280 * Find the matching hash table entry.
2281 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2282 */
2283 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2284 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2285 { /* likely */ }
2286 else
2287 {
2288#ifdef VBOX_STRICT
2289 unsigned cCollisions = 0;
2290#endif
2291 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2292 for (;;)
2293 {
2294 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2295 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2296 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2297 break;
2298 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2299 {
2300#ifdef VBOX_STRICT
2301 uint32_t idxCpu = pGVM->cCpus;
2302 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2303 while (idxCpu-- > 0)
2304 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2305#endif
2306
2307 /*
2308 * Not an EMT, so see if it's a worker thread.
2309 */
2310 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2311 while (--idx > GVMMWORKERTHREAD_INVALID)
2312 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2313 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2314
2315 return NIL_RTNATIVETHREAD;
2316 }
2317 }
2318 }
2319
2320 /*
2321 * Validate the VCpu number and translate it into a pointer.
2322 */
2323 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2324 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2325 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2326 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2327 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2328 return pGVCpu->hNativeThread;
2329}
2330
2331
2332/**
2333 * Converts a pointer with the GVM structure to a host physical address.
2334 *
2335 * @returns Host physical address.
2336 * @param pGVM The global (ring-0) VM structure.
2337 * @param pv The address to convert.
2338 * @thread EMT
2339 */
2340VMMR0_INT_DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2341{
2342 AssertPtr(pGVM);
2343 Assert(pGVM->u32Magic == GVM_MAGIC);
2344 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2345 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2346 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2347}
2348
2349
2350/**
2351 * This is will wake up expired and soon-to-be expired VMs.
2352 *
2353 * @returns Number of VMs that has been woken up.
2354 * @param pGVMM Pointer to the GVMM instance data.
2355 * @param u64Now The current time.
2356 */
2357static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2358{
2359 /*
2360 * Skip this if we've got disabled because of high resolution wakeups or by
2361 * the user.
2362 */
2363 if (!pGVMM->fDoEarlyWakeUps)
2364 return 0;
2365
2366/** @todo Rewrite this algorithm. See performance defect XYZ. */
2367
2368 /*
2369 * A cheap optimization to stop wasting so much time here on big setups.
2370 */
2371 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2372 if ( pGVMM->cHaltedEMTs == 0
2373 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2374 return 0;
2375
2376 /*
2377 * Only one thread doing this at a time.
2378 */
2379 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2380 return 0;
2381
2382 /*
2383 * The first pass will wake up VMs which have actually expired
2384 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2385 */
2386 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2387 uint64_t u64Min = UINT64_MAX;
2388 unsigned cWoken = 0;
2389 unsigned cHalted = 0;
2390 unsigned cTodo2nd = 0;
2391 unsigned cTodo3rd = 0;
2392 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2393 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2394 i = pGVMM->aHandles[i].iNext)
2395 {
2396 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2397 if ( RT_VALID_PTR(pCurGVM)
2398 && pCurGVM->u32Magic == GVM_MAGIC)
2399 {
2400 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2401 {
2402 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2403 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2404 if (u64)
2405 {
2406 if (u64 <= u64Now)
2407 {
2408 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2409 {
2410 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2411 AssertRC(rc);
2412 cWoken++;
2413 }
2414 }
2415 else
2416 {
2417 cHalted++;
2418 if (u64 <= uNsEarlyWakeUp1)
2419 cTodo2nd++;
2420 else if (u64 <= uNsEarlyWakeUp2)
2421 cTodo3rd++;
2422 else if (u64 < u64Min)
2423 u64 = u64Min;
2424 }
2425 }
2426 }
2427 }
2428 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2429 }
2430
2431 if (cTodo2nd)
2432 {
2433 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2434 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2435 i = pGVMM->aHandles[i].iNext)
2436 {
2437 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2438 if ( RT_VALID_PTR(pCurGVM)
2439 && pCurGVM->u32Magic == GVM_MAGIC)
2440 {
2441 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2442 {
2443 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2444 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2445 if ( u64
2446 && u64 <= uNsEarlyWakeUp1)
2447 {
2448 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2449 {
2450 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2451 AssertRC(rc);
2452 cWoken++;
2453 }
2454 }
2455 }
2456 }
2457 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2458 }
2459 }
2460
2461 if (cTodo3rd)
2462 {
2463 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2464 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2465 i = pGVMM->aHandles[i].iNext)
2466 {
2467 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2468 if ( RT_VALID_PTR(pCurGVM)
2469 && pCurGVM->u32Magic == GVM_MAGIC)
2470 {
2471 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2472 {
2473 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2474 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2475 if ( u64
2476 && u64 <= uNsEarlyWakeUp2)
2477 {
2478 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2479 {
2480 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2481 AssertRC(rc);
2482 cWoken++;
2483 }
2484 }
2485 }
2486 }
2487 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2488 }
2489 }
2490
2491 /*
2492 * Set the minimum value.
2493 */
2494 pGVMM->uNsNextEmtWakeup = u64Min;
2495
2496 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2497 return cWoken;
2498}
2499
2500
2501#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2502/**
2503 * Timer callback for the EMT high-resolution wake-up timer.
2504 *
2505 * @param pTimer The timer handle.
2506 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2507 * @param iTick The current tick.
2508 */
2509static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2510{
2511 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2512 NOREF(pTimer); NOREF(iTick);
2513
2514 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2515 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2516 {
2517 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2518 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2519 }
2520 else
2521 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2522
2523 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2524 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2525}
2526#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2527
2528
2529/**
2530 * Halt the EMT thread.
2531 *
2532 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2533 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2534 * @param pGVM The global (ring-0) VM structure.
2535 * @param pGVCpu The global (ring-0) CPU structure of the calling
2536 * EMT.
2537 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2538 * @thread EMT(pGVCpu).
2539 */
2540VMMR0_INT_DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2541{
2542 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2543 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2544 PGVMM pGVMM;
2545 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2546
2547 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2548 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2549
2550 /*
2551 * If we're doing early wake-ups, we must take the UsedList lock before we
2552 * start querying the current time.
2553 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2554 */
2555 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2556 if (fDoEarlyWakeUps)
2557 {
2558 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2559 }
2560
2561 /* GIP hack: We might are frequently sleeping for short intervals where the
2562 difference between GIP and system time matters on systems with high resolution
2563 system time. So, convert the input from GIP to System time in that case. */
2564 Assert(ASMIntAreEnabled());
2565 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2566 const uint64_t u64NowGip = RTTimeNanoTS();
2567
2568 if (fDoEarlyWakeUps)
2569 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2570
2571 /*
2572 * Go to sleep if we must...
2573 * Cap the sleep time to 1 second to be on the safe side.
2574 */
2575 int rc;
2576 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2577 if ( u64NowGip < u64ExpireGipTime
2578 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2579 ? pGVMM->nsMinSleepCompany
2580 : pGVMM->nsMinSleepAlone)
2581#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2582 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2583#endif
2584 )
2585 )
2586 {
2587 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2588 if (cNsInterval > RT_NS_1SEC)
2589 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2590 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2591 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2592 if (fDoEarlyWakeUps)
2593 {
2594 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2595 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2596 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2597 }
2598
2599#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2600 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2601 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2602 {
2603 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2604 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2605 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2606 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2607 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2608 }
2609#endif
2610
2611 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2612 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2613 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2614
2615 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2616 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2617
2618#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2619 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2620 { /* likely */ }
2621 else
2622 {
2623 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2624 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2625 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2626 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2627 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2628 }
2629#endif
2630
2631 /* Reset the semaphore to try prevent a few false wake-ups. */
2632 if (rc == VINF_SUCCESS)
2633 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2634 else if (rc == VERR_TIMEOUT)
2635 {
2636 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2637 rc = VINF_SUCCESS;
2638 }
2639 }
2640 else
2641 {
2642 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2643 if (fDoEarlyWakeUps)
2644 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2645 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2646 rc = VINF_SUCCESS;
2647 }
2648
2649 return rc;
2650}
2651
2652
2653/**
2654 * Halt the EMT thread.
2655 *
2656 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2657 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2658 * @param pGVM The global (ring-0) VM structure.
2659 * @param idCpu The Virtual CPU ID of the calling EMT.
2660 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2661 * @thread EMT(idCpu).
2662 */
2663VMMR0_INT_DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2664{
2665 PGVMM pGVMM;
2666 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2667 if (RT_SUCCESS(rc))
2668 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2669 return rc;
2670}
2671
2672
2673
2674/**
2675 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2676 * the a sleeping EMT.
2677 *
2678 * @retval VINF_SUCCESS if successfully woken up.
2679 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2680 *
2681 * @param pGVM The global (ring-0) VM structure.
2682 * @param pGVCpu The global (ring-0) VCPU structure.
2683 */
2684DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2685{
2686 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2687
2688 /*
2689 * Signal the semaphore regardless of whether it's current blocked on it.
2690 *
2691 * The reason for this is that there is absolutely no way we can be 100%
2692 * certain that it isn't *about* go to go to sleep on it and just got
2693 * delayed a bit en route. So, we will always signal the semaphore when
2694 * the it is flagged as halted in the VMM.
2695 */
2696/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2697 int rc;
2698 if (pGVCpu->gvmm.s.u64HaltExpire)
2699 {
2700 rc = VINF_SUCCESS;
2701 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2702 }
2703 else
2704 {
2705 rc = VINF_GVM_NOT_BLOCKED;
2706 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2707 }
2708
2709 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2710 AssertRC(rc2);
2711
2712 return rc;
2713}
2714
2715
2716/**
2717 * Wakes up the halted EMT thread so it can service a pending request.
2718 *
2719 * @returns VBox status code.
2720 * @retval VINF_SUCCESS if successfully woken up.
2721 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2722 *
2723 * @param pGVM The global (ring-0) VM structure.
2724 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2725 * @param fTakeUsedLock Take the used lock or not
2726 * @thread Any but EMT(idCpu).
2727 */
2728VMMR0_INT_DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2729{
2730 /*
2731 * Validate input and take the UsedLock.
2732 */
2733 PGVMM pGVMM;
2734 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2735 if (RT_SUCCESS(rc))
2736 {
2737 if (idCpu < pGVM->cCpus)
2738 {
2739 /*
2740 * Do the actual job.
2741 */
2742 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2743
2744 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2745 {
2746 /*
2747 * While we're here, do a round of scheduling.
2748 */
2749 Assert(ASMIntAreEnabled());
2750 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2751 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2752 }
2753 }
2754 else
2755 rc = VERR_INVALID_CPU_ID;
2756
2757 if (fTakeUsedLock)
2758 {
2759 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2760 AssertRC(rc2);
2761 }
2762 }
2763
2764 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2765 return rc;
2766}
2767
2768
2769/**
2770 * Wakes up the halted EMT thread so it can service a pending request.
2771 *
2772 * @returns VBox status code.
2773 * @retval VINF_SUCCESS if successfully woken up.
2774 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2775 *
2776 * @param pGVM The global (ring-0) VM structure.
2777 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2778 * @thread Any but EMT(idCpu).
2779 */
2780VMMR0_INT_DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2781{
2782 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2783}
2784
2785
2786/**
2787 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2788 * parameter and no used locking.
2789 *
2790 * @returns VBox status code.
2791 * @retval VINF_SUCCESS if successfully woken up.
2792 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2793 *
2794 * @param pGVM The global (ring-0) VM structure.
2795 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2796 * @thread Any but EMT(idCpu).
2797 * @deprecated Don't use in new code if possible! Use the GVM variant.
2798 */
2799VMMR0_INT_DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2800{
2801 PGVMM pGVMM;
2802 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2803 if (RT_SUCCESS(rc))
2804 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2805 return rc;
2806}
2807
2808
2809/**
2810 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2811 * the Virtual CPU if it's still busy executing guest code.
2812 *
2813 * @returns VBox status code.
2814 * @retval VINF_SUCCESS if poked successfully.
2815 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2816 *
2817 * @param pGVM The global (ring-0) VM structure.
2818 * @param pVCpu The cross context virtual CPU structure.
2819 */
2820DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2821{
2822 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2823
2824 RTCPUID idHostCpu = pVCpu->idHostCpu;
2825 if ( idHostCpu == NIL_RTCPUID
2826 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2827 {
2828 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2829 return VINF_GVM_NOT_BUSY_IN_GC;
2830 }
2831
2832 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2833 RTMpPokeCpu(idHostCpu);
2834 return VINF_SUCCESS;
2835}
2836
2837
2838/**
2839 * Pokes an EMT if it's still busy running guest code.
2840 *
2841 * @returns VBox status code.
2842 * @retval VINF_SUCCESS if poked successfully.
2843 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2844 *
2845 * @param pGVM The global (ring-0) VM structure.
2846 * @param idCpu The ID of the virtual CPU to poke.
2847 * @param fTakeUsedLock Take the used lock or not
2848 */
2849VMMR0_INT_DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2850{
2851 /*
2852 * Validate input and take the UsedLock.
2853 */
2854 PGVMM pGVMM;
2855 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2856 if (RT_SUCCESS(rc))
2857 {
2858 if (idCpu < pGVM->cCpus)
2859 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2860 else
2861 rc = VERR_INVALID_CPU_ID;
2862
2863 if (fTakeUsedLock)
2864 {
2865 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2866 AssertRC(rc2);
2867 }
2868 }
2869
2870 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2871 return rc;
2872}
2873
2874
2875/**
2876 * Pokes an EMT if it's still busy running guest code.
2877 *
2878 * @returns VBox status code.
2879 * @retval VINF_SUCCESS if poked successfully.
2880 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2881 *
2882 * @param pGVM The global (ring-0) VM structure.
2883 * @param idCpu The ID of the virtual CPU to poke.
2884 */
2885VMMR0_INT_DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2886{
2887 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2888}
2889
2890
2891/**
2892 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2893 * used locking.
2894 *
2895 * @returns VBox status code.
2896 * @retval VINF_SUCCESS if poked successfully.
2897 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2898 *
2899 * @param pGVM The global (ring-0) VM structure.
2900 * @param idCpu The ID of the virtual CPU to poke.
2901 *
2902 * @deprecated Don't use in new code if possible! Use the GVM variant.
2903 */
2904VMMR0_INT_DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2905{
2906 PGVMM pGVMM;
2907 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2908 if (RT_SUCCESS(rc))
2909 {
2910 if (idCpu < pGVM->cCpus)
2911 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2912 else
2913 rc = VERR_INVALID_CPU_ID;
2914 }
2915 return rc;
2916}
2917
2918
2919/**
2920 * Wakes up a set of halted EMT threads so they can service pending request.
2921 *
2922 * @returns VBox status code, no informational stuff.
2923 *
2924 * @param pGVM The global (ring-0) VM structure.
2925 * @param pSleepSet The set of sleepers to wake up.
2926 * @param pPokeSet The set of CPUs to poke.
2927 */
2928VMMR0_INT_DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2929{
2930 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2931 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2932 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2933
2934 /*
2935 * Validate input and take the UsedLock.
2936 */
2937 PGVMM pGVMM;
2938 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2939 if (RT_SUCCESS(rc))
2940 {
2941 rc = VINF_SUCCESS;
2942 VMCPUID idCpu = pGVM->cCpus;
2943 while (idCpu-- > 0)
2944 {
2945 /* Don't try poke or wake up ourselves. */
2946 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2947 continue;
2948
2949 /* just ignore errors for now. */
2950 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2951 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2952 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2953 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2954 }
2955
2956 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2957 AssertRC(rc2);
2958 }
2959
2960 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2961 return rc;
2962}
2963
2964
2965/**
2966 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2967 *
2968 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2969 * @param pGVM The global (ring-0) VM structure.
2970 * @param pReq Pointer to the request packet.
2971 */
2972VMMR0_INT_DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2973{
2974 /*
2975 * Validate input and pass it on.
2976 */
2977 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2978 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2979
2980 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2981}
2982
2983
2984
2985/**
2986 * Poll the schedule to see if someone else should get a chance to run.
2987 *
2988 * This is a bit hackish and will not work too well if the machine is
2989 * under heavy load from non-VM processes.
2990 *
2991 * @returns VINF_SUCCESS if not yielded.
2992 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2993 * @param pGVM The global (ring-0) VM structure.
2994 * @param idCpu The Virtual CPU ID of the calling EMT.
2995 * @param fYield Whether to yield or not.
2996 * This is for when we're spinning in the halt loop.
2997 * @thread EMT(idCpu).
2998 */
2999VMMR0_INT_DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
3000{
3001 /*
3002 * Validate input.
3003 */
3004 PGVMM pGVMM;
3005 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
3006 if (RT_SUCCESS(rc))
3007 {
3008 /*
3009 * We currently only implement helping doing wakeups (fYield = false), so don't
3010 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
3011 */
3012 if (!fYield && pGVMM->fDoEarlyWakeUps)
3013 {
3014 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
3015 pGVM->gvmm.s.StatsSched.cPollCalls++;
3016
3017 Assert(ASMIntAreEnabled());
3018 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
3019
3020 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
3021
3022 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3023 }
3024 /*
3025 * Not quite sure what we could do here...
3026 */
3027 else if (fYield)
3028 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
3029 else
3030 rc = VINF_SUCCESS;
3031 }
3032
3033 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
3034 return rc;
3035}
3036
3037
3038#ifdef GVMM_SCHED_WITH_PPT
3039/**
3040 * Timer callback for the periodic preemption timer.
3041 *
3042 * @param pTimer The timer handle.
3043 * @param pvUser Pointer to the per cpu structure.
3044 * @param iTick The current tick.
3045 */
3046static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
3047{
3048 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
3049 NOREF(pTimer); NOREF(iTick);
3050
3051 /*
3052 * Termination check
3053 */
3054 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
3055 return;
3056
3057 /*
3058 * Do the house keeping.
3059 */
3060 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3061
3062 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3063 {
3064 /*
3065 * Historicize the max frequency.
3066 */
3067 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3068 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3069 pCpu->Ppt.iTickHistorization = 0;
3070 pCpu->Ppt.uDesiredHz = 0;
3071
3072 /*
3073 * Check if the current timer frequency.
3074 */
3075 uint32_t uHistMaxHz = 0;
3076 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3077 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3078 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3079 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3080 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3081 else if (uHistMaxHz)
3082 {
3083 /*
3084 * Reprogram it.
3085 */
3086 pCpu->Ppt.cChanges++;
3087 pCpu->Ppt.iTickHistorization = 0;
3088 pCpu->Ppt.uTimerHz = uHistMaxHz;
3089 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3090 pCpu->Ppt.cNsInterval = cNsInterval;
3091 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3092 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3093 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3094 / cNsInterval;
3095 else
3096 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3097 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3098
3099 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3100 RTTimerChangeInterval(pTimer, cNsInterval);
3101 }
3102 else
3103 {
3104 /*
3105 * Stop it.
3106 */
3107 pCpu->Ppt.fStarted = false;
3108 pCpu->Ppt.uTimerHz = 0;
3109 pCpu->Ppt.cNsInterval = 0;
3110 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3111
3112 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3113 RTTimerStop(pTimer);
3114 }
3115 }
3116 else
3117 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3118}
3119#endif /* GVMM_SCHED_WITH_PPT */
3120
3121
3122/**
3123 * Updates the periodic preemption timer for the calling CPU.
3124 *
3125 * The caller must have disabled preemption!
3126 * The caller must check that the host can do high resolution timers.
3127 *
3128 * @param pGVM The global (ring-0) VM structure.
3129 * @param idHostCpu The current host CPU id.
3130 * @param uHz The desired frequency.
3131 */
3132VMMR0_INT_DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3133{
3134 NOREF(pGVM);
3135#ifdef GVMM_SCHED_WITH_PPT
3136 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3137 Assert(RTTimerCanDoHighResolution());
3138
3139 /*
3140 * Resolve the per CPU data.
3141 */
3142 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3143 PGVMM pGVMM = g_pGVMM;
3144 if ( !RT_VALID_PTR(pGVMM)
3145 || pGVMM->u32Magic != GVMM_MAGIC)
3146 return;
3147 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3148 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3149 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3150 && pCpu->idCpu == idHostCpu,
3151 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3152
3153 /*
3154 * Check whether we need to do anything about the timer.
3155 * We have to be a little bit careful since we might be race the timer
3156 * callback here.
3157 */
3158 if (uHz > 16384)
3159 uHz = 16384; /** @todo add a query method for this! */
3160 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3161 && uHz >= pCpu->Ppt.uMinHz
3162 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3163 {
3164 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3165
3166 pCpu->Ppt.uDesiredHz = uHz;
3167 uint32_t cNsInterval = 0;
3168 if (!pCpu->Ppt.fStarted)
3169 {
3170 pCpu->Ppt.cStarts++;
3171 pCpu->Ppt.fStarted = true;
3172 pCpu->Ppt.fStarting = true;
3173 pCpu->Ppt.iTickHistorization = 0;
3174 pCpu->Ppt.uTimerHz = uHz;
3175 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3176 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3177 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3178 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3179 / cNsInterval;
3180 else
3181 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3182 }
3183
3184 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3185
3186 if (cNsInterval)
3187 {
3188 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3189 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3190 AssertRC(rc);
3191
3192 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3193 if (RT_FAILURE(rc))
3194 pCpu->Ppt.fStarted = false;
3195 pCpu->Ppt.fStarting = false;
3196 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3197 }
3198 }
3199#else /* !GVMM_SCHED_WITH_PPT */
3200 NOREF(idHostCpu); NOREF(uHz);
3201#endif /* !GVMM_SCHED_WITH_PPT */
3202}
3203
3204
3205/**
3206 * Calls @a pfnCallback for each VM in the system.
3207 *
3208 * This will enumerate the VMs while holding the global VM used list lock in
3209 * shared mode. So, only suitable for simple work. If more expensive work
3210 * needs doing, a different approach must be taken as using this API would
3211 * otherwise block VM creation and destruction.
3212 *
3213 * @returns VBox status code.
3214 * @param pfnCallback The callback function.
3215 * @param pvUser User argument to the callback.
3216 */
3217VMMR0_INT_DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3218{
3219 PGVMM pGVMM;
3220 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3221
3222 int rc = VINF_SUCCESS;
3223 GVMMR0_USED_SHARED_LOCK(pGVMM);
3224 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3225 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3226 i = pGVMM->aHandles[i].iNext, cLoops++)
3227 {
3228 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3229 if ( RT_VALID_PTR(pGVM)
3230 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3231 && pGVM->u32Magic == GVM_MAGIC)
3232 {
3233 rc = pfnCallback(pGVM, pvUser);
3234 if (rc != VINF_SUCCESS)
3235 break;
3236 }
3237
3238 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3239 }
3240 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3241 return rc;
3242}
3243
3244
3245/**
3246 * Retrieves the GVMM statistics visible to the caller.
3247 *
3248 * @returns VBox status code.
3249 *
3250 * @param pStats Where to put the statistics.
3251 * @param pSession The current session.
3252 * @param pGVM The GVM to obtain statistics for. Optional.
3253 */
3254VMMR0_INT_DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3255{
3256 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3257
3258 /*
3259 * Validate input.
3260 */
3261 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3262 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3263 pStats->cVMs = 0; /* (crash before taking the sem...) */
3264
3265 /*
3266 * Take the lock and get the VM statistics.
3267 */
3268 PGVMM pGVMM;
3269 if (pGVM)
3270 {
3271 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3272 if (RT_FAILURE(rc))
3273 return rc;
3274 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3275
3276 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3277 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3278 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3279 while (iCpu-- > 0)
3280 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3281 }
3282 else
3283 {
3284 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3285 RT_ZERO(pStats->SchedVM);
3286 RT_ZERO(pStats->aVCpus);
3287
3288 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3289 AssertRCReturn(rc, rc);
3290 }
3291
3292 /*
3293 * Enumerate the VMs and add the ones visible to the statistics.
3294 */
3295 pStats->cVMs = 0;
3296 pStats->cEMTs = 0;
3297 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3298
3299 for (unsigned i = pGVMM->iUsedHead;
3300 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3301 i = pGVMM->aHandles[i].iNext)
3302 {
3303 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3304 void *pvObj = pGVMM->aHandles[i].pvObj;
3305 if ( RT_VALID_PTR(pvObj)
3306 && RT_VALID_PTR(pOtherGVM)
3307 && pOtherGVM->u32Magic == GVM_MAGIC
3308 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3309 {
3310 pStats->cVMs++;
3311 pStats->cEMTs += pOtherGVM->cCpus;
3312
3313 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3314 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3315 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3316 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3317 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3318
3319 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3320 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3321 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3322
3323 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3324 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3325
3326 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3327 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3328 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3329 }
3330 }
3331
3332 /*
3333 * Copy out the per host CPU statistics.
3334 */
3335 uint32_t iDstCpu = 0;
3336 uint32_t cSrcCpus = pGVMM->cHostCpus;
3337 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3338 {
3339 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3340 {
3341 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3342 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3343#ifdef GVMM_SCHED_WITH_PPT
3344 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3345 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3346 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3347 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3348#else
3349 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3350 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3351 pStats->aHostCpus[iDstCpu].cChanges = 0;
3352 pStats->aHostCpus[iDstCpu].cStarts = 0;
3353#endif
3354 iDstCpu++;
3355 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3356 break;
3357 }
3358 }
3359 pStats->cHostCpus = iDstCpu;
3360
3361 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3362
3363 return VINF_SUCCESS;
3364}
3365
3366
3367/**
3368 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3369 *
3370 * @returns see GVMMR0QueryStatistics.
3371 * @param pGVM The global (ring-0) VM structure. Optional.
3372 * @param pReq Pointer to the request packet.
3373 * @param pSession The current session.
3374 */
3375VMMR0_INT_DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3376{
3377 /*
3378 * Validate input and pass it on.
3379 */
3380 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3381 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3382 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3383
3384 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3385}
3386
3387
3388/**
3389 * Resets the specified GVMM statistics.
3390 *
3391 * @returns VBox status code.
3392 *
3393 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3394 * @param pSession The current session.
3395 * @param pGVM The GVM to reset statistics for. Optional.
3396 */
3397VMMR0_INT_DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3398{
3399 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3400
3401 /*
3402 * Validate input.
3403 */
3404 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3405 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3406
3407 /*
3408 * Take the lock and get the VM statistics.
3409 */
3410 PGVMM pGVMM;
3411 if (pGVM)
3412 {
3413 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3414 if (RT_FAILURE(rc))
3415 return rc;
3416# define MAYBE_RESET_FIELD(field) \
3417 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3418 MAYBE_RESET_FIELD(cHaltCalls);
3419 MAYBE_RESET_FIELD(cHaltBlocking);
3420 MAYBE_RESET_FIELD(cHaltTimeouts);
3421 MAYBE_RESET_FIELD(cHaltNotBlocking);
3422 MAYBE_RESET_FIELD(cHaltWakeUps);
3423 MAYBE_RESET_FIELD(cWakeUpCalls);
3424 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3425 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3426 MAYBE_RESET_FIELD(cPokeCalls);
3427 MAYBE_RESET_FIELD(cPokeNotBusy);
3428 MAYBE_RESET_FIELD(cPollCalls);
3429 MAYBE_RESET_FIELD(cPollHalts);
3430 MAYBE_RESET_FIELD(cPollWakeUps);
3431# undef MAYBE_RESET_FIELD
3432 }
3433 else
3434 {
3435 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3436
3437 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3438 AssertRCReturn(rc, rc);
3439 }
3440
3441 /*
3442 * Enumerate the VMs and add the ones visible to the statistics.
3443 */
3444 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3445 {
3446 for (unsigned i = pGVMM->iUsedHead;
3447 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3448 i = pGVMM->aHandles[i].iNext)
3449 {
3450 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3451 void *pvObj = pGVMM->aHandles[i].pvObj;
3452 if ( RT_VALID_PTR(pvObj)
3453 && RT_VALID_PTR(pOtherGVM)
3454 && pOtherGVM->u32Magic == GVM_MAGIC
3455 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3456 {
3457# define MAYBE_RESET_FIELD(field) \
3458 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3459 MAYBE_RESET_FIELD(cHaltCalls);
3460 MAYBE_RESET_FIELD(cHaltBlocking);
3461 MAYBE_RESET_FIELD(cHaltTimeouts);
3462 MAYBE_RESET_FIELD(cHaltNotBlocking);
3463 MAYBE_RESET_FIELD(cHaltWakeUps);
3464 MAYBE_RESET_FIELD(cWakeUpCalls);
3465 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3466 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3467 MAYBE_RESET_FIELD(cPokeCalls);
3468 MAYBE_RESET_FIELD(cPokeNotBusy);
3469 MAYBE_RESET_FIELD(cPollCalls);
3470 MAYBE_RESET_FIELD(cPollHalts);
3471 MAYBE_RESET_FIELD(cPollWakeUps);
3472# undef MAYBE_RESET_FIELD
3473 }
3474 }
3475 }
3476
3477 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3478
3479 return VINF_SUCCESS;
3480}
3481
3482
3483/**
3484 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3485 *
3486 * @returns see GVMMR0ResetStatistics.
3487 * @param pGVM The global (ring-0) VM structure. Optional.
3488 * @param pReq Pointer to the request packet.
3489 * @param pSession The current session.
3490 */
3491VMMR0_INT_DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3492{
3493 /*
3494 * Validate input and pass it on.
3495 */
3496 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3497 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3498 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3499
3500 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3501}
3502
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette