VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp

Last change on this file was 106920, checked in by vboxsync, 4 weeks ago

/Config.kmk,Devices/Makefile.kmk,VMM/*: Introducing VBOX_WITH_MINIMAL_R0 for win.arm64 and similar build configurations not really needing much from VMMR0.r0/VBoxSup.sys. jiraref:VBP-1449

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 122.8 KB
Line 
1/* $Id: GVMMR0.cpp 106920 2024-11-11 01:09:38Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/** @page pg_gvmm GVMM - The Global VM Manager
30 *
31 * The Global VM Manager lives in ring-0. Its main function at the moment is
32 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
33 * each of them, and assign them unique identifiers (so GMM can track page
34 * owners). The GVMM also manage some of the host CPU resources, like the
35 * periodic preemption timer.
36 *
37 * The GVMM will create a ring-0 object for each VM when it is registered, this
38 * is both for session cleanup purposes and for having a point where it is
39 * possible to implement usage polices later (in SUPR0ObjRegister).
40 *
41 *
42 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
43 *
44 * On system that sports a high resolution kernel timer API, we use per-cpu
45 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
46 * execution. The timer frequency is calculating by taking the max
47 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
48 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
49 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
50 *
51 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
52 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
53 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
54 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
55 * AMD-V and raw-mode execution environments.
56 */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP LOG_GROUP_GVMM
63#include <VBox/vmm/gvmm.h>
64#include <VBox/vmm/gmm.h>
65#include "GVMMR0Internal.h"
66#include <VBox/vmm/dbgf.h>
67#include <VBox/vmm/iom.h>
68#include <VBox/vmm/pdm.h>
69#include <VBox/vmm/pgm.h>
70#include <VBox/vmm/vmm.h>
71#ifdef VBOX_WITH_NEM_R0
72# include <VBox/vmm/nem.h>
73#endif
74#include <VBox/vmm/vmcpuset.h>
75#include <VBox/vmm/vmcc.h>
76#include <VBox/param.h>
77#include <VBox/err.h>
78
79#include <iprt/asm.h>
80#ifdef RT_ARCH_AMD64
81# include <iprt/asm-amd64-x86.h>
82#endif
83#include <iprt/critsect.h>
84#include <iprt/mem.h>
85#include <iprt/semaphore.h>
86#include <iprt/time.h>
87#include <VBox/log.h>
88#include <iprt/thread.h>
89#include <iprt/process.h>
90#include <iprt/param.h>
91#include <iprt/string.h>
92#include <iprt/assert.h>
93#include <iprt/mem.h>
94#include <iprt/memobj.h>
95#include <iprt/mp.h>
96#include <iprt/cpuset.h>
97#include <iprt/spinlock.h>
98#include <iprt/timer.h>
99
100#include "dtrace/VBoxVMM.h"
101
102
103/*********************************************************************************************************************************
104* Defined Constants And Macros *
105*********************************************************************************************************************************/
106#if (defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)) \
107 && !defined(VBOX_WITH_MINIMAL_R0)
108/** Define this to enable the periodic preemption timer. */
109# define GVMM_SCHED_WITH_PPT
110#endif
111
112#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
113/** Define this to enable the per-EMT high resolution wakeup timers. */
114# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
115#endif
116
117
118/** Special value that GVMMR0DeregisterVCpu sets. */
119#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
120AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
121
122
123/*********************************************************************************************************************************
124* Structures and Typedefs *
125*********************************************************************************************************************************/
126
127/**
128 * Global VM handle.
129 */
130typedef struct GVMHANDLE
131{
132 /** The index of the next handle in the list (free or used). (0 is nil.) */
133 uint16_t volatile iNext;
134 /** Our own index / handle value. */
135 uint16_t iSelf;
136 /** The process ID of the handle owner.
137 * This is used for access checks. */
138 RTPROCESS ProcId;
139 /** The pointer to the ring-0 only (aka global) VM structure. */
140 PGVM pGVM;
141 /** The virtual machine object. */
142 void *pvObj;
143 /** The session this VM is associated with. */
144 PSUPDRVSESSION pSession;
145 /** The ring-0 handle of the EMT0 thread.
146 * This is used for ownership checks as well as looking up a VM handle by thread
147 * at times like assertions. */
148 RTNATIVETHREAD hEMT0;
149} GVMHANDLE;
150/** Pointer to a global VM handle. */
151typedef GVMHANDLE *PGVMHANDLE;
152
153/** Number of GVM handles (including the NIL handle). */
154#if HC_ARCH_BITS == 64
155# define GVMM_MAX_HANDLES 8192
156#else
157# define GVMM_MAX_HANDLES 128
158#endif
159
160/**
161 * Per host CPU GVMM data.
162 */
163typedef struct GVMMHOSTCPU
164{
165 /** Magic number (GVMMHOSTCPU_MAGIC). */
166 uint32_t volatile u32Magic;
167 /** The CPU ID. */
168 RTCPUID idCpu;
169 /** The CPU set index. */
170 uint32_t idxCpuSet;
171
172#ifdef GVMM_SCHED_WITH_PPT
173 /** Periodic preemption timer data. */
174 struct
175 {
176 /** The handle to the periodic preemption timer. */
177 PRTTIMER pTimer;
178 /** Spinlock protecting the data below. */
179 RTSPINLOCK hSpinlock;
180 /** The smalles Hz that we need to care about. (static) */
181 uint32_t uMinHz;
182 /** The number of ticks between each historization. */
183 uint32_t cTicksHistoriziationInterval;
184 /** The current historization tick (counting up to
185 * cTicksHistoriziationInterval and then resetting). */
186 uint32_t iTickHistorization;
187 /** The current timer interval. This is set to 0 when inactive. */
188 uint32_t cNsInterval;
189 /** The current timer frequency. This is set to 0 when inactive. */
190 uint32_t uTimerHz;
191 /** The current max frequency reported by the EMTs.
192 * This gets historicize and reset by the timer callback. This is
193 * read without holding the spinlock, so needs atomic updating. */
194 uint32_t volatile uDesiredHz;
195 /** Whether the timer was started or not. */
196 bool volatile fStarted;
197 /** Set if we're starting timer. */
198 bool volatile fStarting;
199 /** The index of the next history entry (mod it). */
200 uint32_t iHzHistory;
201 /** Historicized uDesiredHz values. The array wraps around, new entries
202 * are added at iHzHistory. This is updated approximately every
203 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
204 uint32_t aHzHistory[8];
205 /** Statistics counter for recording the number of interval changes. */
206 uint32_t cChanges;
207 /** Statistics counter for recording the number of timer starts. */
208 uint32_t cStarts;
209 } Ppt;
210#endif /* GVMM_SCHED_WITH_PPT */
211
212} GVMMHOSTCPU;
213/** Pointer to the per host CPU GVMM data. */
214typedef GVMMHOSTCPU *PGVMMHOSTCPU;
215/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
216#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
217/** The interval on history entry should cover (approximately) give in
218 * nanoseconds. */
219#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
220
221
222/**
223 * The GVMM instance data.
224 */
225typedef struct GVMM
226{
227 /** Eyecatcher / magic. */
228 uint32_t u32Magic;
229 /** The index of the head of the free handle chain. (0 is nil.) */
230 uint16_t volatile iFreeHead;
231 /** The index of the head of the active handle chain. (0 is nil.) */
232 uint16_t volatile iUsedHead;
233 /** The number of VMs. */
234 uint16_t volatile cVMs;
235 /** Alignment padding. */
236 uint16_t u16Reserved;
237 /** The number of EMTs. */
238 uint32_t volatile cEMTs;
239 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
240 uint32_t volatile cHaltedEMTs;
241 /** Mini lock for restricting early wake-ups to one thread. */
242 bool volatile fDoingEarlyWakeUps;
243 bool afPadding[3]; /**< explicit alignment padding. */
244 /** When the next halted or sleeping EMT will wake up.
245 * This is set to 0 when it needs recalculating and to UINT64_MAX when
246 * there are no halted or sleeping EMTs in the GVMM. */
247 uint64_t uNsNextEmtWakeup;
248 /** The lock used to serialize VM creation, destruction and associated events that
249 * isn't performance critical. Owners may acquire the list lock. */
250 RTCRITSECT CreateDestroyLock;
251 /** The lock used to serialize used list updates and accesses.
252 * This indirectly includes scheduling since the scheduler will have to walk the
253 * used list to examin running VMs. Owners may not acquire any other locks. */
254 RTCRITSECTRW UsedLock;
255 /** The handle array.
256 * The size of this array defines the maximum number of currently running VMs.
257 * The first entry is unused as it represents the NIL handle. */
258 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
259
260 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
261 * The number of EMTs that means we no longer consider ourselves alone on a
262 * CPU/Core.
263 */
264 uint32_t cEMTsMeansCompany;
265 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
266 * The minimum sleep time for when we're alone, in nano seconds.
267 */
268 uint32_t nsMinSleepAlone;
269 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
270 * The minimum sleep time for when we've got company, in nano seconds.
271 */
272 uint32_t nsMinSleepCompany;
273#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
274 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
275 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
276 * nano seconds.
277 */
278 uint32_t nsMinSleepWithHrTimer;
279#endif
280 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
281 * The limit for the first round of early wake-ups, given in nano seconds.
282 */
283 uint32_t nsEarlyWakeUp1;
284 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
285 * The limit for the second round of early wake-ups, given in nano seconds.
286 */
287 uint32_t nsEarlyWakeUp2;
288
289 /** Set if we're doing early wake-ups.
290 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
291 bool volatile fDoEarlyWakeUps;
292
293 /** The number of entries in the host CPU array (aHostCpus). */
294 uint32_t cHostCpus;
295 /** Per host CPU data (variable length). */
296 GVMMHOSTCPU aHostCpus[1];
297} GVMM;
298AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
299AssertCompileMemberAlignment(GVMM, UsedLock, 8);
300AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
301/** Pointer to the GVMM instance data. */
302typedef GVMM *PGVMM;
303
304/** The GVMM::u32Magic value (Charlie Haden). */
305#define GVMM_MAGIC UINT32_C(0x19370806)
306
307
308
309/*********************************************************************************************************************************
310* Global Variables *
311*********************************************************************************************************************************/
312/** Pointer to the GVMM instance data.
313 * (Just my general dislike for global variables.) */
314static PGVMM g_pGVMM = NULL;
315
316/** Macro for obtaining and validating the g_pGVMM pointer.
317 * On failure it will return from the invoking function with the specified return value.
318 *
319 * @param pGVMM The name of the pGVMM variable.
320 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
321 * status codes.
322 */
323#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
324 do { \
325 (pGVMM) = g_pGVMM;\
326 AssertPtrReturn((pGVMM), (rc)); \
327 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
328 } while (0)
329
330/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
331 * On failure it will return from the invoking function.
332 *
333 * @param pGVMM The name of the pGVMM variable.
334 */
335#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
336 do { \
337 (pGVMM) = g_pGVMM;\
338 AssertPtrReturnVoid((pGVMM)); \
339 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
340 } while (0)
341
342
343/*********************************************************************************************************************************
344* Internal Functions *
345*********************************************************************************************************************************/
346static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
347static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
348static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
349static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
350
351#ifdef GVMM_SCHED_WITH_PPT
352static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
353#endif
354#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
355static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
356#endif
357
358
359/**
360 * Initializes the GVMM.
361 *
362 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
363 *
364 * @returns VBox status code.
365 */
366GVMMR0DECL(int) GVMMR0Init(void)
367{
368 LogFlow(("GVMMR0Init:\n"));
369
370 /*
371 * Allocate and initialize the instance data.
372 */
373 uint32_t cHostCpus = RTMpGetArraySize();
374 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
375
376 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
377 if (!pGVMM)
378 return VERR_NO_MEMORY;
379 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
380 "GVMM-CreateDestroyLock");
381 if (RT_SUCCESS(rc))
382 {
383 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
384 if (RT_SUCCESS(rc))
385 {
386 pGVMM->u32Magic = GVMM_MAGIC;
387 pGVMM->iUsedHead = 0;
388 pGVMM->iFreeHead = 1;
389
390 /* the nil handle */
391 pGVMM->aHandles[0].iSelf = 0;
392 pGVMM->aHandles[0].iNext = 0;
393
394 /* the tail */
395 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
396 pGVMM->aHandles[i].iSelf = i;
397 pGVMM->aHandles[i].iNext = 0; /* nil */
398
399 /* the rest */
400 while (i-- > 1)
401 {
402 pGVMM->aHandles[i].iSelf = i;
403 pGVMM->aHandles[i].iNext = i + 1;
404 }
405
406 /* The default configuration values. */
407 uint32_t cNsResolution = RTSemEventMultiGetResolution();
408 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
409 if (cNsResolution >= 5*RT_NS_100US)
410 {
411 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
412 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
413 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
414 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
415 }
416 else if (cNsResolution > RT_NS_100US)
417 {
418 pGVMM->nsMinSleepAlone = cNsResolution / 2;
419 pGVMM->nsMinSleepCompany = cNsResolution / 4;
420 pGVMM->nsEarlyWakeUp1 = 0;
421 pGVMM->nsEarlyWakeUp2 = 0;
422 }
423 else
424 {
425 pGVMM->nsMinSleepAlone = 2000;
426 pGVMM->nsMinSleepCompany = 2000;
427 pGVMM->nsEarlyWakeUp1 = 0;
428 pGVMM->nsEarlyWakeUp2 = 0;
429 }
430#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
431 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
432#endif
433 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
434
435 /* The host CPU data. */
436 pGVMM->cHostCpus = cHostCpus;
437 uint32_t iCpu = cHostCpus;
438 RTCPUSET PossibleSet;
439 RTMpGetSet(&PossibleSet);
440 while (iCpu-- > 0)
441 {
442 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
443#ifdef GVMM_SCHED_WITH_PPT
444 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
445 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
446 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
447 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
448 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
449 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
450 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
451 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
452 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
453 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
454 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
455 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
456#endif
457
458 if (RTCpuSetIsMember(&PossibleSet, iCpu))
459 {
460 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
461 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
462
463#ifdef GVMM_SCHED_WITH_PPT
464 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
465 50*1000*1000 /* whatever */,
466 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
467 gvmmR0SchedPeriodicPreemptionTimerCallback,
468 &pGVMM->aHostCpus[iCpu]);
469 if (RT_SUCCESS(rc))
470 {
471 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
472 if (RT_FAILURE(rc))
473 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
474 }
475 else
476 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
477 if (RT_FAILURE(rc))
478 {
479 while (iCpu < cHostCpus)
480 {
481 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
482 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
483 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
484 iCpu++;
485 }
486 break;
487 }
488#endif
489 }
490 else
491 {
492 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
493 pGVMM->aHostCpus[iCpu].u32Magic = 0;
494 }
495 }
496 if (RT_SUCCESS(rc))
497 {
498 g_pGVMM = pGVMM;
499 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
500 return VINF_SUCCESS;
501 }
502
503 /* bail out. */
504 RTCritSectRwDelete(&pGVMM->UsedLock);
505 }
506 else
507 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
508 RTCritSectDelete(&pGVMM->CreateDestroyLock);
509 }
510 else
511 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
512
513 RTMemFree(pGVMM);
514 return rc;
515}
516
517
518/**
519 * Terminates the GVM.
520 *
521 * This is called while owning the loader semaphore (see supdrvLdrFree()).
522 * And unless something is wrong, there should be absolutely no VMs
523 * registered at this point.
524 */
525GVMMR0DECL(void) GVMMR0Term(void)
526{
527 LogFlow(("GVMMR0Term:\n"));
528
529 PGVMM pGVMM = g_pGVMM;
530 g_pGVMM = NULL;
531 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
532 {
533 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
534 return;
535 }
536
537 /*
538 * First of all, stop all active timers.
539 */
540 uint32_t cActiveTimers = 0;
541 uint32_t iCpu = pGVMM->cHostCpus;
542 while (iCpu-- > 0)
543 {
544 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
545#ifdef GVMM_SCHED_WITH_PPT
546 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
547 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
548 cActiveTimers++;
549#endif
550 }
551 if (cActiveTimers)
552 RTThreadSleep(1); /* fudge */
553
554 /*
555 * Invalidate the and free resources.
556 */
557 pGVMM->u32Magic = ~GVMM_MAGIC;
558 RTCritSectRwDelete(&pGVMM->UsedLock);
559 RTCritSectDelete(&pGVMM->CreateDestroyLock);
560
561 pGVMM->iFreeHead = 0;
562 if (pGVMM->iUsedHead)
563 {
564 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
565 pGVMM->iUsedHead = 0;
566 }
567
568#ifdef GVMM_SCHED_WITH_PPT
569 iCpu = pGVMM->cHostCpus;
570 while (iCpu-- > 0)
571 {
572 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
573 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
574 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
575 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
576 }
577#endif
578
579 RTMemFree(pGVMM);
580}
581
582
583/**
584 * A quick hack for setting global config values.
585 *
586 * @returns VBox status code.
587 *
588 * @param pSession The session handle. Used for authentication.
589 * @param pszName The variable name.
590 * @param u64Value The new value.
591 */
592GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
593{
594 /*
595 * Validate input.
596 */
597 PGVMM pGVMM;
598 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
599 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
600 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 {
611 if (u64Value <= UINT32_MAX)
612 pGVMM->cEMTsMeansCompany = u64Value;
613 else
614 rc = VERR_OUT_OF_RANGE;
615 }
616 else if (!strcmp(pszName, "MinSleepAlone"))
617 {
618 if (u64Value <= RT_NS_100MS)
619 pGVMM->nsMinSleepAlone = u64Value;
620 else
621 rc = VERR_OUT_OF_RANGE;
622 }
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 {
625 if (u64Value <= RT_NS_100MS)
626 pGVMM->nsMinSleepCompany = u64Value;
627 else
628 rc = VERR_OUT_OF_RANGE;
629 }
630#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
631 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 pGVMM->nsMinSleepWithHrTimer = u64Value;
635 else
636 rc = VERR_OUT_OF_RANGE;
637 }
638#endif
639 else if (!strcmp(pszName, "EarlyWakeUp1"))
640 {
641 if (u64Value <= RT_NS_100MS)
642 {
643 pGVMM->nsEarlyWakeUp1 = u64Value;
644 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
645 }
646 else
647 rc = VERR_OUT_OF_RANGE;
648 }
649 else if (!strcmp(pszName, "EarlyWakeUp2"))
650 {
651 if (u64Value <= RT_NS_100MS)
652 {
653 pGVMM->nsEarlyWakeUp2 = u64Value;
654 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
655 }
656 else
657 rc = VERR_OUT_OF_RANGE;
658 }
659 else
660 rc = VERR_CFGM_VALUE_NOT_FOUND;
661 return rc;
662}
663
664
665/**
666 * A quick hack for getting global config values.
667 *
668 * @returns VBox status code.
669 *
670 * @param pSession The session handle. Used for authentication.
671 * @param pszName The variable name.
672 * @param pu64Value Where to return the value.
673 */
674GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
675{
676 /*
677 * Validate input.
678 */
679 PGVMM pGVMM;
680 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
681 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
682 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
683 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
684
685 /*
686 * String switch time!
687 */
688 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
689 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
690 int rc = VINF_SUCCESS;
691 pszName += sizeof("/GVMM/") - 1;
692 if (!strcmp(pszName, "cEMTsMeansCompany"))
693 *pu64Value = pGVMM->cEMTsMeansCompany;
694 else if (!strcmp(pszName, "MinSleepAlone"))
695 *pu64Value = pGVMM->nsMinSleepAlone;
696 else if (!strcmp(pszName, "MinSleepCompany"))
697 *pu64Value = pGVMM->nsMinSleepCompany;
698#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
699 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
700 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
701#endif
702 else if (!strcmp(pszName, "EarlyWakeUp1"))
703 *pu64Value = pGVMM->nsEarlyWakeUp1;
704 else if (!strcmp(pszName, "EarlyWakeUp2"))
705 *pu64Value = pGVMM->nsEarlyWakeUp2;
706 else
707 rc = VERR_CFGM_VALUE_NOT_FOUND;
708 return rc;
709}
710
711
712/**
713 * Acquire the 'used' lock in shared mode.
714 *
715 * This prevents destruction of the VM while we're in ring-0.
716 *
717 * @returns IPRT status code, see RTSemFastMutexRequest.
718 * @param a_pGVMM The GVMM instance data.
719 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
720 */
721#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
722
723/**
724 * Release the 'used' lock in when owning it in shared mode.
725 *
726 * @returns IPRT status code, see RTSemFastMutexRequest.
727 * @param a_pGVMM The GVMM instance data.
728 * @sa GVMMR0_USED_SHARED_LOCK
729 */
730#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
731
732/**
733 * Acquire the 'used' lock in exclusive mode.
734 *
735 * Only use this function when making changes to the used list.
736 *
737 * @returns IPRT status code, see RTSemFastMutexRequest.
738 * @param a_pGVMM The GVMM instance data.
739 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
740 */
741#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
742
743/**
744 * Release the 'used' lock when owning it in exclusive mode.
745 *
746 * @returns IPRT status code, see RTSemFastMutexRelease.
747 * @param a_pGVMM The GVMM instance data.
748 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
749 */
750#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
751
752
753/**
754 * Try acquire the 'create & destroy' lock.
755 *
756 * @returns IPRT status code, see RTSemFastMutexRequest.
757 * @param pGVMM The GVMM instance data.
758 */
759DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
760{
761 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
762 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
763 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
764 return rc;
765}
766
767
768/**
769 * Release the 'create & destroy' lock.
770 *
771 * @returns IPRT status code, see RTSemFastMutexRequest.
772 * @param pGVMM The GVMM instance data.
773 */
774DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
775{
776 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
777 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
778 AssertRC(rc);
779 return rc;
780}
781
782
783/**
784 * Request wrapper for the GVMMR0CreateVM API.
785 *
786 * @returns VBox status code.
787 * @param pReq The request buffer.
788 * @param pSession The session handle. The VM will be associated with this.
789 */
790GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
791{
792 /*
793 * Validate the request.
794 */
795 if (!RT_VALID_PTR(pReq))
796 return VERR_INVALID_POINTER;
797 if (pReq->Hdr.cbReq != sizeof(*pReq))
798 return VERR_INVALID_PARAMETER;
799 if (pReq->pSession != pSession)
800 return VERR_INVALID_POINTER;
801
802 /*
803 * Execute it.
804 */
805 PGVM pGVM;
806 pReq->pVMR0 = NULL;
807 pReq->pVMR3 = NIL_RTR3PTR;
808 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
809 if (RT_SUCCESS(rc))
810 {
811 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
812 pReq->pVMR3 = pGVM->pVMR3;
813 }
814 return rc;
815}
816
817
818/**
819 * Allocates the VM structure and registers it with GVM.
820 *
821 * The caller will become the VM owner and there by the EMT.
822 *
823 * @returns VBox status code.
824 * @param pSession The support driver session.
825 * @param cCpus Number of virtual CPUs for the new VM.
826 * @param ppGVM Where to store the pointer to the VM structure.
827 *
828 * @thread EMT.
829 */
830GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
831{
832 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
833 PGVMM pGVMM;
834 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
835
836 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
837 *ppGVM = NULL;
838
839 if ( cCpus == 0
840 || cCpus > VMM_MAX_CPU_COUNT)
841 return VERR_INVALID_PARAMETER;
842
843 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
844 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
845 RTPROCESS ProcId = RTProcSelf();
846 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
847
848 /*
849 * The whole allocation process is protected by the lock.
850 */
851 int rc = gvmmR0CreateDestroyLock(pGVMM);
852 AssertRCReturn(rc, rc);
853
854 /*
855 * Only one VM per session.
856 */
857 if (SUPR0GetSessionVM(pSession) != NULL)
858 {
859 gvmmR0CreateDestroyUnlock(pGVMM);
860 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
861 return VERR_ALREADY_EXISTS;
862 }
863
864 /*
865 * Allocate a handle first so we don't waste resources unnecessarily.
866 */
867 uint16_t iHandle = pGVMM->iFreeHead;
868 if (iHandle)
869 {
870 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
871
872 /* consistency checks, a bit paranoid as always. */
873 if ( !pHandle->pGVM
874 && !pHandle->pvObj
875 && pHandle->iSelf == iHandle)
876 {
877 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
878 if (pHandle->pvObj)
879 {
880 /*
881 * Move the handle from the free to used list and perform permission checks.
882 */
883 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
884 AssertRC(rc);
885
886 pGVMM->iFreeHead = pHandle->iNext;
887 pHandle->iNext = pGVMM->iUsedHead;
888 pGVMM->iUsedHead = iHandle;
889 pGVMM->cVMs++;
890
891 pHandle->pGVM = NULL;
892 pHandle->pSession = pSession;
893 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
894 pHandle->ProcId = NIL_RTPROCESS;
895
896 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
897
898 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
899 if (RT_SUCCESS(rc))
900 {
901 /*
902 * Allocate memory for the VM structure (combined VM + GVM).
903 */
904 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
905 const uint32_t cPages = RT_ALIGN_32(cbVM, HOST_PAGE_SIZE) >> HOST_PAGE_SHIFT;
906 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
907 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << HOST_PAGE_SHIFT, false /* fExecutable */);
908 if (RT_SUCCESS(rc))
909 {
910 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
911 AssertPtr(pGVM);
912
913 /*
914 * Initialise the structure.
915 */
916 RT_BZERO(pGVM, cPages << HOST_PAGE_SHIFT);
917 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
918 pGVM->gvmm.s.VMMemObj = hVMMemObj;
919#ifndef VBOX_WITH_MINIMAL_R0
920 rc = GMMR0InitPerVMData(pGVM);
921 int rc2 = PGMR0InitPerVMData(pGVM, hVMMemObj);
922#else
923 int rc2 = VINF_SUCCESS;
924#endif
925 int rc3 = VMMR0InitPerVMData(pGVM);
926#ifndef VBOX_WITH_MINIMAL_R0
927 CPUMR0InitPerVMData(pGVM);
928 DBGFR0InitPerVMData(pGVM);
929 PDMR0InitPerVMData(pGVM);
930 IOMR0InitPerVMData(pGVM);
931 TMR0InitPerVMData(pGVM);
932#endif
933 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
934 {
935 /*
936 * Allocate page array.
937 * This currently have to be made available to ring-3, but this is should change eventually.
938 */
939 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
940 if (RT_SUCCESS(rc))
941 {
942 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
943 for (uint32_t iPage = 0; iPage < cPages; iPage++)
944 {
945 paPages[iPage].uReserved = 0;
946 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
947 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
948 }
949
950 /*
951 * Map the page array, VM and VMCPU structures into ring-3.
952 */
953 AssertCompileSizeAlignment(VM, HOST_PAGE_SIZE);
954 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
955 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
956 0 /*offSub*/, sizeof(VM));
957 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
958 {
959 AssertCompileSizeAlignment(VMCPU, HOST_PAGE_SIZE);
960 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
961 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
962 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
963 }
964 if (RT_SUCCESS(rc))
965 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
966 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
967 NIL_RTR0PROCESS);
968 if (RT_SUCCESS(rc))
969 {
970 /*
971 * Initialize all the VM pointers.
972 */
973 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
974 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
975
976 for (VMCPUID i = 0; i < cCpus; i++)
977 {
978 pGVM->aCpus[i].pVMR0 = pGVM;
979 pGVM->aCpus[i].pVMR3 = pVMR3;
980 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
981 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
982 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
983 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
984 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
985 }
986
987 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
988 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
989 ("%p\n", pGVM->paVMPagesR3));
990
991#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
992 /*
993 * Create the high resolution wake-up timer for EMT 0, ignore failures.
994 */
995 if (RTTimerCanDoHighResolution())
996 {
997 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
998 0 /*one-shot, no interval*/,
999 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
1000 &pGVM->aCpus[0]);
1001 if (RT_FAILURE(rc4))
1002 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
1003 }
1004#endif
1005
1006 /*
1007 * Complete the handle - take the UsedLock sem just to be careful.
1008 */
1009 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1010 AssertRC(rc);
1011
1012 pHandle->pGVM = pGVM;
1013 pHandle->hEMT0 = hEMT0;
1014 pHandle->ProcId = ProcId;
1015 pGVM->pVMR3 = pVMR3;
1016 pGVM->pVMR3Unsafe = pVMR3;
1017 pGVM->aCpus[0].hEMT = hEMT0;
1018 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
1019 pGVM->aCpus[0].cEmtHashCollisions = 0;
1020 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1021 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1022 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1023 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1024 pGVMM->cEMTs += cCpus;
1025
1026 /* Associate it with the session and create the context hook for EMT0. */
1027 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1028 if (RT_SUCCESS(rc))
1029 {
1030 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1031 if (RT_SUCCESS(rc))
1032 {
1033 /*
1034 * Done!
1035 */
1036 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1037
1038 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1039 gvmmR0CreateDestroyUnlock(pGVMM);
1040
1041#ifndef VBOX_WITH_MINIMAL_R0
1042 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1043#endif
1044
1045 *ppGVM = pGVM;
1046 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1047 return VINF_SUCCESS;
1048 }
1049
1050 SUPR0SetSessionVM(pSession, NULL, NULL);
1051 }
1052 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1053 }
1054
1055 /* Cleanup mappings. */
1056 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1057 {
1058 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1059 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1060 }
1061 for (VMCPUID i = 0; i < cCpus; i++)
1062 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1063 {
1064 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1065 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1066 }
1067 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1068 {
1069 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1070 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1071 }
1072 }
1073 }
1074 else
1075 {
1076 if (RT_SUCCESS_NP(rc))
1077 rc = rc2;
1078 if (RT_SUCCESS_NP(rc))
1079 rc = rc3;
1080 AssertStmt(RT_FAILURE_NP(rc), rc = VERR_IPE_UNEXPECTED_STATUS);
1081 }
1082 }
1083 }
1084 /* else: The user wasn't permitted to create this VM. */
1085
1086 /*
1087 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1088 * object reference here. A little extra mess because of non-recursive lock.
1089 */
1090 void *pvObj = pHandle->pvObj;
1091 pHandle->pvObj = NULL;
1092 gvmmR0CreateDestroyUnlock(pGVMM);
1093
1094 SUPR0ObjRelease(pvObj, pSession);
1095
1096 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1097 return rc;
1098 }
1099
1100 rc = VERR_NO_MEMORY;
1101 }
1102 else
1103 rc = VERR_GVMM_IPE_1;
1104 }
1105 else
1106 rc = VERR_GVM_TOO_MANY_VMS;
1107
1108 gvmmR0CreateDestroyUnlock(pGVMM);
1109 return rc;
1110}
1111
1112
1113/**
1114 * Initializes the per VM data belonging to GVMM.
1115 *
1116 * @param pGVM Pointer to the global VM structure.
1117 * @param hSelf The handle.
1118 * @param cCpus The CPU count.
1119 * @param pSession The session this VM is associated with.
1120 */
1121static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1122{
1123 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1124 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1125 AssertCompileMemberAlignment(VM, cpum, 64);
1126 AssertCompileMemberAlignment(VM, tm, 64);
1127
1128 /* GVM: */
1129 pGVM->u32Magic = GVM_MAGIC;
1130 pGVM->hSelf = hSelf;
1131 pGVM->cCpus = cCpus;
1132 pGVM->pSession = pSession;
1133 pGVM->pSelf = pGVM;
1134
1135 /* VM: */
1136 pGVM->enmVMState = VMSTATE_CREATING;
1137 pGVM->hSelfUnsafe = hSelf;
1138 pGVM->pSessionUnsafe = pSession;
1139 pGVM->pVMR0ForCall = pGVM;
1140 pGVM->cCpusUnsafe = cCpus;
1141 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1142 pGVM->uStructVersion = 1;
1143 pGVM->cbSelf = sizeof(VM);
1144 pGVM->cbVCpu = sizeof(VMCPU);
1145
1146 /* GVMM: */
1147 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1148 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1149 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1150 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1151 pGVM->gvmm.s.fDoneVMMR0Init = false;
1152 pGVM->gvmm.s.fDoneVMMR0Term = false;
1153
1154 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1155 {
1156 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1157 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1158 }
1159 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1160
1161 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1162 {
1163 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1164 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1165 }
1166
1167 /*
1168 * Per virtual CPU.
1169 */
1170 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1171 {
1172 pGVM->aCpus[i].idCpu = i;
1173 pGVM->aCpus[i].idCpuUnsafe = i;
1174 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1175 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1176 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1177 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1178 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1179 pGVM->aCpus[i].pGVM = pGVM;
1180 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1181 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1182 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1183 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1184 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1185 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1186 }
1187}
1188
1189
1190/**
1191 * Does the VM initialization.
1192 *
1193 * @returns VBox status code.
1194 * @param pGVM The global (ring-0) VM structure.
1195 */
1196GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1197{
1198 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1199
1200 int rc = VERR_INTERNAL_ERROR_3;
1201 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1202 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1203 {
1204 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1205 {
1206 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1207 if (RT_FAILURE(rc))
1208 {
1209 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1210 break;
1211 }
1212 }
1213 }
1214 else
1215 rc = VERR_WRONG_ORDER;
1216
1217 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1218 return rc;
1219}
1220
1221
1222/**
1223 * Indicates that we're done with the ring-0 initialization
1224 * of the VM.
1225 *
1226 * @param pGVM The global (ring-0) VM structure.
1227 * @thread EMT(0)
1228 */
1229GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1230{
1231 /* Set the indicator. */
1232 pGVM->gvmm.s.fDoneVMMR0Init = true;
1233}
1234
1235
1236/**
1237 * Indicates that we're doing the ring-0 termination of the VM.
1238 *
1239 * @returns true if termination hasn't been done already, false if it has.
1240 * @param pGVM Pointer to the global VM structure. Optional.
1241 * @thread EMT(0) or session cleanup thread.
1242 */
1243GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1244{
1245 /* Validate the VM structure, state and handle. */
1246 AssertPtrReturn(pGVM, false);
1247
1248 /* Set the indicator. */
1249 if (pGVM->gvmm.s.fDoneVMMR0Term)
1250 return false;
1251 pGVM->gvmm.s.fDoneVMMR0Term = true;
1252 return true;
1253}
1254
1255
1256/**
1257 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1258 *
1259 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1260 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1261 * would've been nice if the caller was actually the EMT thread or that we somehow
1262 * could've associated the calling thread with the VM up front.
1263 *
1264 * @returns VBox status code.
1265 * @param pGVM The global (ring-0) VM structure.
1266 *
1267 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1268 */
1269GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1270{
1271 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1272 PGVMM pGVMM;
1273 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1274
1275 /*
1276 * Validate the VM structure, state and caller.
1277 */
1278 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1279 AssertReturn(!((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1280 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1281 VERR_WRONG_ORDER);
1282
1283 uint32_t hGVM = pGVM->hSelf;
1284 ASMCompilerBarrier();
1285 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1286 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1287
1288 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1289 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1290
1291 RTPROCESS ProcId = RTProcSelf();
1292 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1293 AssertReturn( ( pHandle->hEMT0 == hSelf
1294 && pHandle->ProcId == ProcId)
1295 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1296
1297 /*
1298 * Lookup the handle and destroy the object.
1299 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1300 * object, we take some precautions against racing callers just in case...
1301 */
1302 int rc = gvmmR0CreateDestroyLock(pGVMM);
1303 AssertRC(rc);
1304
1305 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1306 if ( pHandle->pGVM == pGVM
1307 && ( ( pHandle->hEMT0 == hSelf
1308 && pHandle->ProcId == ProcId)
1309 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1310 && RT_VALID_PTR(pHandle->pvObj)
1311 && RT_VALID_PTR(pHandle->pSession)
1312 && RT_VALID_PTR(pHandle->pGVM)
1313 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1314 {
1315 /* Check that other EMTs have deregistered. */
1316 uint32_t cNotDeregistered = 0;
1317 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1318 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1319 if (cNotDeregistered == 0)
1320 {
1321 /* Grab the object pointer. */
1322 void *pvObj = pHandle->pvObj;
1323 pHandle->pvObj = NULL;
1324 gvmmR0CreateDestroyUnlock(pGVMM);
1325
1326 SUPR0ObjRelease(pvObj, pHandle->pSession);
1327 }
1328 else
1329 {
1330 gvmmR0CreateDestroyUnlock(pGVMM);
1331 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1332 }
1333 }
1334 else
1335 {
1336 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1337 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1338 gvmmR0CreateDestroyUnlock(pGVMM);
1339 rc = VERR_GVMM_IPE_2;
1340 }
1341
1342 return rc;
1343}
1344
1345
1346/**
1347 * Performs VM cleanup task as part of object destruction.
1348 *
1349 * @param pGVM The GVM pointer.
1350 */
1351static void gvmmR0CleanupVM(PGVM pGVM)
1352{
1353 if ( pGVM->gvmm.s.fDoneVMMR0Init
1354 && !pGVM->gvmm.s.fDoneVMMR0Term)
1355 {
1356 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1357 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1358 {
1359 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1360 VMMR0TermVM(pGVM, NIL_VMCPUID);
1361 }
1362 else
1363 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1364 }
1365
1366#ifndef VBOX_WITH_MINIMAL_R0
1367 GMMR0CleanupVM(pGVM);
1368# ifdef VBOX_WITH_NEM_R0
1369 NEMR0CleanupVM(pGVM);
1370# endif
1371 PDMR0CleanupVM(pGVM);
1372 IOMR0CleanupVM(pGVM);
1373 DBGFR0CleanupVM(pGVM);
1374 PGMR0CleanupVM(pGVM);
1375 TMR0CleanupVM(pGVM);
1376#endif
1377 VMMR0CleanupVM(pGVM);
1378}
1379
1380
1381/**
1382 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1383 *
1384 * pvUser1 is the GVM instance pointer.
1385 * pvUser2 is the handle pointer.
1386 */
1387static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1388{
1389 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1390
1391 NOREF(pvObj);
1392
1393 /*
1394 * Some quick, paranoid, input validation.
1395 */
1396 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1397 AssertPtr(pHandle);
1398 PGVMM pGVMM = (PGVMM)pvUser1;
1399 Assert(pGVMM == g_pGVMM);
1400 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1401 if ( !iHandle
1402 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1403 || iHandle != pHandle->iSelf)
1404 {
1405 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1406 return;
1407 }
1408
1409 int rc = gvmmR0CreateDestroyLock(pGVMM);
1410 AssertRC(rc);
1411 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1412 AssertRC(rc);
1413
1414 /*
1415 * This is a tad slow but a doubly linked list is too much hassle.
1416 */
1417 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1418 {
1419 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1420 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1421 gvmmR0CreateDestroyUnlock(pGVMM);
1422 return;
1423 }
1424
1425 if (pGVMM->iUsedHead == iHandle)
1426 pGVMM->iUsedHead = pHandle->iNext;
1427 else
1428 {
1429 uint16_t iPrev = pGVMM->iUsedHead;
1430 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1431 while (iPrev)
1432 {
1433 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1434 {
1435 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1436 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1437 gvmmR0CreateDestroyUnlock(pGVMM);
1438 return;
1439 }
1440 if (RT_UNLIKELY(c-- <= 0))
1441 {
1442 iPrev = 0;
1443 break;
1444 }
1445
1446 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1447 break;
1448 iPrev = pGVMM->aHandles[iPrev].iNext;
1449 }
1450 if (!iPrev)
1451 {
1452 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1453 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1454 gvmmR0CreateDestroyUnlock(pGVMM);
1455 return;
1456 }
1457
1458 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1459 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1460 }
1461 pHandle->iNext = 0;
1462 pGVMM->cVMs--;
1463
1464 /*
1465 * Do the global cleanup round.
1466 */
1467 PGVM pGVM = pHandle->pGVM;
1468 if ( RT_VALID_PTR(pGVM)
1469 && pGVM->u32Magic == GVM_MAGIC)
1470 {
1471 pGVMM->cEMTs -= pGVM->cCpus;
1472
1473 if (pGVM->pSession)
1474 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1475
1476 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1477
1478 gvmmR0CleanupVM(pGVM);
1479
1480 /*
1481 * Do the GVMM cleanup - must be done last.
1482 */
1483 /* The VM and VM pages mappings/allocations. */
1484 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1485 {
1486 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1487 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1488 }
1489
1490 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1491 {
1492 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1493 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1494 }
1495
1496 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1497 {
1498 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1499 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1500 }
1501
1502 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1503 {
1504 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1505 {
1506 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1507 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1508 }
1509 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1510 {
1511 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1512 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1513 }
1514#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1515 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1516 {
1517 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1518 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1519 }
1520#endif
1521 }
1522
1523 /* the GVM structure itself. */
1524 pGVM->u32Magic |= UINT32_C(0x80000000);
1525 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1526 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1527 pGVM = NULL;
1528
1529 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1530 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1531 AssertRC(rc);
1532 }
1533 /* else: GVMMR0CreateVM cleanup. */
1534
1535 /*
1536 * Free the handle.
1537 */
1538 pHandle->iNext = pGVMM->iFreeHead;
1539 pGVMM->iFreeHead = iHandle;
1540 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1541 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1542 ASMAtomicWriteNullPtr(&pHandle->pSession);
1543 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1544 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1545
1546 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1547 gvmmR0CreateDestroyUnlock(pGVMM);
1548 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1549}
1550
1551
1552/**
1553 * Registers the calling thread as the EMT of a Virtual CPU.
1554 *
1555 * Note that VCPU 0 is automatically registered during VM creation.
1556 *
1557 * @returns VBox status code
1558 * @param pGVM The global (ring-0) VM structure.
1559 * @param idCpu VCPU id to register the current thread as.
1560 */
1561GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1562{
1563 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1564
1565 /*
1566 * Validate the VM structure, state and handle.
1567 */
1568 PGVMM pGVMM;
1569 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1570 if (RT_SUCCESS(rc))
1571 {
1572 if (idCpu < pGVM->cCpus)
1573 {
1574 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1575 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1576
1577 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1578
1579 /* Check that the EMT isn't already assigned to a thread. */
1580 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1581 {
1582 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1583
1584 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1585 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1586 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1587 if (RT_SUCCESS(rc))
1588 {
1589 /*
1590 * Do the assignment, then try setup the hook. Undo if that fails.
1591 */
1592 unsigned cCollisions = 0;
1593 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1594 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1595 {
1596 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1597 do
1598 {
1599 cCollisions++;
1600 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1601 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1602 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1603 }
1604 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1605 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1606
1607 pGVCpu->hNativeThreadR0 = hNativeSelf;
1608 pGVCpu->hEMT = hNativeSelf;
1609 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1610 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1611
1612 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1613 if (RT_SUCCESS(rc))
1614 {
1615#ifndef VBOX_WITH_MINIMAL_R0
1616 CPUMR0RegisterVCpuThread(pGVCpu);
1617#endif
1618
1619#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1620 /*
1621 * Create the high resolution wake-up timer, ignore failures.
1622 */
1623 if (RTTimerCanDoHighResolution())
1624 {
1625 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1626 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1627 if (RT_FAILURE(rc2))
1628 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1629 }
1630#endif
1631 }
1632 else
1633 {
1634 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1635 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1636 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1637 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1638 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1639 }
1640 }
1641 }
1642 else
1643 rc = VERR_ACCESS_DENIED;
1644
1645 gvmmR0CreateDestroyUnlock(pGVMM);
1646 }
1647 else
1648 rc = VERR_INVALID_CPU_ID;
1649 }
1650 return rc;
1651}
1652
1653
1654/**
1655 * Deregisters the calling thread as the EMT of a Virtual CPU.
1656 *
1657 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1658 *
1659 * @returns VBox status code
1660 * @param pGVM The global (ring-0) VM structure.
1661 * @param idCpu VCPU id to register the current thread as.
1662 */
1663GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1664{
1665 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1666
1667 /*
1668 * Validate the VM structure, state and handle.
1669 */
1670 PGVMM pGVMM;
1671 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1672 if (RT_SUCCESS(rc))
1673 {
1674 /*
1675 * Take the destruction lock and recheck the handle state to
1676 * prevent racing GVMMR0DestroyVM.
1677 */
1678 gvmmR0CreateDestroyLock(pGVMM);
1679
1680 uint32_t hSelf = pGVM->hSelf;
1681 ASMCompilerBarrier();
1682 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1683 && pGVMM->aHandles[hSelf].pvObj != NULL
1684 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1685 {
1686 /*
1687 * Do per-EMT cleanups.
1688 */
1689 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1690
1691 /*
1692 * Invalidate hEMT. We don't use NIL here as that would allow
1693 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1694 */
1695 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1696 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1697
1698 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1699 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1700 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1701 }
1702
1703 gvmmR0CreateDestroyUnlock(pGVMM);
1704 }
1705 return rc;
1706}
1707
1708
1709/**
1710 * Registers the caller as a given worker thread.
1711 *
1712 * This enables the thread to operate critical sections in ring-0.
1713 *
1714 * @returns VBox status code.
1715 * @param pGVM The global (ring-0) VM structure.
1716 * @param enmWorker The worker thread this is supposed to be.
1717 * @param hNativeSelfR3 The ring-3 native self of the caller.
1718 */
1719GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1720{
1721 /*
1722 * Validate input.
1723 */
1724 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1725 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1726 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1727 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1728 PGVMM pGVMM;
1729 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1730 AssertRCReturn(rc, rc);
1731 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1732
1733 /*
1734 * Grab the big lock and check the VM state again.
1735 */
1736 uint32_t const hSelf = pGVM->hSelf;
1737 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1738 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1739 && pGVMM->aHandles[hSelf].pvObj != NULL
1740 && pGVMM->aHandles[hSelf].pGVM == pGVM
1741 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1742 {
1743 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1744 {
1745 /*
1746 * Check that the thread isn't an EMT or serving in some other worker capacity.
1747 */
1748 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1749 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1750 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1751 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1752 rc = VERR_INVALID_PARAMETER);
1753 if (RT_SUCCESS(rc))
1754 {
1755 /*
1756 * Do the registration.
1757 */
1758 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1759 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1760 {
1761 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1762 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1763 rc = VINF_SUCCESS;
1764 }
1765 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1766 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1767 rc = VERR_ALREADY_EXISTS;
1768 else
1769 rc = VERR_RESOURCE_BUSY;
1770 }
1771 }
1772 else
1773 rc = VERR_VM_INVALID_VM_STATE;
1774 }
1775 else
1776 rc = VERR_INVALID_VM_HANDLE;
1777 gvmmR0CreateDestroyUnlock(pGVMM);
1778 return rc;
1779}
1780
1781
1782/**
1783 * Deregisters a workinger thread (caller).
1784 *
1785 * The worker thread cannot be re-created and re-registered, instead the given
1786 * @a enmWorker slot becomes invalid.
1787 *
1788 * @returns VBox status code.
1789 * @param pGVM The global (ring-0) VM structure.
1790 * @param enmWorker The worker thread this is supposed to be.
1791 */
1792GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1793{
1794 /*
1795 * Validate input.
1796 */
1797 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1798 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1799 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1800 PGVMM pGVMM;
1801 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1802 AssertRCReturn(rc, rc);
1803
1804 /*
1805 * Grab the big lock and check the VM state again.
1806 */
1807 uint32_t const hSelf = pGVM->hSelf;
1808 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1809 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1810 && pGVMM->aHandles[hSelf].pvObj != NULL
1811 && pGVMM->aHandles[hSelf].pGVM == pGVM
1812 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1813 {
1814 /*
1815 * Do the deregistration.
1816 * This will prevent any other threads register as the worker later.
1817 */
1818 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1819 {
1820 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1821 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1822 rc = VINF_SUCCESS;
1823 }
1824 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1825 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1826 rc = VINF_SUCCESS;
1827 else
1828 rc = VERR_NOT_OWNER;
1829 }
1830 else
1831 rc = VERR_INVALID_VM_HANDLE;
1832 gvmmR0CreateDestroyUnlock(pGVMM);
1833 return rc;
1834}
1835
1836
1837/**
1838 * Lookup a GVM structure by its handle.
1839 *
1840 * @returns The GVM pointer on success, NULL on failure.
1841 * @param hGVM The global VM handle. Asserts on bad handle.
1842 */
1843GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1844{
1845 PGVMM pGVMM;
1846 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1847
1848 /*
1849 * Validate.
1850 */
1851 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1852 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1853
1854 /*
1855 * Look it up.
1856 */
1857 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1858 AssertPtrReturn(pHandle->pvObj, NULL);
1859 PGVM pGVM = pHandle->pGVM;
1860 AssertPtrReturn(pGVM, NULL);
1861
1862 return pGVM;
1863}
1864
1865
1866/**
1867 * Check that the given GVM and VM structures match up.
1868 *
1869 * The calling thread must be in the same process as the VM. All current lookups
1870 * are by threads inside the same process, so this will not be an issue.
1871 *
1872 * @returns VBox status code.
1873 * @param pGVM The global (ring-0) VM structure.
1874 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1875 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1876 * shared mode when requested.
1877 *
1878 * Be very careful if not taking the lock as it's
1879 * possible that the VM will disappear then!
1880 *
1881 * @remark This will not assert on an invalid pGVM but try return silently.
1882 */
1883static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1884{
1885 /*
1886 * Check the pointers.
1887 */
1888 int rc;
1889 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1890 && ((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0 ))
1891 {
1892 /*
1893 * Get the pGVMM instance and check the VM handle.
1894 */
1895 PGVMM pGVMM;
1896 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1897
1898 uint16_t hGVM = pGVM->hSelf;
1899 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1900 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1901 {
1902 RTPROCESS const pidSelf = RTProcSelf();
1903 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1904 if (fTakeUsedLock)
1905 {
1906 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1907 AssertRCReturn(rc, rc);
1908 }
1909
1910 if (RT_LIKELY( pHandle->pGVM == pGVM
1911 && pHandle->ProcId == pidSelf
1912 && RT_VALID_PTR(pHandle->pvObj)))
1913 {
1914 /*
1915 * Some more VM data consistency checks.
1916 */
1917 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1918 && pGVM->hSelfUnsafe == hGVM
1919 && pGVM->pSelf == pGVM))
1920 {
1921 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1922 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1923 {
1924 *ppGVMM = pGVMM;
1925 return VINF_SUCCESS;
1926 }
1927 rc = VERR_INCONSISTENT_VM_HANDLE;
1928 }
1929 else
1930 rc = VERR_INCONSISTENT_VM_HANDLE;
1931 }
1932 else
1933 rc = VERR_INVALID_VM_HANDLE;
1934
1935 if (fTakeUsedLock)
1936 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1937 }
1938 else
1939 rc = VERR_INVALID_VM_HANDLE;
1940 }
1941 else
1942 rc = VERR_INVALID_POINTER;
1943 return rc;
1944}
1945
1946
1947/**
1948 * Validates a GVM/VM pair.
1949 *
1950 * @returns VBox status code.
1951 * @param pGVM The global (ring-0) VM structure.
1952 */
1953GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1954{
1955 PGVMM pGVMM;
1956 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1957}
1958
1959
1960/**
1961 * Check that the given GVM and VM structures match up.
1962 *
1963 * The calling thread must be in the same process as the VM. All current lookups
1964 * are by threads inside the same process, so this will not be an issue.
1965 *
1966 * @returns VBox status code.
1967 * @param pGVM The global (ring-0) VM structure.
1968 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1969 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1970 * @thread EMT
1971 *
1972 * @remarks This will assert in all failure paths.
1973 */
1974static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1975{
1976 /*
1977 * Check the pointers.
1978 */
1979 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1980 AssertReturn(((uintptr_t)pGVM & HOST_PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1981
1982 /*
1983 * Get the pGVMM instance and check the VM handle.
1984 */
1985 PGVMM pGVMM;
1986 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1987
1988 uint16_t hGVM = pGVM->hSelf;
1989 ASMCompilerBarrier();
1990 AssertReturn( hGVM != NIL_GVM_HANDLE
1991 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1992
1993 RTPROCESS const pidSelf = RTProcSelf();
1994 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1995 AssertReturn( pHandle->pGVM == pGVM
1996 && pHandle->ProcId == pidSelf
1997 && RT_VALID_PTR(pHandle->pvObj),
1998 VERR_INVALID_HANDLE);
1999
2000 /*
2001 * Check the EMT claim.
2002 */
2003 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
2004 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
2005 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
2006
2007 /*
2008 * Some more VM data consistency checks.
2009 */
2010 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
2011 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
2012 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
2013 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
2014
2015 *ppGVMM = pGVMM;
2016 return VINF_SUCCESS;
2017}
2018
2019
2020/**
2021 * Validates a GVM/EMT pair.
2022 *
2023 * @returns VBox status code.
2024 * @param pGVM The global (ring-0) VM structure.
2025 * @param idCpu The Virtual CPU ID of the calling EMT.
2026 * @thread EMT(idCpu)
2027 */
2028GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2029{
2030 PGVMM pGVMM;
2031 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2032}
2033
2034
2035/**
2036 * Looks up the VM belonging to the specified EMT thread.
2037 *
2038 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2039 * unnecessary kernel panics when the EMT thread hits an assertion. The
2040 * call may or not be an EMT thread.
2041 *
2042 * @returns Pointer to the VM on success, NULL on failure.
2043 * @param hEMT The native thread handle of the EMT.
2044 * NIL_RTNATIVETHREAD means the current thread
2045 */
2046GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2047{
2048 /*
2049 * No Assertions here as we're usually called in a AssertMsgN or
2050 * RTAssert* context.
2051 */
2052 PGVMM pGVMM = g_pGVMM;
2053 if ( !RT_VALID_PTR(pGVMM)
2054 || pGVMM->u32Magic != GVMM_MAGIC)
2055 return NULL;
2056
2057 if (hEMT == NIL_RTNATIVETHREAD)
2058 hEMT = RTThreadNativeSelf();
2059 RTPROCESS ProcId = RTProcSelf();
2060
2061 /*
2062 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2063 */
2064/** @todo introduce some pid hash table here, please. */
2065 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2066 {
2067 if ( pGVMM->aHandles[i].iSelf == i
2068 && pGVMM->aHandles[i].ProcId == ProcId
2069 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2070 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2071 {
2072 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2073 return pGVMM->aHandles[i].pGVM;
2074
2075 /* This is fearly safe with the current process per VM approach. */
2076 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2077 VMCPUID const cCpus = pGVM->cCpus;
2078 ASMCompilerBarrier();
2079 if ( cCpus < 1
2080 || cCpus > VMM_MAX_CPU_COUNT)
2081 continue;
2082 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2083 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2084 return pGVMM->aHandles[i].pGVM;
2085 }
2086 }
2087 return NULL;
2088}
2089
2090
2091/**
2092 * Looks up the GVMCPU belonging to the specified EMT thread.
2093 *
2094 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2095 * unnecessary kernel panics when the EMT thread hits an assertion. The
2096 * call may or not be an EMT thread.
2097 *
2098 * @returns Pointer to the VM on success, NULL on failure.
2099 * @param hEMT The native thread handle of the EMT.
2100 * NIL_RTNATIVETHREAD means the current thread
2101 */
2102GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2103{
2104 /*
2105 * No Assertions here as we're usually called in a AssertMsgN,
2106 * RTAssert*, Log and LogRel contexts.
2107 */
2108 PGVMM pGVMM = g_pGVMM;
2109 if ( !RT_VALID_PTR(pGVMM)
2110 || pGVMM->u32Magic != GVMM_MAGIC)
2111 return NULL;
2112
2113 if (hEMT == NIL_RTNATIVETHREAD)
2114 hEMT = RTThreadNativeSelf();
2115 RTPROCESS ProcId = RTProcSelf();
2116
2117 /*
2118 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2119 */
2120/** @todo introduce some pid hash table here, please. */
2121 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2122 {
2123 if ( pGVMM->aHandles[i].iSelf == i
2124 && pGVMM->aHandles[i].ProcId == ProcId
2125 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2126 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2127 {
2128 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2129 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2130 return &pGVM->aCpus[0];
2131
2132 /* This is fearly safe with the current process per VM approach. */
2133 VMCPUID const cCpus = pGVM->cCpus;
2134 ASMCompilerBarrier();
2135 ASMCompilerBarrier();
2136 if ( cCpus < 1
2137 || cCpus > VMM_MAX_CPU_COUNT)
2138 continue;
2139 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2140 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2141 return &pGVM->aCpus[idCpu];
2142 }
2143 }
2144 return NULL;
2145}
2146
2147
2148/**
2149 * Get the GVMCPU structure for the given EMT.
2150 *
2151 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2152 * @param pGVM The global (ring-0) VM structure.
2153 * @param hEMT The native thread handle of the EMT.
2154 * NIL_RTNATIVETHREAD means the current thread
2155 */
2156GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2157{
2158 /*
2159 * Validate & adjust input.
2160 */
2161 AssertPtr(pGVM);
2162 Assert(pGVM->u32Magic == GVM_MAGIC);
2163 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2164 {
2165 hEMT = RTThreadNativeSelf();
2166 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2167 }
2168
2169 /*
2170 * Find the matching hash table entry.
2171 * See similar code in GVMMR0GetRing3ThreadForSelf.
2172 */
2173 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2174 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2175 { /* likely */ }
2176 else
2177 {
2178#ifdef VBOX_STRICT
2179 unsigned cCollisions = 0;
2180#endif
2181 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2182 for (;;)
2183 {
2184 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2185 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2186 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2187 break;
2188 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2189 {
2190#ifdef VBOX_STRICT
2191 uint32_t idxCpu = pGVM->cCpus;
2192 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2193 while (idxCpu-- > 0)
2194 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2195#endif
2196 return NULL;
2197 }
2198 }
2199 }
2200
2201 /*
2202 * Validate the VCpu number and translate it into a pointer.
2203 */
2204 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2205 AssertReturn(idCpu < pGVM->cCpus, NULL);
2206 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2207 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2208 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2209 return pGVCpu;
2210}
2211
2212
2213/**
2214 * Get the native ring-3 thread handle for the caller.
2215 *
2216 * This works for EMTs and registered workers.
2217 *
2218 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2219 * @param pGVM The global (ring-0) VM structure.
2220 */
2221GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2222{
2223 /*
2224 * Validate input.
2225 */
2226 AssertPtr(pGVM);
2227 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2228 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2229 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2230
2231 /*
2232 * Find the matching hash table entry.
2233 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2234 */
2235 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2236 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2237 { /* likely */ }
2238 else
2239 {
2240#ifdef VBOX_STRICT
2241 unsigned cCollisions = 0;
2242#endif
2243 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2244 for (;;)
2245 {
2246 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2247 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2248 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2249 break;
2250 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2251 {
2252#ifdef VBOX_STRICT
2253 uint32_t idxCpu = pGVM->cCpus;
2254 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2255 while (idxCpu-- > 0)
2256 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2257#endif
2258
2259 /*
2260 * Not an EMT, so see if it's a worker thread.
2261 */
2262 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2263 while (--idx > GVMMWORKERTHREAD_INVALID)
2264 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2265 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2266
2267 return NIL_RTNATIVETHREAD;
2268 }
2269 }
2270 }
2271
2272 /*
2273 * Validate the VCpu number and translate it into a pointer.
2274 */
2275 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2276 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2277 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2278 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2279 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2280 return pGVCpu->hNativeThread;
2281}
2282
2283
2284/**
2285 * Converts a pointer with the GVM structure to a host physical address.
2286 *
2287 * @returns Host physical address.
2288 * @param pGVM The global (ring-0) VM structure.
2289 * @param pv The address to convert.
2290 * @thread EMT
2291 */
2292GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2293{
2294 AssertPtr(pGVM);
2295 Assert(pGVM->u32Magic == GVM_MAGIC);
2296 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2297 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2298 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> HOST_PAGE_SHIFT) | ((uintptr_t)pv & HOST_PAGE_OFFSET_MASK);
2299}
2300
2301
2302/**
2303 * This is will wake up expired and soon-to-be expired VMs.
2304 *
2305 * @returns Number of VMs that has been woken up.
2306 * @param pGVMM Pointer to the GVMM instance data.
2307 * @param u64Now The current time.
2308 */
2309static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2310{
2311 /*
2312 * Skip this if we've got disabled because of high resolution wakeups or by
2313 * the user.
2314 */
2315 if (!pGVMM->fDoEarlyWakeUps)
2316 return 0;
2317
2318/** @todo Rewrite this algorithm. See performance defect XYZ. */
2319
2320 /*
2321 * A cheap optimization to stop wasting so much time here on big setups.
2322 */
2323 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2324 if ( pGVMM->cHaltedEMTs == 0
2325 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2326 return 0;
2327
2328 /*
2329 * Only one thread doing this at a time.
2330 */
2331 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2332 return 0;
2333
2334 /*
2335 * The first pass will wake up VMs which have actually expired
2336 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2337 */
2338 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2339 uint64_t u64Min = UINT64_MAX;
2340 unsigned cWoken = 0;
2341 unsigned cHalted = 0;
2342 unsigned cTodo2nd = 0;
2343 unsigned cTodo3rd = 0;
2344 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2345 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2346 i = pGVMM->aHandles[i].iNext)
2347 {
2348 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2349 if ( RT_VALID_PTR(pCurGVM)
2350 && pCurGVM->u32Magic == GVM_MAGIC)
2351 {
2352 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2353 {
2354 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2355 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2356 if (u64)
2357 {
2358 if (u64 <= u64Now)
2359 {
2360 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2361 {
2362 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2363 AssertRC(rc);
2364 cWoken++;
2365 }
2366 }
2367 else
2368 {
2369 cHalted++;
2370 if (u64 <= uNsEarlyWakeUp1)
2371 cTodo2nd++;
2372 else if (u64 <= uNsEarlyWakeUp2)
2373 cTodo3rd++;
2374 else if (u64 < u64Min)
2375 u64 = u64Min;
2376 }
2377 }
2378 }
2379 }
2380 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2381 }
2382
2383 if (cTodo2nd)
2384 {
2385 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2386 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2387 i = pGVMM->aHandles[i].iNext)
2388 {
2389 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2390 if ( RT_VALID_PTR(pCurGVM)
2391 && pCurGVM->u32Magic == GVM_MAGIC)
2392 {
2393 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2394 {
2395 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2396 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2397 if ( u64
2398 && u64 <= uNsEarlyWakeUp1)
2399 {
2400 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2401 {
2402 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2403 AssertRC(rc);
2404 cWoken++;
2405 }
2406 }
2407 }
2408 }
2409 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2410 }
2411 }
2412
2413 if (cTodo3rd)
2414 {
2415 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2416 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2417 i = pGVMM->aHandles[i].iNext)
2418 {
2419 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2420 if ( RT_VALID_PTR(pCurGVM)
2421 && pCurGVM->u32Magic == GVM_MAGIC)
2422 {
2423 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2424 {
2425 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2426 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2427 if ( u64
2428 && u64 <= uNsEarlyWakeUp2)
2429 {
2430 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2431 {
2432 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2433 AssertRC(rc);
2434 cWoken++;
2435 }
2436 }
2437 }
2438 }
2439 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2440 }
2441 }
2442
2443 /*
2444 * Set the minimum value.
2445 */
2446 pGVMM->uNsNextEmtWakeup = u64Min;
2447
2448 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2449 return cWoken;
2450}
2451
2452
2453#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2454/**
2455 * Timer callback for the EMT high-resolution wake-up timer.
2456 *
2457 * @param pTimer The timer handle.
2458 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2459 * @param iTick The current tick.
2460 */
2461static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2462{
2463 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2464 NOREF(pTimer); NOREF(iTick);
2465
2466 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2467 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2468 {
2469 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2470 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2471 }
2472 else
2473 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2474
2475 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2476 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2477}
2478#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2479
2480
2481/**
2482 * Halt the EMT thread.
2483 *
2484 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2485 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2486 * @param pGVM The global (ring-0) VM structure.
2487 * @param pGVCpu The global (ring-0) CPU structure of the calling
2488 * EMT.
2489 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2490 * @thread EMT(pGVCpu).
2491 */
2492GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2493{
2494 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2495 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2496 PGVMM pGVMM;
2497 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2498
2499 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2500 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2501
2502 /*
2503 * If we're doing early wake-ups, we must take the UsedList lock before we
2504 * start querying the current time.
2505 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2506 */
2507 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2508 if (fDoEarlyWakeUps)
2509 {
2510 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2511 }
2512
2513 /* GIP hack: We might are frequently sleeping for short intervals where the
2514 difference between GIP and system time matters on systems with high resolution
2515 system time. So, convert the input from GIP to System time in that case. */
2516 Assert(ASMGetFlags() & X86_EFL_IF);
2517 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2518 const uint64_t u64NowGip = RTTimeNanoTS();
2519
2520 if (fDoEarlyWakeUps)
2521 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2522
2523 /*
2524 * Go to sleep if we must...
2525 * Cap the sleep time to 1 second to be on the safe side.
2526 */
2527 int rc;
2528 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2529 if ( u64NowGip < u64ExpireGipTime
2530 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2531 ? pGVMM->nsMinSleepCompany
2532 : pGVMM->nsMinSleepAlone)
2533#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2534 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2535#endif
2536 )
2537 )
2538 {
2539 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2540 if (cNsInterval > RT_NS_1SEC)
2541 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2542 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2543 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2544 if (fDoEarlyWakeUps)
2545 {
2546 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2547 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2548 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2549 }
2550
2551#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2552 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2553 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2554 {
2555 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2556 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2557 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2558 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2559 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2560 }
2561#endif
2562
2563 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2564 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2565 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2566
2567 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2568 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2569
2570#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2571 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2572 { /* likely */ }
2573 else
2574 {
2575 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2576 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2577 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2578 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2579 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2580 }
2581#endif
2582
2583 /* Reset the semaphore to try prevent a few false wake-ups. */
2584 if (rc == VINF_SUCCESS)
2585 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2586 else if (rc == VERR_TIMEOUT)
2587 {
2588 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2589 rc = VINF_SUCCESS;
2590 }
2591 }
2592 else
2593 {
2594 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2595 if (fDoEarlyWakeUps)
2596 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2597 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2598 rc = VINF_SUCCESS;
2599 }
2600
2601 return rc;
2602}
2603
2604
2605/**
2606 * Halt the EMT thread.
2607 *
2608 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2609 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2610 * @param pGVM The global (ring-0) VM structure.
2611 * @param idCpu The Virtual CPU ID of the calling EMT.
2612 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2613 * @thread EMT(idCpu).
2614 */
2615GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2616{
2617 PGVMM pGVMM;
2618 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2619 if (RT_SUCCESS(rc))
2620 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2621 return rc;
2622}
2623
2624
2625
2626/**
2627 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2628 * the a sleeping EMT.
2629 *
2630 * @retval VINF_SUCCESS if successfully woken up.
2631 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2632 *
2633 * @param pGVM The global (ring-0) VM structure.
2634 * @param pGVCpu The global (ring-0) VCPU structure.
2635 */
2636DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2637{
2638 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2639
2640 /*
2641 * Signal the semaphore regardless of whether it's current blocked on it.
2642 *
2643 * The reason for this is that there is absolutely no way we can be 100%
2644 * certain that it isn't *about* go to go to sleep on it and just got
2645 * delayed a bit en route. So, we will always signal the semaphore when
2646 * the it is flagged as halted in the VMM.
2647 */
2648/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2649 int rc;
2650 if (pGVCpu->gvmm.s.u64HaltExpire)
2651 {
2652 rc = VINF_SUCCESS;
2653 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2654 }
2655 else
2656 {
2657 rc = VINF_GVM_NOT_BLOCKED;
2658 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2659 }
2660
2661 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2662 AssertRC(rc2);
2663
2664 return rc;
2665}
2666
2667
2668/**
2669 * Wakes up the halted EMT thread so it can service a pending request.
2670 *
2671 * @returns VBox status code.
2672 * @retval VINF_SUCCESS if successfully woken up.
2673 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2674 *
2675 * @param pGVM The global (ring-0) VM structure.
2676 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2677 * @param fTakeUsedLock Take the used lock or not
2678 * @thread Any but EMT(idCpu).
2679 */
2680GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2681{
2682 /*
2683 * Validate input and take the UsedLock.
2684 */
2685 PGVMM pGVMM;
2686 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2687 if (RT_SUCCESS(rc))
2688 {
2689 if (idCpu < pGVM->cCpus)
2690 {
2691 /*
2692 * Do the actual job.
2693 */
2694 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2695
2696 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2697 {
2698 /*
2699 * While we're here, do a round of scheduling.
2700 */
2701 Assert(ASMGetFlags() & X86_EFL_IF);
2702 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2703 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2704 }
2705 }
2706 else
2707 rc = VERR_INVALID_CPU_ID;
2708
2709 if (fTakeUsedLock)
2710 {
2711 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2712 AssertRC(rc2);
2713 }
2714 }
2715
2716 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2717 return rc;
2718}
2719
2720
2721/**
2722 * Wakes up the halted EMT thread so it can service a pending request.
2723 *
2724 * @returns VBox status code.
2725 * @retval VINF_SUCCESS if successfully woken up.
2726 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2727 *
2728 * @param pGVM The global (ring-0) VM structure.
2729 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2730 * @thread Any but EMT(idCpu).
2731 */
2732GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2733{
2734 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2735}
2736
2737
2738/**
2739 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2740 * parameter and no used locking.
2741 *
2742 * @returns VBox status code.
2743 * @retval VINF_SUCCESS if successfully woken up.
2744 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2745 *
2746 * @param pGVM The global (ring-0) VM structure.
2747 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2748 * @thread Any but EMT(idCpu).
2749 * @deprecated Don't use in new code if possible! Use the GVM variant.
2750 */
2751GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2752{
2753 PGVMM pGVMM;
2754 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2755 if (RT_SUCCESS(rc))
2756 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2757 return rc;
2758}
2759
2760
2761/**
2762 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2763 * the Virtual CPU if it's still busy executing guest code.
2764 *
2765 * @returns VBox status code.
2766 * @retval VINF_SUCCESS if poked successfully.
2767 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2768 *
2769 * @param pGVM The global (ring-0) VM structure.
2770 * @param pVCpu The cross context virtual CPU structure.
2771 */
2772DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2773{
2774 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2775
2776 RTCPUID idHostCpu = pVCpu->idHostCpu;
2777 if ( idHostCpu == NIL_RTCPUID
2778 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2779 {
2780 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2781 return VINF_GVM_NOT_BUSY_IN_GC;
2782 }
2783
2784 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2785 RTMpPokeCpu(idHostCpu);
2786 return VINF_SUCCESS;
2787}
2788
2789
2790/**
2791 * Pokes an EMT if it's still busy running guest code.
2792 *
2793 * @returns VBox status code.
2794 * @retval VINF_SUCCESS if poked successfully.
2795 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2796 *
2797 * @param pGVM The global (ring-0) VM structure.
2798 * @param idCpu The ID of the virtual CPU to poke.
2799 * @param fTakeUsedLock Take the used lock or not
2800 */
2801GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2802{
2803 /*
2804 * Validate input and take the UsedLock.
2805 */
2806 PGVMM pGVMM;
2807 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2808 if (RT_SUCCESS(rc))
2809 {
2810 if (idCpu < pGVM->cCpus)
2811 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2812 else
2813 rc = VERR_INVALID_CPU_ID;
2814
2815 if (fTakeUsedLock)
2816 {
2817 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2818 AssertRC(rc2);
2819 }
2820 }
2821
2822 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2823 return rc;
2824}
2825
2826
2827/**
2828 * Pokes an EMT if it's still busy running guest code.
2829 *
2830 * @returns VBox status code.
2831 * @retval VINF_SUCCESS if poked successfully.
2832 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2833 *
2834 * @param pGVM The global (ring-0) VM structure.
2835 * @param idCpu The ID of the virtual CPU to poke.
2836 */
2837GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2838{
2839 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2840}
2841
2842
2843/**
2844 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2845 * used locking.
2846 *
2847 * @returns VBox status code.
2848 * @retval VINF_SUCCESS if poked successfully.
2849 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2850 *
2851 * @param pGVM The global (ring-0) VM structure.
2852 * @param idCpu The ID of the virtual CPU to poke.
2853 *
2854 * @deprecated Don't use in new code if possible! Use the GVM variant.
2855 */
2856GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2857{
2858 PGVMM pGVMM;
2859 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2860 if (RT_SUCCESS(rc))
2861 {
2862 if (idCpu < pGVM->cCpus)
2863 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2864 else
2865 rc = VERR_INVALID_CPU_ID;
2866 }
2867 return rc;
2868}
2869
2870
2871/**
2872 * Wakes up a set of halted EMT threads so they can service pending request.
2873 *
2874 * @returns VBox status code, no informational stuff.
2875 *
2876 * @param pGVM The global (ring-0) VM structure.
2877 * @param pSleepSet The set of sleepers to wake up.
2878 * @param pPokeSet The set of CPUs to poke.
2879 */
2880GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2881{
2882 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2883 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2884 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2885
2886 /*
2887 * Validate input and take the UsedLock.
2888 */
2889 PGVMM pGVMM;
2890 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2891 if (RT_SUCCESS(rc))
2892 {
2893 rc = VINF_SUCCESS;
2894 VMCPUID idCpu = pGVM->cCpus;
2895 while (idCpu-- > 0)
2896 {
2897 /* Don't try poke or wake up ourselves. */
2898 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2899 continue;
2900
2901 /* just ignore errors for now. */
2902 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2903 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2904 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2905 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2906 }
2907
2908 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2909 AssertRC(rc2);
2910 }
2911
2912 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2913 return rc;
2914}
2915
2916
2917/**
2918 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2919 *
2920 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2921 * @param pGVM The global (ring-0) VM structure.
2922 * @param pReq Pointer to the request packet.
2923 */
2924GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2925{
2926 /*
2927 * Validate input and pass it on.
2928 */
2929 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2930 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2931
2932 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2933}
2934
2935
2936
2937/**
2938 * Poll the schedule to see if someone else should get a chance to run.
2939 *
2940 * This is a bit hackish and will not work too well if the machine is
2941 * under heavy load from non-VM processes.
2942 *
2943 * @returns VINF_SUCCESS if not yielded.
2944 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2945 * @param pGVM The global (ring-0) VM structure.
2946 * @param idCpu The Virtual CPU ID of the calling EMT.
2947 * @param fYield Whether to yield or not.
2948 * This is for when we're spinning in the halt loop.
2949 * @thread EMT(idCpu).
2950 */
2951GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2952{
2953 /*
2954 * Validate input.
2955 */
2956 PGVMM pGVMM;
2957 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2958 if (RT_SUCCESS(rc))
2959 {
2960 /*
2961 * We currently only implement helping doing wakeups (fYield = false), so don't
2962 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2963 */
2964 if (!fYield && pGVMM->fDoEarlyWakeUps)
2965 {
2966 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2967 pGVM->gvmm.s.StatsSched.cPollCalls++;
2968
2969 Assert(ASMGetFlags() & X86_EFL_IF);
2970 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2971
2972 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2973
2974 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2975 }
2976 /*
2977 * Not quite sure what we could do here...
2978 */
2979 else if (fYield)
2980 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2981 else
2982 rc = VINF_SUCCESS;
2983 }
2984
2985 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2986 return rc;
2987}
2988
2989
2990#ifdef GVMM_SCHED_WITH_PPT
2991/**
2992 * Timer callback for the periodic preemption timer.
2993 *
2994 * @param pTimer The timer handle.
2995 * @param pvUser Pointer to the per cpu structure.
2996 * @param iTick The current tick.
2997 */
2998static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2999{
3000 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
3001 NOREF(pTimer); NOREF(iTick);
3002
3003 /*
3004 * Termination check
3005 */
3006 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
3007 return;
3008
3009 /*
3010 * Do the house keeping.
3011 */
3012 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3013
3014 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
3015 {
3016 /*
3017 * Historicize the max frequency.
3018 */
3019 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
3020 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
3021 pCpu->Ppt.iTickHistorization = 0;
3022 pCpu->Ppt.uDesiredHz = 0;
3023
3024 /*
3025 * Check if the current timer frequency.
3026 */
3027 uint32_t uHistMaxHz = 0;
3028 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3029 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3030 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3031 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3032 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3033 else if (uHistMaxHz)
3034 {
3035 /*
3036 * Reprogram it.
3037 */
3038 pCpu->Ppt.cChanges++;
3039 pCpu->Ppt.iTickHistorization = 0;
3040 pCpu->Ppt.uTimerHz = uHistMaxHz;
3041 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3042 pCpu->Ppt.cNsInterval = cNsInterval;
3043 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3044 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3045 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3046 / cNsInterval;
3047 else
3048 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3049 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3050
3051 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3052 RTTimerChangeInterval(pTimer, cNsInterval);
3053 }
3054 else
3055 {
3056 /*
3057 * Stop it.
3058 */
3059 pCpu->Ppt.fStarted = false;
3060 pCpu->Ppt.uTimerHz = 0;
3061 pCpu->Ppt.cNsInterval = 0;
3062 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3063
3064 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3065 RTTimerStop(pTimer);
3066 }
3067 }
3068 else
3069 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3070}
3071#endif /* GVMM_SCHED_WITH_PPT */
3072
3073
3074/**
3075 * Updates the periodic preemption timer for the calling CPU.
3076 *
3077 * The caller must have disabled preemption!
3078 * The caller must check that the host can do high resolution timers.
3079 *
3080 * @param pGVM The global (ring-0) VM structure.
3081 * @param idHostCpu The current host CPU id.
3082 * @param uHz The desired frequency.
3083 */
3084GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3085{
3086 NOREF(pGVM);
3087#ifdef GVMM_SCHED_WITH_PPT
3088 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3089 Assert(RTTimerCanDoHighResolution());
3090
3091 /*
3092 * Resolve the per CPU data.
3093 */
3094 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3095 PGVMM pGVMM = g_pGVMM;
3096 if ( !RT_VALID_PTR(pGVMM)
3097 || pGVMM->u32Magic != GVMM_MAGIC)
3098 return;
3099 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3100 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3101 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3102 && pCpu->idCpu == idHostCpu,
3103 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3104
3105 /*
3106 * Check whether we need to do anything about the timer.
3107 * We have to be a little bit careful since we might be race the timer
3108 * callback here.
3109 */
3110 if (uHz > 16384)
3111 uHz = 16384; /** @todo add a query method for this! */
3112 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3113 && uHz >= pCpu->Ppt.uMinHz
3114 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3115 {
3116 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3117
3118 pCpu->Ppt.uDesiredHz = uHz;
3119 uint32_t cNsInterval = 0;
3120 if (!pCpu->Ppt.fStarted)
3121 {
3122 pCpu->Ppt.cStarts++;
3123 pCpu->Ppt.fStarted = true;
3124 pCpu->Ppt.fStarting = true;
3125 pCpu->Ppt.iTickHistorization = 0;
3126 pCpu->Ppt.uTimerHz = uHz;
3127 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3128 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3129 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3130 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3131 / cNsInterval;
3132 else
3133 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3134 }
3135
3136 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3137
3138 if (cNsInterval)
3139 {
3140 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3141 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3142 AssertRC(rc);
3143
3144 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3145 if (RT_FAILURE(rc))
3146 pCpu->Ppt.fStarted = false;
3147 pCpu->Ppt.fStarting = false;
3148 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3149 }
3150 }
3151#else /* !GVMM_SCHED_WITH_PPT */
3152 NOREF(idHostCpu); NOREF(uHz);
3153#endif /* !GVMM_SCHED_WITH_PPT */
3154}
3155
3156
3157/**
3158 * Calls @a pfnCallback for each VM in the system.
3159 *
3160 * This will enumerate the VMs while holding the global VM used list lock in
3161 * shared mode. So, only suitable for simple work. If more expensive work
3162 * needs doing, a different approach must be taken as using this API would
3163 * otherwise block VM creation and destruction.
3164 *
3165 * @returns VBox status code.
3166 * @param pfnCallback The callback function.
3167 * @param pvUser User argument to the callback.
3168 */
3169GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3170{
3171 PGVMM pGVMM;
3172 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3173
3174 int rc = VINF_SUCCESS;
3175 GVMMR0_USED_SHARED_LOCK(pGVMM);
3176 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3177 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3178 i = pGVMM->aHandles[i].iNext, cLoops++)
3179 {
3180 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3181 if ( RT_VALID_PTR(pGVM)
3182 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3183 && pGVM->u32Magic == GVM_MAGIC)
3184 {
3185 rc = pfnCallback(pGVM, pvUser);
3186 if (rc != VINF_SUCCESS)
3187 break;
3188 }
3189
3190 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3191 }
3192 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3193 return rc;
3194}
3195
3196
3197/**
3198 * Retrieves the GVMM statistics visible to the caller.
3199 *
3200 * @returns VBox status code.
3201 *
3202 * @param pStats Where to put the statistics.
3203 * @param pSession The current session.
3204 * @param pGVM The GVM to obtain statistics for. Optional.
3205 */
3206GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3207{
3208 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3209
3210 /*
3211 * Validate input.
3212 */
3213 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3214 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3215 pStats->cVMs = 0; /* (crash before taking the sem...) */
3216
3217 /*
3218 * Take the lock and get the VM statistics.
3219 */
3220 PGVMM pGVMM;
3221 if (pGVM)
3222 {
3223 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3224 if (RT_FAILURE(rc))
3225 return rc;
3226 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3227
3228 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3229 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3230 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3231 while (iCpu-- > 0)
3232 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3233 }
3234 else
3235 {
3236 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3237 RT_ZERO(pStats->SchedVM);
3238 RT_ZERO(pStats->aVCpus);
3239
3240 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3241 AssertRCReturn(rc, rc);
3242 }
3243
3244 /*
3245 * Enumerate the VMs and add the ones visible to the statistics.
3246 */
3247 pStats->cVMs = 0;
3248 pStats->cEMTs = 0;
3249 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3250
3251 for (unsigned i = pGVMM->iUsedHead;
3252 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3253 i = pGVMM->aHandles[i].iNext)
3254 {
3255 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3256 void *pvObj = pGVMM->aHandles[i].pvObj;
3257 if ( RT_VALID_PTR(pvObj)
3258 && RT_VALID_PTR(pOtherGVM)
3259 && pOtherGVM->u32Magic == GVM_MAGIC
3260 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3261 {
3262 pStats->cVMs++;
3263 pStats->cEMTs += pOtherGVM->cCpus;
3264
3265 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3266 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3267 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3268 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3269 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3270
3271 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3272 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3273 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3274
3275 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3276 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3277
3278 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3279 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3280 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3281 }
3282 }
3283
3284 /*
3285 * Copy out the per host CPU statistics.
3286 */
3287 uint32_t iDstCpu = 0;
3288 uint32_t cSrcCpus = pGVMM->cHostCpus;
3289 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3290 {
3291 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3292 {
3293 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3294 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3295#ifdef GVMM_SCHED_WITH_PPT
3296 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3297 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3298 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3299 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3300#else
3301 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3302 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3303 pStats->aHostCpus[iDstCpu].cChanges = 0;
3304 pStats->aHostCpus[iDstCpu].cStarts = 0;
3305#endif
3306 iDstCpu++;
3307 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3308 break;
3309 }
3310 }
3311 pStats->cHostCpus = iDstCpu;
3312
3313 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3314
3315 return VINF_SUCCESS;
3316}
3317
3318
3319/**
3320 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3321 *
3322 * @returns see GVMMR0QueryStatistics.
3323 * @param pGVM The global (ring-0) VM structure. Optional.
3324 * @param pReq Pointer to the request packet.
3325 * @param pSession The current session.
3326 */
3327GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3328{
3329 /*
3330 * Validate input and pass it on.
3331 */
3332 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3333 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3334 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3335
3336 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3337}
3338
3339
3340/**
3341 * Resets the specified GVMM statistics.
3342 *
3343 * @returns VBox status code.
3344 *
3345 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3346 * @param pSession The current session.
3347 * @param pGVM The GVM to reset statistics for. Optional.
3348 */
3349GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3350{
3351 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3352
3353 /*
3354 * Validate input.
3355 */
3356 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3357 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3358
3359 /*
3360 * Take the lock and get the VM statistics.
3361 */
3362 PGVMM pGVMM;
3363 if (pGVM)
3364 {
3365 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3366 if (RT_FAILURE(rc))
3367 return rc;
3368# define MAYBE_RESET_FIELD(field) \
3369 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3370 MAYBE_RESET_FIELD(cHaltCalls);
3371 MAYBE_RESET_FIELD(cHaltBlocking);
3372 MAYBE_RESET_FIELD(cHaltTimeouts);
3373 MAYBE_RESET_FIELD(cHaltNotBlocking);
3374 MAYBE_RESET_FIELD(cHaltWakeUps);
3375 MAYBE_RESET_FIELD(cWakeUpCalls);
3376 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3377 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3378 MAYBE_RESET_FIELD(cPokeCalls);
3379 MAYBE_RESET_FIELD(cPokeNotBusy);
3380 MAYBE_RESET_FIELD(cPollCalls);
3381 MAYBE_RESET_FIELD(cPollHalts);
3382 MAYBE_RESET_FIELD(cPollWakeUps);
3383# undef MAYBE_RESET_FIELD
3384 }
3385 else
3386 {
3387 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3388
3389 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3390 AssertRCReturn(rc, rc);
3391 }
3392
3393 /*
3394 * Enumerate the VMs and add the ones visible to the statistics.
3395 */
3396 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3397 {
3398 for (unsigned i = pGVMM->iUsedHead;
3399 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3400 i = pGVMM->aHandles[i].iNext)
3401 {
3402 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3403 void *pvObj = pGVMM->aHandles[i].pvObj;
3404 if ( RT_VALID_PTR(pvObj)
3405 && RT_VALID_PTR(pOtherGVM)
3406 && pOtherGVM->u32Magic == GVM_MAGIC
3407 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3408 {
3409# define MAYBE_RESET_FIELD(field) \
3410 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3411 MAYBE_RESET_FIELD(cHaltCalls);
3412 MAYBE_RESET_FIELD(cHaltBlocking);
3413 MAYBE_RESET_FIELD(cHaltTimeouts);
3414 MAYBE_RESET_FIELD(cHaltNotBlocking);
3415 MAYBE_RESET_FIELD(cHaltWakeUps);
3416 MAYBE_RESET_FIELD(cWakeUpCalls);
3417 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3418 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3419 MAYBE_RESET_FIELD(cPokeCalls);
3420 MAYBE_RESET_FIELD(cPokeNotBusy);
3421 MAYBE_RESET_FIELD(cPollCalls);
3422 MAYBE_RESET_FIELD(cPollHalts);
3423 MAYBE_RESET_FIELD(cPollWakeUps);
3424# undef MAYBE_RESET_FIELD
3425 }
3426 }
3427 }
3428
3429 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3430
3431 return VINF_SUCCESS;
3432}
3433
3434
3435/**
3436 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3437 *
3438 * @returns see GVMMR0ResetStatistics.
3439 * @param pGVM The global (ring-0) VM structure. Optional.
3440 * @param pReq Pointer to the request packet.
3441 * @param pSession The current session.
3442 */
3443GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3444{
3445 /*
3446 * Validate input and pass it on.
3447 */
3448 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3449 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3450 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3451
3452 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3453}
3454
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette