VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp

Last change on this file was 109047, checked in by vboxsync, 10 days ago

SUPDrv: Make it build on linux.arm64. jiraref:VBP-1598

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 188.7 KB
Line 
1/* $Id: SUPDrvGip.cpp 109047 2025-04-22 09:39:50Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP_DRV
42#define SUPDRV_AGNOSTIC
43#include "SUPDrvInternal.h"
44#ifndef PAGE_SHIFT
45# include <iprt/param.h>
46#endif
47#include <iprt/asm.h>
48#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
49# include <iprt/asm-amd64-x86.h>
50#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
51# include <iprt/asm-arm.h>
52#else
53# error "Port me!"
54#endif
55#include <iprt/asm-math.h>
56#include <iprt/cpuset.h>
57#include <iprt/handletable.h>
58#include <iprt/mem.h>
59#include <iprt/mp.h>
60#include <iprt/power.h>
61#include <iprt/process.h>
62#include <iprt/semaphore.h>
63#include <iprt/spinlock.h>
64#include <iprt/thread.h>
65#include <iprt/uuid.h>
66#include <iprt/net.h>
67#include <iprt/crc.h>
68#include <iprt/string.h>
69#include <iprt/timer.h>
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
71# include <iprt/rand.h>
72# include <iprt/path.h>
73#endif
74#include <iprt/uint128.h>
75#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
76# include <iprt/x86.h>
77#elif defined(RT_ARCH_ARM64)
78# include <iprt/armv8.h>
79#endif
80
81#include <VBox/param.h>
82#include <VBox/log.h>
83#include <VBox/err.h>
84
85#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
86# include "dtrace/SUPDrv.h"
87#else
88/* ... */
89#endif
90
91
92/*********************************************************************************************************************************
93* Defined Constants And Macros *
94*********************************************************************************************************************************/
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
97 *
98 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
99 */
100#define GIP_UPDATEHZ_RECALC_FREQ 0x800
101
102/** A reserved TSC value used for synchronization as well as measurement of
103 * TSC deltas. */
104#define GIP_TSC_DELTA_RSVD UINT64_MAX
105/** The number of TSC delta measurement loops in total (includes primer and
106 * read-time loops). */
107#define GIP_TSC_DELTA_LOOPS 96
108/** The number of cache primer loops. */
109#define GIP_TSC_DELTA_PRIMER_LOOPS 4
110/** The number of loops until we keep computing the minumum read time. */
111#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
112
113/** The TSC frequency refinement period in seconds.
114 * The timer fires after 200ms, then every second, this value just says when
115 * to stop it after that. */
116#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
117/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
118#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
119/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
120#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
121/** The TSC delta value for the initial GIP master - 0 in regular builds.
122 * To test the delta code this can be set to a non-zero value. */
123#if 0
124# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
125#else
126# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
127#endif
128
129AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
130AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
131
132/** @def VBOX_SVN_REV
133 * The makefile should define this if it can. */
134#ifndef VBOX_SVN_REV
135# define VBOX_SVN_REV 0
136#endif
137
138#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
139# define DO_NOT_START_GIP
140#endif
141
142
143/*********************************************************************************************************************************
144* Internal Functions *
145*********************************************************************************************************************************/
146static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
147static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
148static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
149static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
150static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
151#ifdef SUPDRV_USE_TSC_DELTA_THREAD
152static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
153static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
154static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
155#else
156static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt);
157static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
158#endif
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
165SUPR0_EXPORT_SYMBOL(g_pSUPGlobalInfoPage);
166
167
168
169/*
170 *
171 * Misc Common GIP Code
172 * Misc Common GIP Code
173 * Misc Common GIP Code
174 *
175 *
176 */
177
178
179/**
180 * Finds the GIP CPU index corresponding to @a idCpu.
181 *
182 * @returns GIP CPU array index, UINT32_MAX if not found.
183 * @param pGip The GIP.
184 * @param idCpu The CPU ID.
185 */
186static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
187{
188 uint32_t i;
189 for (i = 0; i < pGip->cCpus; i++)
190 if (pGip->aCPUs[i].idCpu == idCpu)
191 return i;
192 return UINT32_MAX;
193}
194
195
196/**
197 * Gets the APIC ID using the best available method.
198 *
199 * @returns APIC ID.
200 * @param pGip The GIP, for SUPGIPGETCPU_XXX.
201 *
202 * @note APIC ID == CPU ID on non-x86 platforms.
203 */
204DECLINLINE(uint32_t) supdrvGipGetApicId(PSUPGLOBALINFOPAGE pGip)
205{
206#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
207 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_0B)
208 return ASMGetApicIdExt0B();
209 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_8000001E)
210 return ASMGetApicIdExt8000001E();
211 return ASMGetApicId();
212
213#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
214 RT_NOREF(pGip);
215 return (uint32_t)ASMGetThreadIdRoEL0();
216
217#elif defined(RT_ARCH_ARM64) && defined(RT_OS_LINUX)
218 return (uint32_t)RTMpCurSetIndex();
219
220#else
221# error "port me"
222#endif
223}
224
225
226/**
227 * Gets the APIC ID using the best available method, slow version.
228 *
229 * @note APIC ID == CPU ID on non-x86 platforms.
230 */
231static uint32_t supdrvGipGetApicIdSlow(void)
232{
233#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
234 uint32_t const idApic = ASMGetApicId();
235
236 /* The Intel CPU topology leaf: */
237 uint32_t uOther = ASMCpuId_EAX(0);
238 if (uOther >= UINT32_C(0xb) && RTX86IsValidStdRange(uOther))
239 {
240 uint32_t uEax = 0;
241 uint32_t uEbx = 0;
242 uint32_t uEcx = 0;
243 uint32_t uEdx = 0;
244# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
245 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
246# else
247 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
248# endif
249 if ((uEcx >> 8) != 0) /* level type != invalid */
250 {
251 if ((uEdx & 0xff) == idApic)
252 return uEdx;
253 AssertMsgFailed(("ASMGetApicIdExt0B=>%#x idApic=%#x\n", uEdx, idApic));
254 }
255 }
256
257 /* The AMD leaf: */
258 uOther = ASMCpuId_EAX(UINT32_C(0x80000000));
259 if (uOther >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uOther))
260 {
261 uOther = ASMGetApicIdExt8000001E();
262 if ((uOther & 0xff) == idApic)
263 return uOther;
264 AssertMsgFailed(("ASMGetApicIdExt8000001E=>%#x idApic=%#x\n", uOther, idApic));
265 }
266 return idApic;
267
268#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
269 return (uint32_t)ASMGetThreadIdRoEL0();
270
271#elif defined(RT_ARCH_ARM64) && defined(RT_OS_LINUX)
272 return (uint32_t)RTMpCurSetIndex();
273
274#else
275# error "port me"
276#endif
277}
278
279
280
281/*
282 *
283 * GIP Mapping and Unmapping Related Code.
284 * GIP Mapping and Unmapping Related Code.
285 * GIP Mapping and Unmapping Related Code.
286 *
287 *
288 */
289
290
291/**
292 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
293 * updating.
294 *
295 * @param pGipCpu The per CPU structure for this CPU.
296 * @param u64NanoTS The current time.
297 */
298static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
299{
300 /*
301 * Here we don't really care about applying the TSC delta. The re-initialization of this
302 * value is not relevant especially while (re)starting the GIP as the first few ones will
303 * be ignored anyway, see supdrvGipDoUpdateCpu().
304 */
305 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
306 pGipCpu->u64NanoTS = u64NanoTS;
307}
308
309
310/**
311 * Set the current TSC and NanoTS value for the CPU.
312 *
313 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
314 * @param pvUser1 Pointer to the ring-0 GIP mapping.
315 * @param pvUser2 Pointer to the variable holding the current time.
316 */
317static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
318{
319 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
320 uint32_t const idApic = supdrvGipGetApicId(pGip);
321 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
322 {
323 unsigned const iCpu = pGip->aiCpuFromApicId[idApic];
324
325 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
326 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
327 else
328 LogRelMax(64, ("supdrvGipReInitCpuCallback: iCpu=%#x out of bounds (%#zx, idApic=%#x)\n",
329 iCpu, RT_ELEMENTS(pGip->aiCpuFromApicId), idApic));
330 }
331 else
332 LogRelMax(64, ("supdrvGipReInitCpuCallback: idApic=%#x out of bounds (%#zx)\n",
333 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)));
334
335 NOREF(pvUser2);
336}
337
338
339/**
340 * State structure for supdrvGipDetectGetGipCpuCallback.
341 */
342typedef struct SUPDRVGIPDETECTGETCPU
343{
344 /** Bitmap of APIC IDs that has been seen (initialized to zero).
345 * Used to detect duplicate APIC IDs (paranoia). */
346 uint8_t volatile bmApicId[4096 / 8];
347 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
348 * initially). The callback clears the methods not detected. */
349 uint32_t volatile fSupported;
350 /** The first callback detecting any kind of range issues (initialized to
351 * NIL_RTCPUID). */
352 RTCPUID volatile idCpuProblem;
353} SUPDRVGIPDETECTGETCPU;
354/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
355typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
356
357
358/**
359 * Checks for alternative ways of getting the CPU ID.
360 *
361 * This also checks the APIC ID, CPU ID and CPU set index values against the
362 * GIP tables.
363 *
364 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
365 * @param pvUser1 Pointer to the state structure.
366 * @param pvUser2 Pointer to the GIP.
367 */
368static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
369{
370 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
371 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
372 int const iCpuSet = RTMpCpuIdToSetIndex(idCpu);
373 uint32_t fSupported = 0;
374 uint32_t idApic;
375#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
376 uint32_t uEax, uEbx, uEcx, uEdx;
377#else
378 uint32_t const uEax = 0; /* Dummy for LogRel. */
379#endif
380 NOREF(pGip);
381
382 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
383
384#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
385 /*
386 * Check that the CPU ID and CPU set index are interchangable.
387 */
388 if ((RTCPUID)iCpuSet == idCpu)
389 {
390 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
391 if ( iCpuSet >= 0
392 && iCpuSet < RTCPUSET_MAX_CPUS
393 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
394 {
395 PSUPGIPCPU pGipCpu = SUPGetGipCpuBySetIndex(pGip, iCpuSet);
396
397 /*
398 * Check whether the IDTR.LIMIT contains a CPU number.
399 */
400# ifdef RT_ARCH_X86
401 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
402# else
403 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
404# endif
405 RTIDTR Idtr;
406 ASMGetIDTR(&Idtr);
407 if (Idtr.cbIdt >= cbIdt)
408 {
409 uint32_t uTmp = Idtr.cbIdt - cbIdt;
410 uTmp &= RTCPUSET_MAX_CPUS - 1;
411 if (uTmp == idCpu)
412 {
413 RTIDTR Idtr2;
414 ASMGetIDTR(&Idtr2);
415 if (Idtr2.cbIdt == Idtr.cbIdt)
416 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
417 }
418 }
419
420 /*
421 * Check whether RDTSCP is an option.
422 */
423 if (ASMHasCpuId())
424 {
425 if ( RTX86IsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
426 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
427 {
428 uint32_t uAux;
429 ASMReadTscWithAux(&uAux);
430 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
431 {
432 ASMNopPause();
433 ASMReadTscWithAux(&uAux);
434 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
435 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
436 }
437
438 if (pGipCpu)
439 {
440 uint32_t const uGroupedAux = (uint8_t)pGipCpu->iCpuGroupMember | ((uint32_t)pGipCpu->iCpuGroup << 8);
441 if ( (uAux & UINT16_MAX) == uGroupedAux
442 && pGipCpu->iCpuGroupMember <= UINT8_MAX)
443 {
444 ASMNopPause();
445 ASMReadTscWithAux(&uAux);
446 if ((uAux & UINT16_MAX) == uGroupedAux)
447 fSupported |= SUPGIPGETCPU_RDTSCP_GROUP_IN_CH_NUMBER_IN_CL;
448 }
449 }
450 }
451 }
452 }
453 }
454
455 /*
456 * Check for extended APIC ID methods.
457 */
458 idApic = UINT32_MAX;
459 uEax = ASMCpuId_EAX(0);
460 if (uEax >= UINT32_C(0xb) && RTX86IsValidStdRange(uEax))
461 {
462# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
463 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
464# else
465 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
466# endif
467 if ((uEcx >> 8) != 0) /* level type != invalid */
468 {
469 if (RT_LIKELY( uEdx < RT_ELEMENTS(pGip->aiCpuFromApicId)
470 && !ASMBitTest(pState->bmApicId, uEdx)))
471 {
472 if (uEdx == ASMGetApicIdExt0B())
473 {
474 idApic = uEdx;
475 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_0B;
476 }
477 else
478 AssertMsgFailed(("%#x vs %#x\n", uEdx, ASMGetApicIdExt0B()));
479 }
480 }
481 }
482
483 uEax = ASMCpuId_EAX(UINT32_C(0x80000000));
484 if (uEax >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uEax))
485 {
486# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
487 ASMCpuId_Idx_ECX(UINT32_C(0x8000001e), 0, &uEax, &uEbx, &uEcx, &uEdx);
488# else
489 ASMCpuIdExSlow(UINT32_C(0x8000001e), 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
490# endif
491 if (uEax || uEbx || uEcx || uEdx)
492 {
493 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
494 && ( idApic == UINT32_MAX
495 || idApic == uEax)
496 && !ASMBitTest(pState->bmApicId, uEax)))
497 {
498 if (uEax == ASMGetApicIdExt8000001E())
499 {
500 idApic = uEax;
501 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_8000001E;
502 }
503 else
504 AssertMsgFailed(("%#x vs %#x\n", uEax, ASMGetApicIdExt8000001E()));
505 }
506 }
507 }
508
509#else /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
510 fSupported |= SUPGIPGETCPU_TPIDRRO_EL0;
511 idApic = supdrvGipGetApicIdSlow();
512#endif /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
513
514 /*
515 * Check that the APIC ID is unique.
516 */
517#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
518 uEax = ASMGetApicId();
519 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
520 && ( idApic == UINT32_MAX
521 || idApic == uEax)
522 && !ASMAtomicBitTestAndSet(pState->bmApicId, uEax)))
523 {
524 idApic = uEax;
525 fSupported |= SUPGIPGETCPU_APIC_ID;
526 }
527 else
528#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
529 if ( idApic == UINT32_MAX
530 || idApic >= RT_ELEMENTS(pGip->aiCpuFromApicId) /* parnaoia */
531 || ASMAtomicBitTestAndSet(pState->bmApicId, idApic))
532 {
533 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
534 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
535 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x/%#x - duplicate APIC ID.\n",
536 idCpu, iCpuSet, uEax, idApic));
537 }
538
539 /*
540 * Check that the iCpuSet is within the expected range.
541 */
542 if (RT_UNLIKELY( iCpuSet < 0
543 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
544 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
545 {
546 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
547 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
548 idCpu, iCpuSet, idApic));
549 }
550 else
551 {
552 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
553 if (RT_UNLIKELY(idCpu2 != idCpu))
554 {
555 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
556 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
557 idCpu, iCpuSet, idApic, idCpu2));
558 }
559 }
560
561 /*
562 * Update the supported feature mask before we return.
563 */
564 ASMAtomicAndU32(&pState->fSupported, fSupported);
565
566 NOREF(pvUser2);
567}
568
569
570/**
571 * Increase the timer freqency on hosts where this is possible (NT).
572 *
573 * The idea is that more interrupts is better for us... Also, it's better than
574 * we increase the timer frequence, because we might end up getting inaccurate
575 * callbacks if someone else does it.
576 *
577 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
578 */
579static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
580{
581 if (pDevExt->u32SystemTimerGranularityGrant == 0)
582 {
583 uint32_t u32SystemResolution;
584 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
585 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
586 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
587 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
588 )
589 {
590#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
591 uint32_t u32After = RTTimerGetSystemGranularity();
592 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
593#endif
594 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
595 }
596 }
597}
598
599
600/**
601 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
602 *
603 * @param pDevExt Clears u32SystemTimerGranularityGrant.
604 */
605static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
606{
607 if (pDevExt->u32SystemTimerGranularityGrant)
608 {
609 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
610 AssertRC(rc2);
611 pDevExt->u32SystemTimerGranularityGrant = 0;
612 }
613}
614
615
616/**
617 * Maps the GIP into userspace and/or get the physical address of the GIP.
618 *
619 * @returns IPRT status code.
620 * @param pSession Session to which the GIP mapping should belong.
621 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
622 * @param pHCPhysGip Where to store the physical address. (optional)
623 *
624 * @remark There is no reference counting on the mapping, so one call to this function
625 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
626 * and remove the session as a GIP user.
627 */
628SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
629{
630 int rc;
631 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
632 RTR3PTR pGipR3 = NIL_RTR3PTR;
633 RTHCPHYS HCPhys = NIL_RTHCPHYS;
634 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
635
636 /*
637 * Validate
638 */
639 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
640 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
641 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
642
643#ifdef SUPDRV_USE_MUTEX_FOR_GIP
644 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
645#else
646 RTSemFastMutexRequest(pDevExt->mtxGip);
647#endif
648 if (pDevExt->pGip)
649 {
650 /*
651 * Map it?
652 */
653 rc = VINF_SUCCESS;
654 if (ppGipR3)
655 {
656 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
657 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
658 RTMEM_PROT_READ, NIL_RTR0PROCESS);
659 if (RT_SUCCESS(rc))
660 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
661 }
662
663 /*
664 * Get physical address.
665 */
666 if (pHCPhysGip && RT_SUCCESS(rc))
667 HCPhys = pDevExt->HCPhysGip;
668
669 /*
670 * Reference globally.
671 */
672 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
673 {
674 pSession->fGipReferenced = 1;
675 pDevExt->cGipUsers++;
676 if (pDevExt->cGipUsers == 1)
677 {
678 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
679 uint64_t u64NanoTS;
680
681 /*
682 * GIP starts/resumes updating again. On windows we bump the
683 * host timer frequency to make sure we don't get stuck in guest
684 * mode and to get better timer (and possibly clock) accuracy.
685 */
686 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
687
688 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
689
690 /*
691 * document me
692 */
693 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
694 {
695 unsigned i;
696 for (i = 0; i < pGipR0->cCpus; i++)
697 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
698 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
699 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
700 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
701 }
702
703 /*
704 * document me
705 */
706 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
707 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
708 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
709 || RTMpGetOnlineCount() == 1)
710 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
711 else
712 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
713
714 /*
715 * Detect alternative ways to figure the CPU ID in ring-3 and
716 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
717 * and CPU set indexes while we're at it.
718 */
719 if (RT_SUCCESS(rc))
720 {
721 PSUPDRVGIPDETECTGETCPU pDetectState = (PSUPDRVGIPDETECTGETCPU)RTMemTmpAllocZ(sizeof(*pDetectState));
722 if (pDetectState)
723 {
724 pDetectState->fSupported = UINT32_MAX;
725 pDetectState->idCpuProblem = NIL_RTCPUID;
726 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, pDetectState, pGipR0);
727 if (pDetectState->idCpuProblem == NIL_RTCPUID)
728 {
729 if ( pDetectState->fSupported != UINT32_MAX
730 && pDetectState->fSupported != 0)
731 {
732 if (pGipR0->fGetGipCpu != pDetectState->fSupported)
733 {
734 pGipR0->fGetGipCpu = pDetectState->fSupported;
735 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", pDetectState->fSupported));
736 }
737 }
738 else
739 {
740 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
741 pDetectState->fSupported));
742 rc = VERR_UNSUPPORTED_CPU;
743 }
744 }
745 else
746 {
747 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
748 pDetectState->idCpuProblem, pDetectState->idCpuProblem));
749 rc = VERR_INVALID_CPU_ID;
750 }
751 RTMemTmpFree(pDetectState);
752 }
753 else
754 rc = VERR_NO_TMP_MEMORY;
755 }
756
757 /*
758 * Start the GIP timer if all is well..
759 */
760 if (RT_SUCCESS(rc))
761 {
762#ifndef DO_NOT_START_GIP
763 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
764#endif
765 rc = VINF_SUCCESS;
766 }
767
768 /*
769 * Bail out on error.
770 */
771 if (RT_FAILURE(rc))
772 {
773 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
774 pDevExt->cGipUsers = 0;
775 pSession->fGipReferenced = 0;
776 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
777 {
778 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
779 if (RT_SUCCESS(rc2))
780 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
781 }
782 HCPhys = NIL_RTHCPHYS;
783 pGipR3 = NIL_RTR3PTR;
784 }
785 }
786 }
787 }
788 else
789 {
790 rc = VERR_GENERAL_FAILURE;
791 Log(("SUPR0GipMap: GIP is not available!\n"));
792 }
793#ifdef SUPDRV_USE_MUTEX_FOR_GIP
794 RTSemMutexRelease(pDevExt->mtxGip);
795#else
796 RTSemFastMutexRelease(pDevExt->mtxGip);
797#endif
798
799 /*
800 * Write returns.
801 */
802 if (pHCPhysGip)
803 *pHCPhysGip = HCPhys;
804 if (ppGipR3)
805 *ppGipR3 = pGipR3;
806
807#ifdef DEBUG_DARWIN_GIP
808 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
809#else
810 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
811#endif
812 return rc;
813}
814SUPR0_EXPORT_SYMBOL(SUPR0GipMap);
815
816
817/**
818 * Unmaps any user mapping of the GIP and terminates all GIP access
819 * from this session.
820 *
821 * @returns IPRT status code.
822 * @param pSession Session to which the GIP mapping should belong.
823 */
824SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
825{
826 int rc = VINF_SUCCESS;
827 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
828#ifdef DEBUG_DARWIN_GIP
829 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
830 pSession,
831 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
832 pSession->GipMapObjR3));
833#else
834 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
835#endif
836 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
837
838#ifdef SUPDRV_USE_MUTEX_FOR_GIP
839 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
840#else
841 RTSemFastMutexRequest(pDevExt->mtxGip);
842#endif
843
844 /*
845 * GIP test-mode session?
846 */
847 if ( pSession->fGipTestMode
848 && pDevExt->pGip)
849 {
850 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
851 Assert(!pSession->fGipTestMode);
852 }
853
854 /*
855 * Unmap anything?
856 */
857 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
858 {
859 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
860 AssertRC(rc);
861 if (RT_SUCCESS(rc))
862 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
863 }
864
865 /*
866 * Dereference global GIP.
867 */
868 if (pSession->fGipReferenced && !rc)
869 {
870 pSession->fGipReferenced = 0;
871 if ( pDevExt->cGipUsers > 0
872 && !--pDevExt->cGipUsers)
873 {
874 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
875#ifndef DO_NOT_START_GIP
876 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
877#endif
878 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
879 }
880 }
881
882#ifdef SUPDRV_USE_MUTEX_FOR_GIP
883 RTSemMutexRelease(pDevExt->mtxGip);
884#else
885 RTSemFastMutexRelease(pDevExt->mtxGip);
886#endif
887
888 return rc;
889}
890SUPR0_EXPORT_SYMBOL(SUPR0GipUnmap);
891
892
893/**
894 * Gets the GIP pointer.
895 *
896 * @returns Pointer to the GIP or NULL.
897 */
898SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
899{
900 return g_pSUPGlobalInfoPage;
901}
902
903
904
905
906
907/*
908 *
909 *
910 * GIP Initialization, Termination and CPU Offline / Online Related Code.
911 * GIP Initialization, Termination and CPU Offline / Online Related Code.
912 * GIP Initialization, Termination and CPU Offline / Online Related Code.
913 *
914 *
915 */
916
917/**
918 * Used by supdrvGipInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
919 * to update the TSC frequency related GIP variables.
920 *
921 * @param pGip The GIP.
922 * @param nsElapsed The number of nanoseconds elapsed.
923 * @param cElapsedTscTicks The corresponding number of TSC ticks.
924 * @param iTick The tick number for debugging.
925 */
926static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
927{
928 /*
929 * Calculate the frequency.
930 */
931 uint64_t uCpuHz;
932 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
933 && nsElapsed < UINT32_MAX)
934 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
935 else
936 {
937 RTUINT128U CpuHz, Tmp, Divisor;
938 CpuHz.s.Lo = CpuHz.s.Hi = 0;
939 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
940 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
941 uCpuHz = CpuHz.s.Lo;
942 }
943
944 /*
945 * Update the GIP.
946 */
947 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
948 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
949 {
950 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
951
952 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
953 if (iTick + 1 < pGip->cCpus)
954 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
955 }
956}
957
958
959/**
960 * Timer callback function for TSC frequency refinement in invariant GIP mode.
961 *
962 * This is started during driver init and fires once
963 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
964 *
965 * @param pTimer The timer.
966 * @param pvUser Opaque pointer to the device instance data.
967 * @param iTick The timer tick.
968 */
969static DECLCALLBACK(void) supdrvGipInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
970{
971 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
972 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
973 RTCPUID idCpu;
974 uint64_t cNsElapsed;
975 uint64_t cTscTicksElapsed;
976 uint64_t nsNow;
977 uint64_t uTsc;
978 RTCCUINTREG fEFlags;
979
980 /* Paranoia. */
981 AssertReturnVoid(pGip);
982 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
983
984 /*
985 * If we got a power event, stop the refinement process.
986 */
987 if (pDevExt->fInvTscRefinePowerEvent)
988 {
989 int rc = RTTimerStop(pTimer); AssertRC(rc);
990 return;
991 }
992
993 /*
994 * Read the TSC and time, noting which CPU we are on.
995 *
996 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
997 * systems where it matters we're in a context where we cannot waste that
998 * much time (DPC watchdog, called from clock interrupt).
999 */
1000 fEFlags = ASMIntDisableFlags();
1001 uTsc = ASMReadTSC();
1002 nsNow = RTTimeSystemNanoTS();
1003 idCpu = RTMpCpuId();
1004 ASMSetFlags(fEFlags);
1005
1006 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
1007 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
1008
1009 /*
1010 * If the above measurement was taken on a different CPU than the one we
1011 * started the process on, cTscTicksElapsed will need to be adjusted with
1012 * the TSC deltas of both the CPUs.
1013 *
1014 * We ASSUME that the delta calculation process takes less time than the
1015 * TSC frequency refinement timer. If it doesn't, we'll complain and
1016 * drop the frequency refinement.
1017 *
1018 * Note! We cannot entirely trust enmUseTscDelta here because it's
1019 * downgraded after each delta calculation.
1020 */
1021 if ( idCpu != pDevExt->idCpuInvarTscRefine
1022 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1023 {
1024 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
1025 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
1026 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1027 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1028 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1029 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1030 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1031 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1032 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1033 {
1034 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1035 {
1036 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
1037 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
1038 }
1039 }
1040 /*
1041 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
1042 * calculations.
1043 */
1044 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
1045 {
1046 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
1047 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
1048 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1049 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1050 int rc = RTTimerStop(pTimer); AssertRC(rc);
1051 return;
1052 }
1053 }
1054
1055 /*
1056 * Calculate and update the CPU frequency variables in GIP.
1057 *
1058 * If there is a GIP user already and we've already refined the frequency
1059 * a couple of times, don't update it as we want a stable frequency value
1060 * for all VMs.
1061 */
1062 if ( pDevExt->cGipUsers == 0
1063 || cNsElapsed < RT_NS_1SEC * 2)
1064 {
1065 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
1066
1067 /*
1068 * Stop the timer once we've reached the defined refinement period.
1069 */
1070 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
1071 {
1072 int rc = RTTimerStop(pTimer);
1073 AssertRC(rc);
1074 }
1075 }
1076 else
1077 {
1078 int rc = RTTimerStop(pTimer);
1079 AssertRC(rc);
1080 }
1081}
1082
1083
1084/**
1085 * @callback_method_impl{FNRTPOWERNOTIFICATION}
1086 */
1087static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
1088{
1089 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1090 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1091
1092 /*
1093 * If the TSC frequency refinement timer is running, we need to cancel it so it
1094 * doesn't screw up the frequency after a long suspend.
1095 *
1096 * Recalculate all TSC-deltas on host resume as it may have changed, seen
1097 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
1098 */
1099 if (enmEvent == RTPOWEREVENT_RESUME)
1100 {
1101 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1102 if ( RT_LIKELY(pGip)
1103 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1104 && !supdrvOSAreCpusOfflinedOnSuspend())
1105 {
1106#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1107 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1108#else
1109 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
1110 supdrvTscMeasureInitialDeltas(pDevExt);
1111#endif
1112 }
1113 }
1114 else if (enmEvent == RTPOWEREVENT_SUSPEND)
1115 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1116}
1117
1118
1119/**
1120 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
1121 *
1122 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
1123 * the CPU may change the TSC frequence between now and when the timer fires
1124 * (supdrvInitAsyncRefineTscTimer).
1125 *
1126 * @param pDevExt Pointer to the device instance data.
1127 */
1128static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
1129{
1130 uint64_t u64NanoTS;
1131 RTCCUINTREG fEFlags;
1132 int rc;
1133
1134 /*
1135 * Register a power management callback.
1136 */
1137 pDevExt->fInvTscRefinePowerEvent = false;
1138 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
1139 AssertRC(rc); /* ignore */
1140
1141 /*
1142 * Record the TSC and NanoTS as the starting anchor point for refinement
1143 * of the TSC. We try get as close to a clock tick as possible on systems
1144 * which does not provide high resolution time.
1145 */
1146 u64NanoTS = RTTimeSystemNanoTS();
1147 while (RTTimeSystemNanoTS() == u64NanoTS)
1148 ASMNopPause();
1149
1150 fEFlags = ASMIntDisableFlags();
1151 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
1152 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
1153 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
1154 ASMSetFlags(fEFlags);
1155
1156 /*
1157 * Create a timer that runs on the same CPU so we won't have a depencency
1158 * on the TSC-delta and can run in parallel to it. On systems that does not
1159 * implement CPU specific timers we'll apply deltas in the timer callback,
1160 * just like we do for CPUs going offline.
1161 *
1162 * The longer the refinement interval the better the accuracy, at least in
1163 * theory. If it's too long though, ring-3 may already be starting its
1164 * first VMs before we're done. On most systems we will be loading the
1165 * support driver during boot and VMs won't be started for a while yet,
1166 * it is really only a problem during development (especially with
1167 * on-demand driver starting on windows).
1168 *
1169 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
1170 * to calculate the frequency during driver loading, the timer is set
1171 * to fire after 200 ms the first time. It will then reschedule itself
1172 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
1173 * reached or it notices that there is a user land client with GIP
1174 * mapped (we want a stable frequency for all VMs).
1175 */
1176 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
1177 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
1178 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1179 if (RT_SUCCESS(rc))
1180 {
1181 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1182 if (RT_SUCCESS(rc))
1183 return;
1184 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1185 }
1186
1187 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
1188 {
1189 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
1190 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1191 if (RT_SUCCESS(rc))
1192 {
1193 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1194 if (RT_SUCCESS(rc))
1195 return;
1196 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1197 }
1198 }
1199
1200 pDevExt->pInvarTscRefineTimer = NULL;
1201 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
1202}
1203
1204
1205/**
1206 * @callback_method_impl{PFNRTMPWORKER,
1207 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1208 * the measurements on.}
1209 */
1210static DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1211{
1212 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1213 uint64_t *puTscStop = (uint64_t *)pvUser1;
1214 uint64_t *pnsStop = (uint64_t *)pvUser2;
1215 RT_NOREF1(idCpu);
1216
1217 *puTscStop = ASMReadTSC();
1218 *pnsStop = RTTimeSystemNanoTS();
1219
1220 ASMSetFlags(fEFlags);
1221}
1222
1223
1224/**
1225 * Measures the TSC frequency of the system.
1226 *
1227 * The TSC frequency can vary on systems which are not reported as invariant.
1228 * On such systems the object of this function is to find out what the nominal,
1229 * maximum TSC frequency under 'normal' CPU operation.
1230 *
1231 * @returns VBox status code.
1232 * @param pGip Pointer to the GIP.
1233 * @param fRough Set if we're doing the rough calculation that the
1234 * TSC measuring code needs, where accuracy isn't all
1235 * that important (too high is better than too low).
1236 * When clear we try for best accuracy that we can
1237 * achieve in reasonably short time.
1238 */
1239static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1240{
1241 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1242 int cTriesLeft = fRough ? 4 : 2;
1243 while (cTriesLeft-- > 0)
1244 {
1245 RTCCUINTREG fEFlags;
1246 uint64_t nsStart;
1247 uint64_t nsStop;
1248 uint64_t uTscStart;
1249 uint64_t uTscStop;
1250 RTCPUID idCpuStart;
1251 RTCPUID idCpuStop;
1252
1253 /*
1254 * Synchronize with the host OS clock tick on systems without high
1255 * resolution time API (older Windows version for example).
1256 */
1257 nsStart = RTTimeSystemNanoTS();
1258 while (RTTimeSystemNanoTS() == nsStart)
1259 ASMNopPause();
1260
1261 /*
1262 * Read the TSC and current time, noting which CPU we're on.
1263 */
1264 fEFlags = ASMIntDisableFlags();
1265 uTscStart = ASMReadTSC();
1266 nsStart = RTTimeSystemNanoTS();
1267 idCpuStart = RTMpCpuId();
1268 ASMSetFlags(fEFlags);
1269
1270 /*
1271 * Delay for a while.
1272 */
1273 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1274 {
1275 /*
1276 * Sleep-wait since the TSC frequency is constant, it eases host load.
1277 * Shorter interval produces more variance in the frequency (esp. Windows).
1278 */
1279 uint64_t msElapsed = 0;
1280 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1281 / RT_NS_1MS;
1282 do
1283 {
1284 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1285 nsStop = RTTimeSystemNanoTS();
1286 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1287 } while (msElapsed < msDelay);
1288
1289 while (RTTimeSystemNanoTS() == nsStop)
1290 ASMNopPause();
1291 }
1292 else
1293 {
1294 /*
1295 * Busy-wait keeping the frequency up.
1296 */
1297 do
1298 {
1299 ASMNopPause();
1300 nsStop = RTTimeSystemNanoTS();
1301 } while (nsStop - nsStart < RT_NS_100MS);
1302 }
1303
1304 /*
1305 * Read the TSC and time again.
1306 */
1307 fEFlags = ASMIntDisableFlags();
1308 uTscStop = ASMReadTSC();
1309 nsStop = RTTimeSystemNanoTS();
1310 idCpuStop = RTMpCpuId();
1311 ASMSetFlags(fEFlags);
1312
1313 /*
1314 * If the CPU changes, things get a bit complicated and what we
1315 * can get away with depends on the GIP mode / TSC reliability.
1316 */
1317 if (idCpuStop != idCpuStart)
1318 {
1319 bool fDoXCall = false;
1320
1321 /*
1322 * Synchronous TSC mode: we're probably fine as it's unlikely
1323 * that we were rescheduled because of TSC throttling or power
1324 * management reasons, so just go ahead.
1325 */
1326 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1327 {
1328 /* Probably ok, maybe we should retry once?. */
1329 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1330 }
1331 /*
1332 * If we're just doing the rough measurement, do the cross call and
1333 * get on with things (we don't have deltas!).
1334 */
1335 else if (fRough)
1336 fDoXCall = true;
1337 /*
1338 * Invariant TSC mode: It doesn't matter if we have delta available
1339 * for both CPUs. That is not something we can assume at this point.
1340 *
1341 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1342 * downgraded after each delta calculation and the delta
1343 * calculations may not be complete yet.
1344 */
1345 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1346 {
1347/** @todo This section of code is never reached atm, consider dropping it later on... */
1348 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1349 {
1350 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1351 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1352 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1353 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1354 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1355 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1356 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1357 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1358 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1359 {
1360 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1361 {
1362 uTscStart -= iStartTscDelta;
1363 uTscStop -= iStopTscDelta;
1364 }
1365 }
1366 /*
1367 * Invalid CPU indexes are not caused by online/offline races, so
1368 * we have to trigger driver load failure if that happens as GIP
1369 * and IPRT assumptions are busted on this system.
1370 */
1371 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1372 {
1373 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1374 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1375 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1376 return VERR_INVALID_CPU_INDEX;
1377 }
1378 /*
1379 * No valid deltas. We retry, if we're on our last retry
1380 * we do the cross call instead just to get a result. The
1381 * frequency will be refined in a few seconds anyway.
1382 */
1383 else if (cTriesLeft > 0)
1384 continue;
1385 else
1386 fDoXCall = true;
1387 }
1388 }
1389 /*
1390 * Asynchronous TSC mode: This is bad, as the reason we usually
1391 * use this mode is to deal with variable TSC frequencies and
1392 * deltas. So, we need to get the TSC from the same CPU as
1393 * started it, we also need to keep that CPU busy. So, retry
1394 * and fall back to the cross call on the last attempt.
1395 */
1396 else
1397 {
1398 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1399 if (cTriesLeft > 0)
1400 continue;
1401 fDoXCall = true;
1402 }
1403
1404 if (fDoXCall)
1405 {
1406 /*
1407 * Try read the TSC and timestamp on the start CPU.
1408 */
1409 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1410 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1411 continue;
1412 }
1413 }
1414
1415 /*
1416 * Calculate the TSC frequency and update it (shared with the refinement timer).
1417 */
1418 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1419 return VINF_SUCCESS;
1420 }
1421
1422 Assert(!fRough);
1423 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1424}
1425
1426
1427/**
1428 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1429 *
1430 * @returns Index of the CPU in the cache set.
1431 * @param pGip The GIP.
1432 * @param idCpu The CPU ID.
1433 */
1434static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1435{
1436 uint32_t i, cTries;
1437
1438 /*
1439 * ASSUMES that CPU IDs are constant.
1440 */
1441 for (i = 0; i < pGip->cCpus; i++)
1442 if (pGip->aCPUs[i].idCpu == idCpu)
1443 return i;
1444
1445 cTries = 0;
1446 do
1447 {
1448 for (i = 0; i < pGip->cCpus; i++)
1449 {
1450 bool fRc;
1451 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1452 if (fRc)
1453 return i;
1454 }
1455 } while (cTries++ < 32);
1456 AssertReleaseFailed();
1457 return i - 1;
1458}
1459
1460
1461/**
1462 * The calling CPU should be accounted as online, update GIP accordingly.
1463 *
1464 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1465 *
1466 * @param pDevExt The device extension.
1467 * @param idCpu The CPU ID.
1468 */
1469static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1470{
1471 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1472 int iCpuSet = 0;
1473 uint32_t idApic;
1474 uint32_t i = 0;
1475 uint64_t u64NanoTS = 0;
1476
1477 AssertPtrReturnVoid(pGip);
1478 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1479 AssertRelease(idCpu == RTMpCpuId());
1480 Assert(pGip->cPossibleCpus == RTMpGetCount());
1481
1482 /*
1483 * Do this behind a spinlock with interrupts disabled as this can fire
1484 * on all CPUs simultaneously, see @bugref{6110}.
1485 */
1486 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1487
1488 /*
1489 * Update the globals.
1490 */
1491 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1492 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1493 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1494 if (iCpuSet >= 0)
1495 {
1496 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1497 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1498 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1499 }
1500
1501 /*
1502 * Update the entry.
1503 */
1504 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1505 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1506
1507 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1508
1509 idApic = supdrvGipGetApicIdSlow();
1510 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1511 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1512 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1513
1514 pGip->aCPUs[i].iCpuGroup = 0;
1515 pGip->aCPUs[i].iCpuGroupMember = iCpuSet;
1516#ifdef RT_OS_WINDOWS
1517 supdrvOSGipInitGroupBitsForCpu(pDevExt, pGip, &pGip->aCPUs[i]);
1518#endif
1519
1520 /*
1521 * Update the APIC ID and CPU set index mappings.
1522 */
1523 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
1524 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1525 else
1526 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: idApic=%#x is out of bounds (%#zx, i=%u, iCpuSet=%d)\n",
1527 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId), i, iCpuSet));
1528 if ((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
1529 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1530 else
1531 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: iCpuSet=%d is out of bounds (%#zx, i=%u, idApic=%d)\n",
1532 iCpuSet, RT_ELEMENTS(pGip->aiCpuFromApicId), i, idApic));
1533
1534 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1535 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1536
1537 /* Update the Mp online/offline counter. */
1538 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1539
1540 /* Commit it. */
1541 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1542
1543 RTSpinlockRelease(pDevExt->hGipSpinlock);
1544}
1545
1546
1547/**
1548 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1549 *
1550 * @param idCpu The CPU ID we are running on.
1551 * @param pvUser1 Opaque pointer to the device instance data.
1552 * @param pvUser2 Not used.
1553 */
1554static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1555{
1556 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1557 NOREF(pvUser2);
1558 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1559}
1560
1561
1562/**
1563 * The CPU should be accounted as offline, update the GIP accordingly.
1564 *
1565 * This is used by supdrvGipMpEvent.
1566 *
1567 * @param pDevExt The device extension.
1568 * @param idCpu The CPU ID.
1569 */
1570static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1571{
1572 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1573 int iCpuSet;
1574 unsigned i;
1575
1576 AssertPtrReturnVoid(pGip);
1577 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1578
1579 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1580 AssertReturnVoid(iCpuSet >= 0);
1581
1582 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1583 AssertReturnVoid(i < pGip->cCpus);
1584 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1585
1586 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1587 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1588
1589 /* Update the Mp online/offline counter. */
1590 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1591
1592 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1593 {
1594 /* Reset the TSC delta, we will recalculate it lazily. */
1595 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1596 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1597 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1598 }
1599
1600 /* Commit it. */
1601 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1602
1603 RTSpinlockRelease(pDevExt->hGipSpinlock);
1604}
1605
1606
1607/**
1608 * Multiprocessor event notification callback.
1609 *
1610 * This is used to make sure that the GIP master gets passed on to
1611 * another CPU. It also updates the associated CPU data.
1612 *
1613 * @param enmEvent The event.
1614 * @param idCpu The cpu it applies to.
1615 * @param pvUser Pointer to the device extension.
1616 */
1617static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1618{
1619 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1620 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1621
1622 if (pGip)
1623 {
1624 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1625 switch (enmEvent)
1626 {
1627 case RTMPEVENT_ONLINE:
1628 {
1629 RTThreadPreemptDisable(&PreemptState);
1630 if (idCpu == RTMpCpuId())
1631 {
1632 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1633 RTThreadPreemptRestore(&PreemptState);
1634 }
1635 else
1636 {
1637 RTThreadPreemptRestore(&PreemptState);
1638 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1639 }
1640
1641 /*
1642 * Recompute TSC-delta for the newly online'd CPU.
1643 */
1644 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1645 {
1646#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1647 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1648#else
1649 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1650 supdrvTscMeasureDeltaOne(pDevExt, iCpu);
1651#endif
1652 }
1653 break;
1654 }
1655
1656 case RTMPEVENT_OFFLINE:
1657 supdrvGipMpEventOffline(pDevExt, idCpu);
1658 break;
1659 }
1660 }
1661
1662 /*
1663 * Make sure there is a master GIP.
1664 */
1665 if (enmEvent == RTMPEVENT_OFFLINE)
1666 {
1667 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1668 if (idGipMaster == idCpu)
1669 {
1670 /*
1671 * The GIP master is going offline, find a new one.
1672 */
1673 bool fIgnored;
1674 unsigned i;
1675 RTCPUID idNewGipMaster = NIL_RTCPUID;
1676 RTCPUSET OnlineCpus;
1677 RTMpGetOnlineSet(&OnlineCpus);
1678
1679 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1680 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1681 {
1682 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1683 if (idCurCpu != idGipMaster)
1684 {
1685 idNewGipMaster = idCurCpu;
1686 break;
1687 }
1688 }
1689
1690 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1691 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1692 NOREF(fIgnored);
1693 }
1694 }
1695}
1696
1697
1698/**
1699 * On CPU initialization callback for RTMpOnAll.
1700 *
1701 * @param idCpu The CPU ID.
1702 * @param pvUser1 The device extension.
1703 * @param pvUser2 The GIP.
1704 */
1705static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1706{
1707 /* This is good enough, even though it will update some of the globals a
1708 bit to much. */
1709 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1710 NOREF(pvUser2);
1711}
1712
1713#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1714
1715/**
1716 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1717 *
1718 * @param idCpu Ignored.
1719 * @param pvUser1 Where to put the TSC.
1720 * @param pvUser2 Ignored.
1721 */
1722static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1723{
1724 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1725 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1726 RT_NOREF2(idCpu, pvUser2);
1727}
1728
1729
1730/**
1731 * Determine if Async GIP mode is required because of TSC drift.
1732 *
1733 * When using the default/normal timer code it is essential that the time stamp counter
1734 * (TSC) runs never backwards, that is, a read operation to the counter should return
1735 * a bigger value than any previous read operation. This is guaranteed by the latest
1736 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1737 * case we have to choose the asynchronous timer mode.
1738 *
1739 * @param poffMin Pointer to the determined difference between different
1740 * cores (optional, can be NULL).
1741 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1742 */
1743static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1744{
1745 /*
1746 * Just iterate all the cpus 8 times and make sure that the TSC is
1747 * ever increasing. We don't bother taking TSC rollover into account.
1748 */
1749 int iEndCpu = RTMpGetArraySize();
1750 int iCpu;
1751 int cLoops = 8;
1752 bool fAsync = false;
1753 int rc = VINF_SUCCESS;
1754 uint64_t offMax = 0;
1755 uint64_t offMin = ~(uint64_t)0;
1756 uint64_t PrevTsc = ASMReadTSC();
1757
1758 while (cLoops-- > 0)
1759 {
1760 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1761 {
1762 uint64_t CurTsc;
1763 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1764 &CurTsc, (void *)(uintptr_t)iCpu);
1765 if (RT_SUCCESS(rc))
1766 {
1767 if (CurTsc <= PrevTsc)
1768 {
1769 fAsync = true;
1770 offMin = offMax = PrevTsc - CurTsc;
1771 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1772 iCpu, cLoops, CurTsc, PrevTsc));
1773 break;
1774 }
1775
1776 /* Gather statistics (except the first time). */
1777 if (iCpu != 0 || cLoops != 7)
1778 {
1779 uint64_t off = CurTsc - PrevTsc;
1780 if (off < offMin)
1781 offMin = off;
1782 if (off > offMax)
1783 offMax = off;
1784 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1785 }
1786
1787 /* Next */
1788 PrevTsc = CurTsc;
1789 }
1790 else if (rc == VERR_NOT_SUPPORTED)
1791 break;
1792 else
1793 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1794 }
1795
1796 /* broke out of the loop. */
1797 if (iCpu < iEndCpu)
1798 break;
1799 }
1800
1801 if (poffMin)
1802 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1803 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1804 fAsync, iEndCpu, rc, offMin, offMax));
1805# if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1806 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1807# endif
1808 return fAsync;
1809}
1810
1811#endif /* RT_ARCH_AMD64 || RT_ARCH_X86 */
1812
1813
1814/**
1815 * supdrvGipInit() worker that determines the GIP TSC mode.
1816 *
1817 * @returns The most suitable TSC mode.
1818 * @param pDevExt Pointer to the device instance data.
1819 */
1820static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1821{
1822#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1823 uint64_t u64DiffCoresIgnored;
1824 uint32_t uEAX, uEBX, uECX, uEDX;
1825
1826 /*
1827 * Establish whether the CPU advertises TSC as invariant, we need that in
1828 * a couple of places below.
1829 */
1830 bool fInvariantTsc = false;
1831 if (ASMHasCpuId())
1832 {
1833 uEAX = ASMCpuId_EAX(0x80000000);
1834 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1835 {
1836 uEDX = ASMCpuId_EDX(0x80000007);
1837 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1838 fInvariantTsc = true;
1839 }
1840 }
1841
1842 /*
1843 * On single CPU systems, we don't need to consider ASYNC mode.
1844 */
1845 if (RTMpGetCount() <= 1)
1846 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1847
1848 /*
1849 * Allow the user and/or OS specific bits to force async mode.
1850 */
1851 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1852 return SUPGIPMODE_ASYNC_TSC;
1853
1854 /*
1855 * Use invariant mode if the CPU says TSC is invariant.
1856 */
1857 if (fInvariantTsc)
1858 return SUPGIPMODE_INVARIANT_TSC;
1859
1860 /*
1861 * TSC is not invariant and we're on SMP, this presents two problems:
1862 *
1863 * (1) There might be a skew between the CPU, so that cpu0
1864 * returns a TSC that is slightly different from cpu1.
1865 * This screw may be due to (2), bad TSC initialization
1866 * or slightly different TSC rates.
1867 *
1868 * (2) Power management (and other things) may cause the TSC
1869 * to run at a non-constant speed, and cause the speed
1870 * to be different on the cpus. This will result in (1).
1871 *
1872 * If any of the above is detected, we will have to use ASYNC mode.
1873 */
1874 /* (1). Try check for current differences between the cpus. */
1875 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1876 return SUPGIPMODE_ASYNC_TSC;
1877
1878 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1879 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1880 if ( RTX86IsValidStdRange(uEAX)
1881 && (RTX86IsAmdCpu(uEBX, uECX, uEDX) || RTX86IsHygonCpu(uEBX, uECX, uEDX)) )
1882 {
1883 /* Check for APM support. */
1884 uEAX = ASMCpuId_EAX(0x80000000);
1885 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1886 {
1887 uEDX = ASMCpuId_EDX(0x80000007);
1888 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1889 return SUPGIPMODE_ASYNC_TSC;
1890 }
1891 }
1892
1893 return SUPGIPMODE_SYNC_TSC;
1894
1895#elif defined(RT_ARCH_ARM64)
1896 RT_NOREF(pDevExt);
1897 return SUPGIPMODE_INVARIANT_TSC;
1898
1899#else
1900# error "Port me"
1901#endif
1902}
1903
1904
1905/**
1906 * Initializes per-CPU GIP information.
1907 *
1908 * @param pGip Pointer to the GIP.
1909 * @param pCpu Pointer to which GIP CPU to initialize.
1910 * @param u64NanoTS The current nanosecond timestamp.
1911 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1912 */
1913static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1914{
1915 pCpu->u32TransactionId = 2;
1916 pCpu->u64NanoTS = u64NanoTS;
1917 pCpu->u64TSC = ASMReadTSC();
1918 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1919 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1920
1921 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1922 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1923 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1924 ASMAtomicWriteU16(&pCpu->iCpuGroup, 0);
1925 ASMAtomicWriteU16(&pCpu->iCpuGroupMember, UINT16_MAX);
1926 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1927 ASMAtomicWriteU32(&pCpu->iReservedForNumaNode, 0);
1928
1929 /*
1930 * The first time we're called, we don't have a CPU frequency handy,
1931 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1932 * called again and at that point we have a more plausible CPU frequency
1933 * value handy. The frequency history will also be adjusted again on
1934 * the 2nd timer callout (maybe we can skip that now?).
1935 */
1936 if (!uCpuHz)
1937 {
1938 pCpu->u64CpuHz = _4G - 1;
1939 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1940 }
1941 else
1942 {
1943 pCpu->u64CpuHz = uCpuHz;
1944 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1945 }
1946 pCpu->au32TSCHistory[0]
1947 = pCpu->au32TSCHistory[1]
1948 = pCpu->au32TSCHistory[2]
1949 = pCpu->au32TSCHistory[3]
1950 = pCpu->au32TSCHistory[4]
1951 = pCpu->au32TSCHistory[5]
1952 = pCpu->au32TSCHistory[6]
1953 = pCpu->au32TSCHistory[7]
1954 = pCpu->u32UpdateIntervalTSC;
1955}
1956
1957
1958/**
1959 * Initializes the GIP data.
1960 *
1961 * @returns VBox status code.
1962 * @param pDevExt Pointer to the device instance data.
1963 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1964 * @param HCPhys The physical address of the GIP.
1965 * @param u64NanoTS The current nanosecond timestamp.
1966 * @param uUpdateHz The update frequency.
1967 * @param uUpdateIntervalNS The update interval in nanoseconds.
1968 * @param cCpus The CPU count.
1969 * @param cbGipCpuGroups The supdrvOSGipGetGroupTableSize return value we
1970 * used when allocating the GIP structure.
1971 */
1972static int supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1973 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS,
1974 unsigned cCpus, size_t cbGipCpuGroups)
1975{
1976 size_t const cbGip = RT_ALIGN_Z(RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups, PAGE_SIZE);
1977 unsigned i;
1978#ifdef DEBUG_DARWIN_GIP
1979 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1980#else
1981 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1982#endif
1983
1984 /*
1985 * Initialize the structure.
1986 */
1987 memset(pGip, 0, cbGip);
1988
1989 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1990 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1991 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1992 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1993 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1994 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1995 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1996 else
1997 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1998 pGip->cCpus = (uint16_t)cCpus;
1999 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
2000 pGip->u32UpdateHz = uUpdateHz;
2001 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
2002 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
2003 RTCpuSetEmpty(&pGip->OnlineCpuSet);
2004 RTCpuSetEmpty(&pGip->PresentCpuSet);
2005 RTMpGetSet(&pGip->PossibleCpuSet);
2006 pGip->cOnlineCpus = RTMpGetOnlineCount();
2007 pGip->cPresentCpus = RTMpGetPresentCount();
2008 pGip->cPossibleCpus = RTMpGetCount();
2009 pGip->cPossibleCpuGroups = 1;
2010 pGip->idCpuMax = RTMpGetMaxCpuId();
2011 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
2012 pGip->aiCpuFromApicId[i] = UINT16_MAX;
2013 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
2014 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
2015 for (i = 0; i < RT_ELEMENTS(pGip->aoffCpuGroup); i++)
2016 pGip->aoffCpuGroup[i] = UINT32_MAX;
2017 for (i = 0; i < cCpus; i++)
2018 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
2019#ifdef RT_OS_WINDOWS
2020 int rc = supdrvOSInitGipGroupTable(pDevExt, pGip, cbGipCpuGroups);
2021 AssertRCReturn(rc, rc);
2022#endif
2023
2024 /*
2025 * Link it to the device extension.
2026 */
2027 pDevExt->pGip = pGip;
2028 pDevExt->HCPhysGip = HCPhys;
2029 pDevExt->cGipUsers = 0;
2030
2031 return VINF_SUCCESS;
2032}
2033
2034
2035/**
2036 * Creates the GIP.
2037 *
2038 * @returns VBox status code.
2039 * @param pDevExt Instance data. GIP stuff may be updated.
2040 */
2041int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
2042{
2043 PSUPGLOBALINFOPAGE pGip;
2044 size_t cbGip;
2045 size_t cbGipCpuGroups;
2046 RTHCPHYS HCPhysGip;
2047 uint32_t u32SystemResolution;
2048 uint32_t u32Interval;
2049 uint32_t u32MinInterval;
2050 uint32_t uMod;
2051 unsigned cCpus;
2052 int rc;
2053
2054 LogFlow(("supdrvGipCreate:\n"));
2055
2056 /*
2057 * Assert order.
2058 */
2059 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
2060 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
2061 Assert(!pDevExt->pGipTimer);
2062#ifdef SUPDRV_USE_MUTEX_FOR_GIP
2063 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
2064 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
2065#else
2066 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
2067 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
2068#endif
2069
2070 /*
2071 * Check the CPU count.
2072 */
2073 cCpus = RTMpGetArraySize();
2074 if (cCpus > RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)))
2075 {
2076 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)));
2077 return VERR_TOO_MANY_CPUS;
2078 }
2079
2080 /*
2081 * Allocate a contiguous set of pages with a default kernel mapping.
2082 */
2083#ifdef RT_OS_WINDOWS
2084 cbGipCpuGroups = supdrvOSGipGetGroupTableSize(pDevExt);
2085#else
2086 cbGipCpuGroups = 0;
2087#endif
2088 cbGip = RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups;
2089 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, cbGip, NIL_RTHCPHYS /*PhysHighest*/, false /*fExecutable*/);
2090 if (RT_FAILURE(rc))
2091 {
2092 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
2093 return rc;
2094 }
2095 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
2096 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
2097
2098 /*
2099 * Find a reasonable update interval and initialize the structure.
2100 */
2101 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
2102 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
2103 * See @bugref{6710}. */
2104 u32MinInterval = RT_NS_10MS;
2105 u32SystemResolution = RTTimerGetSystemGranularity();
2106 u32Interval = u32MinInterval;
2107 uMod = u32MinInterval % u32SystemResolution;
2108 if (uMod)
2109 u32Interval += u32SystemResolution - uMod;
2110
2111 rc = supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval,
2112 cCpus, cbGipCpuGroups);
2113
2114 /*
2115 * Important sanity check... (Sets rc)
2116 */
2117 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
2118 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
2119 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
2120 {
2121 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
2122 rc = VERR_INTERNAL_ERROR_2;
2123 }
2124
2125 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
2126 AssertStmt( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
2127 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED,
2128 rc = VERR_INTERNAL_ERROR_3);
2129
2130 /*
2131 * Do the TSC frequency measurements.
2132 *
2133 * If we're in invariant TSC mode, just to a quick preliminary measurement
2134 * that the TSC-delta measurement code can use to yield cross calls.
2135 *
2136 * If we're in any of the other two modes, neither which require MP init,
2137 * notifications or deltas for the job, do the full measurement now so
2138 * that supdrvGipInitOnCpu() can populate the TSC interval and history
2139 * array with more reasonable values.
2140 */
2141 if (RT_SUCCESS(rc))
2142 {
2143 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
2144 {
2145 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
2146 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
2147 }
2148 else
2149 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
2150 if (RT_SUCCESS(rc))
2151 {
2152 /*
2153 * Start TSC-delta measurement thread before we start getting MP
2154 * events that will try kick it into action (includes the
2155 * RTMpOnAll/supdrvGipInitOnCpu call below).
2156 */
2157 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
2158 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
2159#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2160 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2161 rc = supdrvTscDeltaThreadInit(pDevExt);
2162#endif
2163 if (RT_SUCCESS(rc))
2164 {
2165 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
2166 if (RT_SUCCESS(rc))
2167 {
2168 /*
2169 * Do GIP initialization on all online CPUs. Wake up the
2170 * TSC-delta thread afterwards.
2171 */
2172 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
2173 if (RT_SUCCESS(rc))
2174 {
2175#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2176 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
2177#else
2178 uint16_t iCpu;
2179 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2180 {
2181 /*
2182 * Measure the TSC deltas now that we have MP notifications.
2183 */
2184 int cTries = 5;
2185 do
2186 {
2187 rc = supdrvTscMeasureInitialDeltas(pDevExt);
2188 if ( rc != VERR_TRY_AGAIN
2189 && rc != VERR_CPU_OFFLINE)
2190 break;
2191 } while (--cTries > 0);
2192 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2193 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
2194 }
2195 else
2196 {
2197 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2198 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
2199 }
2200 if (RT_SUCCESS(rc))
2201#endif
2202 {
2203 /*
2204 * Create the timer.
2205 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
2206 */
2207 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
2208 {
2209 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
2210 supdrvGipAsyncTimer, pDevExt);
2211 if (rc == VERR_NOT_SUPPORTED)
2212 {
2213 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
2214 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
2215 }
2216 }
2217 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2218 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
2219 supdrvGipSyncAndInvariantTimer, pDevExt);
2220 if (RT_SUCCESS(rc))
2221 {
2222 /*
2223 * We're good.
2224 */
2225 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
2226 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2227
2228 g_pSUPGlobalInfoPage = pGip;
2229 return VINF_SUCCESS;
2230 }
2231
2232 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
2233 Assert(!pDevExt->pGipTimer);
2234 }
2235 }
2236 else
2237 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
2238 }
2239 else
2240 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
2241 }
2242 else
2243 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
2244 }
2245 else
2246 OSDBGPRINT(("supdrvGipCreate: supdrvTscMeasureInitialDeltas failed. rc=%Rrc\n", rc));
2247 }
2248
2249 /* Releases timer frequency increase too. */
2250 supdrvGipDestroy(pDevExt);
2251 return rc;
2252}
2253
2254
2255/**
2256 * Invalidates the GIP data upon termination.
2257 *
2258 * @param pGip Pointer to the read-write kernel mapping of the GIP.
2259 */
2260static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2261{
2262 unsigned i;
2263 pGip->u32Magic = 0;
2264 for (i = 0; i < pGip->cCpus; i++)
2265 {
2266 pGip->aCPUs[i].u64NanoTS = 0;
2267 pGip->aCPUs[i].u64TSC = 0;
2268 pGip->aCPUs[i].iTSCHistoryHead = 0;
2269 pGip->aCPUs[i].u64TSCSample = 0;
2270 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2271 }
2272}
2273
2274
2275/**
2276 * Terminates the GIP.
2277 *
2278 * @param pDevExt Instance data. GIP stuff may be updated.
2279 */
2280void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2281{
2282 int rc;
2283#ifdef DEBUG_DARWIN_GIP
2284 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2285 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2286 pDevExt->pGipTimer, pDevExt->GipMemObj));
2287#endif
2288
2289 /*
2290 * Stop receiving MP notifications before tearing anything else down.
2291 */
2292 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2293
2294#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2295 /*
2296 * Terminate the TSC-delta measurement thread and resources.
2297 */
2298 supdrvTscDeltaTerm(pDevExt);
2299#endif
2300
2301 /*
2302 * Destroy the TSC-refinement timer.
2303 */
2304 if (pDevExt->pInvarTscRefineTimer)
2305 {
2306 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2307 pDevExt->pInvarTscRefineTimer = NULL;
2308 }
2309
2310 /*
2311 * Invalid the GIP data.
2312 */
2313 if (pDevExt->pGip)
2314 {
2315 supdrvGipTerm(pDevExt->pGip);
2316 pDevExt->pGip = NULL;
2317 }
2318 g_pSUPGlobalInfoPage = NULL;
2319
2320 /*
2321 * Destroy the timer and free the GIP memory object.
2322 */
2323 if (pDevExt->pGipTimer)
2324 {
2325 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2326 pDevExt->pGipTimer = NULL;
2327 }
2328
2329 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2330 {
2331 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2332 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2333 }
2334
2335 /*
2336 * Finally, make sure we've release the system timer resolution request
2337 * if one actually succeeded and is still pending.
2338 */
2339 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2340}
2341
2342
2343
2344
2345/*
2346 *
2347 *
2348 * GIP Update Timer Related Code
2349 * GIP Update Timer Related Code
2350 * GIP Update Timer Related Code
2351 *
2352 *
2353 */
2354
2355
2356/**
2357 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2358 * updates all the per cpu data except the transaction id.
2359 *
2360 * @param pDevExt The device extension.
2361 * @param pGipCpu Pointer to the per cpu data.
2362 * @param u64NanoTS The current time stamp.
2363 * @param u64TSC The current TSC.
2364 * @param iTick The current timer tick.
2365 *
2366 * @remarks Can be called with interrupts disabled!
2367 */
2368static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2369{
2370 uint64_t u64TSCDelta;
2371 bool fUpdateCpuHz;
2372 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2373 AssertPtrReturnVoid(pGip);
2374
2375 /* Delta between this and the previous update. */
2376 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2377
2378 /*
2379 * Update the NanoTS.
2380 */
2381 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2382
2383 /*
2384 * Calc TSC delta.
2385 */
2386 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2387 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2388
2389 /*
2390 * Determine if we need to update the CPU (TSC) frequency calculation.
2391 *
2392 * We don't need to keep recalculating the frequency when it's invariant,
2393 * unless the special tstGIP-2 testing mode is enabled.
2394 */
2395 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2396 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2397 { /* likely*/ }
2398 else
2399 {
2400 uint32_t fGipFlags = pGip->fFlags;
2401 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2402 {
2403 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2404 {
2405 /* Cache the TSC frequency before forcing updates due to test mode. */
2406 if (!fUpdateCpuHz)
2407 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2408 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2409 }
2410 fUpdateCpuHz = true;
2411 }
2412 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2413 {
2414 /* Restore the cached TSC frequency if any. */
2415 if (!fUpdateCpuHz)
2416 {
2417 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2418 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2419 }
2420 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2421 }
2422 }
2423
2424 /*
2425 * Calculate the CPU (TSC) frequency if necessary.
2426 */
2427 if (fUpdateCpuHz)
2428 {
2429 uint64_t u64CpuHz;
2430 uint32_t u32UpdateIntervalTSC;
2431 uint32_t u32UpdateIntervalTSCSlack;
2432 uint32_t u32TransactionId;
2433 unsigned iTSCHistoryHead;
2434
2435 if (u64TSCDelta >> 32)
2436 {
2437 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2438 pGipCpu->cErrors++;
2439 }
2440
2441 /*
2442 * On the 2nd and 3rd callout, reset the history with the current TSC
2443 * interval since the values entered by supdrvGipInit are totally off.
2444 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2445 * better, while the 3rd should be most reliable.
2446 */
2447 /** @todo Could we drop this now that we initializes the history
2448 * with nominal TSC frequency values? */
2449 u32TransactionId = pGipCpu->u32TransactionId;
2450 if (RT_UNLIKELY( ( u32TransactionId == 5
2451 || u32TransactionId == 7)
2452 && ( iTick == 2
2453 || iTick == 3) ))
2454 {
2455 unsigned i;
2456 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2457 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2458 }
2459
2460 /*
2461 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2462 * Wait until we have at least one full history since the above history reset. The
2463 * assumption is that the majority of the previous history values will be tolerable.
2464 * See @bugref{6710#c67}.
2465 */
2466 /** @todo Could we drop the fudging there now that we initializes the history
2467 * with nominal TSC frequency values? */
2468 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2469 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2470 {
2471 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2472 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2473 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2474 {
2475 uint32_t u32;
2476 u32 = pGipCpu->au32TSCHistory[0];
2477 u32 += pGipCpu->au32TSCHistory[1];
2478 u32 += pGipCpu->au32TSCHistory[2];
2479 u32 += pGipCpu->au32TSCHistory[3];
2480 u32 >>= 2;
2481 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2482 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2483 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2484 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2485 u64TSCDelta >>= 2;
2486 u64TSCDelta += u32;
2487 u64TSCDelta >>= 1;
2488 }
2489 }
2490
2491 /*
2492 * TSC History.
2493 */
2494 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2495 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2496 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2497 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2498
2499 /*
2500 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2501 *
2502 * On Windows, we have an occasional (but recurring) sour value that messed up
2503 * the history but taking only 1 interval reduces the precision overall.
2504 */
2505 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2506 || pGip->u32UpdateHz >= 1000)
2507 {
2508 uint32_t u32;
2509 u32 = pGipCpu->au32TSCHistory[0];
2510 u32 += pGipCpu->au32TSCHistory[1];
2511 u32 += pGipCpu->au32TSCHistory[2];
2512 u32 += pGipCpu->au32TSCHistory[3];
2513 u32 >>= 2;
2514 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2515 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2516 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2517 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2518 u32UpdateIntervalTSC >>= 2;
2519 u32UpdateIntervalTSC += u32;
2520 u32UpdateIntervalTSC >>= 1;
2521
2522 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2523 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2524 }
2525 else if (pGip->u32UpdateHz >= 90)
2526 {
2527 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2528 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2529 u32UpdateIntervalTSC >>= 1;
2530
2531 /* value chosen on a 2GHz thinkpad running windows */
2532 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2533 }
2534 else
2535 {
2536 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2537
2538 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2539 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2540 }
2541 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2542
2543 /*
2544 * CpuHz.
2545 */
2546 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2547 u64CpuHz /= pGip->u32UpdateIntervalNS;
2548 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2549 }
2550}
2551
2552
2553/**
2554 * Updates the GIP.
2555 *
2556 * @param pDevExt The device extension.
2557 * @param u64NanoTS The current nanosecond timestamp.
2558 * @param u64TSC The current TSC timestamp.
2559 * @param idCpu The CPU ID.
2560 * @param iTick The current timer tick.
2561 *
2562 * @remarks Can be called with interrupts disabled!
2563 */
2564static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2565{
2566 /*
2567 * Determine the relevant CPU data.
2568 */
2569 PSUPGIPCPU pGipCpu;
2570 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2571 AssertPtrReturnVoid(pGip);
2572
2573 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2574 pGipCpu = &pGip->aCPUs[0];
2575 else
2576 {
2577 unsigned iCpu;
2578 uint32_t idApic = supdrvGipGetApicId(pGip);
2579 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
2580 { /* likely */ }
2581 else
2582 return;
2583 iCpu = pGip->aiCpuFromApicId[idApic];
2584 if (RT_LIKELY(iCpu < pGip->cCpus))
2585 { /* likely */ }
2586 else
2587 return;
2588 pGipCpu = &pGip->aCPUs[iCpu];
2589 if (RT_LIKELY(pGipCpu->idCpu == idCpu))
2590 { /* likely */ }
2591 else
2592 return;
2593 }
2594
2595 /*
2596 * Start update transaction.
2597 */
2598 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2599 {
2600 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2601 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2602 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2603 pGipCpu->cErrors++;
2604 return;
2605 }
2606
2607 /*
2608 * Recalc the update frequency every 0x800th time.
2609 */
2610 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2611 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2612 {
2613 if (pGip->u64NanoTSLastUpdateHz)
2614 {
2615#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2616 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2617 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2618 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2619 {
2620 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2621 * calculation on non-invariant hosts if it changes the history decision
2622 * taken in supdrvGipDoUpdateCpu(). */
2623 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2624 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2625 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2626 }
2627#endif
2628 }
2629 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2630 }
2631
2632 /*
2633 * Update the data.
2634 */
2635 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2636
2637 /*
2638 * Complete transaction.
2639 */
2640 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2641}
2642
2643
2644/**
2645 * Updates the per cpu GIP data for the calling cpu.
2646 *
2647 * @param pDevExt The device extension.
2648 * @param u64NanoTS The current nanosecond timestamp.
2649 * @param u64TSC The current TSC timesaver.
2650 * @param idCpu The CPU ID.
2651 * @param idApic The APIC id for the CPU index.
2652 * @param iTick The current timer tick.
2653 *
2654 * @remarks Can be called with interrupts disabled!
2655 */
2656static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2657 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2658{
2659 uint32_t iCpu;
2660 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2661
2662 /*
2663 * Avoid a potential race when a CPU online notification doesn't fire on
2664 * the onlined CPU but the tick creeps in before the event notification is
2665 * run.
2666 */
2667 if (RT_LIKELY(iTick != 1))
2668 { /* likely*/ }
2669 else
2670 {
2671 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2672 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2673 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2674 }
2675
2676 iCpu = pGip->aiCpuFromApicId[idApic];
2677 if (RT_LIKELY(iCpu < pGip->cCpus))
2678 {
2679 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2680 if (pGipCpu->idCpu == idCpu)
2681 {
2682 /*
2683 * Start update transaction.
2684 */
2685 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2686 {
2687 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2688 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2689 pGipCpu->cErrors++;
2690 return;
2691 }
2692
2693 /*
2694 * Update the data.
2695 */
2696 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2697
2698 /*
2699 * Complete transaction.
2700 */
2701 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2702 }
2703 }
2704}
2705
2706
2707/**
2708 * Timer callback function for the sync and invariant GIP modes.
2709 *
2710 * @param pTimer The timer.
2711 * @param pvUser Opaque pointer to the device extension.
2712 * @param iTick The timer tick.
2713 */
2714static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2715{
2716 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2717 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2718 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2719 uint64_t u64TSC = ASMReadTSC();
2720 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2721 RT_NOREF1(pTimer);
2722
2723 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2724 {
2725 /*
2726 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2727 * missing timer ticks is not an option for GIP because the GIP users
2728 * will end up incrementing the time in 1ns per time getter call until
2729 * there is a complete timer update. So, if the delta has yet to be
2730 * calculated, we just pretend it is zero for now (the GIP users
2731 * probably won't have it for a wee while either and will do the same).
2732 *
2733 * We could maybe on some platforms try cross calling a CPU with a
2734 * working delta here, but it's not worth the hassle since the
2735 * likelihood of this happening is really low. On Windows, Linux, and
2736 * Solaris timers fire on the CPU they were registered/started on.
2737 * Darwin timers doesn't necessarily (they are high priority threads).
2738 */
2739 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2740 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2741 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2742 Assert(!ASMIntAreEnabled());
2743 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2744 {
2745 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2746 if (iTscDelta != INT64_MAX)
2747 u64TSC -= iTscDelta;
2748 }
2749 }
2750
2751 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2752
2753 ASMSetFlags(fEFlags);
2754}
2755
2756
2757/**
2758 * Timer callback function for async GIP mode.
2759 * @param pTimer The timer.
2760 * @param pvUser Opaque pointer to the device extension.
2761 * @param iTick The timer tick.
2762 */
2763static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2764{
2765 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2766 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2767 RTCPUID idCpu = RTMpCpuId();
2768 uint64_t u64TSC = ASMReadTSC();
2769 uint64_t NanoTS = RTTimeSystemNanoTS();
2770 RT_NOREF1(pTimer);
2771
2772 /** @todo reset the transaction number and whatnot when iTick == 1. */
2773 if (pDevExt->idGipMaster == idCpu)
2774 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2775 else
2776 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, supdrvGipGetApicId(pDevExt->pGip), iTick);
2777
2778 ASMSetFlags(fEFlags);
2779}
2780
2781
2782
2783
2784/*
2785 *
2786 *
2787 * TSC Delta Measurements And Related Code
2788 * TSC Delta Measurements And Related Code
2789 * TSC Delta Measurements And Related Code
2790 *
2791 *
2792 */
2793
2794
2795/*
2796 * Select TSC delta measurement algorithm.
2797 */
2798#if 0
2799# define GIP_TSC_DELTA_METHOD_1
2800#else
2801# define GIP_TSC_DELTA_METHOD_2
2802#endif
2803
2804/** For padding variables to keep them away from other cache lines. Better too
2805 * large than too small!
2806 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2807 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2808 * III had 32 bytes cache lines. */
2809#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2810
2811
2812/**
2813 * TSC delta measurement algorithm \#2 result entry.
2814 */
2815typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2816{
2817 uint32_t iSeqMine;
2818 uint32_t iSeqOther;
2819 uint64_t uTsc;
2820} SUPDRVTSCDELTAMETHOD2ENTRY;
2821
2822/**
2823 * TSC delta measurement algorithm \#2 Data.
2824 */
2825typedef struct SUPDRVTSCDELTAMETHOD2
2826{
2827 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2828 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2829 /** The current sequence number of this worker. */
2830 uint32_t volatile iCurSeqNo;
2831 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2832 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2833 /** Result table. */
2834 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2835} SUPDRVTSCDELTAMETHOD2;
2836/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2837typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2838
2839
2840/**
2841 * The TSC delta synchronization struct, version 2.
2842 *
2843 * The synchronization variable is completely isolated in its own cache line
2844 * (provided our max cache line size estimate is correct).
2845 */
2846typedef struct SUPTSCDELTASYNC2
2847{
2848 /** Padding to make sure the uVar1 is in its own cache line. */
2849 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2850
2851 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2852 volatile uint32_t uSyncVar;
2853 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2854 volatile uint32_t uSyncSeq;
2855
2856 /** Padding to make sure the uVar1 is in its own cache line. */
2857 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2858
2859 /** Start RDTSC value. Put here mainly to save stack space. */
2860 uint64_t uTscStart;
2861 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2862 uint64_t cMaxTscTicks;
2863} SUPTSCDELTASYNC2;
2864AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2865typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2866
2867/** Prestart wait. */
2868#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2869/** Prestart aborted. */
2870#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2871/** Ready (on your mark). */
2872#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2873/** Steady (get set). */
2874#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2875/** Go! */
2876#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2877/** Used by the verification test. */
2878#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2879
2880/** We reached the time limit. */
2881#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2882/** The other party won't touch the sync struct ever again. */
2883#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2884
2885
2886/**
2887 * Argument package/state passed by supdrvTscMeasureDeltaOne() to the RTMpOn
2888 * callback worker.
2889 * @todo add
2890 */
2891typedef struct SUPDRVGIPTSCDELTARGS
2892{
2893 /** The device extension. */
2894 PSUPDRVDEVEXT pDevExt;
2895 /** Pointer to the GIP CPU array entry for the worker. */
2896 PSUPGIPCPU pWorker;
2897 /** Pointer to the GIP CPU array entry for the master. */
2898 PSUPGIPCPU pMaster;
2899 /** The maximum number of ticks to spend in supdrvTscMeasureDeltaCallback.
2900 * (This is what we need a rough TSC frequency for.) */
2901 uint64_t cMaxTscTicks;
2902 /** Used to abort synchronization setup. */
2903 bool volatile fAbortSetup;
2904
2905 /** Padding to make sure the master variables live in its own cache lines. */
2906 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2907
2908 /** @name Master
2909 * @{ */
2910 /** The time the master spent in the MP worker. */
2911 uint64_t cElapsedMasterTscTicks;
2912 /** The iTry value when stopped at. */
2913 uint32_t iTry;
2914 /** Set if the run timed out. */
2915 bool volatile fTimedOut;
2916 /** Pointer to the master's synchronization struct (on stack). */
2917 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2918 /** Master data union. */
2919 union
2920 {
2921 /** Data (master) for delta verification. */
2922 struct
2923 {
2924 /** Verification test TSC values for the master. */
2925 uint64_t volatile auTscs[32];
2926 } Verify;
2927 /** Data (master) for measurement method \#2. */
2928 struct
2929 {
2930 /** Data and sequence number. */
2931 SUPDRVTSCDELTAMETHOD2 Data;
2932 /** The lag setting for the next run. */
2933 bool fLag;
2934 /** Number of hits. */
2935 uint32_t cHits;
2936 } M2;
2937 } uMaster;
2938 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2939 * VERR_TRY_AGAIN on timeout. */
2940 int32_t rcVerify;
2941#ifdef TSCDELTA_VERIFY_WITH_STATS
2942 /** The maximum difference between TSC read during delta verification. */
2943 int64_t cMaxVerifyTscTicks;
2944 /** The minimum difference between two TSC reads during verification. */
2945 int64_t cMinVerifyTscTicks;
2946 /** The bad TSC diff, worker relative to master (= worker - master).
2947 * Negative value means the worker is behind the master. */
2948 int64_t iVerifyBadTscDiff;
2949#endif
2950 /** @} */
2951
2952 /** Padding to make sure the worker variables live is in its own cache line. */
2953 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2954
2955 /** @name Proletarian
2956 * @{ */
2957 /** Pointer to the worker's synchronization struct (on stack). */
2958 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2959 /** The time the worker spent in the MP worker. */
2960 uint64_t cElapsedWorkerTscTicks;
2961 /** Worker data union. */
2962 union
2963 {
2964 /** Data (worker) for delta verification. */
2965 struct
2966 {
2967 /** Verification test TSC values for the worker. */
2968 uint64_t volatile auTscs[32];
2969 } Verify;
2970 /** Data (worker) for measurement method \#2. */
2971 struct
2972 {
2973 /** Data and sequence number. */
2974 SUPDRVTSCDELTAMETHOD2 Data;
2975 /** The lag setting for the next run (set by master). */
2976 bool fLag;
2977 } M2;
2978 } uWorker;
2979 /** @} */
2980
2981 /** Padding to make sure the above is in its own cache line. */
2982 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2983} SUPDRVGIPTSCDELTARGS;
2984typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2985
2986
2987/** @name Macros that implements the basic synchronization steps common to
2988 * the algorithms.
2989 *
2990 * Must be used from loop as the timeouts are implemented via 'break' statements
2991 * at the moment.
2992 *
2993 * @{
2994 */
2995#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2996# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2997# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2998# define TSCDELTA_DBG_CHECK_LOOP() \
2999 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
3000#else
3001# define TSCDELTA_DBG_VARS() ((void)0)
3002# define TSCDELTA_DBG_START_LOOP() ((void)0)
3003# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
3004#endif
3005#if 0
3006# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
3007#else
3008# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
3009#endif
3010#if 0
3011# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
3012#else
3013# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
3014#endif
3015#if 0
3016# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
3017#else
3018# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
3019#endif
3020
3021
3022static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3023 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
3024{
3025 uint32_t iMySeq = fIsMaster ? 0 : 256;
3026 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
3027 uint32_t u32Tmp;
3028 uint32_t iSync2Loops = 0;
3029 RTCCUINTREG fEFlags;
3030 TSCDELTA_DBG_VARS();
3031
3032#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3033 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
3034#else
3035 *pfEFlags = 0;
3036#endif
3037
3038 /*
3039 * The master tells the worker to get on it's mark.
3040 */
3041 if (fIsMaster)
3042 {
3043 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3044 { /* likely*/ }
3045 else
3046 {
3047 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3048 return false;
3049 }
3050 }
3051
3052 /*
3053 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
3054 */
3055 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
3056 for (;;)
3057 {
3058 fEFlags = ASMIntDisableFlags();
3059 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3060 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
3061 break;
3062 ASMSetFlags(fEFlags);
3063 ASMNopPause();
3064
3065 /* Abort? */
3066 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
3067 {
3068 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3069 return false;
3070 }
3071
3072 /* Check for timeouts every so often (not every loop in case RDTSC is
3073 trapping or something). Must check the first time around. */
3074#if 0 /* For debugging the timeout paths. */
3075 static uint32_t volatile xxx;
3076#endif
3077 if ( ( (iSync2Loops & 0x3ff) == 0
3078 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
3079#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
3080 || (!fIsMaster && (++xxx & 0xf) == 0)
3081#endif
3082 )
3083 {
3084 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
3085 ignore the timeout if we've got the go ahead already (simpler). */
3086 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
3087 {
3088 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
3089 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
3090 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3091 return false;
3092 }
3093 }
3094 iSync2Loops++;
3095 }
3096
3097 /*
3098 * Interrupts are now disabled and will remain disabled until we do
3099 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
3100 */
3101 *pfEFlags = fEFlags;
3102
3103 /*
3104 * The worker tells the master that it is on its mark and that the master
3105 * need to get into position as well.
3106 */
3107 if (!fIsMaster)
3108 {
3109 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3110 { /* likely */ }
3111 else
3112 {
3113 ASMSetFlags(fEFlags);
3114 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3115 return false;
3116 }
3117 }
3118
3119 /*
3120 * The master sends the 'go' to the worker and wait for ACK.
3121 */
3122 if (fIsMaster)
3123 {
3124 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3125 { /* likely */ }
3126 else
3127 {
3128 ASMSetFlags(fEFlags);
3129 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3130 return false;
3131 }
3132 }
3133
3134 /*
3135 * Wait for the 'go' signal (ack in the master case).
3136 */
3137 TSCDELTA_DBG_START_LOOP();
3138 for (;;)
3139 {
3140 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3141 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
3142 break;
3143 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
3144 { /* likely */ }
3145 else
3146 {
3147 ASMSetFlags(fEFlags);
3148 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3149 return false;
3150 }
3151
3152 TSCDELTA_DBG_CHECK_LOOP();
3153 ASMNopPause();
3154 }
3155
3156 /*
3157 * The worker acks the 'go' (shouldn't fail).
3158 */
3159 if (!fIsMaster)
3160 {
3161 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3162 { /* likely */ }
3163 else
3164 {
3165 ASMSetFlags(fEFlags);
3166 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3167 return false;
3168 }
3169 }
3170
3171 /*
3172 * Try enter mostly lockstep execution with it.
3173 */
3174 for (;;)
3175 {
3176 uint32_t iOtherSeq1, iOtherSeq2;
3177 ASMCompilerBarrier();
3178 ASMSerializeInstruction();
3179
3180 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
3181 ASMNopPause();
3182 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
3183 ASMNopPause();
3184 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
3185
3186 ASMCompilerBarrier();
3187 if (iOtherSeq1 == iOtherSeq2)
3188 return true;
3189
3190 /* Did the other guy give up? Should we give up? */
3191 if ( iOtherSeq1 == UINT32_MAX
3192 || iOtherSeq2 == UINT32_MAX)
3193 return true;
3194 if (++iMySeq >= iMaxSeq)
3195 {
3196 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
3197 return true;
3198 }
3199 ASMNopPause();
3200 }
3201}
3202
3203#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3204 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3205 { /*likely*/ } \
3206 else if (true) \
3207 { \
3208 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
3209 break; \
3210 } else do {} while (0)
3211#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3212 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3213 { /*likely*/ } \
3214 else if (true) \
3215 { \
3216 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
3217 break; \
3218 } else do {} while (0)
3219
3220
3221static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3222 bool fIsMaster, RTCCUINTREG fEFlags)
3223{
3224 TSCDELTA_DBG_VARS();
3225 RT_NOREF1(pOtherSync);
3226
3227 /*
3228 * Wait for the 'ready' signal. In the master's case, this means the
3229 * worker has completed its data collection, while in the worker's case it
3230 * means the master is done processing the data and it's time for the next
3231 * loop iteration (or whatever).
3232 */
3233 ASMSetFlags(fEFlags);
3234 TSCDELTA_DBG_START_LOOP();
3235 for (;;)
3236 {
3237 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3238 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
3239 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
3240 return true;
3241 ASMNopPause();
3242 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
3243 { /* likely */}
3244 else
3245 {
3246 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
3247 return false; /* shouldn't ever happen! */
3248 }
3249 TSCDELTA_DBG_CHECK_LOOP();
3250 ASMNopPause();
3251 }
3252}
3253
3254#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3255 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
3256 { /* likely */ } \
3257 else if (true) \
3258 { \
3259 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
3260 break; \
3261 } else do {} while (0)
3262
3263#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
3264 /* \
3265 * Tell the worker that we're done processing the data and ready for the next round. \
3266 */ \
3267 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3268 { /* likely */ } \
3269 else if (true)\
3270 { \
3271 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3272 break; \
3273 } else do {} while (0)
3274
3275#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3276 if (true) { \
3277 /* \
3278 * Tell the master that we're done collecting data and wait for the next round to start. \
3279 */ \
3280 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3281 { /* likely */ } \
3282 else \
3283 { \
3284 ASMSetFlags(a_fEFlags); \
3285 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3286 break; \
3287 } \
3288 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3289 { /* likely */ } \
3290 else \
3291 { \
3292 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3293 break; \
3294 } \
3295 } else do {} while (0)
3296/** @} */
3297
3298
3299#ifdef GIP_TSC_DELTA_METHOD_1
3300/**
3301 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3302 *
3303 *
3304 * We ignore the first few runs of the loop in order to prime the
3305 * cache. Also, we need to be careful about using 'pause' instruction
3306 * in critical busy-wait loops in this code - it can cause undesired
3307 * behaviour with hyperthreading.
3308 *
3309 * We try to minimize the measurement error by computing the minimum
3310 * read time of the compare statement in the worker by taking TSC
3311 * measurements across it.
3312 *
3313 * It must be noted that the computed minimum read time is mostly to
3314 * eliminate huge deltas when the worker is too early and doesn't by
3315 * itself help produce more accurate deltas. We allow two times the
3316 * computed minimum as an arbitrary acceptable threshold. Therefore,
3317 * it is still possible to get negative deltas where there are none
3318 * when the worker is earlier. As long as these occasional negative
3319 * deltas are lower than the time it takes to exit guest-context and
3320 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3321 * that jumped backwards. It is due to the existence of the negative
3322 * deltas that we don't recompute the delta with the master and
3323 * worker interchanged to eliminate the remaining measurement error.
3324 *
3325 *
3326 * @param pArgs The argument/state data.
3327 * @param pMySync My synchronization structure.
3328 * @param pOtherSync My partner's synchronization structure.
3329 * @param fIsMaster Set if master, clear if worker.
3330 * @param iTry The attempt number.
3331 */
3332static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3333 bool fIsMaster, uint32_t iTry)
3334{
3335 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3336 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3337 uint64_t uMinCmpReadTime = UINT64_MAX;
3338 unsigned iLoop;
3339 NOREF(iTry);
3340
3341 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3342 {
3343 RTCCUINTREG fEFlags;
3344 if (fIsMaster)
3345 {
3346 /*
3347 * The master.
3348 */
3349 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3350 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3351 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3352 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3353
3354 do
3355 {
3356 ASMSerializeInstruction();
3357 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3358 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3359
3360 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3361
3362 /* Process the data. */
3363 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3364 {
3365 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3366 {
3367 int64_t iDelta = pGipCpuWorker->u64TSCSample
3368 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3369 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3370 ? iDelta < pGipCpuWorker->i64TSCDelta
3371 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3372 pGipCpuWorker->i64TSCDelta = iDelta;
3373 }
3374 }
3375
3376 /* Reset our TSC sample and tell the worker to move on. */
3377 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3378 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3379 }
3380 else
3381 {
3382 /*
3383 * The worker.
3384 */
3385 uint64_t uTscWorker;
3386 uint64_t uTscWorkerFlushed;
3387 uint64_t uCmpReadTime;
3388
3389 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3390 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3391
3392 /*
3393 * Keep reading the TSC until we notice that the master has read his. Reading
3394 * the TSC -after- the master has updated the memory is way too late. We thus
3395 * compensate by trying to measure how long it took for the worker to notice
3396 * the memory flushed from the master.
3397 */
3398 do
3399 {
3400 ASMSerializeInstruction();
3401 uTscWorker = ASMReadTSC();
3402 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3403 ASMSerializeInstruction();
3404 uTscWorkerFlushed = ASMReadTSC();
3405
3406 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3407 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3408 {
3409 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3410 if (uCmpReadTime < (uMinCmpReadTime << 1))
3411 {
3412 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3413 if (uCmpReadTime < uMinCmpReadTime)
3414 uMinCmpReadTime = uCmpReadTime;
3415 }
3416 else
3417 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3418 }
3419 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3420 {
3421 if (uCmpReadTime < uMinCmpReadTime)
3422 uMinCmpReadTime = uCmpReadTime;
3423 }
3424
3425 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3426 }
3427 }
3428
3429 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3430 pMySync->uSyncVar));
3431
3432 /*
3433 * We must reset the worker TSC sample value in case it gets picked as a
3434 * GIP master later on (it's trashed above, naturally).
3435 */
3436 if (!fIsMaster)
3437 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3438}
3439#endif /* GIP_TSC_DELTA_METHOD_1 */
3440
3441
3442#ifdef GIP_TSC_DELTA_METHOD_2
3443/*
3444 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3445 */
3446
3447# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3448# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3449
3450
3451static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3452{
3453 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3454 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3455 uint32_t idxResult;
3456 uint32_t cHits = 0;
3457
3458 /*
3459 * Look for matching entries in the master and worker tables.
3460 */
3461 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3462 {
3463 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3464 if (idxOther & 1)
3465 {
3466 idxOther >>= 1;
3467 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3468 {
3469 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3470 {
3471 int64_t iDelta;
3472 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3473 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3474 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3475 ? iDelta < iBestDelta
3476 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3477 iBestDelta = iDelta;
3478 cHits++;
3479 }
3480 }
3481 }
3482 }
3483
3484 /*
3485 * Save the results.
3486 */
3487 if (cHits > 2)
3488 pArgs->pWorker->i64TSCDelta = iBestDelta;
3489 pArgs->uMaster.M2.cHits += cHits;
3490}
3491
3492
3493/**
3494 * The core function of the 2nd TSC delta measurement algorithm.
3495 *
3496 * The idea here is that we have the two CPUs execute the exact same code
3497 * collecting a largish set of TSC samples. The code has one data dependency on
3498 * the other CPU which intention it is to synchronize the execution as well as
3499 * help cross references the two sets of TSC samples (the sequence numbers).
3500 *
3501 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3502 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3503 * it will help with making the CPUs enter lock step execution occasionally.
3504 *
3505 */
3506static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3507{
3508 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3509 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3510
3511 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3512 ASMSerializeInstruction();
3513 while (cLeft-- > 0)
3514 {
3515 uint64_t uTsc;
3516 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3517 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3518 ASMCompilerBarrier();
3519 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3520 uTsc = ASMReadTSC();
3521 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3522 ASMCompilerBarrier();
3523 ASMSerializeInstruction();
3524 pEntry->iSeqMine = iSeqMine;
3525 pEntry->iSeqOther = iSeqOther;
3526 pEntry->uTsc = uTsc;
3527 pEntry++;
3528 ASMSerializeInstruction();
3529 if (fLag)
3530 ASMNopPause();
3531 }
3532}
3533
3534
3535/**
3536 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3537 *
3538 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3539 *
3540 * @param pArgs The argument/state data.
3541 * @param pMySync My synchronization structure.
3542 * @param pOtherSync My partner's synchronization structure.
3543 * @param fIsMaster Set if master, clear if worker.
3544 * @param iTry The attempt number.
3545 */
3546static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3547 bool fIsMaster, uint32_t iTry)
3548{
3549 unsigned iLoop;
3550 RT_NOREF1(iTry);
3551
3552 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3553 {
3554 RTCCUINTREG fEFlags;
3555 if (fIsMaster)
3556 {
3557 /*
3558 * Adjust the loop lag fudge.
3559 */
3560# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3561 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3562 {
3563 /* Lag during the priming to be nice to everyone.. */
3564 pArgs->uMaster.M2.fLag = true;
3565 pArgs->uWorker.M2.fLag = true;
3566 }
3567 else
3568# endif
3569 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3570 {
3571 /* 25 % of the body without lagging. */
3572 pArgs->uMaster.M2.fLag = false;
3573 pArgs->uWorker.M2.fLag = false;
3574 }
3575 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3576 {
3577 /* 25 % of the body with both lagging. */
3578 pArgs->uMaster.M2.fLag = true;
3579 pArgs->uWorker.M2.fLag = true;
3580 }
3581 else
3582 {
3583 /* 50% of the body with alternating lag. */
3584 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3585 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3586 }
3587
3588 /*
3589 * Sync up with the worker and collect data.
3590 */
3591 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3592 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3593 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3594
3595 /*
3596 * Process the data.
3597 */
3598# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3599 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3600# endif
3601 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3602
3603 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3604 }
3605 else
3606 {
3607 /*
3608 * The worker.
3609 */
3610 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3611 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3612 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3613 }
3614 }
3615}
3616
3617#endif /* GIP_TSC_DELTA_METHOD_2 */
3618
3619
3620
3621static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3622 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3623{
3624 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3625 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3626 uint32_t i;
3627 TSCDELTA_DBG_VARS();
3628
3629 for (;;)
3630 {
3631 RTCCUINTREG fEFlags;
3632 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3633 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3634
3635 if (fIsMaster)
3636 {
3637 uint64_t uTscWorker;
3638 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3639
3640 /*
3641 * Collect TSC, master goes first.
3642 */
3643 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3644 {
3645 /* Read, kick & wait #1. */
3646 uint64_t uTsc = ASMReadTSC();
3647 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3648 ASMSerializeInstruction();
3649 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3650 TSCDELTA_DBG_START_LOOP();
3651 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3652 {
3653 TSCDELTA_DBG_CHECK_LOOP();
3654 ASMNopPause();
3655 }
3656
3657 /* Read, kick & wait #2. */
3658 uTsc = ASMReadTSC();
3659 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3660 ASMSerializeInstruction();
3661 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3662 TSCDELTA_DBG_START_LOOP();
3663 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3664 {
3665 TSCDELTA_DBG_CHECK_LOOP();
3666 ASMNopPause();
3667 }
3668 }
3669
3670 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3671
3672 /*
3673 * Process the data.
3674 */
3675#ifdef TSCDELTA_VERIFY_WITH_STATS
3676 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3677 pArgs->cMinVerifyTscTicks = INT64_MAX;
3678 pArgs->iVerifyBadTscDiff = 0;
3679#endif
3680 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3681 uTscWorker = 0;
3682 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3683 {
3684 /* Master vs previous worker entry. */
3685 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3686 int64_t iDiff;
3687 if (i > 0)
3688 {
3689 iDiff = uTscMaster - uTscWorker;
3690#ifdef TSCDELTA_VERIFY_WITH_STATS
3691 if (iDiff > pArgs->cMaxVerifyTscTicks)
3692 pArgs->cMaxVerifyTscTicks = iDiff;
3693 if (iDiff < pArgs->cMinVerifyTscTicks)
3694 pArgs->cMinVerifyTscTicks = iDiff;
3695#endif
3696 if (iDiff < 0)
3697 {
3698#ifdef TSCDELTA_VERIFY_WITH_STATS
3699 pArgs->iVerifyBadTscDiff = -iDiff;
3700#endif
3701 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3702 break;
3703 }
3704 }
3705
3706 /* Worker vs master. */
3707 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3708 iDiff = uTscWorker - uTscMaster;
3709#ifdef TSCDELTA_VERIFY_WITH_STATS
3710 if (iDiff > pArgs->cMaxVerifyTscTicks)
3711 pArgs->cMaxVerifyTscTicks = iDiff;
3712 if (iDiff < pArgs->cMinVerifyTscTicks)
3713 pArgs->cMinVerifyTscTicks = iDiff;
3714#endif
3715 if (iDiff < 0)
3716 {
3717#ifdef TSCDELTA_VERIFY_WITH_STATS
3718 pArgs->iVerifyBadTscDiff = iDiff;
3719#endif
3720 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3721 break;
3722 }
3723 }
3724
3725 /* Done. */
3726 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3727 }
3728 else
3729 {
3730 /*
3731 * The worker, master leads.
3732 */
3733 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3734
3735 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3736 {
3737 uint64_t uTsc;
3738
3739 /* Wait, Read and Kick #1. */
3740 TSCDELTA_DBG_START_LOOP();
3741 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3742 {
3743 TSCDELTA_DBG_CHECK_LOOP();
3744 ASMNopPause();
3745 }
3746 uTsc = ASMReadTSC();
3747 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3748 ASMSerializeInstruction();
3749 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3750
3751 /* Wait, Read and Kick #2. */
3752 TSCDELTA_DBG_START_LOOP();
3753 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3754 {
3755 TSCDELTA_DBG_CHECK_LOOP();
3756 ASMNopPause();
3757 }
3758 uTsc = ASMReadTSC();
3759 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3760 ASMSerializeInstruction();
3761 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3762 }
3763
3764 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3765 }
3766 return pArgs->rcVerify;
3767 }
3768
3769 /*
3770 * Timed out, please retry.
3771 */
3772 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3773 return VERR_TIMEOUT;
3774}
3775
3776
3777
3778/**
3779 * Handles the special abort procedure during synchronization setup in
3780 * supdrvTscMeasureDeltaCallbackUnwrapped().
3781 *
3782 * @returns 0 (dummy, ignored)
3783 * @param pArgs Pointer to argument/state data.
3784 * @param pMySync Pointer to my sync structure.
3785 * @param fIsMaster Set if we're the master, clear if worker.
3786 * @param fTimeout Set if it's a timeout.
3787 */
3788DECL_NO_INLINE(static, int)
3789supdrvTscMeasureDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3790{
3791 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3792 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3793 TSCDELTA_DBG_VARS();
3794 RT_NOREF1(pMySync);
3795
3796 /*
3797 * Clear our sync pointer and make sure the abort flag is set.
3798 */
3799 ASMAtomicWriteNullPtr(ppMySync);
3800 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3801 if (fTimeout)
3802 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3803
3804 /*
3805 * Make sure the other party is out of there and won't be touching our
3806 * sync state again (would cause stack corruption).
3807 */
3808 TSCDELTA_DBG_START_LOOP();
3809 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3810 {
3811 ASMNopPause();
3812 ASMNopPause();
3813 ASMNopPause();
3814 TSCDELTA_DBG_CHECK_LOOP();
3815 }
3816
3817 return 0;
3818}
3819
3820
3821/**
3822 * This is used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
3823 * and compute the delta between them.
3824 *
3825 * To reduce code size a good when timeout handling was added, a dummy return
3826 * value had to be added (saves 1-3 lines per timeout case), thus this
3827 * 'Unwrapped' function and the dummy 0 return value.
3828 *
3829 * @returns 0 (dummy, ignored)
3830 * @param idCpu The CPU we are current scheduled on.
3831 * @param pArgs Pointer to a parameter package.
3832 *
3833 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3834 * read the TSC at exactly the same time on both the master and the
3835 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3836 * contention, SMI, pipelining etc. there is no guaranteed way of
3837 * doing this on x86 CPUs.
3838 */
3839static int supdrvTscMeasureDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3840{
3841 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3842 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3843 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3844 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3845 uint32_t iTry;
3846 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3847 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3848 SUPTSCDELTASYNC2 MySync;
3849 PSUPTSCDELTASYNC2 pOtherSync;
3850 int rc;
3851 TSCDELTA_DBG_VARS();
3852
3853 /* A bit of paranoia first. */
3854 if (!pGipCpuMaster || !pGipCpuWorker)
3855 return 0;
3856
3857 /*
3858 * If the CPU isn't part of the measurement, return immediately.
3859 */
3860 if ( !fIsMaster
3861 && idCpu != pGipCpuWorker->idCpu)
3862 return 0;
3863
3864 /*
3865 * Set up my synchronization stuff and wait for the other party to show up.
3866 *
3867 * We don't wait forever since the other party may be off fishing (offline,
3868 * spinning with ints disables, whatever), we must play nice to the rest of
3869 * the system as this context generally isn't one in which we will get
3870 * preempted and we may hold up a number of lower priority interrupts.
3871 */
3872 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3873 ASMAtomicWritePtr(ppMySync, &MySync);
3874 MySync.uTscStart = ASMReadTSC();
3875 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3876
3877 /* Look for the partner, might not be here yet... Special abort considerations. */
3878 iTry = 0;
3879 TSCDELTA_DBG_START_LOOP();
3880 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3881 {
3882 ASMNopPause();
3883 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3884 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3885 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3886 if ( (iTry++ & 0xff) == 0
3887 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3888 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3889 TSCDELTA_DBG_CHECK_LOOP();
3890 ASMNopPause();
3891 }
3892
3893 /* I found my partner, waiting to be found... Special abort considerations. */
3894 if (fIsMaster)
3895 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3896 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3897
3898 iTry = 0;
3899 TSCDELTA_DBG_START_LOOP();
3900 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3901 {
3902 ASMNopPause();
3903 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3904 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3905 if ( (iTry++ & 0xff) == 0
3906 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3907 {
3908 if ( fIsMaster
3909 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3910 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3911 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3912 }
3913 TSCDELTA_DBG_CHECK_LOOP();
3914 }
3915
3916 if (!fIsMaster)
3917 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3918 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3919
3920/** @todo Add a resumable state to pArgs so we don't waste time if we time
3921 * out or something. Timeouts are legit, any of the two CPUs may get
3922 * interrupted. */
3923
3924 /*
3925 * Start by seeing if we have a zero delta between the two CPUs.
3926 * This should normally be the case.
3927 */
3928 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3929 if (RT_SUCCESS(rc))
3930 {
3931 if (fIsMaster)
3932 {
3933 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3934 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3935 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3936 }
3937 }
3938 /*
3939 * If the verification didn't time out, do regular delta measurements.
3940 * We retry this until we get a reasonable value.
3941 */
3942 else if (rc != VERR_TIMEOUT)
3943 {
3944 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3945 for (iTry = 0; iTry < 12; iTry++)
3946 {
3947 /*
3948 * Check the state before we start.
3949 */
3950 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3951 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3952 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3953 {
3954 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3955 break;
3956 }
3957
3958 /*
3959 * Do the measurements.
3960 */
3961#ifdef GIP_TSC_DELTA_METHOD_1
3962 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3963#elif defined(GIP_TSC_DELTA_METHOD_2)
3964 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3965#else
3966# error "huh??"
3967#endif
3968
3969 /*
3970 * Check the state.
3971 */
3972 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3973 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3974 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3975 {
3976 if (fIsMaster)
3977 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3978 else
3979 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3980 break;
3981 }
3982
3983 /*
3984 * Success? If so, stop trying. Master decides.
3985 */
3986 if (fIsMaster)
3987 {
3988 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3989 {
3990 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3991 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3992 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3993 break;
3994 }
3995 }
3996 }
3997 if (fIsMaster)
3998 pArgs->iTry = iTry;
3999 }
4000
4001 /*
4002 * End the synchronization dance. We tell the other that we're done,
4003 * then wait for the same kind of reply.
4004 */
4005 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
4006 ASMAtomicWriteNullPtr(ppMySync);
4007 iTry = 0;
4008 TSCDELTA_DBG_START_LOOP();
4009 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
4010 {
4011 iTry++;
4012 if ( iTry == 0
4013 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
4014 break; /* this really shouldn't happen. */
4015 TSCDELTA_DBG_CHECK_LOOP();
4016 ASMNopPause();
4017 }
4018
4019 /*
4020 * Collect some runtime stats.
4021 */
4022 if (fIsMaster)
4023 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
4024 else
4025 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
4026 return 0;
4027}
4028
4029/**
4030 * Callback used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
4031 * and compute the delta between them.
4032 *
4033 * @param idCpu The CPU we are current scheduled on.
4034 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
4035 * @param pvUser2 Unused.
4036 */
4037static DECLCALLBACK(void) supdrvTscMeasureDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4038{
4039 supdrvTscMeasureDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
4040 RT_NOREF1(pvUser2);
4041}
4042
4043
4044/**
4045 * Measures the TSC delta between the master GIP CPU and one specified worker
4046 * CPU.
4047 *
4048 * @returns VBox status code.
4049 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
4050 * failure.
4051 * @param pDevExt Pointer to the device instance data.
4052 * @param idxWorker The index of the worker CPU from the GIP's array of
4053 * CPUs.
4054 *
4055 * @remarks This must be called with preemption enabled!
4056 */
4057static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
4058{
4059 int rc;
4060 int rc2;
4061 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4062 RTCPUID idMaster = pDevExt->idGipMaster;
4063 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
4064 PSUPGIPCPU pGipCpuMaster;
4065 uint32_t iGipCpuMaster;
4066#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4067 uint32_t u32Tmp;
4068#endif
4069
4070 /* Validate input a bit. */
4071 AssertReturn(pGip, VERR_INVALID_PARAMETER);
4072 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4073 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4074
4075 /*
4076 * Don't attempt measuring the delta for the GIP master.
4077 */
4078 if (pGipCpuWorker->idCpu == idMaster)
4079 {
4080 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
4081 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
4082 return VINF_SUCCESS;
4083 }
4084
4085 /*
4086 * One measurement at a time, at least for now. We might be using
4087 * broadcast IPIs so, so be nice to the rest of the system.
4088 */
4089#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4090 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
4091#else
4092 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
4093#endif
4094 if (RT_FAILURE(rc))
4095 return rc;
4096
4097 /*
4098 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
4099 * try pick a different master. (This fudge only works with multi core systems.)
4100 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
4101 *
4102 * We skip this on AMDs for now as their HTT is different from Intel's and
4103 * it doesn't seem to have any favorable effect on the results.
4104 *
4105 * If the master is offline, we need a new master too, so share the code.
4106 */
4107 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
4108 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
4109 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
4110#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4111 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
4112 && pGip->cOnlineCpus > 2
4113 && ASMHasCpuId()
4114 && RTX86IsValidStdRange(ASMCpuId_EAX(0))
4115 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
4116 && ( !ASMIsAmdCpu()
4117 || RTX86GetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
4118 || ( RTX86GetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
4119 && RTX86GetCpuModelAMD(u32Tmp) >= 0x02) ) )
4120 || !RTMpIsCpuOnline(idMaster) )
4121 {
4122 uint32_t i;
4123 for (i = 0; i < pGip->cCpus; i++)
4124 if ( i != iGipCpuMaster
4125 && i != idxWorker
4126 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
4127 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
4128 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
4129 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
4130 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
4131 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
4132 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
4133 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
4134 {
4135 iGipCpuMaster = i;
4136 pGipCpuMaster = &pGip->aCPUs[i];
4137 idMaster = pGipCpuMaster->idCpu;
4138 break;
4139 }
4140 }
4141#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
4142
4143 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
4144 {
4145 /*
4146 * Initialize data package for the RTMpOnPair callback.
4147 */
4148 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
4149 if (pArgs)
4150 {
4151 pArgs->pWorker = pGipCpuWorker;
4152 pArgs->pMaster = pGipCpuMaster;
4153 pArgs->pDevExt = pDevExt;
4154 pArgs->pSyncMaster = NULL;
4155 pArgs->pSyncWorker = NULL;
4156 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
4157
4158 /*
4159 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
4160 * and supdrvTscMeasureDeltaCallback can use it as a success check.
4161 */
4162 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
4163 * that when doing the restart loop reorg. */
4164 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
4165 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
4166 supdrvTscMeasureDeltaCallback, pArgs, NULL);
4167 if (RT_SUCCESS(rc))
4168 {
4169#if 0
4170 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
4171 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
4172 pArgs->fTimedOut ? " timed out" :"");
4173#endif
4174#if 0
4175 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
4176 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
4177#endif
4178 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
4179 {
4180 /*
4181 * Work the TSC delta applicability rating. It starts
4182 * optimistic in supdrvGipInit, we downgrade it here.
4183 */
4184 SUPGIPUSETSCDELTA enmRating;
4185 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
4186 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
4187 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
4188 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
4189 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
4190 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
4191 else
4192 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
4193 if (pGip->enmUseTscDelta < enmRating)
4194 {
4195 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
4196 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
4197 }
4198 }
4199 else
4200 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4201 }
4202 /** @todo return try-again if we get an offline CPU error. */
4203
4204 RTMemFree(pArgs);
4205 }
4206 else
4207 rc = VERR_NO_MEMORY;
4208 }
4209 else
4210 rc = VERR_CPU_OFFLINE;
4211
4212 /*
4213 * We're done now.
4214 */
4215#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4216 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4217#else
4218 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4219#endif
4220 return rc;
4221}
4222
4223
4224/**
4225 * Resets the TSC-delta related TSC samples and optionally the deltas
4226 * themselves.
4227 *
4228 * @param pDevExt Pointer to the device instance data.
4229 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
4230 *
4231 * @remarks This might be called while holding a spinlock!
4232 */
4233static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
4234{
4235 unsigned iCpu;
4236 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4237 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4238 {
4239 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
4240 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
4241 if (fResetTscDeltas)
4242 {
4243 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
4244 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
4245 }
4246 }
4247}
4248
4249
4250/**
4251 * Picks an online CPU as the master TSC for TSC-delta computations.
4252 *
4253 * @returns VBox status code.
4254 * @param pDevExt Pointer to the device instance data.
4255 * @param pidxMaster Where to store the CPU array index of the chosen
4256 * master. Optional, can be NULL.
4257 */
4258static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
4259{
4260 /*
4261 * Pick the first CPU online as the master TSC and make it the new GIP master based
4262 * on the APIC ID.
4263 *
4264 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
4265 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
4266 * master as this point since the sync/async timer isn't created yet.
4267 */
4268 unsigned iCpu;
4269 uint32_t idxMaster = UINT32_MAX;
4270 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4271 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
4272 {
4273 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
4274 if (idxCpu != UINT16_MAX)
4275 {
4276 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
4277 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
4278 {
4279 idxMaster = idxCpu;
4280 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4281 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4282 if (pidxMaster)
4283 *pidxMaster = idxMaster;
4284 return VINF_SUCCESS;
4285 }
4286 }
4287 }
4288 return VERR_CPU_OFFLINE;
4289}
4290
4291
4292/**
4293 * Performs the initial measurements of the TSC deltas between CPUs.
4294 *
4295 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4296 * triggered by it if threaded.
4297 *
4298 * @returns VBox status code.
4299 * @param pDevExt Pointer to the device instance data.
4300 *
4301 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4302 * idCpu, GIP's online CPU set which are populated in
4303 * supdrvGipInitOnCpu().
4304 */
4305static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt)
4306{
4307 PSUPGIPCPU pGipCpuMaster;
4308 unsigned iCpu;
4309 unsigned iOddEven;
4310 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4311 uint32_t idxMaster = UINT32_MAX;
4312 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4313
4314 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4315 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4316 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4317 if (RT_FAILURE(rc))
4318 {
4319 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4320 return rc;
4321 }
4322 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4323 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4324 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4325
4326 /*
4327 * If there is only a single CPU online we have nothing to do.
4328 */
4329 if (pGip->cOnlineCpus <= 1)
4330 {
4331 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4332 return VINF_SUCCESS;
4333 }
4334
4335 /*
4336 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4337 * master). We do the CPUs with the even numbered APIC IDs first so that
4338 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4339 */
4340 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4341 {
4342 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4343 {
4344 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4345 if ( iCpu != idxMaster
4346 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4347 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4348 {
4349 rc = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4350 if (RT_FAILURE(rc))
4351 {
4352 SUPR0Printf("supdrvTscMeasureDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4353 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4354 break;
4355 }
4356
4357 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4358 {
4359 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4360 rc = VERR_TRY_AGAIN;
4361 break;
4362 }
4363 }
4364 }
4365 }
4366
4367 return rc;
4368}
4369
4370
4371#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4372
4373/**
4374 * Switches the TSC-delta measurement thread into the butchered state.
4375 *
4376 * @returns VBox status code.
4377 * @param pDevExt Pointer to the device instance data.
4378 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4379 * @param pszFailed An error message to log.
4380 * @param rcFailed The error code to exit the thread with.
4381 */
4382static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4383{
4384 if (!fSpinlockHeld)
4385 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4386
4387 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4388 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4389 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4390 return rcFailed;
4391}
4392
4393
4394/**
4395 * The TSC-delta measurement thread.
4396 *
4397 * @returns VBox status code.
4398 * @param hThread The thread handle.
4399 * @param pvUser Opaque pointer to the device instance data.
4400 */
4401static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4402{
4403 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4404 int rc = VERR_INTERNAL_ERROR_2;
4405 for (;;)
4406 {
4407 /*
4408 * Switch on the current state.
4409 */
4410 SUPDRVTSCDELTATHREADSTATE enmState;
4411 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4412 enmState = pDevExt->enmTscDeltaThreadState;
4413 switch (enmState)
4414 {
4415 case kTscDeltaThreadState_Creating:
4416 {
4417 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4418 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4419 if (RT_FAILURE(rc))
4420 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4421 RT_FALL_THRU();
4422 }
4423
4424 case kTscDeltaThreadState_Listening:
4425 {
4426 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4427
4428 /*
4429 * Linux counts uninterruptible sleeps as load, hence we shall do a
4430 * regular, interruptible sleep here and ignore wake ups due to signals.
4431 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4432 */
4433 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4434 if ( RT_FAILURE(rc)
4435 && rc != VERR_TIMEOUT
4436 && rc != VERR_INTERRUPTED)
4437 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4438 RTThreadUserReset(hThread);
4439 break;
4440 }
4441
4442 case kTscDeltaThreadState_WaitAndMeasure:
4443 {
4444 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4445 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4446 if (RT_FAILURE(rc))
4447 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4448 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4449 RTThreadSleep(1);
4450 RT_FALL_THRU();
4451 }
4452
4453 case kTscDeltaThreadState_Measuring:
4454 {
4455 if (pDevExt->fTscThreadRecomputeAllDeltas)
4456 {
4457 int cTries = 8;
4458 int cMsWaitPerTry = 10;
4459 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4460 Assert(pGip);
4461 do
4462 {
4463 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4464 rc = supdrvTscMeasureInitialDeltas(pDevExt);
4465 if ( RT_SUCCESS(rc)
4466 || ( RT_FAILURE(rc)
4467 && rc != VERR_TRY_AGAIN
4468 && rc != VERR_CPU_OFFLINE))
4469 {
4470 break;
4471 }
4472 RTThreadSleep(cMsWaitPerTry);
4473 } while (cTries-- > 0);
4474 pDevExt->fTscThreadRecomputeAllDeltas = false;
4475 }
4476 else
4477 {
4478 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4479 unsigned iCpu;
4480
4481 /* Measure TSC-deltas only for the CPUs that are in the set. */
4482 rc = VINF_SUCCESS;
4483 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4484 {
4485 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4486 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4487 {
4488 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4489 {
4490 int rc2 = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4491 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4492 rc = rc2;
4493 }
4494 else
4495 {
4496 /*
4497 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4498 * mark the delta as fine to get the timer thread off our back.
4499 */
4500 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4501 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4502 }
4503 }
4504 }
4505 }
4506 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4507 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4508 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4509 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4510 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4511 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4512 break;
4513 }
4514
4515 case kTscDeltaThreadState_Terminating:
4516 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4517 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4518 return VINF_SUCCESS;
4519
4520 case kTscDeltaThreadState_Butchered:
4521 default:
4522 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4523 }
4524 }
4525 /* not reached */
4526}
4527
4528
4529/**
4530 * Waits for the TSC-delta measurement thread to respond to a state change.
4531 *
4532 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4533 * other error code on internal error.
4534 *
4535 * @param pDevExt The device instance data.
4536 * @param enmCurState The current state.
4537 * @param enmNewState The new state we're waiting for it to enter.
4538 */
4539static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4540 SUPDRVTSCDELTATHREADSTATE enmNewState)
4541{
4542 SUPDRVTSCDELTATHREADSTATE enmActualState;
4543 int rc;
4544
4545 /*
4546 * Wait a short while for the expected state transition.
4547 */
4548 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4549 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4550 enmActualState = pDevExt->enmTscDeltaThreadState;
4551 if (enmActualState == enmNewState)
4552 {
4553 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4554 rc = VINF_SUCCESS;
4555 }
4556 else if (enmActualState == enmCurState)
4557 {
4558 /*
4559 * Wait longer if the state has not yet transitioned to the one we want.
4560 */
4561 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4562 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4563 if ( RT_SUCCESS(rc)
4564 || rc == VERR_TIMEOUT)
4565 {
4566 /*
4567 * Check the state whether we've succeeded.
4568 */
4569 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4570 enmActualState = pDevExt->enmTscDeltaThreadState;
4571 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4572 if (enmActualState == enmNewState)
4573 rc = VINF_SUCCESS;
4574 else if (enmActualState == enmCurState)
4575 {
4576 rc = VERR_TIMEOUT;
4577 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4578 enmActualState, enmNewState));
4579 }
4580 else
4581 {
4582 rc = VERR_INTERNAL_ERROR;
4583 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4584 enmActualState, enmNewState));
4585 }
4586 }
4587 else
4588 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4589 }
4590 else
4591 {
4592 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4593 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4594 enmActualState, enmCurState, enmNewState));
4595 rc = VERR_INTERNAL_ERROR;
4596 }
4597
4598 return rc;
4599}
4600
4601
4602/**
4603 * Signals the TSC-delta thread to start measuring TSC-deltas.
4604 *
4605 * @param pDevExt Pointer to the device instance data.
4606 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4607 */
4608static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4609{
4610 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4611 {
4612 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4613 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4614 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4615 {
4616 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4617 if (fForceAll)
4618 pDevExt->fTscThreadRecomputeAllDeltas = true;
4619 }
4620 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4621 && fForceAll)
4622 pDevExt->fTscThreadRecomputeAllDeltas = true;
4623 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4624 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4625 }
4626}
4627
4628
4629/**
4630 * Terminates the actual thread running supdrvTscDeltaThread().
4631 *
4632 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4633 * supdrvTscDeltaTerm().
4634 *
4635 * @param pDevExt Pointer to the device instance data.
4636 */
4637static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4638{
4639 int rc;
4640 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4641 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4642 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4643 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4644 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4645 if (RT_FAILURE(rc))
4646 {
4647 /* Signal a few more times before giving up. */
4648 int cTriesLeft = 5;
4649 while (--cTriesLeft > 0)
4650 {
4651 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4652 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4653 if (rc != VERR_TIMEOUT)
4654 break;
4655 }
4656 }
4657}
4658
4659
4660/**
4661 * Initializes and spawns the TSC-delta measurement thread.
4662 *
4663 * A thread is required for servicing re-measurement requests from events like
4664 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4665 * under all contexts on all OSs.
4666 *
4667 * @returns VBox status code.
4668 * @param pDevExt Pointer to the device instance data.
4669 *
4670 * @remarks Must only be called -after- initializing GIP and setting up MP
4671 * notifications!
4672 */
4673static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4674{
4675 int rc;
4676 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4677 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4678 if (RT_SUCCESS(rc))
4679 {
4680 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4681 if (RT_SUCCESS(rc))
4682 {
4683 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4684 pDevExt->cMsTscDeltaTimeout = 60000;
4685 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4686 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4687 if (RT_SUCCESS(rc))
4688 {
4689 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4690 if (RT_SUCCESS(rc))
4691 {
4692 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4693 return rc;
4694 }
4695
4696 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4697 supdrvTscDeltaThreadTerminate(pDevExt);
4698 }
4699 else
4700 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4701 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4702 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4703 }
4704 else
4705 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4706 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4707 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4708 }
4709 else
4710 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4711
4712 return rc;
4713}
4714
4715
4716/**
4717 * Terminates the TSC-delta measurement thread and cleanup.
4718 *
4719 * @param pDevExt Pointer to the device instance data.
4720 */
4721static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4722{
4723 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4724 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4725 {
4726 supdrvTscDeltaThreadTerminate(pDevExt);
4727 }
4728
4729 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4730 {
4731 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4732 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4733 }
4734
4735 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4736 {
4737 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4738 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4739 }
4740
4741 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4742}
4743
4744#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4745
4746/**
4747 * Measure the TSC delta for the CPU given by its CPU set index.
4748 *
4749 * @returns VBox status code.
4750 * @retval VERR_INTERRUPTED if interrupted while waiting.
4751 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4752 * measurement.
4753 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4754 *
4755 * @param pSession The caller's session. GIP must've been mapped.
4756 * @param iCpuSet The CPU set index of the CPU to measure.
4757 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4758 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4759 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4760 * ready.
4761 * @param cTries Number of times to try, pass 0 for the default.
4762 */
4763SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4764 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4765{
4766 PSUPDRVDEVEXT pDevExt;
4767 PSUPGLOBALINFOPAGE pGip;
4768 uint16_t iGipCpu;
4769 int rc;
4770#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4771 uint64_t msTsStartWait;
4772 uint32_t iWaitLoop;
4773#endif
4774
4775 /*
4776 * Validate and adjust the input.
4777 */
4778 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4779 if (!pSession->fGipReferenced)
4780 return VERR_WRONG_ORDER;
4781
4782 pDevExt = pSession->pDevExt;
4783 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4784
4785 pGip = pDevExt->pGip;
4786 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4787
4788 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4789 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4790 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4791 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4792
4793 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4794 return VERR_INVALID_FLAGS;
4795
4796 /*
4797 * The request is a noop if the TSC delta isn't being used.
4798 */
4799 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4800 return VINF_SUCCESS;
4801
4802 if (cTries == 0)
4803 cTries = 12;
4804 else if (cTries > 256)
4805 cTries = 256;
4806
4807 if (cMsWaitRetry == 0)
4808 cMsWaitRetry = 2;
4809 else if (cMsWaitRetry > 1000)
4810 cMsWaitRetry = 1000;
4811
4812#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4813 /*
4814 * Has the TSC already been measured and we're not forced to redo it?
4815 */
4816 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4817 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4818 return VINF_SUCCESS;
4819
4820 /*
4821 * Asynchronous request? Forward it to the thread, no waiting.
4822 */
4823 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4824 {
4825 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4826 * to pass those options to the thread somehow and implement it in the
4827 * thread. Check if anyone uses/needs fAsync before implementing this. */
4828 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4829 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4830 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4831 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4832 {
4833 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4834 rc = VINF_SUCCESS;
4835 }
4836 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4837 rc = VERR_THREAD_IS_DEAD;
4838 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4839 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4840 return VINF_SUCCESS;
4841 }
4842
4843 /*
4844 * If a TSC-delta measurement request is already being serviced by the thread,
4845 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4846 */
4847 msTsStartWait = RTTimeSystemMilliTS();
4848 for (iWaitLoop = 0;; iWaitLoop++)
4849 {
4850 uint64_t cMsElapsed;
4851 SUPDRVTSCDELTATHREADSTATE enmState;
4852 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4853 enmState = pDevExt->enmTscDeltaThreadState;
4854 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4855
4856 if (enmState == kTscDeltaThreadState_Measuring)
4857 { /* Must wait, the thread is busy. */ }
4858 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4859 { /* Must wait, this state only says what will happen next. */ }
4860 else if (enmState == kTscDeltaThreadState_Terminating)
4861 { /* Must wait, this state only says what should happen next. */ }
4862 else
4863 break; /* All other states, the thread is either idly listening or dead. */
4864
4865 /* Wait or fail. */
4866 if (cMsWaitThread == 0)
4867 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4868 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4869 if (cMsElapsed >= cMsWaitThread)
4870 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4871
4872 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4873 if (rc == VERR_INTERRUPTED)
4874 return rc;
4875 }
4876#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4877
4878 /*
4879 * Try measure the TSC delta the given number of times.
4880 */
4881 for (;;)
4882 {
4883 /* Unless we're forced to measure the delta, check whether it's done already. */
4884 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4885 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4886 {
4887 rc = VINF_SUCCESS;
4888 break;
4889 }
4890
4891 /* Measure it. */
4892 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
4893 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4894 {
4895 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4896 break;
4897 }
4898
4899 /* Retry? */
4900 if (cTries <= 1)
4901 break;
4902 cTries--;
4903
4904 /* Always delay between retries (be nice to the rest of the system
4905 and avoid the BSOD hounds). */
4906 rc = RTThreadSleep(cMsWaitRetry);
4907 if (rc == VERR_INTERRUPTED)
4908 break;
4909 }
4910
4911 return rc;
4912}
4913SUPR0_EXPORT_SYMBOL(SUPR0TscDeltaMeasureBySetIndex);
4914
4915
4916/**
4917 * Service a TSC-delta measurement request.
4918 *
4919 * @returns VBox status code.
4920 * @param pDevExt Pointer to the device instance data.
4921 * @param pSession The support driver session.
4922 * @param pReq Pointer to the TSC-delta measurement request.
4923 */
4924int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4925{
4926 uint32_t cTries;
4927 uint32_t iCpuSet;
4928 uint32_t fFlags;
4929 RTMSINTERVAL cMsWaitRetry;
4930 RT_NOREF1(pDevExt);
4931
4932 /*
4933 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4934 */
4935 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4936
4937 if (pReq->u.In.idCpu == NIL_RTCPUID)
4938 return VERR_INVALID_CPU_ID;
4939 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4940 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4941 return VERR_INVALID_CPU_ID;
4942
4943 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4944
4945 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4946
4947 fFlags = 0;
4948 if (pReq->u.In.fAsync)
4949 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4950 if (pReq->u.In.fForce)
4951 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4952
4953 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4954 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4955 cTries);
4956}
4957
4958
4959/**
4960 * Reads TSC with delta applied.
4961 *
4962 * Will try to resolve delta value INT64_MAX before applying it. This is the
4963 * main purpose of this function, to handle the case where the delta needs to be
4964 * determined.
4965 *
4966 * @returns VBox status code.
4967 * @param pDevExt Pointer to the device instance data.
4968 * @param pSession The support driver session.
4969 * @param pReq Pointer to the TSC-read request.
4970 */
4971int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4972{
4973 PSUPGLOBALINFOPAGE pGip;
4974 int rc;
4975
4976 /*
4977 * Validate. We require the client to have mapped GIP (no asserting on
4978 * ring-3 preconditions).
4979 */
4980 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4981 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4982 return VERR_WRONG_ORDER;
4983 pGip = pDevExt->pGip;
4984 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4985
4986 /*
4987 * We're usually here because we need to apply delta, but we shouldn't be
4988 * upset if the GIP is some different mode.
4989 */
4990 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4991 {
4992 uint32_t cTries = 0;
4993 for (;;)
4994 {
4995 /*
4996 * Start by gathering the data, using CLI for disabling preemption
4997 * while we do that.
4998 */
4999 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5000 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5001 int iGipCpu = 0; /* gcc maybe used uninitialized */
5002 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5003 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5004 {
5005 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
5006 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5007 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5008 ASMSetFlags(fEFlags);
5009
5010 /*
5011 * If we're lucky we've got a delta, but no predictions here
5012 * as this I/O control is normally only used when the TSC delta
5013 * is set to INT64_MAX.
5014 */
5015 if (i64Delta != INT64_MAX)
5016 {
5017 pReq->u.Out.u64AdjustedTsc -= i64Delta;
5018 rc = VINF_SUCCESS;
5019 break;
5020 }
5021
5022 /* Give up after a few times. */
5023 if (cTries >= 4)
5024 {
5025 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
5026 break;
5027 }
5028
5029 /* Need to measure the delta an try again. */
5030 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
5031 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
5032 /** @todo should probably delay on failure... dpc watchdogs */
5033 }
5034 else
5035 {
5036 /* This really shouldn't happen. */
5037 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
5038 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5039 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5040 ASMSetFlags(fEFlags);
5041 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
5042 break;
5043 }
5044 }
5045 }
5046 else
5047 {
5048 /*
5049 * No delta to apply. Easy. Deal with preemption the lazy way.
5050 */
5051 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5052 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5053 int iGipCpu = 0; /* gcc may be used uninitialized */
5054 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5055 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5056 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5057 else
5058 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5059 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5060 ASMSetFlags(fEFlags);
5061 rc = VINF_SUCCESS;
5062 }
5063
5064 return rc;
5065}
5066
5067
5068/**
5069 * Worker for supdrvIOCtl_GipSetFlags.
5070 *
5071 * @returns VBox status code.
5072 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
5073 * a session.
5074 *
5075 * @param pDevExt Pointer to the device instance data.
5076 * @param pSession The support driver session.
5077 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5078 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5079 *
5080 * @remarks Caller must own the GIP mutex.
5081 *
5082 * @remarks This function doesn't validate any of the flags.
5083 */
5084static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5085{
5086 uint32_t cRefs;
5087 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5088 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
5089
5090 /*
5091 * Compute GIP test-mode flags.
5092 */
5093 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
5094 {
5095 if (!pSession->fGipTestMode)
5096 {
5097 Assert(pDevExt->cGipTestModeRefs < _64K);
5098 pSession->fGipTestMode = true;
5099 cRefs = ++pDevExt->cGipTestModeRefs;
5100 if (cRefs == 1)
5101 {
5102 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
5103 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
5104 }
5105 }
5106 else
5107 {
5108 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
5109 return VERR_WRONG_ORDER;
5110 }
5111 }
5112 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
5113 && pSession->fGipTestMode)
5114 {
5115 Assert(pDevExt->cGipTestModeRefs > 0);
5116 Assert(pDevExt->cGipTestModeRefs < _64K);
5117 pSession->fGipTestMode = false;
5118 cRefs = --pDevExt->cGipTestModeRefs;
5119 if (!cRefs)
5120 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
5121 else
5122 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
5123 }
5124
5125 /*
5126 * Commit the flags. This should be done as atomically as possible
5127 * since the flag consumers won't be holding the GIP mutex.
5128 */
5129 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
5130 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
5131
5132 return VINF_SUCCESS;
5133}
5134
5135
5136/**
5137 * Sets GIP test mode parameters.
5138 *
5139 * @returns VBox status code.
5140 * @param pDevExt Pointer to the device instance data.
5141 * @param pSession The support driver session.
5142 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5143 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5144 */
5145int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5146{
5147 PSUPGLOBALINFOPAGE pGip;
5148 int rc;
5149
5150 /*
5151 * Validate. We require the client to have mapped GIP (no asserting on
5152 * ring-3 preconditions).
5153 */
5154 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
5155 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
5156 return VERR_WRONG_ORDER;
5157 pGip = pDevExt->pGip;
5158 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
5159
5160 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
5161 return VERR_INVALID_PARAMETER;
5162 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
5163 return VERR_INVALID_PARAMETER;
5164
5165 /*
5166 * Don't confuse supdrvGipSetFlags or anyone else by both setting
5167 * and clearing the same flags. AND takes precedence.
5168 */
5169 fOrMask &= fAndMask;
5170
5171 /*
5172 * Take the loader lock to avoid having to think about races between two
5173 * clients changing the flags at the same time (state is not simple).
5174 */
5175#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5176 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
5177#else
5178 RTSemFastMutexRequest(pDevExt->mtxGip);
5179#endif
5180
5181 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
5182
5183#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5184 RTSemMutexRelease(pDevExt->mtxGip);
5185#else
5186 RTSemFastMutexRelease(pDevExt->mtxGip);
5187#endif
5188 return rc;
5189}
5190
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette