Index: /trunk/include/iprt/err.h
===================================================================
--- /trunk/include/iprt/err.h	(revision 64233)
+++ /trunk/include/iprt/err.h	(revision 64234)
@@ -951,4 +951,6 @@
  * creating a process as a given user. IQ is also called 'Increase quotas'. */
 #define VERR_PROC_IQ_PRIV_NOT_HELD          (-22413)
+/** The system has too many CPUs. */
+#define VERR_MP_TOO_MANY_CPUS               (-22414)
 /** @} */
 
Index: /trunk/include/iprt/nt/nt.h
===================================================================
--- /trunk/include/iprt/nt/nt.h	(revision 64233)
+++ /trunk/include/iprt/nt/nt.h	(revision 64234)
@@ -2431,4 +2431,16 @@
 typedef  NTSTATUS (NTAPI *PFNKEGETPROCESSORNUMBERFROMINDEX)(KEPROCESSORINDEX idxProcessor, PPROCESSOR_NUMBER pProcNumber);
 typedef  KEPROCESSORINDEX (NTAPI *PFNKEGETPROCESSORINDEXFROMNUMBER)(const PROCESSOR_NUMBER *pProcNumber);
+typedef  NTSTATUS (NTAPI *PFNKEGETPROCESSORNUMBERFROMINDEX)(KEPROCESSORINDEX ProcIndex, PROCESSOR_NUMBER *pProcNumber);
+typedef  KEPROCESSORINDEX (NTAPI *PFNKEGETCURRENTPROCESSORNUMBEREX)(const PROCESSOR_NUMBER *pProcNumber);
+typedef  KAFFINITY (NTAPI *PFNKEQUERYACTIVEPROCESSORS)(VOID);
+typedef  ULONG   (NTAPI *PFNKEQUERYMAXIMUMPROCESSORCOUNT)(VOID);
+typedef  ULONG   (NTAPI *PFNKEQUERYMAXIMUMPROCESSORCOUNTEX)(USHORT GroupNumber);
+typedef  USHORT  (NTAPI *PFNKEQUERYMAXIMUMGROUPCOUNT)(VOID);
+typedef  NTSTATUS (NTAPI *PFNKEQUERYLOGICALPROCESSORRELATIONSHIP)(PROCESSOR_NUMBER *pProcNumber,
+                                                                  LOGICAL_PROCESSOR_RELATIONSHIP RelationShipType,
+                                                                  SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pInfo, PULONG pcbInfo);
+typedef  PVOID   (NTAPI *PFNKEREGISTERPROCESSORCHANGECALLBACK)(PPROCESSOR_CALLBACK_FUNCTION pfnCallback, void *pvUser, ULONG fFlags);
+typedef  VOID    (NTAPI *PFNKEDEREGISTERPROCESSORCHANGECALLBACK)(PVOID pvCallback);
+typedef  NTSTATUS (NTAPI *PFNKESETTARGETPROCESSORDPCEX)(KDPC *pDpc, PROCESSOR_NUMBER *pProcNumber);
 
 NTSYSAPI BOOLEAN  NTAPI ObFindHandleForObject(PEPROCESS pProcess, PVOID pvObject, POBJECT_TYPE pObjectType,
Index: /trunk/src/VBox/Runtime/r0drv/nt/initterm-r0drv-nt.cpp
===================================================================
--- /trunk/src/VBox/Runtime/r0drv/nt/initterm-r0drv-nt.cpp	(revision 64233)
+++ /trunk/src/VBox/Runtime/r0drv/nt/initterm-r0drv-nt.cpp	(revision 64234)
@@ -33,8 +33,10 @@
 #include <iprt/assert.h>
 #include <iprt/err.h>
+#include <iprt/mem.h>
 #include <iprt/mp.h>
 #include <iprt/string.h>
 #include "internal/initterm.h"
 #include "internal-r0drv-nt.h"
+#include "../mp-r0drv.h"
 #include "symdb.h"
 #include "symdbdata.h"
@@ -51,43 +53,69 @@
  * and update this variable as CPUs comes online. (The code is done already.)
  */
-RTCPUSET                            g_rtMpNtCpuSet;
+RTCPUSET                                g_rtMpNtCpuSet;
+/** Maximum number of processor groups. */
+uint32_t                                g_cRtMpNtMaxGroups;
+/** Maximum number of processors. */
+uint32_t                                g_cRtMpNtMaxCpus;
+/** The handle of the rtR0NtMpProcessorChangeCallback registration. */
+static PVOID                            g_pvMpCpuChangeCallback = NULL;
 
 /** ExSetTimerResolution, introduced in W2K. */
-PFNMYEXSETTIMERRESOLUTION           g_pfnrtNtExSetTimerResolution;
+PFNMYEXSETTIMERRESOLUTION               g_pfnrtNtExSetTimerResolution;
 /** KeFlushQueuedDpcs, introduced in XP. */
-PFNMYKEFLUSHQUEUEDDPCS              g_pfnrtNtKeFlushQueuedDpcs;
+PFNMYKEFLUSHQUEUEDDPCS                  g_pfnrtNtKeFlushQueuedDpcs;
 /** HalRequestIpi, version introduced with windows 7. */
-PFNHALREQUESTIPI_W7PLUS             g_pfnrtHalRequestIpiW7Plus;
+PFNHALREQUESTIPI_W7PLUS                 g_pfnrtHalRequestIpiW7Plus;
 /** HalRequestIpi, version valid up to windows vista?? */
-PFNHALREQUESTIPI_PRE_W7             g_pfnrtHalRequestIpiPreW7;
+PFNHALREQUESTIPI_PRE_W7                 g_pfnrtHalRequestIpiPreW7;
 /** Worker for RTMpPokeCpu. */
-PFNRTSENDIPI                        g_pfnrtMpPokeCpuWorker;
+PFNRTSENDIPI                            g_pfnrtMpPokeCpuWorker;
 /** KeIpiGenericCall - Introduced in Windows Server 2003. */
-PFNRTKEIPIGENERICCALL               g_pfnrtKeIpiGenericCall;
+PFNRTKEIPIGENERICCALL                   g_pfnrtKeIpiGenericCall;
+/** KeSetTargetProcessorDpcEx - Introduced in Windows 7. */
+PFNKESETTARGETPROCESSORDPCEX            g_pfnrtKeSetTargetProcessorDpcEx;
 /** KeInitializeAffinityEx - Introducted in Windows 7. */
-PFNKEINITIALIZEAFFINITYEX           g_pfnrtKeInitializeAffinityEx;
+PFNKEINITIALIZEAFFINITYEX               g_pfnrtKeInitializeAffinityEx;
 /** KeAddProcessorAffinityEx - Introducted in Windows 7. */
-PFNKEADDPROCESSORAFFINITYEX         g_pfnrtKeAddProcessorAffinityEx;
-/** KeGetProcessorIndexFromNumber - Introducted in Windows  7. */
-PFNKEGETPROCESSORINDEXFROMNUMBER    g_pfnrtKeGetProcessorIndexFromNumber;
+PFNKEADDPROCESSORAFFINITYEX             g_pfnrtKeAddProcessorAffinityEx;
+/** KeGetProcessorIndexFromNumber - Introducted in Windows 7. */
+PFNKEGETPROCESSORINDEXFROMNUMBER        g_pfnrtKeGetProcessorIndexFromNumber;
+/** KeGetProcessorNumberFromIndex - Introducted in Windows 7. */
+PFNKEGETPROCESSORNUMBERFROMINDEX        g_pfnrtKeGetProcessorNumberFromIndex;
+/** KeGetCurrentProcessorNumberEx - Introducted in Windows 7. */
+PFNKEGETCURRENTPROCESSORNUMBEREX        g_pfnrtKeGetCurrentProcessorNumberEx;
+/** KeQueryActiveProcessors - Introducted in Windows 2000. */
+PFNKEQUERYACTIVEPROCESSORS              g_pfnrtKeQueryActiveProcessors;
+/** KeQueryMaximumProcessorCount   - Introducted in Vista and obsoleted W7. */
+PFNKEQUERYMAXIMUMPROCESSORCOUNT         g_pfnrtKeQueryMaximumProcessorCount;
+/** KeQueryMaximumProcessorCountEx - Introducted in Windows 7. */
+PFNKEQUERYMAXIMUMPROCESSORCOUNTEX       g_pfnrtKeQueryMaximumProcessorCountEx;
+/** KeQueryMaximumGroupCount - Introducted in Windows 7. */
+PFNKEQUERYMAXIMUMGROUPCOUNT             g_pfnrtKeQueryMaximumGroupCount;
+/** KeQueryLogicalProcessorRelationship - Introducted in Windows 7. */
+PFNKEQUERYLOGICALPROCESSORRELATIONSHIP  g_pfnrtKeQueryLogicalProcessorRelationship;
+/** KeRegisterProcessorChangeCallback - Introducted in Windows 7. */
+PFNKEREGISTERPROCESSORCHANGECALLBACK    g_pfnrtKeRegisterProcessorChangeCallback;
+/** KeDeregisterProcessorChangeCallback - Introducted in Windows 7. */
+PFNKEDEREGISTERPROCESSORCHANGECALLBACK  g_pfnrtKeDeregisterProcessorChangeCallback;
 /** RtlGetVersion, introduced in ??. */
-PFNRTRTLGETVERSION                  g_pfnrtRtlGetVersion;
+PFNRTRTLGETVERSION                      g_pfnrtRtlGetVersion;
 #ifndef RT_ARCH_AMD64
 /** KeQueryInterruptTime - exported/new in Windows 2000. */
-PFNRTKEQUERYINTERRUPTTIME           g_pfnrtKeQueryInterruptTime;
+PFNRTKEQUERYINTERRUPTTIME               g_pfnrtKeQueryInterruptTime;
 /** KeQuerySystemTime - exported/new in Windows 2000. */
-PFNRTKEQUERYSYSTEMTIME              g_pfnrtKeQuerySystemTime;
+PFNRTKEQUERYSYSTEMTIME                  g_pfnrtKeQuerySystemTime;
 #endif
 /** KeQueryInterruptTimePrecise - new in Windows 8. */
-PFNRTKEQUERYINTERRUPTTIMEPRECISE    g_pfnrtKeQueryInterruptTimePrecise;
+PFNRTKEQUERYINTERRUPTTIMEPRECISE        g_pfnrtKeQueryInterruptTimePrecise;
 /** KeQuerySystemTimePrecise - new in Windows 8. */
-PFNRTKEQUERYSYSTEMTIMEPRECISE       g_pfnrtKeQuerySystemTimePrecise;
+PFNRTKEQUERYSYSTEMTIMEPRECISE           g_pfnrtKeQuerySystemTimePrecise;
 
 /** Offset of the _KPRCB::QuantumEnd field. 0 if not found. */
-uint32_t                            g_offrtNtPbQuantumEnd;
+uint32_t                                g_offrtNtPbQuantumEnd;
 /** Size of the _KPRCB::QuantumEnd field. 0 if not found. */
-uint32_t                            g_cbrtNtPbQuantumEnd;
+uint32_t                                g_cbrtNtPbQuantumEnd;
 /** Offset of the _KPRCB::DpcQueueDepth field. 0 if not found. */
-uint32_t                            g_offrtNtPbDpcQueueDepth;
+uint32_t                                g_offrtNtPbDpcQueueDepth;
 
 
@@ -125,7 +153,21 @@
 
     /* Note! We cannot quite say if something is MP or UNI. So, fSmp is
-             redefined to indicate that it must be MP. */
-    pOsVerInfo->fSmp        = RTMpGetCount() >  1
-                           || ulMajorVersion >= 6; /* Vista and later has no UNI kernel AFAIK. */
+             redefined to indicate that it must be MP.
+       Note! RTMpGetCount is not available here. */
+    pOsVerInfo->fSmp = ulMajorVersion >= 6; /* Vista and later has no UNI kernel AFAIK. */
+    if (!pOsVerInfo->fSmp)
+    {
+        if (   g_pfnrtKeQueryMaximumProcessorCountEx
+            && g_pfnrtKeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS) > 1)
+            pOsVerInfo->fSmp = true;
+        else if (   g_pfnrtKeQueryMaximumProcessorCount
+                 && g_pfnrtKeQueryMaximumProcessorCount() > 1)
+            pOsVerInfo->fSmp = true;
+        else if (   g_pfnrtKeQueryActiveProcessors
+                 && g_pfnrtKeQueryActiveProcessors() > 1)
+            pOsVerInfo->fSmp = true;
+        else if (KeNumberProcessors > 1)
+            pOsVerInfo->fSmp = true;
+    }
 }
 
@@ -203,73 +245,367 @@
 
 
+/**
+ * Implements the NT PROCESSOR_CALLBACK_FUNCTION callback function.
+ *
+ * This maintains the g_rtMpNtCpuSet and works MP notification callbacks.  When
+ * registered, it's called for each active CPU in the system, avoiding racing
+ * CPU hotplugging (as well as testing the callback).
+ *
+ * @param   pvUser              User context (not used).
+ * @param   pChangeCtx          Change context (in).
+ * @param   prcOperationStatus  Operation status (in/out).
+ */
+static VOID __stdcall rtR0NtMpProcessorChangeCallback(void *pvUser, PKE_PROCESSOR_CHANGE_NOTIFY_CONTEXT pChangeCtx,
+                                                      PNTSTATUS prcOperationStatus)
+{
+    RT_NOREF(pvUser, prcOperationStatus);
+    switch (pChangeCtx->State)
+    {
+        case KeProcessorAddCompleteNotify:
+            if (pChangeCtx->NtNumber < RTCPUSET_MAX_CPUS)
+            {
+                RTCpuSetAddByIndex(&g_rtMpNtCpuSet, pChangeCtx->NtNumber);
+                rtMpNotificationDoCallbacks(RTMPEVENT_ONLINE, pChangeCtx->NtNumber);
+            }
+            else
+            {
+                DbgPrint("rtR0NtMpProcessorChangeCallback: NtNumber=%u (%#x) is higher than RTCPUSET_MAX_CPUS (%d)\n",
+                         pChangeCtx->NtNumber, pChangeCtx->NtNumber, RTCPUSET_MAX_CPUS);
+                AssertMsgFailed(("NtNumber=%u (%#x)\n", pChangeCtx->NtNumber, pChangeCtx->NtNumber));
+            }
+            break;
+
+        case KeProcessorAddStartNotify:
+        case KeProcessorAddFailureNotify:
+            /* ignore */
+            break;
+
+        default:
+            AssertMsgFailed(("State=%u\n", pChangeCtx->State));
+    }
+}
+
+
+/**
+ * Wrapper around KeQueryLogicalProcessorRelationship.
+ *
+ * @returns IPRT status code.
+ * @param   ppInfo  Where to return the info. Pass to RTMemFree when done.
+ */
+static int rtR0NtInitQueryGroupRelations(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX **ppInfo)
+{
+    ULONG    cbInfo = sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX)
+                    + g_cRtMpNtMaxGroups * sizeof(GROUP_RELATIONSHIP);
+    NTSTATUS rcNt;
+    do
+    {
+        SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pInfo = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)RTMemAlloc(cbInfo);
+        if (pInfo)
+        {
+            rcNt = g_pfnrtKeQueryLogicalProcessorRelationship(NULL /*pProcNumber*/, RelationGroup, pInfo, &cbInfo);
+            if (NT_SUCCESS(rcNt))
+            {
+                *ppInfo = pInfo;
+                return VINF_SUCCESS;
+            }
+
+            RTMemFree(pInfo);
+            pInfo = NULL;
+        }
+        else
+            rcNt = STATUS_NO_MEMORY;
+    } while (rcNt == STATUS_INFO_LENGTH_MISMATCH);
+    DbgPrint("IPRT: Fatal: KeQueryLogicalProcessorRelationship failed: %#x\n", rcNt);
+    AssertMsgFailed(("KeQueryLogicalProcessorRelationship failed: %#x\n", rcNt));
+    return RTErrConvertFromNtStatus(rcNt);
+}
+
+
+/**
+ * Initalizes multiprocessor globals.
+ *
+ * @returns IPRT status code.
+ */
+static int rtR0NtInitMp(RTNTSDBOSVER const *pOsVerInfo)
+{
+#define MY_CHECK_BREAK(a_Check, a_DbgPrintArgs) \
+        AssertMsgBreakStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs; rc = VERR_INTERNAL_ERROR_4 )
+#define MY_CHECK_RETURN(a_Check, a_DbgPrintArgs, a_rcRet) \
+        AssertMsgReturnStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs, a_rcRet)
+#define MY_CHECK(a_Check, a_DbgPrintArgs) \
+        AssertMsgStmt(a_Check, a_DbgPrintArgs, DbgPrint a_DbgPrintArgs; rc = VERR_INTERNAL_ERROR_4 )
+
+    /*
+     * API combination checks.
+     */
+    MY_CHECK_RETURN(!g_pfnrtKeSetTargetProcessorDpcEx || g_pfnrtKeGetProcessorNumberFromIndex,
+                    ("IPRT: Fatal: Missing KeSetTargetProcessorDpcEx without KeGetProcessorNumberFromIndex!\n"),
+                    VERR_SYMBOL_NOT_FOUND);
+
+    /*
+     * Get max number of processor groups.
+     */
+    if (g_pfnrtKeQueryMaximumGroupCount)
+    {
+        g_cRtMpNtMaxGroups = g_pfnrtKeQueryMaximumGroupCount();
+        MY_CHECK_RETURN(g_cRtMpNtMaxGroups <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxGroups > 0,
+                        ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u\n", g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS),
+                        VERR_MP_TOO_MANY_CPUS);
+    }
+    else
+        g_cRtMpNtMaxGroups = 1;
+
+    /*
+     * Get max number CPUs.
+     * This also defines the range of NT CPU indexes, RTCPUID and index into RTCPUSET.
+     */
+    if (g_pfnrtKeQueryMaximumProcessorCountEx)
+    {
+        g_cRtMpNtMaxCpus = g_pfnrtKeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS);
+        MY_CHECK_RETURN(g_cRtMpNtMaxCpus <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxCpus > 0,
+                        ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u [KeQueryMaximumProcessorCountEx]\n",
+                         g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS),
+                        VERR_MP_TOO_MANY_CPUS);
+    }
+    else if (g_pfnrtKeQueryMaximumProcessorCount)
+    {
+        g_cRtMpNtMaxCpus = g_pfnrtKeQueryMaximumProcessorCount();
+        MY_CHECK_RETURN(g_cRtMpNtMaxCpus <= RTCPUSET_MAX_CPUS && g_cRtMpNtMaxCpus > 0,
+                        ("IPRT: Fatal: g_cRtMpNtMaxGroups=%u, max %u [KeQueryMaximumProcessorCount]\n",
+                         g_cRtMpNtMaxGroups, RTCPUSET_MAX_CPUS),
+                        VERR_MP_TOO_MANY_CPUS);
+    }
+    else if (g_pfnrtKeQueryActiveProcessors)
+    {
+        KAFFINITY fActiveProcessors = g_pfnrtKeQueryActiveProcessors();
+        MY_CHECK_RETURN(fActiveProcessors != 0,
+                        ("IPRT: Fatal: KeQueryActiveProcessors returned 0!\n"),
+                        VERR_INTERNAL_ERROR_2);
+        g_cRtMpNtMaxCpus = 0;
+        do
+        {
+            g_cRtMpNtMaxCpus++;
+            fActiveProcessors >>= 1;
+        } while (fActiveProcessors);
+    }
+    else
+        g_cRtMpNtMaxCpus = KeNumberProcessors;
+
+    /*
+     * Query the details for the groups to figure out which CPUs are online as
+     * well as the NT index limit.
+     */
+    if (g_pfnrtKeQueryLogicalProcessorRelationship)
+    {
+        MY_CHECK_RETURN(g_pfnrtKeGetProcessorIndexFromNumber,
+                        ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeGetProcessorIndexFromNumber!\n"),
+                        VERR_SYMBOL_NOT_FOUND);
+        MY_CHECK_RETURN(g_pfnrtKeGetProcessorNumberFromIndex,
+                        ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeGetProcessorIndexFromNumber!\n"),
+                        VERR_SYMBOL_NOT_FOUND);
+        MY_CHECK_RETURN(g_pfnrtKeSetTargetProcessorDpcEx,
+                        ("IPRT: Fatal: Found KeQueryLogicalProcessorRelationship but not KeSetTargetProcessorDpcEx!\n"),
+                        VERR_SYMBOL_NOT_FOUND);
+
+        SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *pInfo = NULL;
+        int rc = rtR0NtInitQueryGroupRelations(&pInfo);
+        if (RT_FAILURE(rc))
+            return rc;
+
+        AssertReturnStmt(pInfo->Group.MaximumGroupCount == g_cRtMpNtMaxGroups, RTMemFree(pInfo), VERR_INTERNAL_ERROR_3);
+
+        /*
+         * Calc online mask.
+         *
+         * Also check ASSUMPTIONS:
+         *      - Processor indexes going to KeQueryMaximumProcessorCountEx(ALL_PROCESSOR_GROUPS)
+         *      - Processor indexes being assigned to absent hotswappable CPUs, i.e.
+         *        KeGetProcessorIndexFromNumber and KeGetProcessorNumberFromIndex works
+         *        all possible indexes. [Not yet confirmed!]
+         *      - Processor indexes are assigned in group order.
+         *      - MaximumProcessorCount specifies the highest bit in the active mask.
+         *        This is for confirming process IDs assigned by IPRT in ring-3.
+         */
+        /** @todo Test the latter on a real/virtual system. */
+        RTCpuSetEmpty(&g_rtMpNtCpuSet);
+        uint32_t idxCpuExpect = 0;
+        for (uint32_t idxGroup = 0; RT_SUCCESS(rc) && idxGroup < pInfo->Group.ActiveGroupCount; idxGroup++)
+        {
+            const PROCESSOR_GROUP_INFO *pGrpInfo = &pInfo->Group.GroupInfo[idxGroup];
+            MY_CHECK_BREAK(pGrpInfo->MaximumProcessorCount <= MAXIMUM_PROC_PER_GROUP,
+                           ("IPRT: Fatal: MaximumProcessorCount=%u\n", pGrpInfo->MaximumProcessorCount));
+            MY_CHECK_BREAK(pGrpInfo->ActiveProcessorCount <= MAXIMUM_PROC_PER_GROUP,
+                           ("IPRT: Fatal: ActiveProcessorCount=%u\n", pGrpInfo->ActiveProcessorCount));
+            MY_CHECK_BREAK(pGrpInfo->ActiveProcessorCount <= pGrpInfo->MaximumProcessorCount,
+                           ("IPRT: Fatal: ActiveProcessorCount=%u > MaximumProcessorCount=%u\n",
+                            pGrpInfo->ActiveProcessorCount, pGrpInfo->MaximumProcessorCount));
+            for (uint32_t idxMember = 0; idxMember < pGrpInfo->MaximumProcessorCount; idxMember++, idxCpuExpect++)
+            {
+                PROCESSOR_NUMBER ProcNum;
+                ProcNum.Group    = (USHORT)idxGroup;
+                ProcNum.Number   = (UCHAR)idxMember;
+                ProcNum.Reserved = 0;
+                ULONG idxCpu = g_pfnrtKeGetProcessorIndexFromNumber(&ProcNum);
+                MY_CHECK_BREAK(idxCpu != INVALID_PROCESSOR_INDEX,
+                               ("IPRT: Fatal: KeGetProcessorIndexFromNumber(%u/%u) failed\n", idxGroup, idxMember));
+                MY_CHECK_BREAK(idxCpu < g_cRtMpNtMaxCpus && idxCpu < RTCPUSET_MAX_CPUS,
+                               ("IPRT: Fatal: idxCpu=%u >= g_cRtMpNtMaxCpu=%u (RTCPUSET_MAX_CPUS=%u)\n",
+                                idxCpu, g_cRtMpNtMaxCpus, RTCPUSET_MAX_CPUS));
+                MY_CHECK_BREAK(idxCpu == idxCpuExpect, ("IPRT: Fatal: idxCpu=%u != idxCpuExpect=%u\n", idxCpu, idxCpuExpect));
+
+                ProcNum.Group    = UINT16_MAX;
+                ProcNum.Number   = UINT8_MAX;
+                ProcNum.Reserved = UINT8_MAX;
+                NTSTATUS rcNt = g_pfnrtKeGetProcessorNumberFromIndex(idxCpu, &ProcNum);
+                MY_CHECK_BREAK(NT_SUCCESS(rcNt), ("IPRT: Fatal: KeGetProcessorNumberFromIndex(%u,) -> %#x!\n", idxCpu, rcNt));
+                MY_CHECK_BREAK(ProcNum.Group == idxGroup && ProcNum.Number == idxMember,
+                               ("IPRT: Fatal: KeGetProcessorXxxxFromYyyy roundtrip error for %#x! Group: %u vs %u, Number: %u vs %u\n",
+                                idxCpu, ProcNum.Group, idxGroup, ProcNum.Number, idxMember));
+
+                if (pGrpInfo->ActiveProcessorMask & RT_BIT_64(idxMember))
+                    RTCpuSetAddByIndex(&g_rtMpNtCpuSet, idxCpu);
+            }
+        }
+        RTMemFree(pInfo);
+        if (RT_FAILURE(rc)) /* MY_CHECK_BREAK sets rc. */
+            return rc;
+    }
+    else
+    {
+        /* Legacy: */
+        MY_CHECK_RETURN(g_cRtMpNtMaxGroups == 1, ("IPRT: Fatal: Missing KeQueryLogicalProcessorRelationship!\n"),
+                        VERR_SYMBOL_NOT_FOUND);
+
+        if (g_pfnrtKeQueryActiveProcessors)
+            RTCpuSetFromU64(&g_rtMpNtCpuSet, g_pfnrtKeQueryActiveProcessors());
+        else if (g_cRtMpNtMaxCpus < 64)
+            RTCpuSetFromU64(&g_rtMpNtCpuSet, (UINT64_C(1) << g_cRtMpNtMaxCpus) - 1);
+        else
+        {
+            MY_CHECK_RETURN(g_cRtMpNtMaxCpus == 64, ("IPRT: Fatal: g_cRtMpNtMaxCpus=%u, expect 64 or less\n", g_cRtMpNtMaxCpus),
+                            VERR_MP_TOO_MANY_CPUS);
+            RTCpuSetFromU64(&g_rtMpNtCpuSet, UINT64_MAX);
+        }
+    }
+
+    /*
+     * Register CPU hot plugging callback.
+     */
+    Assert(g_pvMpCpuChangeCallback == NULL);
+    if (g_pfnrtKeRegisterProcessorChangeCallback)
+    {
+        MY_CHECK_RETURN(g_pfnrtKeDeregisterProcessorChangeCallback,
+                        ("IPRT: Fatal: KeRegisterProcessorChangeCallback without KeDeregisterProcessorChangeCallback!\n"),
+                        VERR_SYMBOL_NOT_FOUND);
+
+        RTCPUSET ActiveSetCopy = g_rtMpNtCpuSet;
+        RTCpuSetEmpty(&g_rtMpNtCpuSet);
+        g_pvMpCpuChangeCallback = g_pfnrtKeRegisterProcessorChangeCallback(rtR0NtMpProcessorChangeCallback, NULL /*pvUser*/,
+                                                                           KE_PROCESSOR_CHANGE_ADD_EXISTING);
+        if (!g_pvMpCpuChangeCallback)  
+        {
+            AssertFailed();
+            g_rtMpNtCpuSet = ActiveSetCopy;
+        }
+    }
+
+    /*
+     * Special IPI fun for RTMpPokeCpu.
+     *
+     * On Vista and later the DPC method doesn't seem to reliably send IPIs,
+     * so we have to use alternative methods.
+     *
+     * On AMD64 We used to use the HalSendSoftwareInterrupt API (also x86 on
+     * W10+), it looks faster and more convenient to use, however we're either
+     * using it wrong or it doesn't reliably do what we want (see @bugref{8343}).
+     *
+     * The HalRequestIpip API is thus far the only alternative to KeInsertQueueDpc
+     * for doing targetted IPIs.  Trouble with this API is that it changed
+     * fundamentally in Window 7 when they added support for lots of processors.
+     *
+     * If we really think we cannot use KeInsertQueueDpc, we use the broadcast IPI
+     * API KeIpiGenericCall.
+     */
+    if (   pOsVerInfo->uMajorVer > 6
+        || (pOsVerInfo->uMajorVer == 6 && pOsVerInfo->uMinorVer > 0))
+        g_pfnrtHalRequestIpiPreW7 = NULL;
+    else
+        g_pfnrtHalRequestIpiW7Plus = NULL;
+
+    g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingDpc;
+#ifndef IPRT_TARGET_NT4
+    if (   g_pfnrtHalRequestIpiW7Plus
+        && g_pfnrtKeInitializeAffinityEx
+        && g_pfnrtKeAddProcessorAffinityEx
+        && g_pfnrtKeGetProcessorIndexFromNumber)
+    {
+        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingHalReqestIpiW7Plus\n");
+        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalReqestIpiW7Plus;
+    }
+    else if (pOsVerInfo->uMajorVer >= 6 && g_pfnrtKeIpiGenericCall)
+    {
+        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingBroadcastIpi\n");
+        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingBroadcastIpi;
+    }
+    else
+        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingDpc\n");
+    /* else: Windows XP should send always send an IPI -> VERIFY */
+#endif
+
+    return VINF_SUCCESS;
+}
+
+
 DECLHIDDEN(int) rtR0InitNative(void)
 {
     /*
-     * Init the Nt cpu set.
+     * Initialize the function pointers.
      */
 #ifdef IPRT_TARGET_NT4
-    KAFFINITY ActiveProcessors = (UINT64_C(1) << KeNumberProcessors) - UINT64_C(1);
+# define GET_SYSTEM_ROUTINE_EX(a_Prf, a_Name, a_pfnType) do { RT_CONCAT3(g_pfnrt, a_Prf, a_Name) = NULL; } while (0)
 #else
-    KAFFINITY ActiveProcessors = KeQueryActiveProcessors();
+    UNICODE_STRING RoutineName;
+# define GET_SYSTEM_ROUTINE_EX(a_Prf, a_Name, a_pfnType) \
+    do { \
+        RtlInitUnicodeString(&RoutineName, L#a_Name); \
+        RT_CONCAT3(g_pfnrt, a_Prf, a_Name) = (a_pfnType)MmGetSystemRoutineAddress(&RoutineName); \
+    } while (0)
 #endif
-    RTCpuSetEmpty(&g_rtMpNtCpuSet);
-    RTCpuSetFromU64(&g_rtMpNtCpuSet, ActiveProcessors);
-/** @todo Port to W2K8 with > 64 cpus/threads. */
-
-    /*
-     * Initialize the function pointers.
-     */
+#define GET_SYSTEM_ROUTINE(a_Name)                 GET_SYSTEM_ROUTINE_EX(RT_NOTHING, a_Name, decltype(a_Name) *)
+#define GET_SYSTEM_ROUTINE_PRF(a_Prf,a_Name)       GET_SYSTEM_ROUTINE_EX(a_Prf, a_Name, decltype(a_Name) *)
+#define GET_SYSTEM_ROUTINE_TYPE(a_Name, a_pfnType) GET_SYSTEM_ROUTINE_EX(RT_NOTHING, a_Name, a_pfnType)
+
+    GET_SYSTEM_ROUTINE_PRF(Nt,ExSetTimerResolution);
+    GET_SYSTEM_ROUTINE_PRF(Nt,KeFlushQueuedDpcs);
+    GET_SYSTEM_ROUTINE(KeIpiGenericCall);
+    GET_SYSTEM_ROUTINE(KeSetTargetProcessorDpcEx);
+    GET_SYSTEM_ROUTINE(KeInitializeAffinityEx);
+    GET_SYSTEM_ROUTINE(KeAddProcessorAffinityEx);
+    GET_SYSTEM_ROUTINE_TYPE(KeGetProcessorIndexFromNumber, PFNKEGETPROCESSORINDEXFROMNUMBER);
+    GET_SYSTEM_ROUTINE(KeGetProcessorNumberFromIndex);
+    GET_SYSTEM_ROUTINE_TYPE(KeGetCurrentProcessorNumberEx, PFNKEGETCURRENTPROCESSORNUMBEREX);
+    GET_SYSTEM_ROUTINE(KeQueryActiveProcessors);
+    GET_SYSTEM_ROUTINE(KeQueryMaximumProcessorCount);
+    GET_SYSTEM_ROUTINE(KeQueryMaximumProcessorCountEx);
+    GET_SYSTEM_ROUTINE(KeQueryMaximumGroupCount);
+    GET_SYSTEM_ROUTINE(KeQueryLogicalProcessorRelationship);
+    GET_SYSTEM_ROUTINE(KeRegisterProcessorChangeCallback);
+    GET_SYSTEM_ROUTINE(KeDeregisterProcessorChangeCallback);
+
+    GET_SYSTEM_ROUTINE_TYPE(RtlGetVersion, PFNRTRTLGETVERSION);
+#ifndef RT_ARCH_AMD64
+    GET_SYSTEM_ROUTINE(KeQueryInterruptTime);
+    GET_SYSTEM_ROUTINE(KeQuerySystemTime);
+#endif
+    GET_SYSTEM_ROUTINE_TYPE(KeQueryInterruptTimePrecise, PFNRTKEQUERYINTERRUPTTIMEPRECISE);
+    GET_SYSTEM_ROUTINE_TYPE(KeQuerySystemTimePrecise, PFNRTKEQUERYSYSTEMTIMEPRECISE);
+
 #ifdef IPRT_TARGET_NT4
-    g_pfnrtNtExSetTimerResolution = NULL;
-    g_pfnrtNtKeFlushQueuedDpcs = NULL;
     g_pfnrtHalRequestIpiW7Plus = NULL;
     g_pfnrtHalRequestIpiPreW7 = NULL;
-    g_pfnrtKeIpiGenericCall = NULL;
-    g_pfnrtKeInitializeAffinityEx = NULL;
-    g_pfnrtKeAddProcessorAffinityEx = NULL;
-    g_pfnrtKeGetProcessorIndexFromNumber = NULL;
-    g_pfnrtRtlGetVersion = NULL;
-    g_pfnrtKeQueryInterruptTime = NULL;
-    g_pfnrtKeQueryInterruptTimePrecise = NULL;
-    g_pfnrtKeQuerySystemTime = NULL;
-    g_pfnrtKeQuerySystemTimePrecise = NULL;
 #else
-    UNICODE_STRING RoutineName;
-    RtlInitUnicodeString(&RoutineName, L"ExSetTimerResolution");
-    g_pfnrtNtExSetTimerResolution = (PFNMYEXSETTIMERRESOLUTION)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeFlushQueuedDpcs");
-    g_pfnrtNtKeFlushQueuedDpcs = (PFNMYKEFLUSHQUEUEDDPCS)MmGetSystemRoutineAddress(&RoutineName);
-
     RtlInitUnicodeString(&RoutineName, L"HalRequestIpi");
     g_pfnrtHalRequestIpiW7Plus = (PFNHALREQUESTIPI_W7PLUS)MmGetSystemRoutineAddress(&RoutineName);
     g_pfnrtHalRequestIpiPreW7 = (PFNHALREQUESTIPI_PRE_W7)g_pfnrtHalRequestIpiW7Plus;
-
-    RtlInitUnicodeString(&RoutineName, L"KeIpiGenericCall");
-    g_pfnrtKeIpiGenericCall = (PFNRTKEIPIGENERICCALL)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeInitializeAffinityEx");
-    g_pfnrtKeInitializeAffinityEx = (PFNKEINITIALIZEAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeAddProcessorAffinityEx");
-    g_pfnrtKeAddProcessorAffinityEx = (PFNKEADDPROCESSORAFFINITYEX)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeGetProcessorIndexFromNumber");
-    g_pfnrtKeGetProcessorIndexFromNumber = (PFNKEGETPROCESSORINDEXFROMNUMBER)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"RtlGetVersion");
-    g_pfnrtRtlGetVersion = (PFNRTRTLGETVERSION)MmGetSystemRoutineAddress(&RoutineName);
-# ifndef RT_ARCH_AMD64
-    RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTime");
-    g_pfnrtKeQueryInterruptTime = (PFNRTKEQUERYINTERRUPTTIME)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTime");
-    g_pfnrtKeQuerySystemTime = (PFNRTKEQUERYSYSTEMTIME)MmGetSystemRoutineAddress(&RoutineName);
-# endif
-    RtlInitUnicodeString(&RoutineName, L"KeQueryInterruptTimePrecise");
-    g_pfnrtKeQueryInterruptTimePrecise = (PFNRTKEQUERYINTERRUPTTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName);
-
-    RtlInitUnicodeString(&RoutineName, L"KeQuerySystemTimePrecise");
-    g_pfnrtKeQuerySystemTimePrecise = (PFNRTKEQUERYSYSTEMTIMEPRECISE)MmGetSystemRoutineAddress(&RoutineName);
 #endif
 
@@ -403,45 +739,14 @@
 
     /*
-     * Special IPI fun for RTMpPokeCpu.
-     *
-     * On Vista and later the DPC method doesn't seem to reliably send IPIs,
-     * so we have to use alternative methods.
-     *
-     * On AMD64 We used to use the HalSendSoftwareInterrupt API (also x86 on
-     * W10+), it looks faster and more convenient to use, however we're either
-     * using it wrong or it doesn't reliably do what we want (see @bugref{8343}).
-     *
-     * The HalRequestIpip API is thus far the only alternative to KeInsertQueueDpc
-     * for doing targetted IPIs.  Trouble with this API is that it changed
-     * fundamentally in Window 7 when they added support for lots of processors.
-     *
-     * If we really think we cannot use KeInsertQueueDpc, we use the broadcast IPI
-     * API KeIpiGenericCall.
-     */
-    if (   OsVerInfo.uMajorVer > 6
-        || (OsVerInfo.uMajorVer == 6 && OsVerInfo.uMinorVer > 0))
-        g_pfnrtHalRequestIpiPreW7 = NULL;
-    else
-        g_pfnrtHalRequestIpiW7Plus = NULL;
-
-    g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingDpc;
-#ifndef IPRT_TARGET_NT4
-    if (   g_pfnrtHalRequestIpiW7Plus
-        && g_pfnrtKeInitializeAffinityEx
-        && g_pfnrtKeAddProcessorAffinityEx
-        && g_pfnrtKeGetProcessorIndexFromNumber)
-    {
-        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingHalReqestIpiW7Plus\n");
-        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingHalReqestIpiW7Plus;
-    }
-    else if (OsVerInfo.uMajorVer >= 6 && g_pfnrtKeIpiGenericCall)
-    {
-        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingBroadcastIpi\n");
-        g_pfnrtMpPokeCpuWorker = rtMpPokeCpuUsingBroadcastIpi;
-    }
-    else
-        DbgPrint("IPRT: RTMpPoke => rtMpPokeCpuUsingDpc\n");
-    /* else: Windows XP should send always send an IPI -> VERIFY */
-#endif
+     * Initialize multi processor stuff.  This registers a callback, so
+     * we call rtR0TermNative to do the deregistration on failure.
+     */
+    int rc = rtR0NtInitMp(&OsVerInfo);
+    if (RT_FAILURE(rc))
+    {
+        rtR0TermNative();
+        DbgPrint("IPRT: Fatal: rtR0NtInitMp failed: %d\n", rc);
+        return rc;
+    }
 
     return VINF_SUCCESS;
@@ -451,4 +756,14 @@
 DECLHIDDEN(void) rtR0TermNative(void)
 {
+    /*
+     * Deregister the processor change callback.
+     */
+    PVOID pvMpCpuChangeCallback = g_pvMpCpuChangeCallback;
+    g_pvMpCpuChangeCallback = NULL;
+    if (pvMpCpuChangeCallback)
+    {
+        AssertReturnVoid(g_pfnrtKeDeregisterProcessorChangeCallback);
+        g_pfnrtKeDeregisterProcessorChangeCallback(pvMpCpuChangeCallback);
+    }
 }
 
Index: /trunk/src/VBox/Runtime/r0drv/nt/internal-r0drv-nt.h
===================================================================
--- /trunk/src/VBox/Runtime/r0drv/nt/internal-r0drv-nt.h	(revision 64233)
+++ /trunk/src/VBox/Runtime/r0drv/nt/internal-r0drv-nt.h	(revision 64234)
@@ -53,26 +53,39 @@
 *   Global Variables                                                           *
 *******************************************************************************/
-extern RTCPUSET                         g_rtMpNtCpuSet;
-extern PFNMYEXSETTIMERRESOLUTION        g_pfnrtNtExSetTimerResolution;
-extern PFNMYKEFLUSHQUEUEDDPCS           g_pfnrtNtKeFlushQueuedDpcs;
-extern PFNHALREQUESTIPI_W7PLUS          g_pfnrtHalRequestIpiW7Plus;
-extern PFNHALREQUESTIPI_PRE_W7          g_pfnrtHalRequestIpiPreW7;
-extern PFNHALSENDSOFTWAREINTERRUPT      g_pfnrtNtHalSendSoftwareInterrupt;
-extern PFNRTSENDIPI                     g_pfnrtMpPokeCpuWorker;
-extern PFNRTKEIPIGENERICCALL            g_pfnrtKeIpiGenericCall;
-extern PFNKEINITIALIZEAFFINITYEX        g_pfnrtKeInitializeAffinityEx;
-extern PFNKEADDPROCESSORAFFINITYEX      g_pfnrtKeAddProcessorAffinityEx;
-extern PFNKEGETPROCESSORINDEXFROMNUMBER g_pfnrtKeGetProcessorIndexFromNumber;
+extern RTCPUSET                                g_rtMpNtCpuSet;
+extern uint32_t                                g_cRtMpNtMaxGroups;
+extern uint32_t                                g_cRtMpNtMaxCpus;
 
-extern PFNRTRTLGETVERSION               g_pfnrtRtlGetVersion;
+extern PFNMYEXSETTIMERRESOLUTION               g_pfnrtNtExSetTimerResolution;
+extern PFNMYKEFLUSHQUEUEDDPCS                  g_pfnrtNtKeFlushQueuedDpcs;
+extern PFNHALREQUESTIPI_W7PLUS                 g_pfnrtHalRequestIpiW7Plus;
+extern PFNHALREQUESTIPI_PRE_W7                 g_pfnrtHalRequestIpiPreW7;
+extern PFNHALSENDSOFTWAREINTERRUPT             g_pfnrtNtHalSendSoftwareInterrupt;
+extern PFNRTSENDIPI                            g_pfnrtMpPokeCpuWorker;
+extern PFNRTKEIPIGENERICCALL                   g_pfnrtKeIpiGenericCall;
+extern PFNKESETTARGETPROCESSORDPCEX            g_pfnrtKeSetTargetProcessorDpcEx;
+extern PFNKEINITIALIZEAFFINITYEX               g_pfnrtKeInitializeAffinityEx;
+extern PFNKEADDPROCESSORAFFINITYEX             g_pfnrtKeAddProcessorAffinityEx;
+extern PFNKEGETPROCESSORINDEXFROMNUMBER        g_pfnrtKeGetProcessorIndexFromNumber;
+extern PFNKEGETPROCESSORNUMBERFROMINDEX        g_pfnrtKeGetProcessorNumberFromIndex;
+extern PFNKEGETCURRENTPROCESSORNUMBEREX        g_pfnrtKeGetCurrentProcessorNumberEx;
+extern PFNKEQUERYACTIVEPROCESSORS              g_pfnrtKeQueryActiveProcessors;
+extern PFNKEQUERYMAXIMUMPROCESSORCOUNT         g_pfnrtKeQueryMaximumProcessorCount;
+extern PFNKEQUERYMAXIMUMPROCESSORCOUNTEX       g_pfnrtKeQueryMaximumProcessorCountEx;
+extern PFNKEQUERYMAXIMUMGROUPCOUNT             g_pfnrtKeQueryMaximumGroupCount;
+extern PFNKEQUERYLOGICALPROCESSORRELATIONSHIP  g_pfnrtKeQueryLogicalProcessorRelationship;
+extern PFNKEREGISTERPROCESSORCHANGECALLBACK    g_pfnrtKeRegisterProcessorChangeCallback;
+extern PFNKEDEREGISTERPROCESSORCHANGECALLBACK  g_pfnrtKeDeregisterProcessorChangeCallback;
+extern PFNRTRTLGETVERSION                      g_pfnrtRtlGetVersion;
 #ifndef RT_ARCH_AMD64
-extern PFNRTKEQUERYINTERRUPTTIME        g_pfnrtKeQueryInterruptTime;
-extern PFNRTKEQUERYSYSTEMTIME           g_pfnrtKeQuerySystemTime;
+extern PFNRTKEQUERYINTERRUPTTIME               g_pfnrtKeQueryInterruptTime;
+extern PFNRTKEQUERYSYSTEMTIME                  g_pfnrtKeQuerySystemTime;
 #endif
-extern PFNRTKEQUERYINTERRUPTTIMEPRECISE g_pfnrtKeQueryInterruptTimePrecise;
-extern PFNRTKEQUERYSYSTEMTIMEPRECISE    g_pfnrtKeQuerySystemTimePrecise;
-extern uint32_t                         g_offrtNtPbQuantumEnd;
-extern uint32_t                         g_cbrtNtPbQuantumEnd;
-extern uint32_t                         g_offrtNtPbDpcQueueDepth;
+extern PFNRTKEQUERYINTERRUPTTIMEPRECISE        g_pfnrtKeQueryInterruptTimePrecise;
+extern PFNRTKEQUERYSYSTEMTIMEPRECISE           g_pfnrtKeQuerySystemTimePrecise;
+
+extern uint32_t                                g_offrtNtPbQuantumEnd;
+extern uint32_t                                g_cbrtNtPbQuantumEnd;
+extern uint32_t                                g_offrtNtPbDpcQueueDepth;
 
 
@@ -82,4 +95,6 @@
 int __stdcall rtMpPokeCpuUsingHalReqestIpiPreW7(RTCPUID idCpu);
 
+DECLHIDDEN(int) rtMpNtSetTargetProcessorDpc(KDPC *pDpc, RTCPUID idCpu);
+
 RT_C_DECLS_END
 
Index: /trunk/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp
===================================================================
--- /trunk/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp	(revision 64233)
+++ /trunk/src/VBox/Runtime/r0drv/nt/mp-r0drv-nt.cpp	(revision 64234)
@@ -76,18 +76,15 @@
 
 
-/* test a couple of assumption. */
-AssertCompile(MAXIMUM_PROCESSORS <= RTCPUSET_MAX_CPUS);
-AssertCompile(NIL_RTCPUID >= MAXIMUM_PROCESSORS);
-
-/** @todo
- * We cannot do other than assume a 1:1 relationship between the
- * affinity mask and the process despite the vagueness/warnings in
- * the docs. If someone knows a better way to get this done, please
- * let bird know.
- */
-
-
 RTDECL(RTCPUID) RTMpCpuId(void)
 {
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
+    if (g_pfnrtKeGetCurrentProcessorNumberEx)
+    {
+        KEPROCESSORINDEX idxCpu = g_pfnrtKeGetCurrentProcessorNumberEx(NULL);
+        Assert(idxCpu < RTCPUSET_MAX_CPUS);
+        return idxCpu;
+    }
+
     /* WDK upgrade warning: PCR->Number changed from BYTE to WORD. */
     return KeGetCurrentProcessorNumber();
@@ -97,6 +94,5 @@
 RTDECL(int) RTMpCurSetIndex(void)
 {
-    /* WDK upgrade warning: PCR->Number changed from BYTE to WORD. */
-    return KeGetCurrentProcessorNumber();
+    return (int)RTMpCpuId();
 }
 
@@ -104,5 +100,5 @@
 RTDECL(int) RTMpCurSetIndexAndId(PRTCPUID pidCpu)
 {
-    return *pidCpu = KeGetCurrentProcessorNumber();
+    return *pidCpu = RTMpCpuId();
 }
 
@@ -110,5 +106,6 @@
 RTDECL(int) RTMpCpuIdToSetIndex(RTCPUID idCpu)
 {
-    return idCpu < MAXIMUM_PROCESSORS ? (int)idCpu : -1;
+    /* 1:1 mapping, just do range checks. */
+    return idCpu < RTCPUSET_MAX_CPUS ? (int)idCpu : -1;
 }
 
@@ -116,5 +113,6 @@
 RTDECL(RTCPUID) RTMpCpuIdFromSetIndex(int iCpu)
 {
-    return (unsigned)iCpu < MAXIMUM_PROCESSORS ? iCpu : NIL_RTCPUID;
+    /* 1:1 mapping, just do range checks. */
+    return (unsigned)iCpu < RTCPUSET_MAX_CPUS ? iCpu : NIL_RTCPUID;
 }
 
@@ -122,6 +120,9 @@
 RTDECL(RTCPUID) RTMpGetMaxCpuId(void)
 {
-    /** @todo use KeQueryMaximumProcessorCount on vista+ */
-    return MAXIMUM_PROCESSORS - 1;
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
+    /* According to MSDN the processor indexes goes from 0 to the maximum
+       number of CPUs in the system.  We've check this in initterm-r0drv-nt.cpp. */
+    return g_cRtMpNtMaxCpus - 1;
 }
 
@@ -129,13 +130,7 @@
 RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu)
 {
-    if (idCpu >= MAXIMUM_PROCESSORS)
-        return false;
-
-#if 0 /* this isn't safe at all IRQLs (great work guys) */
-    KAFFINITY Mask = KeQueryActiveProcessors();
-    return !!(Mask & RT_BIT_64(idCpu));
-#else
-    return RTCpuSetIsMember(&g_rtMpNtCpuSet, idCpu);
-#endif
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+    return idCpu < RTCPUSET_MAX_CPUS
+        && RTCpuSetIsMember(&g_rtMpNtCpuSet, idCpu);
 }
 
@@ -143,8 +138,9 @@
 RTDECL(bool) RTMpIsCpuPossible(RTCPUID idCpu)
 {
-    /* Cannot easily distinguish between online and offline cpus. */
-    /** @todo online/present cpu stuff must be corrected for proper W2K8 support
-     *        (KeQueryMaximumProcessorCount). */
-    return RTMpIsCpuOnline(idCpu);
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
+    /* A possible CPU ID is one with a value lower than g_cRtMpNtMaxCpus (see
+       comment in RTMpGetMaxCpuId). */
+    return idCpu < g_cRtMpNtMaxCpus;
 }
 
@@ -153,7 +149,13 @@
 RTDECL(PRTCPUSET) RTMpGetSet(PRTCPUSET pSet)
 {
-    /** @todo online/present cpu stuff must be corrected for proper W2K8 support
-     *        (KeQueryMaximumProcessorCount). */
-    return RTMpGetOnlineSet(pSet);
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
+    /* The set of possible CPU IDs(/indexes) are from 0 up to
+       g_cRtMpNtMaxCpus (see comment in RTMpGetMaxCpuId). */
+    RTCpuSetEmpty(pSet);
+    int idxCpu = g_cRtMpNtMaxCpus;
+    while (idxCpu-- > 0)
+        RTCpuSetAddByIndex(pSet, idxCpu);
+    return pSet;
 }
 
@@ -161,7 +163,6 @@
 RTDECL(RTCPUID) RTMpGetCount(void)
 {
-    /** @todo online/present cpu stuff must be corrected for proper W2K8 support
-     *        (KeQueryMaximumProcessorCount). */
-    return RTMpGetOnlineCount();
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+    return g_cRtMpNtMaxCpus;
 }
 
@@ -169,11 +170,8 @@
 RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet)
 {
-#if 0 /* this isn't safe at all IRQLs (great work guys) */
-    KAFFINITY Mask = KeQueryActiveProcessors();
-    return RTCpuSetFromU64(pSet, Mask);
-#else
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
     *pSet = g_rtMpNtCpuSet;
     return pSet;
-#endif
 }
 
@@ -236,5 +234,5 @@
     PRTMPARGS pArgs = (PRTMPARGS)uUserCtx;
     /*ASMAtomicIncU32(&pArgs->cHits); - not needed */
-    pArgs->pfnWorker(KeGetCurrentProcessorNumber(), pArgs->pvUser1, pArgs->pvUser2);
+    pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2);
     return 0;
 }
@@ -250,5 +248,5 @@
 {
     PRTMPARGS pArgs = (PRTMPARGS)uUserCtx;
-    RTCPUID idCpu = KeGetCurrentProcessorNumber();
+    RTCPUID idCpu = RTMpCpuId();
     if (pArgs->idCpu != idCpu)
     {
@@ -269,5 +267,5 @@
 {
     PRTMPARGS pArgs = (PRTMPARGS)uUserCtx;
-    RTCPUID idCpu = KeGetCurrentProcessorNumber();
+    RTCPUID idCpu = RTMpCpuId();
     if (   pArgs->idCpu  == idCpu
         || pArgs->idCpu2 == idCpu)
@@ -289,5 +287,5 @@
 {
     PRTMPARGS pArgs = (PRTMPARGS)uUserCtx;
-    RTCPUID idCpu = KeGetCurrentProcessorNumber();
+    RTCPUID idCpu = RTMpCpuId();
     if (pArgs->idCpu == idCpu)
     {
@@ -348,5 +346,5 @@
 
     ASMAtomicIncU32(&pArgs->cHits);
-    pArgs->pfnWorker(KeGetCurrentProcessorNumber(), pArgs->pvUser1, pArgs->pvUser2);
+    pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2);
 
     /* Dereference the argument structure. */
@@ -355,4 +353,36 @@
     if (cRefs == 0)
         ExFreePool(pArgs);
+}
+
+
+/**
+ * Wrapper around KeSetTargetProcessorDpcEx / KeSetTargetProcessorDpc.
+ *
+ * This is shared with the timer code.
+ *
+ * @returns IPRT status code (errors are asserted).
+ * @param   pDpc                The DPC.
+ * @param   idCpu               The ID of the new target CPU.
+ */
+DECLHIDDEN(int) rtMpNtSetTargetProcessorDpc(KDPC *pDpc, RTCPUID idCpu)
+{
+    if (g_pfnrtKeSetTargetProcessorDpcEx)
+    {
+        /* Convert to stupid process number (bet KeSetTargetProcessorDpcEx does
+           the reverse conversion internally). */
+        PROCESSOR_NUMBER ProcNum;
+        NTSTATUS rcNt = g_pfnrtKeGetProcessorNumberFromIndex(idCpu, &ProcNum);
+        AssertMsgReturn(NT_SUCCESS(rcNt),
+                        ("KeGetProcessorNumberFromIndex(%u) -> %#x\n", idCpu, rcNt),
+                        RTErrConvertFromNtStatus(rcNt));
+
+        rcNt = g_pfnrtKeSetTargetProcessorDpcEx(pDpc, &ProcNum);
+        AssertMsgReturn(NT_SUCCESS(rcNt),
+                        ("KeSetTargetProcessorDpcEx(,%u(%u/%u)) -> %#x\n", idCpu, ProcNum.Group, ProcNum.Number, rcNt),
+                        RTErrConvertFromNtStatus(rcNt));
+    }
+    else
+        KeSetTargetProcessorDpc(pDpc, (int)idCpu);
+    return VINF_SUCCESS;
 }
 
@@ -396,5 +426,5 @@
         return VERR_NOT_SUPPORTED;
 
-    pArgs = (PRTMPARGS)ExAllocatePoolWithTag(NonPagedPool, MAXIMUM_PROCESSORS*sizeof(KDPC) + sizeof(RTMPARGS), (ULONG)'RTMp');
+    pArgs = (PRTMPARGS)ExAllocatePoolWithTag(NonPagedPool, g_cRtMpNtMaxCpus * sizeof(KDPC) + sizeof(RTMPARGS), (ULONG)'RTMp');
     if (!pArgs)
         return VERR_NO_MEMORY;
@@ -410,9 +440,10 @@
     paExecCpuDpcs = (KDPC *)(pArgs + 1);
 
+    int rc;
     if (enmCpuid == RT_NT_CPUID_SPECIFIC)
     {
         KeInitializeDpc(&paExecCpuDpcs[0], rtmpNtDPCWrapper, pArgs);
         KeSetImportanceDpc(&paExecCpuDpcs[0], HighImportance);
-        KeSetTargetProcessorDpc(&paExecCpuDpcs[0], (int)idCpu);
+        rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[0], idCpu);
         pArgs->idCpu = idCpu;
     }
@@ -421,20 +452,27 @@
         KeInitializeDpc(&paExecCpuDpcs[0], rtmpNtDPCWrapper, pArgs);
         KeSetImportanceDpc(&paExecCpuDpcs[0], HighImportance);
-        KeSetTargetProcessorDpc(&paExecCpuDpcs[0], (int)idCpu);
+        rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[0], idCpu);
         pArgs->idCpu = idCpu;
 
         KeInitializeDpc(&paExecCpuDpcs[1], rtmpNtDPCWrapper, pArgs);
         KeSetImportanceDpc(&paExecCpuDpcs[1], HighImportance);
-        KeSetTargetProcessorDpc(&paExecCpuDpcs[1], (int)idCpu2);
+        if (RT_SUCCESS(rc))
+            rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[1], (int)idCpu2);
         pArgs->idCpu2 = idCpu2;
     }
     else
     {
-        for (unsigned i = 0; i < MAXIMUM_PROCESSORS; i++)
+        rc = VINF_SUCCESS;
+        for (unsigned i = 0; i < g_cRtMpNtMaxCpus && RT_SUCCESS(rc); i++)
         {
             KeInitializeDpc(&paExecCpuDpcs[i], rtmpNtDPCWrapper, pArgs);
             KeSetImportanceDpc(&paExecCpuDpcs[i], HighImportance);
-            KeSetTargetProcessorDpc(&paExecCpuDpcs[i], i);
+            rc = rtMpNtSetTargetProcessorDpc(&paExecCpuDpcs[i], i);
         }
+    }
+    if (RT_FAILURE(rc))
+    {
+        ExFreePool(pArgs);
+        return rc;
     }
 
@@ -469,7 +507,7 @@
     else
     {
-        unsigned iSelf = KeGetCurrentProcessorNumber();
-
-        for (unsigned i = 0; i < MAXIMUM_PROCESSORS; i++)
+        unsigned iSelf = RTMpCpuId();
+
+        for (unsigned i = 0; i < g_cRtMpNtMaxCpus; i++)
         {
             if (    (i != iSelf)
@@ -505,5 +543,5 @@
 
     return VINF_SUCCESS;
-#endif /* */
+#endif /* !IPRT_TARGET_NT4 */
 }
 
@@ -611,5 +649,5 @@
     ASMAtomicWriteBool(&pArgs->fExecuting, true);
 
-    pArgs->CallbackArgs.pfnWorker(KeGetCurrentProcessorNumber(), pArgs->CallbackArgs.pvUser1, pArgs->CallbackArgs.pvUser2);
+    pArgs->CallbackArgs.pfnWorker(RTMpCpuId(), pArgs->CallbackArgs.pvUser1, pArgs->CallbackArgs.pvUser2);
 
     ASMAtomicWriteBool(&pArgs->fDone, true);
@@ -683,5 +721,10 @@
     KeInitializeDpc(&pArgs->Dpc, rtMpNtOnSpecificDpcWrapper, pArgs);
     KeSetImportanceDpc(&pArgs->Dpc, HighImportance);
-    KeSetTargetProcessorDpc(&pArgs->Dpc, (int)idCpu);
+    rc = rtMpNtSetTargetProcessorDpc(&pArgs->Dpc, idCpu);
+    if (RT_FAILURE(rc))
+    {
+        ExFreePool(pArgs);
+        return rc;
+    }
 
     /*
@@ -832,13 +875,8 @@
 int rtMpPokeCpuUsingHalReqestIpiW7Plus(RTCPUID idCpu)
 {
-    /*
-     * I think we'll let idCpu be an NT processor number and not a HAL processor
-     * index.  KeAddProcessorAffinityEx is for HAL and uses HAL processor
-     * indexes as input from what I can tell.
-     */
-    PROCESSOR_NUMBER ProcNumber = { /*Group=*/ idCpu / 64, /*Number=*/ idCpu % 64, /* Reserved=*/ 0};
-    KAFFINITY_EX     Target;
+    /* idCpu is an HAL processor index, so we can use it directly. */
+    KAFFINITY_EX Target;
     g_pfnrtKeInitializeAffinityEx(&Target);
-    g_pfnrtKeAddProcessorAffinityEx(&Target, g_pfnrtKeGetProcessorIndexFromNumber(&ProcNumber));
+    g_pfnrtKeAddProcessorAffinityEx(&Target, idCpu);
 
     g_pfnrtHalRequestIpiW7Plus(0, &Target);
@@ -868,18 +906,23 @@
 int rtMpPokeCpuUsingDpc(RTCPUID idCpu)
 {
+    Assert(g_cRtMpNtMaxCpus > 0 && g_cRtMpNtMaxGroups > 0); /* init order */
+
     /*
      * APC fallback.
      */
-    static KDPC s_aPokeDpcs[MAXIMUM_PROCESSORS] = {0};
+    static KDPC s_aPokeDpcs[RTCPUSET_MAX_CPUS] = {0};
     static bool s_fPokeDPCsInitialized = false;
 
     if (!s_fPokeDPCsInitialized)
     {
-        for (unsigned i = 0; i < RT_ELEMENTS(s_aPokeDpcs); i++)
+        for (unsigned i = 0; i < g_cRtMpNtMaxCpus; i++)
         {
             KeInitializeDpc(&s_aPokeDpcs[i], rtMpNtPokeCpuDummy, NULL);
             KeSetImportanceDpc(&s_aPokeDpcs[i], HighImportance);
-            KeSetTargetProcessorDpc(&s_aPokeDpcs[i], (int)i);
+            int rc = rtMpNtSetTargetProcessorDpc(&s_aPokeDpcs[i], idCpu);
+            if (RT_FAILURE(rc))
+                return rc;
         }
+
         s_fPokeDPCsInitialized = true;
     }
@@ -910,5 +953,5 @@
               ? VERR_CPU_NOT_FOUND
               : VERR_CPU_OFFLINE;
-    /* Calls rtMpSendIpiFallback, rtMpSendIpiWin7AndLater or rtMpSendIpiVista. */
+    /* Calls rtMpPokeCpuUsingDpc, rtMpPokeCpuUsingHalReqestIpiW7Plus or rtMpPokeCpuUsingBroadcastIpi. */
     return g_pfnrtMpPokeCpuWorker(idCpu);
 }
Index: /trunk/src/VBox/Runtime/r0drv/nt/timer-r0drv-nt.cpp
===================================================================
--- /trunk/src/VBox/Runtime/r0drv/nt/timer-r0drv-nt.cpp	(revision 64233)
+++ /trunk/src/VBox/Runtime/r0drv/nt/timer-r0drv-nt.cpp	(revision 64234)
@@ -511,4 +511,5 @@
     pTimer->u64NanoInterval = u64NanoInterval;
     KeInitializeTimerEx(&pTimer->NtTimer, SynchronizationTimer);
+    int rc = VINF_SUCCESS;
     if (pTimer->fOmniTimer)
     {
@@ -519,5 +520,5 @@
          */
         pTimer->idCpu = NIL_RTCPUID;
-        for (unsigned iCpu = 0; iCpu < cSubTimers; iCpu++)
+        for (unsigned iCpu = 0; iCpu < cSubTimers && RT_SUCCESS(rc); iCpu++)
         {
             pTimer->aSubTimers[iCpu].iTick = 0;
@@ -533,5 +534,5 @@
                 KeInitializeDpc(&pTimer->aSubTimers[iCpu].NtDpc, rtTimerNtOmniSlaveCallback, &pTimer->aSubTimers[iCpu]);
             KeSetImportanceDpc(&pTimer->aSubTimers[iCpu].NtDpc, HighImportance);
-            KeSetTargetProcessorDpc(&pTimer->aSubTimers[iCpu].NtDpc, (int)RTMpCpuIdFromSetIndex(iCpu));
+            rc = rtMpNtSetTargetProcessorDpc(&pTimer->aSubTimers[iCpu].NtDpc, iCpu);
         }
         Assert(pTimer->idCpu != NIL_RTCPUID);
@@ -549,9 +550,14 @@
         KeSetImportanceDpc(&pTimer->aSubTimers[0].NtDpc, HighImportance);
         if (pTimer->fSpecificCpu)
-            KeSetTargetProcessorDpc(&pTimer->aSubTimers[0].NtDpc, (int)pTimer->idCpu);
-    }
-
-    *ppTimer = pTimer;
-    return VINF_SUCCESS;
+            rc = rtMpNtSetTargetProcessorDpc(&pTimer->aSubTimers[0].NtDpc, (int)pTimer->idCpu);
+    }
+    if (RT_SUCCESS(rc))
+    {
+        *ppTimer = pTimer;
+        return VINF_SUCCESS;
+    }
+
+    RTMemFree(pTimer);
+    return rc;
 }
 
Index: /trunk/src/VBox/Runtime/r3/win/mp-win.cpp
===================================================================
--- /trunk/src/VBox/Runtime/r3/win/mp-win.cpp	(revision 64233)
+++ /trunk/src/VBox/Runtime/r3/win/mp-win.cpp	(revision 64234)
@@ -109,7 +109,12 @@
      *
      * We ASSUME the the GroupInfo index is the same as the group number.
+     *
      * We ASSUME there are no inactive groups, because otherwise it will
      * be difficult to tell how many possible CPUs we can have and do a
-     * reasonable CPU ID/index partitioning.
+     * reasonable CPU ID/index partitioning. [probably bad assumption]
+     *
+     * We ASSUME that the kernel processor indexes are assigned in group order,
+     * which we match here with our own ID+index assignments.  This claim is
+     * verified by initterm-r0drv-nt.cpp.
      *
      * Note! We will die if there are too many processors!
