Index: /trunk/include/VBox/vmm/nem.h
===================================================================
--- /trunk/include/VBox/vmm/nem.h	(revision 92464)
+++ /trunk/include/VBox/vmm/nem.h	(revision 92465)
@@ -100,5 +100,5 @@
                                                   void *pvRam, void *pvMmio2, uint32_t *puNemRange);
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
-                                                void *pvRam, void *pvMmio2, uint8_t *pu2State);
+                                                void *pvRam, void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange);
 /** @name Flags for NEMR3NotifyPhysMmioExMap and NEMR3NotifyPhysMmioExUnmap.
  * @{ */
@@ -127,7 +127,8 @@
  * @param   fFlags          NEM_NOTIFY_PHYS_ROM_F_XXX.
  * @param   pu2State        New page state or UINT8_MAX to leave as-is.
+ * @param   puNemRange      Access to the relevant PGMRAMRANGE::uNemRange field.
  */
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
-                                                     uint32_t fFlags, uint8_t *pu2State);
+                                                     uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange);
 
 /**
@@ -145,7 +146,8 @@
  * @param   pu2State        Where to return the new NEM page state, UINT8_MAX
  *                          for unchanged.
+ * @param   puNemRange      Access to the relevant PGMRAMRANGE::uNemRange field.
  */
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
-                                                    uint32_t fFlags, uint8_t *pu2State);
+                                                    uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange);
 
 /** @name Flags for NEMR3NotifyPhysRomRegisterEarly and NEMR3NotifyPhysRomRegisterLate.
Index: /trunk/include/VBox/vmm/vm.h
===================================================================
--- /trunk/include/VBox/vmm/vm.h	(revision 92464)
+++ /trunk/include/VBox/vmm/vm.h	(revision 92465)
@@ -1346,5 +1346,5 @@
         struct NEM  s;
 #endif
-        uint8_t     padding[512];       /* multiple of 64 */
+        uint8_t     padding[4608];       /* multiple of 64 */
     } nem;
 
Index: /trunk/include/VBox/vmm/vm.mac
===================================================================
--- /trunk/include/VBox/vmm/vm.mac	(revision 92464)
+++ /trunk/include/VBox/vmm/vm.mac	(revision 92465)
@@ -149,5 +149,5 @@
     .em                     resb 256
     alignb 64
-    .nem                    resb 512
+    .nem                    resb 4608
     alignb 64
     .tm                     resb 10112
Index: /trunk/src/VBox/VMM/VMMAll/NEMAllNativeTemplate-win.cpp.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/NEMAllNativeTemplate-win.cpp.h	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMAll/NEMAllNativeTemplate-win.cpp.h	(revision 92465)
@@ -372,4 +372,6 @@
         ADD_REG64(WHvX64RegisterSfmask, pVCpu->cpum.GstCtx.msrSFMASK);
     }
+    if (fWhat & CPUMCTX_EXTRN_TSC_AUX)
+        ADD_REG64(WHvX64RegisterTscAux, pCtxMsrs->msr.TscAux);
     if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
     {
@@ -392,5 +394,4 @@
         ADD_REG64(WHvX64RegisterMsrMtrrFix4kF0000,  pCtxMsrs->msr.MtrrFix4K_F0000);
         ADD_REG64(WHvX64RegisterMsrMtrrFix4kF8000,  pCtxMsrs->msr.MtrrFix4K_F8000);
-        ADD_REG64(WHvX64RegisterTscAux, pCtxMsrs->msr.TscAux);
 #if 0 /** @todo these registers aren't available? Might explain something.. .*/
         const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pVM);
@@ -667,4 +668,6 @@
 //    const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pVM);
 //#endif
+    if (fWhat & CPUMCTX_EXTRN_TSC_AUX)
+        aenmNames[iReg++] = WHvX64RegisterTscAux;
     if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
     {
@@ -686,5 +689,4 @@
         aenmNames[iReg++] = WHvX64RegisterMsrMtrrFix4kF0000;
         aenmNames[iReg++] = WHvX64RegisterMsrMtrrFix4kF8000;
-        aenmNames[iReg++] = WHvX64RegisterTscAux;
         /** @todo look for HvX64RegisterIa32MiscEnable and HvX64RegisterIa32FeatureControl? */
 //#ifdef LOG_ENABLED
@@ -1011,36 +1013,40 @@
         GET_REG64_LOG7(pVCpu->cpum.GstCtx.msrSFMASK, WHvX64RegisterSfmask, "MSR SFMASK");
     }
-    if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
-    {
-        Assert(aenmNames[iReg] == WHvX64RegisterApicBase);
-        const uint64_t uOldBase = APICGetBaseMsrNoCheck(pVCpu);
-        if (aValues[iReg].Reg64 != uOldBase)
-        {
-            Log7(("NEM/%u: MSR APICBase changed %RX64 -> %RX64 (%RX64)\n",
-                  pVCpu->idCpu, uOldBase, aValues[iReg].Reg64, aValues[iReg].Reg64 ^ uOldBase));
-            int rc2 = APICSetBaseMsr(pVCpu, aValues[iReg].Reg64);
-            AssertLogRelMsg(rc2 == VINF_SUCCESS, ("%Rrc %RX64\n", rc2, aValues[iReg].Reg64));
-        }
-        iReg++;
-
-        GET_REG64_LOG7(pVCpu->cpum.GstCtx.msrPAT, WHvX64RegisterPat, "MSR PAT");
+    if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
+    {
+        PCPUMCTXMSRS const pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pVCpu);
+        if (fWhat & CPUMCTX_EXTRN_TSC_AUX)
+            GET_REG64_LOG7(pCtxMsrs->msr.TscAux, WHvX64RegisterTscAux, "MSR TSC_AUX");
+        if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+        {
+            Assert(aenmNames[iReg] == WHvX64RegisterApicBase);
+            const uint64_t uOldBase = APICGetBaseMsrNoCheck(pVCpu);
+            if (aValues[iReg].Reg64 != uOldBase)
+            {
+                Log7(("NEM/%u: MSR APICBase changed %RX64 -> %RX64 (%RX64)\n",
+                      pVCpu->idCpu, uOldBase, aValues[iReg].Reg64, aValues[iReg].Reg64 ^ uOldBase));
+                int rc2 = APICSetBaseMsr(pVCpu, aValues[iReg].Reg64);
+                AssertLogRelMsg(rc2 == VINF_SUCCESS, ("%Rrc %RX64\n", rc2, aValues[iReg].Reg64));
+            }
+            iReg++;
+
+            GET_REG64_LOG7(pVCpu->cpum.GstCtx.msrPAT, WHvX64RegisterPat, "MSR PAT");
 #if 0 /*def LOG_ENABLED*/ /** @todo something's wrong with HvX64RegisterMtrrCap? (AMD) */
-        GET_REG64_LOG7(pVCpu->cpum.GstCtx.msrPAT, WHvX64RegisterMsrMtrrCap);
+            GET_REG64_LOG7(pVCpu->cpum.GstCtx.msrPAT, WHvX64RegisterMsrMtrrCap);
 #endif
-        PCPUMCTXMSRS pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pVCpu);
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrDefType,      WHvX64RegisterMsrMtrrDefType,     "MSR MTRR_DEF_TYPE");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix64K_00000, WHvX64RegisterMsrMtrrFix64k00000, "MSR MTRR_FIX_64K_00000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix16K_80000, WHvX64RegisterMsrMtrrFix16k80000, "MSR MTRR_FIX_16K_80000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix16K_A0000, WHvX64RegisterMsrMtrrFix16kA0000, "MSR MTRR_FIX_16K_A0000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_C0000,  WHvX64RegisterMsrMtrrFix4kC0000,  "MSR MTRR_FIX_4K_C0000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_C8000,  WHvX64RegisterMsrMtrrFix4kC8000,  "MSR MTRR_FIX_4K_C8000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_D0000,  WHvX64RegisterMsrMtrrFix4kD0000,  "MSR MTRR_FIX_4K_D0000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_D8000,  WHvX64RegisterMsrMtrrFix4kD8000,  "MSR MTRR_FIX_4K_D8000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_E0000,  WHvX64RegisterMsrMtrrFix4kE0000,  "MSR MTRR_FIX_4K_E0000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_E8000,  WHvX64RegisterMsrMtrrFix4kE8000,  "MSR MTRR_FIX_4K_E8000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_F0000,  WHvX64RegisterMsrMtrrFix4kF0000,  "MSR MTRR_FIX_4K_F0000");
-        GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_F8000,  WHvX64RegisterMsrMtrrFix4kF8000,  "MSR MTRR_FIX_4K_F8000");
-        GET_REG64_LOG7(pCtxMsrs->msr.TscAux,           WHvX64RegisterTscAux,             "MSR TSC_AUX");
-        /** @todo look for HvX64RegisterIa32MiscEnable and HvX64RegisterIa32FeatureControl? */
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrDefType,      WHvX64RegisterMsrMtrrDefType,     "MSR MTRR_DEF_TYPE");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix64K_00000, WHvX64RegisterMsrMtrrFix64k00000, "MSR MTRR_FIX_64K_00000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix16K_80000, WHvX64RegisterMsrMtrrFix16k80000, "MSR MTRR_FIX_16K_80000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix16K_A0000, WHvX64RegisterMsrMtrrFix16kA0000, "MSR MTRR_FIX_16K_A0000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_C0000,  WHvX64RegisterMsrMtrrFix4kC0000,  "MSR MTRR_FIX_4K_C0000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_C8000,  WHvX64RegisterMsrMtrrFix4kC8000,  "MSR MTRR_FIX_4K_C8000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_D0000,  WHvX64RegisterMsrMtrrFix4kD0000,  "MSR MTRR_FIX_4K_D0000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_D8000,  WHvX64RegisterMsrMtrrFix4kD8000,  "MSR MTRR_FIX_4K_D8000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_E0000,  WHvX64RegisterMsrMtrrFix4kE0000,  "MSR MTRR_FIX_4K_E0000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_E8000,  WHvX64RegisterMsrMtrrFix4kE8000,  "MSR MTRR_FIX_4K_E8000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_F0000,  WHvX64RegisterMsrMtrrFix4kF0000,  "MSR MTRR_FIX_4K_F0000");
+            GET_REG64_LOG7(pCtxMsrs->msr.MtrrFix4K_F8000,  WHvX64RegisterMsrMtrrFix4kF8000,  "MSR MTRR_FIX_4K_F8000");
+            /** @todo look for HvX64RegisterIa32MiscEnable and HvX64RegisterIa32FeatureControl? */
+        }
     }
 
Index: /trunk/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp	(revision 92465)
@@ -1702,4 +1702,11 @@
         iReg++;
     }
+    if (fWhat & CPUMCTX_EXTRN_TSC_AUX)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterTscAux;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.TscAux;
+        iReg++;
+    }
     if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
     {
@@ -1771,8 +1778,4 @@
         pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kF8000;
         pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_F8000;
-        iReg++;
-        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
-        pInput->Elements[iReg].Name                 = HvX64RegisterTscAux;
-        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.TscAux;
         iReg++;
 
Index: /trunk/src/VBox/VMM/VMMR3/NEMR3Native-darwin.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/NEMR3Native-darwin.cpp	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMR3/NEMR3Native-darwin.cpp	(revision 92465)
@@ -2920,10 +2920,10 @@
 
 VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
-                                               void *pvMmio2, uint8_t *pu2State)
+                                               void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
 {
     RT_NOREF(pVM);
 
-    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p\n",
-          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State));
+    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p uNemRange=%#x (%#x)\n",
+          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, puNemRange, *puNemRange));
 
     int rc = VINF_SUCCESS;
@@ -2987,10 +2987,11 @@
 
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
-                                                     uint8_t *pu2State)
-{
-    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
+                                                     uint8_t *pu2State, uint32_t *puNemRange)
+{
+    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags, puNemRange);
 
     Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
-    *pu2State = UINT8_MAX;
+    *pu2State   = UINT8_MAX;
+    *puNemRange = 0;
     return VINF_SUCCESS;
 }
@@ -2998,8 +2999,8 @@
 
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
-                                                    uint32_t fFlags, uint8_t *pu2State)
-{
-    Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p\n",
-          GCPhys, cb, pvPages, fFlags, pu2State));
+                                                    uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p (%d) puNemRange=%p (%#x)\n",
+          GCPhys, cb, pvPages, fFlags, pu2State, *pu2State, puNemRange, *puNemRange));
     *pu2State = UINT8_MAX;
 
@@ -3018,8 +3019,8 @@
         return VERR_NEM_MAP_PAGES_FAILED;
     }
-    RT_NOREF(pVM, fFlags);
+    RT_NOREF(pVM, fFlags, puNemRange);
     return VINF_SUCCESS;
 #else
-    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
+    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags, puNemRange);
     return VERR_NEM_MAP_PAGES_FAILED;
 #endif
Index: /trunk/src/VBox/VMM/VMMR3/NEMR3Native-linux.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/NEMR3Native-linux.cpp	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMR3/NEMR3Native-linux.cpp	(revision 92465)
@@ -126,5 +126,7 @@
         CAP_ENTRY__L(KVM_CAP_INTERNAL_ERROR_DATA),           /* 40 */
 #ifdef __KVM_HAVE_VCPU_EVENTS
-        CAP_ENTRY__L(KVM_CAP_VCPU_EVENTS),
+        CAP_ENTRY_ML(KVM_CAP_VCPU_EVENTS),
+#else
+        CAP_ENTRY_MU(41),
 #endif
         CAP_ENTRY__L(KVM_CAP_S390_PSW),
@@ -144,8 +146,12 @@
         CAP_ENTRY__L(KVM_CAP_ENABLE_CAP),
 #ifdef __KVM_HAVE_XSAVE
-        CAP_ENTRY__L(KVM_CAP_XSAVE),
+        CAP_ENTRY_ML(KVM_CAP_XSAVE),
+#else
+        CAP_ENTRY_MU(55),
 #endif
 #ifdef __KVM_HAVE_XCRS
-        CAP_ENTRY__L(KVM_CAP_XCRS),
+        CAP_ENTRY_ML(KVM_CAP_XCRS),
+#else
+        CAP_ENTRY_MU(56),
 #endif
         CAP_ENTRY__L(KVM_CAP_PPC_GET_PVINFO),
@@ -280,6 +286,6 @@
         CAP_ENTRY__L(KVM_CAP_S390_DIAG318),
         CAP_ENTRY__L(KVM_CAP_STEAL_TIME),
-        CAP_ENTRY__L(KVM_CAP_X86_USER_SPACE_MSR),
-        CAP_ENTRY__L(KVM_CAP_X86_MSR_FILTER),
+        CAP_ENTRY_ML(KVM_CAP_X86_USER_SPACE_MSR),            /* (since 5.10) */
+        CAP_ENTRY_ML(KVM_CAP_X86_MSR_FILTER),
         CAP_ENTRY__L(KVM_CAP_ENFORCE_PV_FEATURE_CPUID),      /* 190 */
         CAP_ENTRY__L(KVM_CAP_SYS_HYPERV_CPUID),
@@ -378,4 +384,12 @@
         rcRet = RTERRINFO_LOG_REL_ADD_F(pErrInfo, VERR_NEM_INIT_FAILED, "Odd KVM_GET_VCPU_MMAP_SIZE value: %#x (%d)", rc, rc);
 
+    /*
+     * Init the slot ID bitmap.
+     */
+    ASMBitSet(&pVM->nem.s.bmSlotIds[0], 0);         /* don't use slot 0 */
+    if (pVM->nem.s.cMaxMemSlots < _32K)
+        ASMBitSetRange(&pVM->nem.s.bmSlotIds[0], pVM->nem.s.cMaxMemSlots, _32K);
+    ASMBitSet(&pVM->nem.s.bmSlotIds[0], _32K - 1);  /* don't use the last slot */
+
     return rcRet;
 }
@@ -411,4 +425,7 @@
         if ((void *)pVCpu->nem.s.pRun == MAP_FAILED)
             return VMSetError(pVM, VERR_NEM_VM_CREATE_FAILED, RT_SRC_POS, "mmap failed for VCpu #%u: %d", idCpu, errno);
+
+        /* We want all x86 registers and events on each exit. */
+        pVCpu->nem.s.pRun->kvm_valid_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS | KVM_SYNC_X86_EVENTS;
     }
     return VINF_SUCCESS;
@@ -553,5 +570,5 @@
     /** @todo */
 
-    return VERR_NOT_IMPLEMENTED;
+    return VINF_SUCCESS;
 }
 
@@ -566,5 +583,37 @@
 int nemR3NativeTerm(PVM pVM)
 {
-    RT_NOREF(pVM);
+    /*
+     * Per-cpu data
+     */
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPU pVCpu = pVM->apCpusR3[idCpu];
+
+        if (pVCpu->nem.s.fdVCpu != -1)
+        {
+            close(pVCpu->nem.s.fdVCpu);
+            pVCpu->nem.s.fdVCpu = -1;
+        }
+        if (pVCpu->nem.s.pRun)
+        {
+            munmap(pVCpu->nem.s.pRun, pVM->nem.s.cbVCpuMmap);
+            pVCpu->nem.s.pRun = NULL;
+        }
+    }
+
+    /*
+     * Global data.
+     */
+    if (pVM->nem.s.fdVm != -1)
+    {
+        close(pVM->nem.s.fdVm);
+        pVM->nem.s.fdVm = -1;
+    }
+
+    if (pVM->nem.s.fdKvm != -1)
+    {
+        close(pVM->nem.s.fdKvm);
+        pVM->nem.s.fdKvm = -1;
+    }
     return VINF_SUCCESS;
 }
@@ -595,9 +644,716 @@
 
 
-VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
-{
-    RT_NOREF(pVM, pVCpu);
+/*********************************************************************************************************************************
+*   Memory management                                                                                                            *
+*********************************************************************************************************************************/
+
+
+/**
+ * Allocates a memory slot ID.
+ *
+ * @returns Slot ID on success, UINT16_MAX on failure.
+ */
+static uint16_t nemR3LnxMemSlotIdAlloc(PVM pVM)
+{
+    /* Use the hint first. */
+    uint16_t idHint = pVM->nem.s.idPrevSlot;
+    if (idHint < _32K - 1)
+    {
+        int32_t idx = ASMBitNextClear(&pVM->nem.s.bmSlotIds, _32K, idHint);
+        Assert(idx < _32K);
+        if (idx > 0 && !ASMAtomicBitTestAndSet(&pVM->nem.s.bmSlotIds, idx))
+            return pVM->nem.s.idPrevSlot = (uint16_t)idx;
+    }
+
+    /*
+     * Search the whole map from the start.
+     */
+    int32_t idx = ASMBitFirstClear(&pVM->nem.s.bmSlotIds, _32K);
+    Assert(idx < _32K);
+    if (idx > 0 && !ASMAtomicBitTestAndSet(&pVM->nem.s.bmSlotIds, idx))
+        return pVM->nem.s.idPrevSlot = (uint16_t)idx;
+
+    Assert(idx < 0 /*shouldn't trigger unless there is a race */);
+    return UINT16_MAX; /* caller is expected to assert. */
+}
+
+
+/**
+ * Frees a memory slot ID
+ */
+static void nemR3LnxMemSlotIdFree(PVM pVM, uint16_t idSlot)
+{
+    if (RT_LIKELY(idSlot < _32K && ASMAtomicBitTestAndClear(&pVM->nem.s.bmSlotIds, idSlot)))
+    { /*likely*/ }
+    else
+        AssertMsgFailed(("idSlot=%u (%#x)\n", idSlot, idSlot));
+}
+
+
+
+VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvR3,
+                                               uint8_t *pu2State, uint32_t *puNemRange)
+{
+    uint16_t idSlot = nemR3LnxMemSlotIdAlloc(pVM);
+    AssertLogRelReturn(idSlot < _32K, VERR_NEM_MAP_PAGES_FAILED);
+
+    Log5(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p pu2State=%p (%d) puNemRange=%p (%d) - idSlot=%#x\n",
+          GCPhys, cb, pvR3, pu2State, pu2State, puNemRange, *puNemRange, idSlot));
+
+    struct kvm_userspace_memory_region Region;
+    Region.slot             = idSlot;
+    Region.flags            = 0;
+    Region.guest_phys_addr  = GCPhys;
+    Region.memory_size      = cb;
+    Region.userspace_addr   = (uintptr_t)pvR3;
+
+    int rc = ioctl(pVM->nem.s.fdVm, KVM_SET_USER_MEMORY_REGION, &Region);
+    if (rc == 0)
+    {
+        *pu2State   = 0;
+        *puNemRange = idSlot;
+        return VINF_SUCCESS;
+    }
+
+    LogRel(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p, idSlot=%#x failed: %u/%u\n", GCPhys, cb, pvR3, idSlot, rc, errno));
+    nemR3LnxMemSlotIdFree(pVM, idSlot);
+    return VERR_NEM_MAP_PAGES_FAILED;
+}
+
+
+VMMR3_INT_DECL(bool) NEMR3IsMmio2DirtyPageTrackingSupported(PVM pVM)
+{
+    RT_NOREF(pVM);
+    return true;
+}
+
+
+VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
+                                                  void *pvRam, void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p (%d) puNemRange=%p (%#x)\n",
+          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, *pu2State, puNemRange, puNemRange ? *puNemRange : UINT32_MAX));
+    RT_NOREF(pvRam);
+
+    if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
+    {
+        /** @todo implement splitting and whatnot of ranges if we want to be 100%
+         *        conforming (just modify RAM registrations in MM.cpp to test). */
+        AssertLogRelMsgFailedReturn(("%RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p\n", GCPhys, cb, fFlags, pvRam, pvMmio2),
+                                    VERR_NEM_MAP_PAGES_FAILED);
+    }
+
+    /*
+     * Register MMIO2.
+     */
+    if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2)
+    {
+        AssertReturn(pvMmio2, VERR_NEM_MAP_PAGES_FAILED);
+        AssertReturn(puNemRange, VERR_NEM_MAP_PAGES_FAILED);
+
+        uint16_t idSlot = nemR3LnxMemSlotIdAlloc(pVM);
+        AssertLogRelReturn(idSlot < _32K, VERR_NEM_MAP_PAGES_FAILED);
+
+        struct kvm_userspace_memory_region Region;
+        Region.slot             = idSlot;
+        Region.flags            = fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_TRACK_DIRTY_PAGES ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+        Region.guest_phys_addr  = GCPhys;
+        Region.memory_size      = cb;
+        Region.userspace_addr   = (uintptr_t)pvMmio2;
+
+        int rc = ioctl(pVM->nem.s.fdVm, KVM_SET_USER_MEMORY_REGION, &Region);
+        if (rc == 0)
+        {
+            *pu2State   = 0;
+            *puNemRange = idSlot;
+            Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvMmio2=%p - idSlot=%#x\n",
+                  GCPhys, cb, fFlags, pvMmio2, idSlot));
+            return VINF_SUCCESS;
+        }
+
+        nemR3LnxMemSlotIdFree(pVM, idSlot);
+        AssertLogRelMsgFailedReturn(("%RGp LB %RGp fFlags=%#x, pvMmio2=%p, idSlot=%#x failed: %u/%u\n",
+                                     GCPhys, cb, fFlags, pvMmio2, idSlot, errno, rc),
+                                    VERR_NEM_MAP_PAGES_FAILED);
+    }
+
+    /* MMIO, don't care. */
+    *pu2State   = 0;
+    *puNemRange = UINT32_MAX;
+    return VINF_SUCCESS;
+}
+
+
+VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
+                                                 void *pvRam, void *pvMmio2, uint32_t *puNemRange)
+{
+    RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, puNemRange);
+    return VINF_SUCCESS;
+}
+
+
+VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
+                                               void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p puNemRange=%p (%#x)\n",
+          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, puNemRange, *puNemRange));
+    RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State);
+
+    if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE)
+    {
+        /** @todo implement splitting and whatnot of ranges if we want to be 100%
+         *        conforming (just modify RAM registrations in MM.cpp to test). */
+        AssertLogRelMsgFailedReturn(("%RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p\n", GCPhys, cb, fFlags, pvRam, pvMmio2),
+                                    VERR_NEM_UNMAP_PAGES_FAILED);
+    }
+
+    if (fFlags & NEM_NOTIFY_PHYS_MMIO_EX_F_MMIO2)
+    {
+        uint32_t const idSlot = *puNemRange;
+        AssertReturn(idSlot > 0 && idSlot < _32K, VERR_NEM_IPE_4);
+        AssertReturn(ASMBitTest(pVM->nem.s.bmSlotIds, idSlot), VERR_NEM_IPE_4);
+
+        struct kvm_userspace_memory_region Region;
+        Region.slot             = idSlot;
+        Region.flags            = 0;
+        Region.guest_phys_addr  = GCPhys;
+        Region.memory_size      = 0;    /* this deregisters it. */
+        Region.userspace_addr   = (uintptr_t)pvMmio2;
+
+        int rc = ioctl(pVM->nem.s.fdVm, KVM_SET_USER_MEMORY_REGION, &Region);
+        if (rc == 0)
+        {
+            if (pu2State)
+                *pu2State = 0;
+            *puNemRange = UINT32_MAX;
+            nemR3LnxMemSlotIdFree(pVM, idSlot);
+            return VINF_SUCCESS;
+        }
+
+        AssertLogRelMsgFailedReturn(("%RGp LB %RGp fFlags=%#x, pvMmio2=%p, idSlot=%#x failed: %u/%u\n",
+                                     GCPhys, cb, fFlags, pvMmio2, idSlot, errno, rc),
+                                    VERR_NEM_UNMAP_PAGES_FAILED);
+    }
+
+    if (pu2State)
+        *pu2State = UINT8_MAX;
+    return VINF_SUCCESS;
+}
+
+
+VMMR3_INT_DECL(int) NEMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t uNemRange,
+                                                           void *pvBitmap, size_t cbBitmap)
+{
+    AssertReturn(uNemRange > 0 && uNemRange < _32K, VERR_NEM_IPE_4);
+    AssertReturn(ASMBitTest(pVM->nem.s.bmSlotIds, uNemRange), VERR_NEM_IPE_4);
+
+    RT_NOREF(GCPhys, cbBitmap);
+
+    struct kvm_dirty_log DirtyLog;
+    DirtyLog.slot         = uNemRange;
+    DirtyLog.padding1     = 0;
+    DirtyLog.dirty_bitmap = pvBitmap;
+
+    int rc = ioctl(pVM->nem.s.fdVm, KVM_GET_DIRTY_LOG, &DirtyLog);
+    AssertLogRelMsgReturn(rc == 0, ("%RGp LB %RGp idSlot=%#x failed: %u/%u\n", GCPhys, cb, uNemRange, errno, rc),
+                          VERR_NEM_QUERY_DIRTY_BITMAP_FAILED);
+
+    return VINF_SUCCESS;
+}
+
+
+VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
+                                                     uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("NEMR3NotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
+    *pu2State = UINT8_MAX;
+
+    /* Don't support puttint ROM where there is already RAM.  For
+       now just shuffle the registrations till it works... */
+    AssertLogRelMsgReturn(!(fFlags & NEM_NOTIFY_PHYS_ROM_F_REPLACE), ("%RGp LB %RGp fFlags=%#x\n", GCPhys, cb, fFlags),
+                          VERR_NEM_MAP_PAGES_FAILED);
+
+    /** @todo figure out how to do shadow ROMs.   */
+
+    /*
+     * We only allocate a slot number here in case we need to use it to
+     * fend of physical handler fun.
+     */
+    uint16_t idSlot = nemR3LnxMemSlotIdAlloc(pVM);
+    AssertLogRelReturn(idSlot < _32K, VERR_NEM_MAP_PAGES_FAILED);
+
+    *pu2State   = 0;
+    *puNemRange = idSlot;
+    Log5(("NEMR3NotifyPhysRomRegisterEarly: %RGp LB %RGp fFlags=%#x pvPages=%p - idSlot=%#x\n",
+          GCPhys, cb, fFlags, pvPages, idSlot));
+    RT_NOREF(GCPhys, cb, fFlags, pvPages);
+    return VINF_SUCCESS;
+}
+
+
+VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
+                                                    uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("NEMR3NotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p (%d) puNemRange=%p (%#x)\n",
+          GCPhys, cb, pvPages, fFlags, pu2State, *pu2State, puNemRange, *puNemRange));
+
+    AssertPtrReturn(pvPages, VERR_NEM_IPE_5);
+
+    uint32_t const idSlot = *puNemRange;
+    AssertReturn(idSlot > 0 && idSlot < _32K, VERR_NEM_IPE_4);
+    AssertReturn(ASMBitTest(pVM->nem.s.bmSlotIds, idSlot), VERR_NEM_IPE_4);
+
+    *pu2State = UINT8_MAX;
+
+    /*
+     * Do the actual setting of the user pages here now that we've
+     * got a valid pvPages (typically isn't available during the early
+     * notification, unless we're replacing RAM).
+     */
+    struct kvm_userspace_memory_region Region;
+    Region.slot             = idSlot;
+    Region.flags            = 0;
+    Region.guest_phys_addr  = GCPhys;
+    Region.memory_size      = cb;
+    Region.userspace_addr   = (uintptr_t)pvPages;
+
+    int rc = ioctl(pVM->nem.s.fdVm, KVM_SET_USER_MEMORY_REGION, &Region);
+    if (rc == 0)
+    {
+        *pu2State   = 0;
+        Log5(("NEMR3NotifyPhysRomRegisterEarly: %RGp LB %RGp fFlags=%#x pvPages=%p - idSlot=%#x\n",
+              GCPhys, cb, fFlags, pvPages, idSlot));
+        return VINF_SUCCESS;
+    }
+    AssertLogRelMsgFailedReturn(("%RGp LB %RGp fFlags=%#x, pvPages=%p, idSlot=%#x failed: %u/%u\n",
+                                 GCPhys, cb, fFlags, pvPages, idSlot, errno, rc),
+                                VERR_NEM_MAP_PAGES_FAILED);
+}
+
+
+/**
+ * Called when the A20 state changes.
+ *
+ * @param   pVCpu           The CPU the A20 state changed on.
+ * @param   fEnabled        Whether it was enabled (true) or disabled.
+ */
+VMMR3_INT_DECL(void) NEMR3NotifySetA20(PVMCPU pVCpu, bool fEnabled)
+{
+    Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
+    Assert(VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)));
+    RT_NOREF(pVCpu, fEnabled);
+}
+
+
+VMM_INT_DECL(void) NEMHCNotifyHandlerPhysicalDeregister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb,
+                                                        RTR3PTR pvMemR3, uint8_t *pu2State)
+{
+    Log5(("NEMHCNotifyHandlerPhysicalDeregister: %RGp LB %RGp enmKind=%d pvMemR3=%p pu2State=%p (%d)\n",
+          GCPhys, cb, enmKind, pvMemR3, pu2State, *pu2State));
+
+    *pu2State = UINT8_MAX;
+    RT_NOREF(pVM, enmKind, GCPhys, cb, pvMemR3);
+}
+
+
+void nemHCNativeNotifyHandlerPhysicalRegister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb)
+{
+    Log5(("nemHCNativeNotifyHandlerPhysicalRegister: %RGp LB %RGp enmKind=%d\n", GCPhys, cb, enmKind));
+    RT_NOREF(pVM, enmKind, GCPhys, cb);
+}
+
+
+void nemHCNativeNotifyHandlerPhysicalModify(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld,
+                                            RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fRestoreAsRAM)
+{
+    Log5(("nemHCNativeNotifyHandlerPhysicalModify: %RGp LB %RGp -> %RGp enmKind=%d fRestoreAsRAM=%d\n",
+          GCPhysOld, cb, GCPhysNew, enmKind, fRestoreAsRAM));
+    RT_NOREF(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fRestoreAsRAM);
+}
+
+
+int nemHCNativeNotifyPhysPageAllocated(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, uint32_t fPageProt,
+                                       PGMPAGETYPE enmType, uint8_t *pu2State)
+{
+    Log5(("nemHCNativeNotifyPhysPageAllocated: %RGp HCPhys=%RHp fPageProt=%#x enmType=%d *pu2State=%d\n",
+          GCPhys, HCPhys, fPageProt, enmType, *pu2State));
+    RT_NOREF(pVM, GCPhys, HCPhys, fPageProt, enmType, pu2State);
+    return VINF_SUCCESS;
+}
+
+
+VMM_INT_DECL(void) NEMHCNotifyPhysPageProtChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, RTR3PTR pvR3, uint32_t fPageProt,
+                                                  PGMPAGETYPE enmType, uint8_t *pu2State)
+{
+    Log5(("NEMHCNotifyPhysPageProtChanged: %RGp HCPhys=%RHp fPageProt=%#x enmType=%d *pu2State=%d\n",
+          GCPhys, HCPhys, fPageProt, enmType, *pu2State));
+    Assert(VM_IS_NEM_ENABLED(pVM));
+    RT_NOREF(pVM, GCPhys, HCPhys, pvR3, fPageProt, enmType, pu2State);
+
+}
+
+
+VMM_INT_DECL(void) NEMHCNotifyPhysPageChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhysPrev, RTHCPHYS HCPhysNew,
+                                              RTR3PTR pvNewR3, uint32_t fPageProt, PGMPAGETYPE enmType, uint8_t *pu2State)
+{
+    Log5(("nemHCNativeNotifyPhysPageChanged: %RGp HCPhys=%RHp->%RHp pvNewR3=%p fPageProt=%#x enmType=%d *pu2State=%d\n",
+          GCPhys, HCPhysPrev, HCPhysNew, pvNewR3, fPageProt, enmType, *pu2State));
+    Assert(VM_IS_NEM_ENABLED(pVM));
+    RT_NOREF(pVM, GCPhys, HCPhysPrev, HCPhysNew, pvNewR3, fPageProt, enmType, pu2State);
+}
+
+
+/*********************************************************************************************************************************
+*   CPU State                                                                                                                    *
+*********************************************************************************************************************************/
+
+/**
+ * Worker that imports selected state from KVM.
+ */
+static int nemHCLnxImportState(PVMCPUCC pVCpu, uint64_t fWhat, struct kvm_run *pRun)
+{
+    RT_NOREF(pVCpu, fWhat, pRun);
     return VERR_NOT_IMPLEMENTED;
 }
+
+
+/**
+ * Interface for importing state on demand (used by IEM).
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context CPU structure.
+ * @param   fWhat       What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMM_INT_DECL(int) NEMImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatImportOnDemand);
+
+    RT_NOREF(pVCpu, fWhat);
+    return nemHCLnxImportState(pVCpu, fWhat, pVCpu->nem.s.pRun);
+}
+
+
+/**
+ * Exports state to KVM.
+ */
+static int nemHCLnxExportState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, struct kvm_run *pRun)
+{
+    uint64_t const fExtrn = pCtx->fExtrn;
+    Assert((fExtrn & CPUMCTX_EXTRN_ALL) != CPUMCTX_EXTRN_ALL);
+
+    /*
+     * Stuff that goes into kvm_run::s.regs.regs:
+     */
+    if (   (fExtrn & (CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_GPRS_MASK))
+        !=           (CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_GPRS_MASK))
+    {
+        if (!(fExtrn & CPUMCTX_EXTRN_RIP))
+            pRun->s.regs.regs.rip    = pCtx->rip;
+        if (!(fExtrn & CPUMCTX_EXTRN_RFLAGS))
+            pRun->s.regs.regs.rflags = pCtx->rflags.u;
+
+        if (!(fExtrn & CPUMCTX_EXTRN_RAX))
+            pRun->s.regs.regs.rax    = pCtx->rax;
+        if (!(fExtrn & CPUMCTX_EXTRN_RCX))
+            pRun->s.regs.regs.rcx    = pCtx->rcx;
+        if (!(fExtrn & CPUMCTX_EXTRN_RDX))
+            pRun->s.regs.regs.rdx    = pCtx->rdx;
+        if (!(fExtrn & CPUMCTX_EXTRN_RBX))
+            pRun->s.regs.regs.rbx    = pCtx->rbx;
+        if (!(fExtrn & CPUMCTX_EXTRN_RSP))
+            pRun->s.regs.regs.rsp    = pCtx->rsp;
+        if (!(fExtrn & CPUMCTX_EXTRN_RBP))
+            pRun->s.regs.regs.rbp    = pCtx->rbp;
+        if (!(fExtrn & CPUMCTX_EXTRN_RSI))
+            pRun->s.regs.regs.rsi    = pCtx->rsi;
+        if (!(fExtrn & CPUMCTX_EXTRN_RDI))
+            pRun->s.regs.regs.rdi    = pCtx->rdi;
+        if (!(fExtrn & CPUMCTX_EXTRN_R8_R15))
+        {
+            pRun->s.regs.regs.r8     = pCtx->r8;
+            pRun->s.regs.regs.r9     = pCtx->r9;
+            pRun->s.regs.regs.r10    = pCtx->r10;
+            pRun->s.regs.regs.r11    = pCtx->r11;
+            pRun->s.regs.regs.r12    = pCtx->r12;
+            pRun->s.regs.regs.r13    = pCtx->r13;
+            pRun->s.regs.regs.r14    = pCtx->r14;
+            pRun->s.regs.regs.r15    = pCtx->r15;
+        }
+        pRun->kvm_dirty_regs |= KVM_SYNC_X86_REGS;
+    }
+
+    /*
+     * Stuff that goes into kvm_run::s.regs.sregs:
+     */
+    /** @todo apic_base   */
+    if (   (fExtrn & (CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK | CPUMCTX_EXTRN_CR_MASK | CPUMCTX_EXTRN_EFER | CPUMCTX_EXTRN_APIC_TPR))
+        !=           (CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK | CPUMCTX_EXTRN_CR_MASK | CPUMCTX_EXTRN_EFER | CPUMCTX_EXTRN_APIC_TPR))
+    {
+#define NEM_LNX_EXPORT_SEG(a_KvmSeg, a_CtxSeg) do { \
+            (a_KvmSeg).base     = (a_CtxSeg).u64Base; \
+            (a_KvmSeg).limit    = (a_CtxSeg).u32Limit; \
+            (a_KvmSeg).selector = (a_CtxSeg).Sel; \
+            (a_KvmSeg).type     = (a_CtxSeg).Attr.n.u4Type; \
+            (a_KvmSeg).s        = (a_CtxSeg).Attr.n.u1DescType; \
+            (a_KvmSeg).dpl      = (a_CtxSeg).Attr.n.u2Dpl; \
+            (a_KvmSeg).present  = (a_CtxSeg).Attr.n.u1Present; \
+            (a_KvmSeg).avl      = (a_CtxSeg).Attr.n.u1Available; \
+            (a_KvmSeg).l        = (a_CtxSeg).Attr.n.u1Long; \
+            (a_KvmSeg).db       = (a_CtxSeg).Attr.n.u1DefBig; \
+            (a_KvmSeg).g        = (a_CtxSeg).Attr.n.u1Granularity; \
+            (a_KvmSeg).unusable = (a_CtxSeg).Attr.n.u1Unusable; \
+            (a_KvmSeg).padding  = 0; \
+        } while (0)
+
+        if ((fExtrn & CPUMCTX_EXTRN_SREG_MASK) != CPUMCTX_EXTRN_SREG_MASK)
+        {
+            if (!(fExtrn & CPUMCTX_EXTRN_ES))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.es, pCtx->es);
+            if (!(fExtrn & CPUMCTX_EXTRN_CS))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.cs, pCtx->cs);
+            if (!(fExtrn & CPUMCTX_EXTRN_SS))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.ss, pCtx->ss);
+            if (!(fExtrn & CPUMCTX_EXTRN_DS))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.ds, pCtx->ds);
+            if (!(fExtrn & CPUMCTX_EXTRN_FS))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.fs, pCtx->fs);
+            if (!(fExtrn & CPUMCTX_EXTRN_GS))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.gs, pCtx->gs);
+        }
+        if ((fExtrn & CPUMCTX_EXTRN_TABLE_MASK) != CPUMCTX_EXTRN_TABLE_MASK)
+        {
+            if (!(fExtrn & CPUMCTX_EXTRN_GDTR))
+            {
+                pRun->s.regs.sregs.gdt.base  = pCtx->gdtr.pGdt;
+                pRun->s.regs.sregs.gdt.limit = pCtx->gdtr.cbGdt;
+                pRun->s.regs.sregs.gdt.padding[0] = 0;
+                pRun->s.regs.sregs.gdt.padding[1] = 0;
+                pRun->s.regs.sregs.gdt.padding[2] = 0;
+            }
+            if (!(fExtrn & CPUMCTX_EXTRN_IDTR))
+            {
+                pRun->s.regs.sregs.idt.base  = pCtx->idtr.pIdt;
+                pRun->s.regs.sregs.idt.limit = pCtx->idtr.cbIdt;
+                pRun->s.regs.sregs.idt.padding[0] = 0;
+                pRun->s.regs.sregs.idt.padding[1] = 0;
+                pRun->s.regs.sregs.idt.padding[2] = 0;
+            }
+            if (!(fExtrn & CPUMCTX_EXTRN_LDTR))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.ldt, pCtx->ldtr);
+            if (!(fExtrn & CPUMCTX_EXTRN_TR))
+                NEM_LNX_EXPORT_SEG(pRun->s.regs.sregs.tr, pCtx->tr);
+        }
+        if ((fExtrn & CPUMCTX_EXTRN_CR_MASK) != CPUMCTX_EXTRN_CR_MASK)
+        {
+            if (!(fExtrn & CPUMCTX_EXTRN_CR0))
+                pRun->s.regs.sregs.cr0   = pCtx->cr0;
+            if (!(fExtrn & CPUMCTX_EXTRN_CR2))
+                pRun->s.regs.sregs.cr2   = pCtx->cr2;
+            if (!(fExtrn & CPUMCTX_EXTRN_CR3))
+                pRun->s.regs.sregs.cr3   = pCtx->cr3;
+            if (!(fExtrn & CPUMCTX_EXTRN_CR4))
+                pRun->s.regs.sregs.cr4   = pCtx->cr4;
+        }
+        if (!(fExtrn & CPUMCTX_EXTRN_APIC_TPR))
+            pRun->s.regs.sregs.cr8    = CPUMGetGuestCR8(pVCpu);
+        if (!(fExtrn & CPUMCTX_EXTRN_EFER))
+            pRun->s.regs.sregs.efer   = pCtx->msrEFER;
+
+        /** @todo apic_base   */
+        /** @todo interrupt_bitmap - IRQ injection?  */
+        pRun->kvm_dirty_regs |= KVM_SYNC_X86_SREGS;
+    }
+
+    /*
+     * Debug registers.
+     */
+    if ((fExtrn & CPUMCTX_EXTRN_DR_MASK) != CPUMCTX_EXTRN_DR_MASK)
+    {
+        struct kvm_debugregs DbgRegs = {{0}};
+
+        if (fExtrn & CPUMCTX_EXTRN_DR_MASK)
+        {
+            /* Partial debug state, we must get DbgRegs first so we can merge: */
+            int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_GET_DEBUGREGS, &DbgRegs);
+            AssertMsgReturn(rc == 0, ("rc=%d errno=%d\n", rc, errno), VERR_NEM_IPE_3);
+        }
+
+        if (!(fExtrn & CPUMCTX_EXTRN_DR0_DR3))
+        {
+            DbgRegs.db[0] = pCtx->dr[0];
+            DbgRegs.db[1] = pCtx->dr[1];
+            DbgRegs.db[2] = pCtx->dr[2];
+            DbgRegs.db[3] = pCtx->dr[3];
+        }
+        if (!(fExtrn & CPUMCTX_EXTRN_DR6))
+            DbgRegs.dr6 = pCtx->dr[6];
+        if (!(fExtrn & CPUMCTX_EXTRN_DR7))
+            DbgRegs.dr7 = pCtx->dr[7];
+
+        int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_SET_DEBUGREGS, &DbgRegs);
+        AssertMsgReturn(rc == 0, ("rc=%d errno=%d\n", rc, errno), VERR_NEM_IPE_3);
+    }
+
+    /*
+     * FPU, SSE, AVX, ++.
+     */
+    if (   (fExtrn & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX | CPUMCTX_EXTRN_OTHER_XSAVE | CPUMCTX_EXTRN_XCRx))
+        !=           (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX | CPUMCTX_EXTRN_OTHER_XSAVE | CPUMCTX_EXTRN_XCRx))
+    {
+        if (   (fExtrn & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX | CPUMCTX_EXTRN_OTHER_XSAVE))
+            !=           (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX | CPUMCTX_EXTRN_OTHER_XSAVE))
+        {
+            if (fExtrn & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX | CPUMCTX_EXTRN_OTHER_XSAVE))
+            {
+                /* Partial state is annoying as we have to do merging - is this possible at all? */
+                struct kvm_xsave XSave;
+                int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_GET_XSAVE, &XSave);
+                AssertMsgReturn(rc == 0, ("rc=%d errno=%d\n", rc, errno), VERR_NEM_IPE_3);
+
+                if (!(fExtrn & CPUMCTX_EXTRN_X87))
+                    memcpy(&pCtx->XState.x87, &XSave, sizeof(pCtx->XState.x87));
+                if (!(fExtrn & CPUMCTX_EXTRN_SSE_AVX))
+                {
+                    /** @todo    */
+                }
+                if (!(fExtrn & CPUMCTX_EXTRN_OTHER_XSAVE))
+                {
+                    /** @todo   */
+                }
+            }
+
+            int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_SET_XSAVE, &pCtx->XState);
+            AssertMsgReturn(rc == 0, ("rc=%d errno=%d\n", rc, errno), VERR_NEM_IPE_3);
+        }
+
+        if (!(fExtrn & CPUMCTX_EXTRN_XCRx))
+        {
+            struct kvm_xcrs Xcrs =
+            {   /*.nr_xcrs = */ 2,
+                /*.flags = */   0,
+                /*.xcrs= */ {
+                    { /*.xcr =*/ 0, /*.reserved=*/ 0, /*.value=*/ pCtx->aXcr[0] },
+                    { /*.xcr =*/ 1, /*.reserved=*/ 0, /*.value=*/ pCtx->aXcr[1] },
+                }
+            };
+
+            int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_SET_XCRS, &Xcrs);
+            AssertMsgReturn(rc == 0, ("rc=%d errno=%d\n", rc, errno), VERR_NEM_IPE_3);
+        }
+    }
+
+    /*
+     * MSRs.
+     */
+    if (   (fExtrn & (CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS | CPUMCTX_EXTRN_SYSENTER_MSRS | CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
+        !=           (CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS | CPUMCTX_EXTRN_SYSENTER_MSRS | CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
+    {
+        union
+        {
+            struct kvm_msrs Core;
+            uint64_t padding[2 + sizeof(struct kvm_msr_entry) * 32];
+        }                   uBuf;
+        uint32_t            iMsr     = 0;
+        PCPUMCTXMSRS const  pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pVCpu);
+
+#define ADD_MSR(a_Msr, a_uValue) do { \
+            Assert(iMsr < 32); \
+            uBuf.Core.entries[iMsr].index    = (a_Msr); \
+            uBuf.Core.entries[iMsr].reserved = 0; \
+            uBuf.Core.entries[iMsr].data     = (a_uValue); \
+            iMsr += 1; \
+        } while (0)
+
+        if (!(fExtrn & CPUMCTX_EXTRN_KERNEL_GS_BASE))
+            ADD_MSR(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
+        if (!(fExtrn & CPUMCTX_EXTRN_SYSCALL_MSRS))
+        {
+            ADD_MSR(MSR_K6_STAR,    pCtx->msrSTAR);
+            ADD_MSR(MSR_K8_LSTAR,   pCtx->msrLSTAR);
+            ADD_MSR(MSR_K8_CSTAR,   pCtx->msrCSTAR);
+            ADD_MSR(MSR_K8_SF_MASK, pCtx->msrSFMASK);
+        }
+        if (!(fExtrn & CPUMCTX_EXTRN_SYSENTER_MSRS))
+        {
+            ADD_MSR(MSR_IA32_SYSENTER_CS,  pCtx->SysEnter.cs);
+            ADD_MSR(MSR_IA32_SYSENTER_EIP, pCtx->SysEnter.eip);
+            ADD_MSR(MSR_IA32_SYSENTER_ESP, pCtx->SysEnter.esp);
+        }
+        if (!(fExtrn & CPUMCTX_EXTRN_TSC_AUX))
+            ADD_MSR(MSR_K8_TSC_AUX, pCtxMsrs->msr.TscAux);
+        if (!(fExtrn & CPUMCTX_EXTRN_OTHER_MSRS))
+        {
+            ADD_MSR(MSR_IA32_CR_PAT, pCtx->msrPAT);
+            /** @todo What do we _have_ to add here?
+             * We also have: Mttr*, MiscEnable, FeatureControl. */
+        }
+
+        uBuf.Core.pad   = 0;
+        uBuf.Core.nmsrs = iMsr;
+        int rc = ioctl(pVCpu->nem.s.fdVCpu, KVM_SET_MSRS, &uBuf);
+        AssertMsgReturn(rc == (int)iMsr,
+                        ("rc=%d iMsr=%d (->%#x) errno=%d\n",
+                         rc, iMsr, (uint32_t)rc < iMsr ? uBuf.Core.entries[rc].index : 0, errno),
+                        VERR_NEM_IPE_3);
+    }
+
+    /*
+     * KVM now owns all the state.
+     */
+    pCtx->fExtrn = (fExtrn & ~CPUMCTX_EXTRN_KEEPER_MASK) | CPUMCTX_EXTRN_KEEPER_NEM | CPUMCTX_EXTRN_ALL;
+
+    RT_NOREF(pVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Query the CPU tick counter and optionally the TSC_AUX MSR value.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context CPU structure.
+ * @param   pcTicks     Where to return the CPU tick count.
+ * @param   puAux       Where to return the TSC_AUX register value.
+ */
+VMM_INT_DECL(int) NEMHCQueryCpuTick(PVMCPUCC pVCpu, uint64_t *pcTicks, uint32_t *puAux)
+{
+    STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatQueryCpuTick);
+    // KVM_GET_CLOCK?
+    RT_NOREF(pVCpu, pcTicks, puAux);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Resumes CPU clock (TSC) on all virtual CPUs.
+ *
+ * This is called by TM when the VM is started, restored, resumed or similar.
+ *
+ * @returns VBox status code.
+ * @param   pVM             The cross context VM structure.
+ * @param   pVCpu           The cross context CPU structure of the calling EMT.
+ * @param   uPausedTscValue The TSC value at the time of pausing.
+ */
+VMM_INT_DECL(int) NEMHCResumeCpuTickOnAll(PVMCC pVM, PVMCPUCC pVCpu, uint64_t uPausedTscValue)
+{
+    // KVM_SET_CLOCK?
+    RT_NOREF(pVM, pVCpu, uPausedTscValue);
+    return VINF_SUCCESS;
+}
+
+
+VMM_INT_DECL(uint32_t) NEMHCGetFeatures(PVMCC pVM)
+{
+    RT_NOREF(pVM);
+    return NEM_FEAT_F_NESTED_PAGING
+         | NEM_FEAT_F_FULL_GST_EXEC
+         | NEM_FEAT_F_XSAVE_XRSTOR;
+}
+
+
+
+/*********************************************************************************************************************************
+*   Execution                                                                                                                    *
+*********************************************************************************************************************************/
 
 
@@ -637,203 +1393,394 @@
 
 
-VMMR3_INT_DECL(int) NEMR3NotifyPhysRamRegister(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvR3,
-                                               uint8_t *pu2State, uint32_t *puNemRange)
-{
-    Log5(("NEMR3NotifyPhysRamRegister: %RGp LB %RGp, pvR3=%p pu2State=%p (%d) puNemRange=%p (%d)\n",
-          GCPhys, cb, pvR3, pu2State, pu2State, puNemRange, *puNemRange));
-    *pu2State = UINT8_MAX;
-    RT_NOREF(pVM, GCPhys, cb, pvR3, puNemRange);
+static VBOXSTRICTRC nemHCLnxHandleInterruptFF(PVM pVM, PVMCPU pVCpu)
+{
+    RT_NOREF(pVM, pVCpu);
     return VINF_SUCCESS;
 }
 
 
-VMMR3_INT_DECL(bool) NEMR3IsMmio2DirtyPageTrackingSupported(PVM pVM)
-{
-    RT_NOREF(pVM);
-    return false;
-}
-
-
-VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
-                                                  void *pvRam, void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
-{
-    Log5(("NEMR3NotifyPhysMmioExMapEarly: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p (%d) puNemRange=%p (%#x)\n",
-          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, *pu2State, puNemRange, puNemRange ? *puNemRange : UINT32_MAX));
-    RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, puNemRange);
-    *pu2State = UINT8_MAX;
-    return VINF_SUCCESS;
-}
-
-
-VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExMapLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags,
-                                                 void *pvRam, void *pvMmio2, uint32_t *puNemRange)
-{
-    RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, puNemRange);
-    return VINF_SUCCESS;
-}
-
-
-VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
-                                               void *pvMmio2, uint8_t *pu2State)
-{
-    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p\n",
-          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State));
-    RT_NOREF(pVM, GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State);
-    if (pu2State)
-        *pu2State = UINT8_MAX;
-    return VINF_SUCCESS;
-}
-
-
-VMMR3_INT_DECL(int) NEMR3PhysMmio2QueryAndResetDirtyBitmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t uNemRange,
-                                                           void *pvBitmap, size_t cbBitmap)
-{
-    RT_NOREF(pVM, GCPhys, cb, uNemRange, pvBitmap, cbBitmap);
-    AssertFailed();
+static VBOXSTRICTRC nemHCLnxHandleExitIo(PVMCC pVM, PVMCPUCC pVCpu, struct kvm_run *pRun)
+{
+    /*
+     * Input validation.
+     */
+    Assert(pRun->io.count > 0);
+    Assert(pRun->io.size == 1 || pRun->io.size == 2 || pRun->io.size == 4);
+    Assert(pRun->io.direction == KVM_EXIT_IO_IN || pRun->io.direction == KVM_EXIT_IO_OUT);
+    Assert(pRun->io.data_offset < pVM->nem.s.cbVCpuMmap);
+    Assert(pRun->io.data_offset + pRun->io.size * pRun->io.count <= pVM->nem.s.cbVCpuMmap);
+
+    /*
+     * Do the requested job.
+     */
+    VBOXSTRICTRC    rcStrict;
+    RTPTRUNION      uPtrData;
+    uPtrData.pu8 = (uint8_t *)pRun + pRun->io.data_offset;
+    if (pRun->io.count == 1)
+    {
+        if (pRun->io.direction == KVM_EXIT_IO_IN)
+        {
+            uint32_t uValue = 0;
+            rcStrict = IOMIOPortRead(pVM, pVCpu, pRun->io.port, &uValue, pRun->io.size);
+            Log4(("IOExit/%u: %04x:%08RX64: IN %#x LB %u -> %#x, rcStrict=%Rrc\n",
+                  pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip,
+                  pRun->io.port, pRun->io.size, uValue, VBOXSTRICTRC_VAL(rcStrict) ));
+            if (IOM_SUCCESS(rcStrict))
+            {
+                if (pRun->io.size == 4)
+                    *uPtrData.pu32 = uValue;
+                else if (pRun->io.size == 2)
+                    *uPtrData.pu16 = (uint16_t)uValue;
+                else
+                    *uPtrData.pu8  = (uint8_t)uValue;
+            }
+        }
+        else
+        {
+            uint32_t const uValue = pRun->io.size == 4 ? *uPtrData.pu32
+                                  : pRun->io.size == 2 ? *uPtrData.pu16
+                                  :                      *uPtrData.pu8;
+            rcStrict = IOMIOPortWrite(pVM, pVCpu, pRun->io.port, uValue, pRun->io.size);
+            Log4(("IOExit/%u: %04x:%08RX64: OUT %#x, %#x LB %u rcStrict=%Rrc\n",
+                  pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip,
+                  pRun->io.port, uValue, pRun->io.size, VBOXSTRICTRC_VAL(rcStrict) ));
+        }
+    }
+    else
+    {
+        uint32_t cTransfers = pRun->io.count;
+        if (pRun->io.direction == KVM_EXIT_IO_IN)
+        {
+            rcStrict = IOMIOPortReadString(pVM, pVCpu, pRun->io.port, uPtrData.pv, &cTransfers, pRun->io.size);
+            Log4(("IOExit/%u: %04x:%08RX64: REP INS %#x LB %u * %#x times -> rcStrict=%Rrc cTransfers=%d\n",
+                  pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip,
+                  pRun->io.port, pRun->io.size, pRun->io.count, VBOXSTRICTRC_VAL(rcStrict), cTransfers ));
+        }
+        else
+        {
+            rcStrict = IOMIOPortWriteString(pVM, pVCpu, pRun->io.port, uPtrData.pv, &cTransfers, pRun->io.size);
+            Log4(("IOExit/%u: %04x:%08RX64: REP OUTS %#x LB %u * %#x times -> rcStrict=%Rrc cTransfers=%d\n",
+                  pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip,
+                  pRun->io.port, pRun->io.size, pRun->io.count, VBOXSTRICTRC_VAL(rcStrict), cTransfers ));
+        }
+        Assert(cTransfers == 0);
+    }
+    return rcStrict;
+}
+
+
+static VBOXSTRICTRC nemHCLnxHandleExit(PVMCC pVM, PVMCPUCC pVCpu, struct kvm_run *pRun)
+{
+    switch (pRun->exit_reason)
+    {
+        case KVM_EXIT_EXCEPTION:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_IO:
+            return nemHCLnxHandleExitIo(pVM, pVCpu, pRun);
+
+        case KVM_EXIT_HYPERCALL:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_DEBUG:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_HLT:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_MMIO:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_IRQ_WINDOW_OPEN:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_X86_RDMSR:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_X86_WRMSR:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_INTR: /* EINTR */
+            return VINF_SUCCESS;
+
+        case KVM_EXIT_SET_TPR:
+            AssertFailed();
+            break;
+        case KVM_EXIT_TPR_ACCESS:
+            AssertFailed();
+            break;
+        case KVM_EXIT_NMI:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_SYSTEM_EVENT:
+            AssertFailed();
+            break;
+        case KVM_EXIT_IOAPIC_EOI:
+            AssertFailed();
+            break;
+        case KVM_EXIT_HYPERV:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_DIRTY_RING_FULL:
+            AssertFailed();
+            break;
+        case KVM_EXIT_AP_RESET_HOLD:
+            AssertFailed();
+            break;
+        case KVM_EXIT_X86_BUS_LOCK:
+            AssertFailed();
+            break;
+
+
+        case KVM_EXIT_SHUTDOWN:
+            AssertFailed();
+            break;
+
+        case KVM_EXIT_FAIL_ENTRY:
+            AssertFailed();
+            break;
+        case KVM_EXIT_INTERNAL_ERROR:
+            AssertFailed();
+            break;
+
+        /*
+         * Foreign and unknowns.
+         */
+        case KVM_EXIT_EPR:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_EPR on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_WATCHDOG:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_WATCHDOG on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_ARM_NISV:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_ARM_NISV on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_S390_STSI:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_S390_STSI on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_S390_TSCH:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_S390_TSCH on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_OSI:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_OSI on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_PAPR_HCALL:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_PAPR_HCALL on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_S390_UCONTROL:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_S390_UCONTROL on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_DCR:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_DCR on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_S390_SIEIC:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_S390_SIEIC on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_S390_RESET:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_S390_RESET on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_UNKNOWN:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_UNKNOWN on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        case KVM_EXIT_XEN:
+            AssertLogRelMsgFailedReturn(("KVM_EXIT_XEN on VCpu #%u at %04x:%RX64!\n", pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+        default:
+            AssertLogRelMsgFailedReturn(("Unknown exit reason %u on VCpu #%u at %04x:%RX64!\n", pRun->exit_reason, pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip), VERR_NEM_IPE_1);
+    }
+
+    RT_NOREF(pVM, pVCpu, pRun);
     return VERR_NOT_IMPLEMENTED;
 }
 
 
-VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
-                                                     uint8_t *pu2State)
-{
-    Log5(("NEMR3NotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
-    *pu2State = UINT8_MAX;
-    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
-    return VINF_SUCCESS;
-}
-
-
-VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
-                                                    uint32_t fFlags, uint8_t *pu2State)
-{
-    Log5(("NEMR3NotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p\n",
-          GCPhys, cb, pvPages, fFlags, pu2State));
-    *pu2State = UINT8_MAX;
-    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
-    return VINF_SUCCESS;
-}
-
-
-/**
- * Called when the A20 state changes.
- *
- * @param   pVCpu           The CPU the A20 state changed on.
- * @param   fEnabled        Whether it was enabled (true) or disabled.
- */
-VMMR3_INT_DECL(void) NEMR3NotifySetA20(PVMCPU pVCpu, bool fEnabled)
-{
-    Log(("nemR3NativeNotifySetA20: fEnabled=%RTbool\n", fEnabled));
-    Assert(VM_IS_NEM_ENABLED(pVCpu->CTX_SUFF(pVM)));
-    RT_NOREF(pVCpu, fEnabled);
-}
-
-
-/**
- * Interface for importing state on demand (used by IEM).
- *
- * @returns VBox status code.
- * @param   pVCpu       The cross context CPU structure.
- * @param   fWhat       What to import, CPUMCTX_EXTRN_XXX.
- */
-VMM_INT_DECL(int) NEMImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
-{
-    STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatImportOnDemand);
-
-    RT_NOREF(pVCpu, fWhat);
-    return VERR_NOT_IMPLEMENTED;
-}
-
-
-/**
- * Query the CPU tick counter and optionally the TSC_AUX MSR value.
- *
- * @returns VBox status code.
- * @param   pVCpu       The cross context CPU structure.
- * @param   pcTicks     Where to return the CPU tick count.
- * @param   puAux       Where to return the TSC_AUX register value.
- */
-VMM_INT_DECL(int) NEMHCQueryCpuTick(PVMCPUCC pVCpu, uint64_t *pcTicks, uint32_t *puAux)
-{
-    STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatQueryCpuTick);
-    // KVM_GET_CLOCK?
-    RT_NOREF(pVCpu, pcTicks, puAux);
-    return VERR_NOT_IMPLEMENTED;
-}
-
-
-/**
- * Resumes CPU clock (TSC) on all virtual CPUs.
- *
- * This is called by TM when the VM is started, restored, resumed or similar.
- *
- * @returns VBox status code.
- * @param   pVM             The cross context VM structure.
- * @param   pVCpu           The cross context CPU structure of the calling EMT.
- * @param   uPausedTscValue The TSC value at the time of pausing.
- */
-VMM_INT_DECL(int) NEMHCResumeCpuTickOnAll(PVMCC pVM, PVMCPUCC pVCpu, uint64_t uPausedTscValue)
-{
-    // KVM_SET_CLOCK?
-    RT_NOREF(pVM, pVCpu, uPausedTscValue);
-    return VERR_NOT_IMPLEMENTED;
-}
-
-
-VMM_INT_DECL(void) NEMHCNotifyHandlerPhysicalDeregister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb,
-                                                        RTR3PTR pvMemR3, uint8_t *pu2State)
-{
-    Log5(("NEMHCNotifyHandlerPhysicalDeregister: %RGp LB %RGp enmKind=%d pvMemR3=%p pu2State=%p (%d)\n",
-          GCPhys, cb, enmKind, pvMemR3, pu2State, *pu2State));
-
-    *pu2State = UINT8_MAX;
-    RT_NOREF(pVM, enmKind, GCPhys, cb, pvMemR3);
-}
-
-
-void nemHCNativeNotifyHandlerPhysicalRegister(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhys, RTGCPHYS cb)
-{
-    Log5(("nemHCNativeNotifyHandlerPhysicalRegister: %RGp LB %RGp enmKind=%d\n", GCPhys, cb, enmKind));
-    RT_NOREF(pVM, enmKind, GCPhys, cb);
-}
-
-
-void nemHCNativeNotifyHandlerPhysicalModify(PVMCC pVM, PGMPHYSHANDLERKIND enmKind, RTGCPHYS GCPhysOld,
-                                            RTGCPHYS GCPhysNew, RTGCPHYS cb, bool fRestoreAsRAM)
-{
-    Log5(("nemHCNativeNotifyHandlerPhysicalModify: %RGp LB %RGp -> %RGp enmKind=%d fRestoreAsRAM=%d\n",
-          GCPhysOld, cb, GCPhysNew, enmKind, fRestoreAsRAM));
-    RT_NOREF(pVM, enmKind, GCPhysOld, GCPhysNew, cb, fRestoreAsRAM);
-}
-
-
-int nemHCNativeNotifyPhysPageAllocated(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, uint32_t fPageProt,
-                                       PGMPAGETYPE enmType, uint8_t *pu2State)
-{
-    Log5(("nemHCNativeNotifyPhysPageAllocated: %RGp HCPhys=%RHp fPageProt=%#x enmType=%d *pu2State=%d\n",
-          GCPhys, HCPhys, fPageProt, enmType, *pu2State));
-    RT_NOREF(pVM, GCPhys, HCPhys, fPageProt, enmType, pu2State);
-    return VINF_SUCCESS;
-}
-
-
-VMM_INT_DECL(void) NEMHCNotifyPhysPageProtChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys, RTR3PTR pvR3, uint32_t fPageProt,
-                                                  PGMPAGETYPE enmType, uint8_t *pu2State)
-{
-    Log5(("NEMHCNotifyPhysPageProtChanged: %RGp HCPhys=%RHp fPageProt=%#x enmType=%d *pu2State=%d\n",
-          GCPhys, HCPhys, fPageProt, enmType, *pu2State));
-    Assert(VM_IS_NEM_ENABLED(pVM));
-    RT_NOREF(pVM, GCPhys, HCPhys, pvR3, fPageProt, enmType, pu2State);
-
-}
-
-
-VMM_INT_DECL(void) NEMHCNotifyPhysPageChanged(PVMCC pVM, RTGCPHYS GCPhys, RTHCPHYS HCPhysPrev, RTHCPHYS HCPhysNew,
-                                              RTR3PTR pvNewR3, uint32_t fPageProt, PGMPAGETYPE enmType, uint8_t *pu2State)
-{
-    Log5(("nemHCNativeNotifyPhysPageChanged: %RGp HCPhys=%RHp->%RHp pvNewR3=%p fPageProt=%#x enmType=%d *pu2State=%d\n",
-          GCPhys, HCPhysPrev, HCPhysNew, pvNewR3, fPageProt, enmType, *pu2State));
-    Assert(VM_IS_NEM_ENABLED(pVM));
-    RT_NOREF(pVM, GCPhys, HCPhysPrev, HCPhysNew, pvNewR3, fPageProt, enmType, pu2State);
+VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
+{
+    /*
+     * Try switch to NEM runloop state.
+     */
+    if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED))
+    { /* likely */ }
+    else
+    {
+        VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
+        LogFlow(("NEM/%u: returning immediately because canceled\n", pVCpu->idCpu));
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * The run loop.
+     */
+    struct kvm_run * const  pRun                = pVCpu->nem.s.pRun;
+    const bool              fSingleStepping     = DBGFIsStepping(pVCpu);
+    VBOXSTRICTRC            rcStrict            = VINF_SUCCESS;
+    for (unsigned iLoop = 0;; iLoop++)
+    {
+        /*
+         * Pending interrupts or such?  Need to check and deal with this prior
+         * to the state syncing.
+         */
+        if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_PIC
+                                     | VMCPU_FF_INTERRUPT_NMI  | VMCPU_FF_INTERRUPT_SMI))
+        {
+            /* Try inject interrupt. */
+            rcStrict = nemHCLnxHandleInterruptFF(pVM, pVCpu);
+            if (rcStrict == VINF_SUCCESS)
+            { /* likely */ }
+            else
+            {
+                LogFlow(("NEM/%u: breaking: nemHCLnxHandleInterruptFF -> %Rrc\n", pVCpu->idCpu, VBOXSTRICTRC_VAL(rcStrict) ));
+                STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatBreakOnStatus);
+                break;
+            }
+        }
+
+        /*
+         * Do not execute in KVM if the A20 isn't enabled.
+         */
+        if (PGMPhysIsA20Enabled(pVCpu))
+        { /* likely */ }
+        else
+        {
+            rcStrict = VINF_EM_RESCHEDULE_REM;
+            LogFlow(("NEM/%u: breaking: A20 disabled\n", pVCpu->idCpu));
+            break;
+        }
+
+        /*
+         * Ensure KVM has the whole state.
+         */
+        if (   (pVCpu->cpum.GstCtx.fExtrn & CPUMCTX_EXTRN_ALL)
+            !=                              CPUMCTX_EXTRN_ALL)
+        {
+            int rc2 = nemHCLnxExportState(pVM, pVCpu, &pVCpu->cpum.GstCtx, pRun);
+            AssertRCReturn(rc2, rc2);
+        }
+
+        /*
+         * Poll timers and run for a bit.
+         *
+         * With the VID approach (ring-0 or ring-3) we can specify a timeout here,
+         * so we take the time of the next timer event and uses that as a deadline.
+         * The rounding heuristics are "tuned" so that rhel5 (1K timer) will boot fine.
+         */
+        /** @todo See if we cannot optimize this TMTimerPollGIP by only redoing
+         *        the whole polling job when timers have changed... */
+        uint64_t       offDeltaIgnored;
+        uint64_t const nsNextTimerEvt = TMTimerPollGIP(pVM, pVCpu, &offDeltaIgnored); NOREF(nsNextTimerEvt);
+        if (   !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+        {
+            if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM_WAIT, VMCPUSTATE_STARTED_EXEC_NEM))
+            {
+                LogFlow(("NEM/%u: Entry @ %04x:%08RX64 IF=%d EFL=%#RX64 SS:RSP=%04x:%08RX64 cr0=%RX64\n",
+                         pVCpu->idCpu, pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip,
+                         !!(pRun->s.regs.regs.rflags & X86_EFL_IF), pRun->s.regs.regs.rflags,
+                         pRun->s.regs.sregs.ss.selector, pRun->s.regs.regs.rsp, pRun->s.regs.sregs.cr0));
+                TMNotifyStartOfExecution(pVM, pVCpu);
+
+                int rcLnx = ioctl(pVCpu->nem.s.fdVCpu, KVM_RUN, 0UL);
+
+                VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_WAIT);
+                TMNotifyEndOfExecution(pVM, pVCpu, ASMReadTSC());
+
+                LogFlow(("NEM/%u: Exit  @ %04x:%08RX64 IF=%d EFL=%#RX64 CR8=%#x Reason=%#x IrqReady=%d Flags=%#x\n", pVCpu->idCpu,
+                         pRun->s.regs.sregs.cs.selector, pRun->s.regs.regs.rip, pRun->if_flag,
+                         pRun->s.regs.regs.rflags, pRun->s.regs.sregs.cr8, pRun->exit_reason,
+                         pRun->ready_for_interrupt_injection, pRun->flags));
+                if (RT_LIKELY(rcLnx == 0 || errno == EINTR))
+                {
+                    /*
+                     * Deal with the message.
+                     */
+                    rcStrict = nemHCLnxHandleExit(pVM, pVCpu, pRun);
+                    if (rcStrict == VINF_SUCCESS)
+                    { /* hopefully likely */ }
+                    else
+                    {
+                        LogFlow(("NEM/%u: breaking: nemHCLnxHandleExit -> %Rrc\n", pVCpu->idCpu, VBOXSTRICTRC_VAL(rcStrict) ));
+                        STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatBreakOnStatus);
+                        break;
+                    }
+                }
+                else
+                {
+                    int rc2 = RTErrConvertFromErrno(errno);
+                    AssertLogRelMsgFailedReturn(("KVM_RUN failed: rcLnx=%d errno=%u rc=%Rrc\n", rcLnx, errno, rc2), rc2);
+                }
+
+                /*
+                 * If no relevant FFs are pending, loop.
+                 */
+                if (   !VM_FF_IS_ANY_SET(   pVM,   !fSingleStepping ? VM_FF_HP_R0_PRE_HM_MASK    : VM_FF_HP_R0_PRE_HM_STEP_MASK)
+                    && !VMCPU_FF_IS_ANY_SET(pVCpu, !fSingleStepping ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
+                    continue;
+
+                /** @todo Try handle pending flags, not just return to EM loops.  Take care
+                 *        not to set important RCs here unless we've handled an exit. */
+                LogFlow(("NEM/%u: breaking: pending FF (%#x / %#RX64)\n",
+                         pVCpu->idCpu, pVM->fGlobalForcedActions, (uint64_t)pVCpu->fLocalForcedActions));
+                STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatBreakOnFFPost);
+            }
+            else
+            {
+                LogFlow(("NEM/%u: breaking: canceled %d (pre exec)\n", pVCpu->idCpu, VMCPU_GET_STATE(pVCpu) ));
+                STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatBreakOnCancel);
+            }
+        }
+        else
+        {
+            LogFlow(("NEM/%u: breaking: pending FF (pre exec)\n", pVCpu->idCpu));
+            STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatBreakOnFFPre);
+        }
+        break;
+    } /* the run loop */
+
+
+    /*
+     * If the CPU is running, make sure to stop it before we try sync back the
+     * state and return to EM.  We don't sync back the whole state if we can help it.
+     */
+    if (!VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM))
+        VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
+
+    if (pVCpu->cpum.GstCtx.fExtrn & CPUMCTX_EXTRN_ALL)
+    {
+        /* Try anticipate what we might need. */
+        uint64_t fImport = IEM_CPUMCTX_EXTRN_MUST_MASK;
+        if (   (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST)
+            || RT_FAILURE(rcStrict))
+            fImport = CPUMCTX_EXTRN_ALL;
+# ifdef IN_RING0 /* Ring-3 I/O port access optimizations: */
+        else if (   rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
+                 || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
+            fImport = CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS;
+        else if (rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
+            fImport = CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS;
+# endif
+        else if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_APIC
+                                          | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI))
+            fImport |= IEM_CPUMCTX_EXTRN_XCPT_MASK;
+
+        if (pVCpu->cpum.GstCtx.fExtrn & fImport)
+        {
+            int rc2 = nemHCLnxImportState(pVCpu, fImport, pRun);
+            if (RT_SUCCESS(rc2))
+                pVCpu->cpum.GstCtx.fExtrn &= ~fImport;
+            else if (RT_SUCCESS(rcStrict))
+                rcStrict = rc2;
+            if (!(pVCpu->cpum.GstCtx.fExtrn & CPUMCTX_EXTRN_ALL))
+                pVCpu->cpum.GstCtx.fExtrn = 0;
+            STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatImportOnReturn);
+        }
+        else
+            STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatImportOnReturnSkipped);
+    }
+    else
+    {
+        pVCpu->cpum.GstCtx.fExtrn = 0;
+        STAM_REL_COUNTER_INC(&pVCpu->nem.s.StatImportOnReturnSkipped);
+    }
+
+    LogFlow(("NEM/%u: %04x:%08RX64 efl=%#08RX64 => %Rrc\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+             pVCpu->cpum.GstCtx.rflags, VBOXSTRICTRC_VAL(rcStrict) ));
+    return rcStrict;
 }
 
Index: /trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMR3/NEMR3Native-win.cpp	(revision 92465)
@@ -2114,8 +2114,8 @@
 
 VMMR3_INT_DECL(int) NEMR3NotifyPhysMmioExUnmap(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, uint32_t fFlags, void *pvRam,
-                                               void *pvMmio2, uint8_t *pu2State)
-{
-    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p\n",
-          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State));
+                                               void *pvMmio2, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("NEMR3NotifyPhysMmioExUnmap: %RGp LB %RGp fFlags=%#x pvRam=%p pvMmio2=%p pu2State=%p uNemRange=%#x (%#x)\n",
+          GCPhys, cb, fFlags, pvRam, pvMmio2, pu2State, puNemRange, *puNemRange));
 
     int rc = VINF_SUCCESS;
@@ -2203,8 +2203,9 @@
 
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterEarly(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages, uint32_t fFlags,
-                                                     uint8_t *pu2State)
+                                                     uint8_t *pu2State, uint32_t *puNemRange)
 {
     Log5(("nemR3NativeNotifyPhysRomRegisterEarly: %RGp LB %RGp pvPages=%p fFlags=%#x\n", GCPhys, cb, pvPages, fFlags));
-    *pu2State = UINT8_MAX;
+    *pu2State   = UINT8_MAX;
+    *puNemRange = 0;
 
 #if 0 /* Let's not do this after all.  We'll protection change notifications for each page and if not we'll map them lazily. */
@@ -2242,8 +2243,8 @@
 
 VMMR3_INT_DECL(int)  NEMR3NotifyPhysRomRegisterLate(PVM pVM, RTGCPHYS GCPhys, RTGCPHYS cb, void *pvPages,
-                                                    uint32_t fFlags, uint8_t *pu2State)
-{
-    Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p\n",
-          GCPhys, cb, pvPages, fFlags, pu2State));
+                                                    uint32_t fFlags, uint8_t *pu2State, uint32_t *puNemRange)
+{
+    Log5(("nemR3NativeNotifyPhysRomRegisterLate: %RGp LB %RGp pvPages=%p fFlags=%#x pu2State=%p (%d) puNemRange=%p (%#x)\n",
+          GCPhys, cb, pvPages, fFlags, pu2State, *pu2State, puNemRange, *puNemRange));
     *pu2State = UINT8_MAX;
 
@@ -2265,7 +2266,7 @@
         return VERR_NEM_MAP_PAGES_FAILED;
     }
-    RT_NOREF(fFlags);
+    RT_NOREF(fFlags, puNemRange);
 #else
-    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags);
+    RT_NOREF(pVM, GCPhys, cb, pvPages, fFlags, puNemRange);
 #endif
     return VINF_SUCCESS;
Index: /trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp	(revision 92464)
+++ /trunk/src/VBox/VMM/VMMR3/PGMPhys.cpp	(revision 92465)
@@ -2498,5 +2498,6 @@
                     if (VM_IS_NEM_ENABLED(pVM)) /* Notify REM before we unlink the range. */
                     {
-                        rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, 0 /*fFlags*/, NULL, NULL, NULL);
+                        rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, 0 /*fFlags*/,
+                                                        NULL, NULL, NULL, &pRam->uNemRange);
                         AssertLogRelRCReturn(rc, rc);
                     }
@@ -2546,5 +2547,5 @@
                     rc = NEMR3NotifyPhysMmioExUnmap(pVM, GCPhys, GCPhysLast - GCPhys + 1, NEM_NOTIFY_PHYS_MMIO_EX_F_REPLACE,
                                                     pRam->pvR3 ? (uint8_t *)pRam->pvR3 + GCPhys - pRam->GCPhys : NULL,
-                                                    NULL, &u2State);
+                                                    NULL, &u2State, &pRam->uNemRange);
                     AssertLogRelRCReturn(rc, rc);
                     if (u2State != UINT8_MAX)
@@ -3688,5 +3689,5 @@
                                             pRam->pvR3
                                             ? (uint8_t *)pRam->pvR3 + pFirstMmio->RamRange.GCPhys - pRam->GCPhys : NULL,
-                                            pFirstMmio->pvR3, &u2State);
+                                            pFirstMmio->pvR3, &u2State, &pRam->uNemRange);
             AssertRCStmt(rc, rcRet = rc);
             if (u2State != UINT8_MAX)
@@ -3721,5 +3722,5 @@
                 uint8_t u2State = UINT8_MAX;
                 rc = NEMR3NotifyPhysMmioExUnmap(pVM, pCurMmio->RamRange.GCPhys, pCurMmio->RamRange.cb, fNemFlags,
-                                                NULL, pCurMmio->pvR3, &u2State);
+                                                NULL, pCurMmio->pvR3, &u2State, &pCurMmio->RamRange.uNemRange);
                 AssertRCStmt(rc, rcRet = rc);
                 if (u2State != UINT8_MAX)
@@ -4349,9 +4350,10 @@
                               | (fFlags & PGMPHYS_ROM_FLAGS_SHADOWED ? NEM_NOTIFY_PHYS_ROM_F_SHADOW : 0);
     uint8_t        u2NemState = UINT8_MAX;
+    uint32_t       uNemRange  = 0;
     if (VM_IS_NEM_ENABLED(pVM))
     {
         int rc = NEMR3NotifyPhysRomRegisterEarly(pVM, GCPhys, cPages << PAGE_SHIFT,
                                                 fRamExists ? PGM_RAMRANGE_CALC_PAGE_R3PTR(pRam, GCPhys) : NULL,
-                                                fNemNotify, &u2NemState);
+                                                fNemNotify, &u2NemState, fRamExists ? &pRam->uNemRange : &uNemRange);
         AssertLogRelRCReturn(rc, rc);
     }
@@ -4435,4 +4437,7 @@
                 pRamNew->pvR3          = NULL;
                 pRamNew->paLSPages     = NULL;
+#ifdef VBOX_WITH_NATIVE_NEM
+                pRamNew->uNemRange     = uNemRange;
+#endif
 
                 PPGMPAGE pRamPage = &pRamNew->aPages[idxFirstRamPage];
@@ -4631,5 +4636,6 @@
                             u2NemState = UINT8_MAX;
                             rc = NEMR3NotifyPhysRomRegisterLate(pVM, GCPhys, cb, PGM_RAMRANGE_CALC_PAGE_R3PTR(pRamNew, GCPhys),
-                                                                fNemNotify, &u2NemState);
+                                                                fNemNotify, &u2NemState,
+                                                                fRamExists ? &pRam->uNemRange : &pRamNew->uNemRange);
                             if (u2NemState != UINT8_MAX)
                                 pgmPhysSetNemStateForPages(&pRamNew->aPages[idxFirstRamPage], cPages, u2NemState);
Index: /trunk/src/VBox/VMM/include/NEMInternal.h
===================================================================
--- /trunk/src/VBox/VMM/include/NEMInternal.h	(revision 92464)
+++ /trunk/src/VBox/VMM/include/NEMInternal.h	(revision 92465)
@@ -189,10 +189,15 @@
     int32_t                     fdVm;
 
+    /** KVM_GET_VCPU_MMAP_SIZE. */
+    uint32_t                    cbVCpuMmap;
     /** KVM_CAP_NR_MEMSLOTS. */
     uint32_t                    cMaxMemSlots;
     /** KVM_CAP_X86_ROBUST_SINGLESTEP. */
     bool                        fRobustSingleStep;
-    /** KVM_GET_VCPU_MMAP_SIZE. */
-    uint32_t                    cbVCpuMmap;
+
+    /** Hint where there might be a free slot. */
+    uint16_t                    idPrevSlot;
+    /** Memory slot ID allocation bitmap. */
+    uint64_t                    bmSlotIds[_32K / 8 / sizeof(uint64_t)];
 
 #elif defined(RT_OS_WINDOWS)
@@ -356,9 +361,9 @@
     STAMCOUNTER                 StatCancelChangedState;
     STAMCOUNTER                 StatCancelAlertedThread;
+# endif
     STAMCOUNTER                 StatBreakOnCancel;
     STAMCOUNTER                 StatBreakOnFFPre;
     STAMCOUNTER                 StatBreakOnFFPost;
     STAMCOUNTER                 StatBreakOnStatus;
-# endif
     STAMCOUNTER                 StatImportOnDemand;
     STAMCOUNTER                 StatImportOnReturn;
