Index: /trunk/Config.kmk
===================================================================
--- /trunk/Config.kmk	(revision 68225)
+++ /trunk/Config.kmk	(revision 68226)
@@ -399,6 +399,13 @@
 # Enables the third step using IEM (the interpreter).
 VBOX_WITH_3RD_IEM_STEP = 1
-# Enables nested hardware virtualization support (mainly for IEM)
+# Enables nested hardware virtualization support
 #VBOX_WITH_NESTED_HWVIRT = 1
+# Enables nested hardware virtualization support only in IEM
+#VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM = 1
+ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ ifndef VBOX_WITH_NESTED_HWVIRT
+  VBOX_WITH_NESTED_HWVIRT = 1
+ endif
+endif
 ## @}
 
Index: /trunk/include/VBox/err.h
===================================================================
--- /trunk/include/VBox/err.h	(revision 68225)
+++ /trunk/include/VBox/err.h	(revision 68226)
@@ -2141,4 +2141,6 @@
 /** An operation caused a nested-guest SVM \#VMEXIT. */
 #define VINF_SVM_VMEXIT                              4067
+/** VMRUN emulation succeeded, ready to immediately enter the nested-guest. */
+#define VINF_SVM_VMRUN                               4068
 /** @} */
 
Index: /trunk/include/VBox/vmm/cpum.h
===================================================================
--- /trunk/include/VBox/vmm/cpum.h	(revision 68225)
+++ /trunk/include/VBox/vmm/cpum.h	(revision 68226)
@@ -1201,4 +1201,6 @@
 VMM_INT_DECL(bool)      CPUMCanSvmNstGstTakeVirtIntr(PCCPUMCTX pCtx);
 VMM_INT_DECL(uint8_t)   CPUMGetSvmNstGstInterrupt(PCCPUMCTX pCtx);
+VMM_INT_DECL(void)      CPUMSvmVmExitRestoreHostState(PCPUMCTX pCtx);
+VMM_INT_DECL(void)      CPUMSvmVmRunSaveHostState(PCPUMCTX pCtx, uint8_t cbInstr);
 /** @} */
 
Index: /trunk/include/VBox/vmm/hm.h
===================================================================
--- /trunk/include/VBox/vmm/hm.h	(revision 68225)
+++ /trunk/include/VBox/vmm/hm.h	(revision 68226)
@@ -162,7 +162,4 @@
  * @{
  */
-VMM_INT_DECL(VBOXSTRICTRC)      HMSvmNstGstVmExit(PVMCPU pVCpu, PCPUMCTX pCtx, uint64_t uExitCode, uint64_t uExitInfo1,
-                                                  uint64_t uExitInfo2);
-VMM_INT_DECL(void)              HMVmxNstGstVmExit(PVMCPU pVCpu, uint16_t uBasicExitReason);
 VMM_INT_DECL(VBOXSTRICTRC)      HMSvmVmmcall(PVMCPU pVCpu, PCPUMCTX pCtx, bool *pfRipUpdated);
 VMM_INT_DECL(VBOXSTRICTRC)      HMSvmVmrun(PVMCPU pVCpu, PCPUMCTX pCtx, uint8_t cbInstr, RTGCPHYS GCPhysVmcb);
@@ -175,4 +172,8 @@
 VMM_INT_DECL(VBOXSTRICTRC)      HMSvmNstGstHandleIOIntercept(PVMCPU pVCpu, PCPUMCTX pCtx, PCSVMIOIOEXITINFO pIoExitInfo,
                                                              uint64_t uNextRip);
+VMM_INT_DECL(bool)              HMSvmIsIOInterceptActive(void *pvIoBitmap, uint16_t u16Port, SVMIOIOTYPE enmIoType, uint8_t cbReg,
+                                                         uint8_t cAddrSizeBits, uint8_t iEffSeg, bool fRep, bool fStrIo,
+                                                         PSVMIOIOEXITINFO pIoExitInfo);
+VMM_INT_DECL(void)              HMSvmNstGstVmExitNotify(PVMCPU pVCpu, PSVMVMCB pVmcbNstGst);
 /** @} */
 
Index: /trunk/include/VBox/vmm/hm_svm.h
===================================================================
--- /trunk/include/VBox/vmm/hm_svm.h	(revision 68225)
+++ /trunk/include/VBox/vmm/hm_svm.h	(revision 68226)
@@ -537,4 +537,7 @@
 typedef const SVMEVENT *PCSVMEVENT;
 
+/** Gets the event type given an SVMEVENT parameter. */
+#define SVM_EVENT_GET_TYPE(a_SvmEvent)  (((a_SvmEvent) >> 8) & 7)
+
 /**
  * SVM Interrupt control structure (Virtual Interrupt Control).
@@ -617,4 +620,6 @@
 /** 64-bit address for the IO buffer. */
 #define SVM_IOIO_64_BIT_ADDR            RT_BIT_32(9)
+/** Number of bits to shift right to get the address sizes. */
+#define SVM_IOIO_ADDR_SIZE_SHIFT        7
 /** Mask of all the IO address sizes. */
 #define SVM_IOIO_ADDR_SIZE_MASK         (SVM_IOIO_16_BIT_ADDR | SVM_IOIO_32_BIT_ADDR | SVM_IOIO_64_BIT_ADDR)
@@ -958,4 +963,52 @@
 AssertCompileSize(SVMVMCB, 0x1000);
 
+/** SVM nested-guest VMCB cache.
+ *
+ *  A state structure for holding information across AMD-V VMRUN/\#VMEXIT
+ *  operation during execution of the nested-guest, restored on \#VMEXIT.
+ */
+typedef struct SVMNESTEDVMCBCACHE
+{
+    /** @name Nested-guest VMCB controls.
+     * @{ */
+    /** Cache of CRX read intercepts. */
+    uint16_t            u16InterceptRdCRx;
+    /** Cache of CRX write intercepts. */
+    uint16_t            u16InterceptWrCRx;
+    /** Cache of DRX read intercepts. */
+    uint16_t            u16InterceptRdDRx;
+    /** Cache of DRX write intercepts. */
+    uint16_t            u16InterceptWrDRx;
+    /** Cache of exception intercepts. */
+    uint32_t            u32InterceptXcpt;
+    /** Cache of control intercepts. */
+    uint64_t            u64InterceptCtrl;
+    /** Cache of IOPM nested-guest physical address. */
+    uint64_t            u64IOPMPhysAddr;
+    /** Cache of MSRPM nested-guest physical address. */
+    uint64_t            u64MSRPMPhysAddr;
+    /** Cache of the VMCB clean bits. */
+    uint64_t            u64VmcbCleanBits;
+    /** Cache of V_INTR_MASKING bit. */
+    bool                fVIntrMasking;
+    /** @} */
+
+    /** @name Other miscellaneous state.
+     * @{ */
+    /** Whether the fields above are updated or not. */
+    bool                fValid;
+    /** Whether a VMRUN was just emulated in R0 and the VMCB is up to date. */
+    bool                fVmrunEmulatedInR0;
+    /** Whether the VMCB exit code and info fields are updated during \#VMEXIT
+     *  processing. */
+    bool                fExitCodeAndInfoUpdated;
+    /** @} */
+} SVMNESTEDVMCBCACHE;
+/** Pointer to the SVMNESTEDVMCBCACHE structure. */
+typedef SVMNESTEDVMCBCACHE *PSVMNESTEDVMCBCACHE;
+/** Pointer to a const SVMNESTEDVMCBCACHE structure. */
+typedef const SVMNESTEDVMCBCACHE *PCSVMNESTEDVMCBCACHE;
+/** @} */
+
 #ifdef IN_RING0
 VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt);
Index: /trunk/src/VBox/VMM/Makefile.kmk
===================================================================
--- /trunk/src/VBox/VMM/Makefile.kmk	(revision 68225)
+++ /trunk/src/VBox/VMM/Makefile.kmk	(revision 68226)
@@ -50,5 +50,8 @@
 endif
 ifdef VBOX_WITH_NESTED_HWVIRT
- VMM_COMMON_DEFS += VBOX_WITH_NESTED_HWVIRT VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ VMM_COMMON_DEFS += VBOX_WITH_NESTED_HWVIRT
+ ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+  VMM_COMMON_DEFS += VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ endif
 endif
 #ifdef VBOX_WITH_IEM
Index: /trunk/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp	(revision 68225)
+++ /trunk/src/VBox/VMM/VMMAll/CPUMAllRegs.cpp	(revision 68226)
@@ -2624,2 +2624,67 @@
 }
 
+
+/**
+ * Restores the host-state from the host-state save area as part of a \#VMEXIT.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   pCtx        The guest-CPU context.
+ */
+VMM_INT_DECL(void) CPUMSvmVmExitRestoreHostState(PCPUMCTX pCtx)
+{
+   /*
+     * Reload the guest's "host state".
+     */
+    PSVMHOSTSTATE pHostState = &pCtx->hwvirt.svm.HostState;
+    pCtx->es         = pHostState->es;
+    pCtx->cs         = pHostState->cs;
+    pCtx->ss         = pHostState->ss;
+    pCtx->ds         = pHostState->ds;
+    pCtx->gdtr       = pHostState->gdtr;
+    pCtx->idtr       = pHostState->idtr;
+    pCtx->msrEFER    = pHostState->uEferMsr;
+    pCtx->cr0        = pHostState->uCr0 | X86_CR0_PE;
+    pCtx->cr3        = pHostState->uCr3;
+    pCtx->cr4        = pHostState->uCr4;
+    pCtx->rflags     = pHostState->rflags;
+    pCtx->rflags.Bits.u1VM = 0;
+    pCtx->rip        = pHostState->uRip;
+    pCtx->rsp        = pHostState->uRsp;
+    pCtx->rax        = pHostState->uRax;
+    pCtx->dr[7]     &= ~(X86_DR7_ENABLED_MASK | X86_DR7_RAZ_MASK | X86_DR7_MBZ_MASK);
+    pCtx->dr[7]     |= X86_DR7_RA1_MASK;
+
+    /** @todo if RIP is not canonical or outside the CS segment limit, we need to
+     *        raise \#GP(0) in the guest. */
+
+    /** @todo check the loaded host-state for consistency. Figure out what
+     *        exactly this involves? */
+}
+
+
+/**
+ * Saves the host-state to the host-state save area as part of a VMRUN.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   pCtx        The guest-CPU context.
+ * @param   cbInstr     The length of the VMRUN instruction in bytes.
+ */
+VMM_INT_DECL(void) CPUMSvmVmRunSaveHostState(PCPUMCTX pCtx, uint8_t cbInstr)
+{
+    PSVMHOSTSTATE pHostState = &pCtx->hwvirt.svm.HostState;
+    pHostState->es       = pCtx->es;
+    pHostState->cs       = pCtx->cs;
+    pHostState->ss       = pCtx->ss;
+    pHostState->ds       = pCtx->ds;
+    pHostState->gdtr     = pCtx->gdtr;
+    pHostState->idtr     = pCtx->idtr;
+    pHostState->uEferMsr = pCtx->msrEFER;
+    pHostState->uCr0     = pCtx->cr0;
+    pHostState->uCr3     = pCtx->cr3;
+    pHostState->uCr4     = pCtx->cr4;
+    pHostState->rflags   = pCtx->rflags;
+    pHostState->uRip     = pCtx->rip + cbInstr;
+    pHostState->uRsp     = pCtx->rsp;
+    pHostState->uRax     = pCtx->rax;
+}
+
Index: /trunk/src/VBox/VMM/VMMAll/HMSVMAll.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/HMSVMAll.cpp	(revision 68225)
+++ /trunk/src/VBox/VMM/VMMAll/HMSVMAll.cpp	(revision 68226)
@@ -258,2 +258,109 @@
 }
 
+
+/**
+ * Determines whether an IOIO intercept is active for the nested-guest or not.
+ *
+ * @param   pvIoBitmap      Pointer to the nested-guest IO bitmap.
+ * @param   u16Port         The IO port being accessed.
+ * @param   enmIoType       The type of IO access.
+ * @param   cbReg           The IO operand size in bytes.
+ * @param   cAddrSizeBits   The address size bits (for 16, 32 or 64).
+ * @param   iEffSeg         The effective segment number.
+ * @param   fRep            Whether this is a repeating IO instruction (REP prefix).
+ * @param   fStrIo          Whether this is a string IO instruction.
+ * @param   pIoExitInfo     Pointer to the SVMIOIOEXITINFO struct to be filled.
+ *                          Optional, can be NULL.
+ */
+VMM_INT_DECL(bool) HMSvmIsIOInterceptActive(void *pvIoBitmap, uint16_t u16Port, SVMIOIOTYPE enmIoType, uint8_t cbReg,
+                                            uint8_t cAddrSizeBits, uint8_t iEffSeg, bool fRep, bool fStrIo,
+                                            PSVMIOIOEXITINFO pIoExitInfo)
+{
+    Assert(cAddrSizeBits == 0 || cAddrSizeBits == 16 || cAddrSizeBits == 32 || cAddrSizeBits == 64);
+    Assert(cbReg == 1 || cbReg == 2 || cbReg == 4 || cbReg == 8);
+
+    /*
+     * The IOPM layout:
+     * Each bit represents one 8-bit port. That makes a total of 0..65535 bits or
+     * two 4K pages.
+     *
+     * For IO instructions that access more than a single byte, the permission bits
+     * for all bytes are checked; if any bit is set to 1, the IO access is intercepted.
+     *
+     * Since it's possible to do a 32-bit IO access at port 65534 (accessing 4 bytes),
+     * we need 3 extra bits beyond the second 4K page.
+     */
+    static const uint16_t s_auSizeMasks[] = { 0, 1, 3, 0, 0xf, 0, 0, 0 };
+
+    uint16_t const offIopm   = u16Port >> 3;
+    uint16_t const fSizeMask = s_auSizeMasks[(cAddrSizeBits >> SVM_IOIO_OP_SIZE_SHIFT) & 7];
+    uint8_t  const cShift    = u16Port - (offIopm << 3);
+    uint16_t const fIopmMask = (1 << cShift) | (fSizeMask << cShift);
+
+    uint8_t const *pbIopm = (uint8_t *)pvIoBitmap;
+    Assert(pbIopm);
+    pbIopm += offIopm;
+    uint16_t const u16Iopm = *(uint16_t *)pbIopm;
+    if (u16Iopm & fIopmMask)
+    {
+        if (pIoExitInfo)
+        {
+            static const uint32_t s_auIoOpSize[] =
+            { SVM_IOIO_32_BIT_OP, SVM_IOIO_8_BIT_OP, SVM_IOIO_16_BIT_OP, 0, SVM_IOIO_32_BIT_OP, 0, 0, 0 };
+
+            static const uint32_t s_auIoAddrSize[] =
+            { 0, SVM_IOIO_16_BIT_ADDR, SVM_IOIO_32_BIT_ADDR, 0, SVM_IOIO_64_BIT_ADDR, 0, 0, 0 };
+
+            pIoExitInfo->u         = s_auIoOpSize[cbReg & 7];
+            pIoExitInfo->u        |= s_auIoAddrSize[(cAddrSizeBits >> 4) & 7];
+            pIoExitInfo->n.u1STR   = fStrIo;
+            pIoExitInfo->n.u1REP   = fRep;
+            pIoExitInfo->n.u3SEG   = iEffSeg & 7;
+            pIoExitInfo->n.u1Type  = enmIoType;
+            pIoExitInfo->n.u16Port = u16Port;
+        }
+        return true;
+    }
+
+    /** @todo remove later (for debugging as VirtualBox always traps all IO
+     *        intercepts). */
+    AssertMsgFailed(("iemSvmHandleIOIntercept: We expect an IO intercept here!\n"));
+    return false;
+}
+
+
+/**
+ * Notification callback for when a \#VMEXIT happens outside SVM R0 code (e.g.
+ * in IEM).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcbNstGst     Pointer to the nested-guest VM control block.
+ *
+ * @sa      hmR0SvmVmRunCacheVmcb.
+ */
+VMM_INT_DECL(void) HMSvmNstGstVmExitNotify(PVMCPU pVCpu, PSVMVMCB pVmcbNstGst)
+{
+    PSVMVMCBCTRL        pVmcbCtrl        = &pVmcbNstGst->ctrl;
+    PSVMNESTEDVMCBCACHE pNstGstVmcbCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+
+    /*
+     * Restore the nested-guest VMCB fields which have been modified for executing
+     * the nested-guest under SVM R0.
+     */
+    if (pNstGstVmcbCache->fValid)
+    {
+        pVmcbCtrl->u16InterceptRdCRx        = pNstGstVmcbCache->u16InterceptRdCRx;
+        pVmcbCtrl->u16InterceptWrCRx        = pNstGstVmcbCache->u16InterceptWrCRx;
+        pVmcbCtrl->u16InterceptRdCRx        = pNstGstVmcbCache->u16InterceptRdCRx;
+        pVmcbCtrl->u16InterceptWrDRx        = pNstGstVmcbCache->u16InterceptWrDRx;
+        pVmcbCtrl->u32InterceptXcpt         = pNstGstVmcbCache->u32InterceptXcpt;
+        pVmcbCtrl->u64InterceptCtrl         = pNstGstVmcbCache->u64InterceptCtrl;
+        pVmcbCtrl->u64VmcbCleanBits         = pNstGstVmcbCache->u64VmcbCleanBits;
+        pVmcbCtrl->u64IOPMPhysAddr          = pNstGstVmcbCache->u64IOPMPhysAddr;
+        pVmcbCtrl->u64MSRPMPhysAddr         = pNstGstVmcbCache->u64MSRPMPhysAddr;
+        pVmcbCtrl->IntCtrl.n.u1VIntrMasking = pNstGstVmcbCache->fVIntrMasking;
+        pNstGstVmcbCache->fValid = false;
+    }
+    pNstGstVmcbCache->fVmrunEmulatedInR0 = false;
+}
+
Index: /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp	(revision 68225)
+++ /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp	(revision 68226)
@@ -231,5 +231,6 @@
 *   Internal Functions                                                                                                           *
 *********************************************************************************************************************************/
-static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite);
+static void hmR0SvmSetMsrPermission(PSVMVMCB pVmcb, uint8_t *pbMsrBitmap, unsigned uMsr, SVMMSREXITREAD enmRead,
+                                    SVMMSREXITWRITE enmWrite);
 static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu);
 static void hmR0SvmLeave(PVMCPU pVCpu);
@@ -281,6 +282,9 @@
 /** @} */
 
-DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient);
-
+static int hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient);
+#ifdef VBOX_WITH_NESTED_HWVIRT
+static int hmR0SvmHandleExitNested(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient);
+static int hmR0SvmExecVmexit(PVMCPU pVCpu, PCPUMCTX pCtx);
+#endif
 
 /*********************************************************************************************************************************
@@ -456,5 +460,4 @@
         {
             RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false);
-            pVCpu->hm.s.svm.pvVmcbHost       = 0;
             pVCpu->hm.s.svm.HCPhysVmcbHost   = 0;
             pVCpu->hm.s.svm.hMemObjVmcbHost  = NIL_RTR0MEMOBJ;
@@ -464,5 +467,5 @@
         {
             RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false);
-            pVCpu->hm.s.svm.pvVmcb           = 0;
+            pVCpu->hm.s.svm.pVmcb            = NULL;
             pVCpu->hm.s.svm.HCPhysVmcb       = 0;
             pVCpu->hm.s.svm.hMemObjVmcb      = NIL_RTR0MEMOBJ;
@@ -472,5 +475,5 @@
         {
             RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
-            pVCpu->hm.s.svm.pvMsrBitmap      = 0;
+            pVCpu->hm.s.svm.pvMsrBitmap      = NULL;
             pVCpu->hm.s.svm.HCPhysMsrBitmap  = 0;
             pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
@@ -525,8 +528,8 @@
             goto failure_cleanup;
 
-        pVCpu->hm.s.svm.pvVmcbHost     = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
+        void *pvVmcbHost               = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
         pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */);
         Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G);
-        ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcbHost);
+        ASMMemZeroPage(pvVmcbHost);
 
         /*
@@ -537,8 +540,8 @@
             goto failure_cleanup;
 
-        pVCpu->hm.s.svm.pvVmcb          = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
+        pVCpu->hm.s.svm.pVmcb           = (PSVMVMCB)RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
         pVCpu->hm.s.svm.HCPhysVmcb      = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */);
         Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G);
-        ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcb);
+        ASMMemZeroPage(pVCpu->hm.s.svm.pVmcb);
 
         /*
@@ -555,5 +558,5 @@
         /* Set all bits to intercept all MSR accesses (changed later on). */
         ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
-    }
+   }
 
     return VINF_SUCCESS;
@@ -582,9 +585,11 @@
  *
  * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pCtx        Pointer to the guest CPU or nested-guest CPU context.
  * @param   uMsr        The MSR for which the access permissions are being set.
  * @param   enmRead     MSR read permissions.
  * @param   enmWrite    MSR write permissions.
  */
-static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite)
+static void hmR0SvmSetMsrPermission(PSVMVMCB pVmcb, uint8_t *pbMsrBitmap, unsigned uMsr, SVMMSREXITREAD enmRead,
+                                    SVMMSREXITWRITE enmWrite)
 {
     uint16_t offMsrpm;
@@ -596,7 +601,5 @@
     Assert(offMsrpm < SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
 
-    uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
     pbMsrBitmap += offMsrpm;
-
     if (enmRead == SVMMSREXIT_INTERCEPT_READ)
         ASMBitSet(pbMsrBitmap, uMsrpmBit);
@@ -609,5 +612,4 @@
         ASMBitClear(pbMsrBitmap, uMsrpmBit + 1);
 
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
     pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
 }
@@ -634,5 +636,5 @@
     {
         PVMCPU   pVCpu = &pVM->aCpus[i];
-        PSVMVMCB pVmcb = (PSVMVMCB)pVM->aCpus[i].hm.s.svm.pvVmcb;
+        PSVMVMCB pVmcb = pVM->aCpus[i].hm.s.svm.pVmcb;
 
         AssertMsgReturn(pVmcb, ("Invalid pVmcb for vcpu[%u]\n", i), VERR_SVM_INVALID_PVMCB);
@@ -767,14 +769,15 @@
          * Don't intercept guest read/write accesses to these MSRs.
          */
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_CSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K6_STAR,           SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_SF_MASK,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_FS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_GS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS,  SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
-        hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_LSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_CSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K6_STAR,           SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_SF_MASK,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_FS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_GS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_IA32_SYSENTER_CS,  SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
     }
 
@@ -803,5 +806,5 @@
         Log4(("SVMR0InvalidatePage %RGv\n", GCVirt));
 
-        PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         AssertMsgReturn(pVmcb, ("Invalid pVmcb!\n"), VERR_SVM_INVALID_PVMCB);
 
@@ -829,5 +832,5 @@
 {
     PVM pVM              = pVCpu->CTX_SUFF(pVM);
-    PSVMVMCB pVmcb       = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb       = pVCpu->hm.s.svm.pVmcb;
     PHMGLOBALCPUINFO pCpu = hmR0GetCurrentCpu();
 
@@ -1096,63 +1099,55 @@
 static void hmR0SvmLoadSharedCR0(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
 {
+    uint64_t u64GuestCR0 = pCtx->cr0;
+
+    /* Always enable caching. */
+    u64GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
     /*
-     * Guest CR0.
+     * When Nested Paging is not available use shadow page tables and intercept #PFs (the latter done in SVMR0SetupVM()).
      */
-    PVM pVM = pVCpu->CTX_SUFF(pVM);
-    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
-    {
-        uint64_t u64GuestCR0 = pCtx->cr0;
-
-        /* Always enable caching. */
-        u64GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW);
-
-        /*
-         * When Nested Paging is not available use shadow page tables and intercept #PFs (the latter done in SVMR0SetupVM()).
-         */
-        if (!pVM->hm.s.fNestedPaging)
-        {
-            u64GuestCR0 |= X86_CR0_PG;     /* When Nested Paging is not available, use shadow page tables. */
-            u64GuestCR0 |= X86_CR0_WP;     /* Guest CPL 0 writes to its read-only pages should cause a #PF #VMEXIT. */
-        }
-
-        /*
-         * Guest FPU bits.
-         */
-        bool fInterceptNM = false;
-        bool fInterceptMF = false;
-        u64GuestCR0 |= X86_CR0_NE;         /* Use internal x87 FPU exceptions handling rather than external interrupts. */
-        if (CPUMIsGuestFPUStateActive(pVCpu))
-        {
-            /* Catch floating point exceptions if we need to report them to the guest in a different way. */
-            if (!(pCtx->cr0 & X86_CR0_NE))
-            {
-                Log4(("hmR0SvmLoadGuestControlRegs: Intercepting Guest CR0.MP Old-style FPU handling!!!\n"));
-                fInterceptMF = true;
-            }
-        }
-        else
-        {
-            fInterceptNM = true;           /* Guest FPU inactive, #VMEXIT on #NM for lazy FPU loading. */
-            u64GuestCR0 |=  X86_CR0_TS     /* Guest can task switch quickly and do lazy FPU syncing. */
-                          | X86_CR0_MP;    /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
-        }
-
-        /*
-         * Update the exception intercept bitmap.
-         */
-        if (fInterceptNM)
-            hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_NM);
-        else
-            hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_NM);
-
-        if (fInterceptMF)
-            hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_MF);
-        else
-            hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_MF);
-
-        pVmcb->guest.u64CR0 = u64GuestCR0;
-        pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
-        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0);
-    }
+    if (!pVmcb->ctrl.NestedPaging.n.u1NestedPaging)
+    {
+        u64GuestCR0 |= X86_CR0_PG;     /* When Nested Paging is not available, use shadow page tables. */
+        u64GuestCR0 |= X86_CR0_WP;     /* Guest CPL 0 writes to its read-only pages should cause a #PF #VMEXIT. */
+    }
+
+    /*
+     * Guest FPU bits.
+     */
+    bool fInterceptNM = false;
+    bool fInterceptMF = false;
+    u64GuestCR0 |= X86_CR0_NE;         /* Use internal x87 FPU exceptions handling rather than external interrupts. */
+    if (CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        /* Catch floating point exceptions if we need to report them to the guest in a different way. */
+        if (!(pCtx->cr0 & X86_CR0_NE))
+        {
+            Log4(("hmR0SvmLoadGuestControlRegs: Intercepting Guest CR0.MP Old-style FPU handling!!!\n"));
+            fInterceptMF = true;
+        }
+    }
+    else
+    {
+        fInterceptNM = true;           /* Guest FPU inactive, #VMEXIT on #NM for lazy FPU loading. */
+        u64GuestCR0 |=  X86_CR0_TS     /* Guest can task switch quickly and do lazy FPU syncing. */
+                      | X86_CR0_MP;    /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
+    }
+
+    /*
+     * Update the exception intercept bitmap.
+     */
+    if (fInterceptNM)
+        hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_NM);
+    else
+        hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_NM);
+
+    if (fInterceptMF)
+        hmR0SvmAddXcptIntercept(pVmcb, X86_XCPT_MF);
+    else
+        hmR0SvmRemoveXcptIntercept(pVmcb, X86_XCPT_MF);
+
+    pVmcb->guest.u64CR0 = u64GuestCR0;
+    pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
 }
 
@@ -1263,4 +1258,56 @@
 
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Loads the nested-guest control registers (CR2, CR3, CR4) into the VMCB.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcbNstGst     Pointer to the nested-guest VM control block.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmLoadGuestControlRegsNested(PVMCPU pVCpu, PSVMVMCB pVmcbNstGst, PCPUMCTX pCtx)
+{
+    /*
+     * Guest CR2.
+     */
+    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR2))
+    {
+        pVmcbNstGst->guest.u64CR2 = pCtx->cr2;
+        pVmcbNstGst->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CR2;
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2);
+    }
+
+    /*
+     * Guest CR3.
+     */
+    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3))
+    {
+        Assert(!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmNestedPaging);
+        pVmcbNstGst->guest.u64CR3 = pCtx->cr3;
+        pVmcbNstGst->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3);
+    }
+
+    /*
+     * Guest CR4.
+     * ASSUMES this is done everytime we get in from ring-3! (XCR0)
+     */
+    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4))
+    {
+        Assert(!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmNestedPaging);
+        pVmcbNstGst->guest.u64CR4 = pCtx->cr4;
+        pVmcbNstGst->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+
+        /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+nested-guest XCR0. */
+        pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4);
+    }
+}
+#endif
+
+
 /**
  * Loads the guest segment registers into the VMCB.
@@ -1367,5 +1414,4 @@
         }
     }
-
 
     /** @todo The following are used in 64-bit only (SYSCALL/SYSRET) but they might
@@ -1380,6 +1426,6 @@
 
 /**
- * Loads the guest state into the VMCB and programs the necessary intercepts
- * accordingly.
+ * Loads the guest (or nested-guest) debug state into the VMCB and programs the
+ * necessary intercepts accordingly.
  *
  * @param   pVCpu       The cross context virtual CPU structure.
@@ -1392,8 +1438,8 @@
 static void hmR0SvmLoadSharedDebugState(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
 {
-    if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
-        return;
-    Assert((pCtx->dr[6] & X86_DR6_RA1_MASK) == X86_DR6_RA1_MASK); Assert((pCtx->dr[6] & X86_DR6_RAZ_MASK) == 0);
-    Assert((pCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); Assert((pCtx->dr[7] & X86_DR7_RAZ_MASK) == 0);
+    Assert((pCtx->dr[6] & X86_DR6_RA1_MASK) == X86_DR6_RA1_MASK);
+    Assert((pCtx->dr[6] & X86_DR6_RAZ_MASK) == 0);
+    Assert((pCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
+    Assert((pCtx->dr[7] & X86_DR7_RAZ_MASK) == 0);
 
     bool fInterceptMovDRx = false;
@@ -1536,8 +1582,27 @@
         }
     }
-
-    HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG);
-}
-
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Loads the nested-guest APIC state (currently just the TPR).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcbNstGst     Pointer to the nested-guest VM control block.
+ */
+static void hmR0SvmLoadGuestApicStateNested(PVMCPU pVCpu, PSVMVMCB pVmcbNstGst)
+{
+    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE))
+    {
+        /* Always enable V_INTR_MASKING as we do not want to allow access to the physical APIC TPR. */
+        pVmcbNstGst->ctrl.IntCtrl.n.u1VIntrMasking = 1;
+        pVCpu->hm.s.svm.fSyncVTpr = false;
+        pVmcbNstGst->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_TPR;
+
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_SVM_GUEST_APIC_STATE);
+    }
+}
+#endif
 
 /**
@@ -1573,11 +1638,12 @@
         {
             pCtx->msrLSTAR = u8Tpr;
+            uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
 
             /* If there are interrupts pending, intercept LSTAR writes, otherwise don't intercept reads or writes. */
             if (fPendingIntr)
-                hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE);
+                hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE);
             else
             {
-                hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+                hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
                 pVCpu->hm.s.svm.fSyncVTpr = true;
             }
@@ -1607,14 +1673,12 @@
 
 /**
- * Loads the exception interrupts required for guest execution in the VMCB.
- *
- * @returns VBox status code.
+ * Loads the exception interrupts required for guest (or nested-guest) execution in
+ * the VMCB.
+ *
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pVmcb       Pointer to the VM control block.
- * @param   pCtx        Pointer to the guest-CPU context.
- */
-static int hmR0SvmLoadGuestXcptIntercepts(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
-{
-    NOREF(pCtx);
+ */
+static void hmR0SvmLoadGuestXcptIntercepts(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
     if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS))
     {
@@ -1634,6 +1698,32 @@
         HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
     }
-    return VINF_SUCCESS;
-}
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Loads the intercepts required for nested-guest execution in the VMCB.
+ *
+ * This merges the guest and nested-guest intercepts in a way that if the outer
+ * guest intercepts an exception we need to intercept it in the nested-guest as
+ * well and handle it accordingly.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcb           Pointer to the VM control block.
+ * @param   pVmcbNstGst     Pointer to the nested-guest VM control block.
+ */
+static void hmR0SvmMergeIntercepts(PVMCPU pVCpu, PCSVMVMCB pVmcb, PSVMVMCB pVmcbNstGst)
+{
+    RT_NOREF(pVCpu);
+#if 0
+    pVmcbNstGst->ctrl.u16InterceptRdCRx |= pVmcb->ctrl.u16InterceptRdCRx;
+    pVmcbNstGst->ctrl.u16InterceptWrCRx |= pVmcb->ctrl.u16InterceptWrCRx;
+    pVmcbNstGst->ctrl.u16InterceptRdDRx |= pVmcb->ctrl.u16InterceptRdDRx;
+    pVmcbNstGst->ctrl.u16InterceptWrDRx |= pVmcb->ctrl.u16InterceptWrDRx;
+#endif
+    pVmcbNstGst->ctrl.u32InterceptXcpt  |= pVmcb->ctrl.u32InterceptXcpt;
+    pVmcbNstGst->ctrl.u64InterceptCtrl  |= pVmcb->ctrl.u64InterceptCtrl;
+}
+#endif
 
 
@@ -1643,11 +1733,10 @@
  * @returns VBox status code.
  * @param   pVCpu   The cross context virtual CPU structure.
- * @param   pCtx    Pointer to the guest-CPU context.
  *
  * @remarks No-long-jump zone!!!
  */
-static int hmR0SvmSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pCtx)
-{
-    if (CPUMIsGuestInLongModeEx(pCtx))
+static int hmR0SvmSetupVMRunHandler(PVMCPU pVCpu)
+{
+    if (CPUMIsGuestInLongMode(pVCpu))
     {
 #ifndef VBOX_ENABLE_64_BITS_GUESTS
@@ -1800,5 +1889,5 @@
 static int hmR0SvmLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
 {
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
     AssertMsgReturn(pVmcb, ("Invalid pVmcb\n"), VERR_SVM_INVALID_PVMCB);
 
@@ -1819,8 +1908,7 @@
     AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
 
-    rc = hmR0SvmLoadGuestXcptIntercepts(pVCpu, pVmcb, pCtx);
-    AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestXcptIntercepts! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
-
-    rc = hmR0SvmSetupVMRunHandler(pVCpu, pCtx);
+    hmR0SvmLoadGuestXcptIntercepts(pVCpu, pVmcb);
+
+    rc = hmR0SvmSetupVMRunHandler(pVCpu);
     AssertLogRelMsgRCReturn(rc, ("hmR0SvmSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
 
@@ -1849,6 +1937,69 @@
 
 
-/**
- * Loads the state shared between the host and guest into the
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Loads the nested-guest state into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pCtx        Pointer to the guest-CPU context.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmLoadGuestStateNested(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x);
+
+    PSVMVMCB pVmcbNstGst = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    Assert(pVmcbNstGst);
+
+    /*
+     * If we just emulated VMRUN, the VMCB is already in-sync with the guest-CPU context.
+     */
+    if (!pVCpu->hm.s.svm.NstGstVmcbCache.fVmrunEmulatedInR0)
+    {
+        hmR0SvmLoadGuestControlRegsNested(pVCpu, pVmcbNstGst, pCtx);
+        hmR0SvmLoadGuestSegmentRegs(pVCpu, pVmcbNstGst, pCtx);
+        hmR0SvmLoadGuestMsrs(pVCpu, pVmcbNstGst, pCtx);
+
+        pVmcbNstGst->guest.u64RIP    = pCtx->rip;
+        pVmcbNstGst->guest.u64RSP    = pCtx->rsp;
+        pVmcbNstGst->guest.u64RFlags = pCtx->eflags.u32;
+        pVmcbNstGst->guest.u64RAX    = pCtx->rax;
+    }
+
+    hmR0SvmLoadGuestApicStateNested(pVCpu, pVmcbNstGst);
+    hmR0SvmLoadGuestXcptIntercepts(pVCpu, pVmcbNstGst);
+
+    int rc = hmR0SvmSetupVMRunHandler(pVCpu);
+    AssertRCReturn(rc, rc);
+
+    /* Clear any unused and reserved bits. */
+    HMCPU_CF_CLEAR(pVCpu,   HM_CHANGED_GUEST_RIP                  /* Unused (loaded unconditionally). */
+                          | HM_CHANGED_GUEST_RSP
+                          | HM_CHANGED_GUEST_RFLAGS
+                          | HM_CHANGED_GUEST_SYSENTER_CS_MSR
+                          | HM_CHANGED_GUEST_SYSENTER_EIP_MSR
+                          | HM_CHANGED_GUEST_SYSENTER_ESP_MSR
+                          | HM_CHANGED_GUEST_LAZY_MSRS            /* Unused. */
+                          | HM_CHANGED_SVM_RESERVED1              /* Reserved. */
+                          | HM_CHANGED_SVM_RESERVED2
+                          | HM_CHANGED_SVM_RESERVED3
+                          | HM_CHANGED_SVM_RESERVED4);
+
+    /* All the guest state bits should be loaded except maybe the host context and/or shared host/guest bits. */
+    AssertMsg(   !HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST)
+              ||  HMCPU_CF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE),
+               ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
+
+    Log4(("Load: CS:RIP=%04x:%RX64 EFL=%#x SS:RSP=%04x:%RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->eflags.u, pCtx->ss.Sel, pCtx->rsp));
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x);
+    return rc;
+}
+#endif
+
+
+/**
+ * Loads the state shared between the host and guest or nested-guest into the
  * VMCB.
  *
@@ -1865,8 +2016,20 @@
 
     if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT
+        /* We use nested-guest CR0 unmodified, hence nothing to do here. */
+        if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            hmR0SvmLoadSharedCR0(pVCpu, pVmcb, pCtx);
+#else
         hmR0SvmLoadSharedCR0(pVCpu, pVmcb, pCtx);
+#endif
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0);
+    }
 
     if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
+    {
         hmR0SvmLoadSharedDebugState(pVCpu, pVmcb, pCtx);
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG);
+    }
 
     /* Unused on AMD-V. */
@@ -1879,7 +2042,8 @@
 
 /**
- * Saves the entire guest state from the VMCB into the
- * guest-CPU context. Currently there is no residual state left in the CPU that
- * is not updated in the VMCB.
+ * Saves the guest (or nested-guest) state from the VMCB into the guest-CPU context.
+ *
+ * Currently there is no residual state left in the CPU that is not updated in the
+ * VMCB.
  *
  * @returns VBox status code.
@@ -1888,10 +2052,9 @@
  *                          out-of-sync. Make sure to update the required fields
  *                          before using them.
- */
-static void hmR0SvmSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
+ * @param   pVmcb           Pointer to the VM control block.
+ */
+static void hmR0SvmSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PCSVMVMCB pVmcb)
 {
     Assert(VMMRZCallRing3IsEnabled(pVCpu));
-
-    PSVMVMCB pVmcb        = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
 
     pMixedCtx->rip        = pVmcb->guest.u64RIP;
@@ -1912,4 +2075,16 @@
      */
     pMixedCtx->cr2        = pVmcb->guest.u64CR2;
+
+#ifdef VBOX_WITH_NESTED_GUEST
+    /*
+     * The nested hypervisor might not be intercepting these control registers,
+     */
+    if (CPUMIsGuestInNestedHwVirtMode(pMixedCtx))
+    {
+        pMixedCtx->cr3        = pVmcb->guest.u64CR3;
+        pMixedCtx->cr4        = pVmcb->guest.u64CR4;
+        pMixedCtx->cr0        = pVmcb->guest.u64CR0;
+    }
+#endif
 
     /*
@@ -2021,9 +2196,12 @@
      * This is done as the very last step of syncing the guest state, as PGMUpdateCR3() may cause longjmp's to ring-3.
      */
-    if (   pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pMixedCtx) || !pVmcb->ctrl.NestedPaging.n.u1NestedPaging);
+#endif
+    if (   pVmcb->ctrl.NestedPaging.n.u1NestedPaging
         && pMixedCtx->cr3 != pVmcb->guest.u64CR3)
     {
         CPUMSetGuestCR3(pVCpu, pVmcb->guest.u64CR3);
-        PGMUpdateCR3(pVCpu, pVmcb->guest.u64CR3);
+        PGMUpdateCR3(pVCpu,    pVmcb->guest.u64CR3);
     }
 }
@@ -2059,5 +2237,5 @@
     if (CPUMIsHyperDebugStateActive(pVCpu))
     {
-        PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         Assert(pVmcb->ctrl.u16InterceptRdDRx == 0xffff);
         Assert(pVmcb->ctrl.u16InterceptWrDRx == 0xffff);
@@ -2238,4 +2416,8 @@
         CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
     }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    pVCpu->hm.s.svm.NstGstVmcbCache.fVmrunEmulatedInR0 = false;
+#endif
 
     /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
@@ -2270,11 +2452,11 @@
  * @param   pVM         The cross context VM structure.
  * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
  *
  * @remarks No-long-jump zone!!!
  */
-static void hmR0SvmUpdateTscOffsetting(PVM pVM, PVMCPU pVCpu)
-{
-    bool     fParavirtTsc;
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+static void hmR0SvmUpdateTscOffsetting(PVM pVM, PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+    bool fParavirtTsc;
     bool fCanUseRealTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &pVmcb->ctrl.u64TSCOffset, &fParavirtTsc);
     if (fCanUseRealTsc)
@@ -2469,7 +2651,8 @@
 
 /**
- * Gets the guest's interrupt-shadow.
- *
- * @returns The guest's interrupt-shadow.
+ * Checks if the guest (or nested-guest) has an interrupt shadow active right
+ * now.
+ *
+ * @returns true if the interrupt shadow is active, false otherwise.
  * @param   pVCpu   The cross context virtual CPU structure.
  * @param   pCtx    Pointer to the guest-CPU context.
@@ -2478,5 +2661,5 @@
  * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
  */
-DECLINLINE(uint32_t) hmR0SvmGetGuestIntrShadow(PVMCPU pVCpu, PCPUMCTX pCtx)
+DECLINLINE(bool) hmR0SvmIsIntrShadowActive(PVMCPU pVCpu, PCPUMCTX pCtx)
 {
     /*
@@ -2484,5 +2667,4 @@
      * inhibit interrupts or clear any existing interrupt-inhibition.
      */
-    uint32_t uIntrState = 0;
     if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
     {
@@ -2494,9 +2676,9 @@
              */
             VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
-        }
-        else
-            uIntrState = SVM_INTERRUPT_SHADOW_ACTIVE;
-    }
-    return uIntrState;
+            return false;
+        }
+        return true;
+    }
+    return false;
 }
 
@@ -2578,34 +2760,37 @@
 }
 
-
-/**
- * Evaluates the event to be delivered to the guest and sets it as the pending
- * event.
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Evaluates the event to be delivered to the nested-guest and sets it as the
+ * pending event.
  *
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pCtx        Pointer to the guest-CPU context.
  */
-static void hmR0SvmEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx)
-{
+static void hmR0SvmEvaluatePendingEventNested(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+    Log4Func(("\n"));
+
     Assert(!pVCpu->hm.s.Event.fPending);
-    Log4Func(("\n"));
-
-    bool const fIntShadow = RT_BOOL(hmR0SvmGetGuestIntrShadow(pVCpu, pCtx));
-    bool const fBlockInt  = !(pCtx->eflags.u32 & X86_EFL_IF);
-    bool const fBlockNmi  = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS);
-    PSVMVMCB pVmcb        = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
-
-    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
-        APICUpdatePendingInterrupts(pVCpu);
+    Assert(pCtx->hwvirt.svm.fGif);
+
+    PSVMVMCB pVmcbNstGst  = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
 
     SVMEVENT Event;
     Event.u = 0;
-                                                              /** @todo SMI. SMIs take priority over NMIs. */
-    if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI))   /* NMI. NMIs take priority over regular interrupts . */
-    {
+    bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu, pCtx);
+
+    /** @todo SMI. SMIs take priority over NMIs. */
+    /*
+     * Check if the nested-guest can receive NMIs.
+     * NMIs are higher priority than regular interrupts.
+     */
+    if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI))
+    {
+        bool const fBlockNmi = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS);
         if (fBlockNmi)
-            hmR0SvmSetIretIntercept(pVmcb);
+            hmR0SvmSetIretIntercept(pVmcbNstGst);
         else if (fIntShadow)
-            hmR0SvmSetVirtIntrIntercept(pVmcb);
+            hmR0SvmSetVirtIntrIntercept(pVmcbNstGst);
         else
         {
@@ -2617,16 +2802,20 @@
 
             hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
-            hmR0SvmSetIretIntercept(pVmcb);
+            hmR0SvmSetIretIntercept(pVmcbNstGst);
             VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
-        }
-    }
-    else if (   VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
-             && !pVCpu->hm.s.fSingleInstruction)
-    {
-        /*
-         * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt() returns
-         * a valid interrupt we -must- deliver the interrupt. We can no longer re-request it from the APIC.
-         */
-        if (   !fBlockInt
+            return;
+        }
+    }
+
+    /*
+     * Check if the nested-guest can receive external interrupts (PIC/APIC).
+     */
+    if (   VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+        && !pVCpu->hm.s.fSingleInstruction)
+    {
+        /* Note: it is critical we call CPUMCanSvmNstGstTakePhysIntr -before- modifying the nested-guests's V_INTR_MASKING
+           bit, currently it gets modified in hmR0SvmLoadGuestApicStateNested. */
+        bool const fIntEnabled = CPUMCanSvmNstGstTakePhysIntr(pCtx);
+        if (    fIntEnabled
             && !fIntShadow)
         {
@@ -2656,4 +2845,102 @@
         }
         else
+            hmR0SvmSetVirtIntrIntercept(pVmcbNstGst);
+    }
+    /*
+     * Check if the nested-guest can receive virtual interrupts.
+     */
+    else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST))
+    {
+        bool const fIntEnabled = CPUMCanSvmNstGstTakeVirtIntr(pCtx);
+        if (fIntEnabled)
+        {
+            uint8_t const u8Interrupt = CPUMGetSvmNstGstInterrupt(pCtx);
+            Log4(("Injecting virtual interrupt u8Interrupt=%#x\n", u8Interrupt));
+
+            Event.n.u1Valid  = 1;
+            Event.n.u8Vector = u8Interrupt;
+            Event.n.u3Type   = SVM_EVENT_EXTERNAL_IRQ;
+
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
+            hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+        }
+    }
+}
+#endif
+
+/**
+ * Evaluates the event to be delivered to the guest and sets it as the pending
+ * event.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pCtx        Pointer to the guest-CPU context.
+ */
+static void hmR0SvmEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+    Assert(!pVCpu->hm.s.Event.fPending);
+    Log4Func(("\n"));
+
+    bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu, pCtx);
+    bool const fBlockInt  = !(pCtx->eflags.u32 & X86_EFL_IF);
+    bool const fBlockNmi  = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS);
+    PSVMVMCB pVmcb        = pVCpu->hm.s.svm.pVmcb;
+
+    SVMEVENT Event;
+    Event.u = 0;
+                                                              /** @todo SMI. SMIs take priority over NMIs. */
+    if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI))   /* NMI. NMIs take priority over regular interrupts. */
+    {
+        if (fBlockNmi)
+            hmR0SvmSetIretIntercept(pVmcb);
+        else if (fIntShadow)
+            hmR0SvmSetVirtIntrIntercept(pVmcb);
+        else
+        {
+            Log4(("Pending NMI\n"));
+
+            Event.n.u1Valid  = 1;
+            Event.n.u8Vector = X86_XCPT_NMI;
+            Event.n.u3Type   = SVM_EVENT_NMI;
+
+            hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+            hmR0SvmSetIretIntercept(pVmcb);
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+        }
+    }
+    else if (   VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+             && !pVCpu->hm.s.fSingleInstruction)
+    {
+        /*
+         * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt() returns
+         * a valid interrupt we -must- deliver the interrupt. We can no longer re-request it from the APIC.
+         */
+        if (   !fBlockInt
+            && !fIntShadow)
+        {
+            uint8_t u8Interrupt;
+            int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+            if (RT_SUCCESS(rc))
+            {
+                Log4(("Injecting external interrupt u8Interrupt=%#x\n", u8Interrupt));
+
+                Event.n.u1Valid  = 1;
+                Event.n.u8Vector = u8Interrupt;
+                Event.n.u3Type   = SVM_EVENT_EXTERNAL_IRQ;
+
+                hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+            }
+            else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+            {
+                /*
+                 * AMD-V has no TPR thresholding feature. We just avoid posting the interrupt.
+                 * We just avoid delivering the TPR-masked interrupt here. TPR will be updated
+                 * always via hmR0SvmLoadGuestState() -> hmR0SvmLoadGuestApicState().
+                 */
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+            }
+            else
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+        }
+        else
             hmR0SvmSetVirtIntrIntercept(pVmcb);
     }
@@ -2667,13 +2954,13 @@
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pCtx        Pointer to the guest-CPU context.
- */
-static void hmR0SvmInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx)
+ * @param   pVmcb       Pointer to the VM control block.
+ */
+static void hmR0SvmInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMVMCB pVmcb)
 {
     Assert(!TRPMHasTrap(pVCpu));
     Assert(!VMMRZCallRing3IsEnabled(pVCpu));
 
-    bool const fIntShadow = RT_BOOL(hmR0SvmGetGuestIntrShadow(pVCpu, pCtx));
+    bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu, pCtx);
     bool const fBlockInt  = !(pCtx->eflags.u32 & X86_EFL_IF);
-    PSVMVMCB pVmcb        = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
 
     if (pVCpu->hm.s.Event.fPending)                                /* First, inject any pending HM events. */
@@ -2681,6 +2968,6 @@
         SVMEVENT Event;
         Event.u = pVCpu->hm.s.Event.u64IntInfo;
+
         Assert(Event.n.u1Valid);
-#ifdef VBOX_STRICT
         if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ)
         {
@@ -2690,16 +2977,7 @@
         else if (Event.n.u3Type == SVM_EVENT_NMI)
             Assert(!fIntShadow);
-#endif
-
-#ifndef RT_OS_WINDOWS
-        /* Temporary test for returning guru, later make this function return void as before. */
-        if (   Event.n.u3Type == SVM_EVENT_EXCEPTION
-            && Event.n.u8Vector == X86_XCPT_PF)
-        {
-            AssertRelease(pCtx->cr2 == pVCpu->hm.s.Event.GCPtrFaultAddress);
-        }
-#endif
-
-        Log4(("Injecting pending HM event.\n"));
+        NOREF(fBlockInt);
+
+        Log4(("Injecting pending HM event\n"));
         hmR0SvmInjectEventVmcb(pVCpu, pVmcb, pCtx, &Event);
         pVCpu->hm.s.Event.fPending = false;
@@ -2715,5 +2993,4 @@
     /* Update the guest interrupt shadow in the VMCB. */
     pVmcb->ctrl.u64IntShadow = !!fIntShadow;
-    NOREF(fBlockInt);
 }
 
@@ -2733,5 +3010,5 @@
     NOREF(pCtx);
     HMSVM_ASSERT_PREEMPT_SAFE();
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
 
     if (rcVMRun == VERR_SVM_INVALID_GUEST_STATE)
@@ -2889,4 +3166,8 @@
     Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
 
+    /* Update pending interrupts into the APIC's IRR. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+        APICUpdatePendingInterrupts(pVCpu);
+
     if (   VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction
                             ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
@@ -2943,15 +3224,7 @@
 
 
-/**
- * Does the preparations before executing guest code in AMD-V.
- *
- * This may cause longjmps to ring-3 and may even result in rescheduling to the
- * recompiler. We must be cautious what we do here regarding committing
- * guest-state information into the VMCB assuming we assuredly execute the guest
- * in AMD-V. If we fall back to the recompiler after updating the VMCB and
- * clearing the common-state (TRPM/forceflags), we must undo those changes so
- * that the recompiler can (and should) use them when it resumes guest
- * execution. Otherwise such operations must be done when we can no longer
- * exit to ring-3.
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Does the preparations before executing nested-guest code in AMD-V.
  *
  * @returns VBox status code (informational status codes included).
@@ -2963,11 +3236,13 @@
  * @param   pCtx            Pointer to the guest-CPU context.
  * @param   pSvmTransient   Pointer to the SVM transient structure.
- */
-static int hmR0SvmPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+ *
+ * @remarks Same caveats regarding longjumps as hmR0SvmPreRunGuest applies.
+ * @sa      hmR0SvmPreRunGuest.
+ */
+static int hmR0SvmPreRunGuestNested(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
 {
     HMSVM_ASSERT_PREEMPT_SAFE();
 
-#if defined(VBOX_WITH_NESTED_HWVIRT) && defined(VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM)
-    /* Nested Hw. virt through SVM R0 execution is not yet implemented, IEM only, we shouldn't get here. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
     if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
     {
@@ -2985,5 +3260,5 @@
         hmR0SvmTrpmTrapToPendingEvent(pVCpu);
     else if (!pVCpu->hm.s.Event.fPending)
-        hmR0SvmEvaluatePendingEvent(pVCpu, pCtx);
+        hmR0SvmEvaluatePendingEventNested(pVCpu, pCtx);
 
     /*
@@ -2992,31 +3267,21 @@
      * NB: If we could continue a task switch exit we wouldn't need to do this.
      */
-    if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending && (((pVCpu->hm.s.Event.u64IntInfo >> 8) & 7) == SVM_EVENT_NMI)))
-        if (RT_UNLIKELY(!pVM->hm.s.svm.u32Features))
-            return VINF_EM_RAW_INJECT_TRPM_EVENT;
-
-#ifdef HMSVM_SYNC_FULL_GUEST_STATE
-    HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
-#endif
-
-    /* Load the guest bits that are not shared with the host in any way since we can longjmp or get preempted. */
-    rc = hmR0SvmLoadGuestState(pVM, pVCpu, pCtx);
+    if (RT_UNLIKELY(   !pVM->hm.s.svm.u32Features
+                    &&  pVCpu->hm.s.Event.fPending
+                    &&  SVM_EVENT_GET_TYPE(pVCpu->hm.s.Event.u64IntInfo) == SVM_EVENT_NMI))
+    {
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    /*
+     * Load the nested-guest state. We can optimize this later to be avoided when VMRUN is
+     * just emulated in hmR0SvmExecVmrun since the VMCB is already setup by the nested-hypervisor,
+     * We currently do this because we may pre-maturely return to ring-3 before executing the
+     * nested-guest and doing it here is simpler.
+     */
+    rc = hmR0SvmLoadGuestStateNested(pVCpu, pCtx);
     AssertRCReturn(rc, rc);
+    /** @todo Get new STAM counter for this? */
     STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
-
-    /*
-     * If we're not intercepting TPR changes in the guest, save the guest TPR before the world-switch
-     * so we can update it on the way back if the guest changed the TPR.
-     */
-    if (pVCpu->hm.s.svm.fSyncVTpr)
-    {
-        if (pVM->hm.s.fTPRPatchingActive)
-            pSvmTransient->u8GuestTpr = pCtx->msrLSTAR;
-        else
-        {
-            PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
-            pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR;
-        }
-    }
 
     /*
@@ -3077,10 +3342,22 @@
     return VINF_SUCCESS;
 }
-
-
-/**
- * Prepares to run guest code in AMD-V and we've committed to doing so. This
- * means there is no backing out to ring-3 or anywhere else at this
- * point.
+#endif
+
+
+/**
+ * Does the preparations before executing guest code in AMD-V.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCB assuming we assuredly execute the guest
+ * in AMD-V. If we fall back to the recompiler after updating the VMCB and
+ * clearing the common-state (TRPM/forceflags), we must undo those changes so
+ * that the recompiler can (and should) use them when it resumes guest
+ * execution. Otherwise such operations must be done when we can no longer
+ * exit to ring-3.
+ *
+ * @returns VBox status code (informational status codes included).
+ * @retval VINF_SUCCESS if we can proceed with running the guest.
+ * @retval VINF_* scheduling changes, we have to go back to ring-3.
  *
  * @param   pVM             The cross context VM structure.
@@ -3088,9 +3365,136 @@
  * @param   pCtx            Pointer to the guest-CPU context.
  * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static int hmR0SvmPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_ASSERT_PREEMPT_SAFE();
+    Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pCtx));
+
+#if defined(VBOX_WITH_NESTED_HWVIRT) && defined(VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM)
+
+    /* IEM only for executing nested guest, we shouldn't get here. */
+    if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+    {
+        Log2(("hmR0SvmPreRunGuest: Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+        return VINF_EM_RESCHEDULE_REM;
+    }
+#endif
+
+    /* Check force flag actions that might require us to go back to ring-3. */
+    int rc = hmR0SvmCheckForceFlags(pVM, pVCpu, pCtx);
+    if (rc != VINF_SUCCESS)
+        return rc;
+
+    if (TRPMHasTrap(pVCpu))
+        hmR0SvmTrpmTrapToPendingEvent(pVCpu);
+    else if (!pVCpu->hm.s.Event.fPending)
+        hmR0SvmEvaluatePendingEvent(pVCpu, pCtx);
+
+    /*
+     * On the oldest AMD-V systems, we may not get enough information to reinject an NMI.
+     * Just do it in software, see @bugref{8411}.
+     * NB: If we could continue a task switch exit we wouldn't need to do this.
+     */
+    if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending && (((pVCpu->hm.s.Event.u64IntInfo >> 8) & 7) == SVM_EVENT_NMI)))
+        if (RT_UNLIKELY(!pVM->hm.s.svm.u32Features))
+            return VINF_EM_RAW_INJECT_TRPM_EVENT;
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+    HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
+#endif
+
+    /* Load the guest bits that are not shared with the host in any way since we can longjmp or get preempted. */
+    rc = hmR0SvmLoadGuestState(pVM, pVCpu, pCtx);
+    AssertRCReturn(rc, rc);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
+
+    /*
+     * If we're not intercepting TPR changes in the guest, save the guest TPR before the world-switch
+     * so we can update it on the way back if the guest changed the TPR.
+     */
+    if (pVCpu->hm.s.svm.fSyncVTpr)
+    {
+        if (pVM->hm.s.fTPRPatchingActive)
+            pSvmTransient->u8GuestTpr = pCtx->msrLSTAR;
+        else
+        {
+            PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+            pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR;
+        }
+    }
+
+    /*
+     * No longjmps to ring-3 from this point on!!!
+     * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
+     * This also disables flushing of the R0-logger instance (if any).
+     */
+    VMMRZCallRing3Disable(pVCpu);
+
+    /*
+     * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.)
+     * when thread-context hooks aren't used and we've been running with preemption disabled for a while.
+     *
+     * We need to check for force-flags that could've possible been altered since we last checked them (e.g.
+     * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}).
+     *
+     * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before
+     * executing guest code.
+     */
+    pSvmTransient->fEFlags = ASMIntDisableFlags();
+    if (   VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+        || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+    {
+        ASMSetFlags(pSvmTransient->fEFlags);
+        VMMRZCallRing3Enable(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+        return VINF_EM_RAW_TO_R3;
+    }
+    if (RTThreadPreemptIsPending(NIL_RTTHREAD))
+    {
+        ASMSetFlags(pSvmTransient->fEFlags);
+        VMMRZCallRing3Enable(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
+        return VINF_EM_RAW_INTERRUPT;
+    }
+
+    /*
+     * If we are injecting an NMI, we must set VMCPU_FF_BLOCK_NMIS only when we are going to execute
+     * guest code for certain (no exits to ring-3). Otherwise, we could re-read the flag on re-entry into
+     * AMD-V and conclude that NMI inhibition is active when we have not even delivered the NMI.
+     *
+     * With VT-x, this is handled by the Guest interruptibility information VMCS field which will set the
+     * VMCS field after actually delivering the NMI which we read on VM-exit to determine the state.
+     */
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        SVMEVENT Event;
+        Event.u = pVCpu->hm.s.Event.u64IntInfo;
+        if (    Event.n.u1Valid
+            &&  Event.n.u3Type == SVM_EVENT_NMI
+            &&  Event.n.u8Vector == X86_XCPT_NMI
+            && !VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
+        {
+            VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Prepares to run nested-guest code in AMD-V and we've committed to doing so. This
+ * means there is no backing out to ring-3 or anywhere else at this point.
+ *
+ * @param   pVM             The cross context VM structure.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
  *
  * @remarks Called with preemption disabled.
  * @remarks No-long-jump zone!!!
  */
-static void hmR0SvmPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+static void hmR0SvmPreRunGuestCommittedNested(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
 {
     Assert(!VMMRZCallRing3IsEnabled(pVCpu));
@@ -3101,5 +3505,6 @@
     VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);            /* Indicate the start of guest execution. */
 
-    hmR0SvmInjectPendingEvent(pVCpu, pCtx);
+    PSVMVMCB pVmcbNstGst = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    hmR0SvmInjectPendingEvent(pVCpu, pCtx, pVmcbNstGst);
 
     if (   pVCpu->hm.s.fPreloadGuestFpu
@@ -3110,8 +3515,8 @@
     }
 
-    /* Load the state shared between host and guest (FPU, debug). */
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    /* Load the state shared between host and nested-guest (FPU, debug). */
     if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE))
-        hmR0SvmLoadSharedState(pVCpu, pVmcb, pCtx);
+        hmR0SvmLoadSharedState(pVCpu, pVmcbNstGst, pCtx);
+
     HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT);             /* Preemption might set this, nothing to do on AMD-V. */
     AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
@@ -3122,5 +3527,5 @@
         || idCurrentCpu != pVCpu->hm.s.idLastCpu)
     {
-        hmR0SvmUpdateTscOffsetting(pVM, pVCpu);
+        hmR0SvmUpdateTscOffsetting(pVM, pVCpu, pVmcbNstGst);
         pSvmTransient->fUpdateTscOffsetting = false;
     }
@@ -3128,5 +3533,5 @@
     /* If we've migrating CPUs, mark the VMCB Clean bits as dirty. */
     if (idCurrentCpu != pVCpu->hm.s.idLastCpu)
-        pVmcb->ctrl.u64VmcbCleanBits = 0;
+        pVmcbNstGst->ctrl.u64VmcbCleanBits = 0;
 
     /* Store status of the shared guest-host state at the time of VMRUN. */
@@ -3145,7 +3550,6 @@
     pSvmTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu);
 
-    /* Flush the appropriate tagged-TLB entries. */
+    /* The TLB flushing would've already been setup by the nested-hypervisor. */
     ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);    /* Used for TLB flushing, set this across the world switch. */
-    hmR0SvmFlushTaggedTlb(pVCpu);
     Assert(hmR0GetCurrentCpu()->idCpu == pVCpu->hm.s.idLastCpu);
 
@@ -3161,8 +3565,9 @@
      * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc).
      */
+    uint8_t *pbMsrBitmap = (uint8_t *)pCtx->hwvirt.svm.CTX_SUFF(pvMsrBitmap);
     if (    (pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
-        && !(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSCP))
-    {
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        && !(pVmcbNstGst->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSCP))
+    {
+        hmR0SvmSetMsrPermission(pVmcbNstGst, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
         pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
         uint64_t u64GuestTscAux = CPUMR0GetGuestTscAux(pVCpu);
@@ -3173,5 +3578,116 @@
     else
     {
-        hmR0SvmSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE);
+        hmR0SvmSetMsrPermission(pVmcbNstGst, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE);
+        pSvmTransient->fRestoreTscAuxMsr = false;
+    }
+
+    /*
+     * If VMCB Clean bits isn't supported by the CPU or exposed by the guest,
+     * mark all state-bits as dirty indicating to the CPU to re-load from VMCB.
+     */
+    if (   !(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN)
+        || !(pVM->cpum.ro.GuestFeatures.fSvmVmcbClean))
+        pVmcbNstGst->ctrl.u64VmcbCleanBits = 0;
+}
+#endif
+
+
+/**
+ * Prepares to run guest code in AMD-V and we've committed to doing so. This
+ * means there is no backing out to ring-3 or anywhere else at this
+ * point.
+ *
+ * @param   pVM             The cross context VM structure.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+{
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);            /* Indicate the start of guest execution. */
+
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    hmR0SvmInjectPendingEvent(pVCpu, pCtx, pVmcb);
+
+    if (   pVCpu->hm.s.fPreloadGuestFpu
+        && !CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        CPUMR0LoadGuestFPU(pVM, pVCpu); /* (Ignore rc, no need to set HM_CHANGED_HOST_CONTEXT for SVM.) */
+        HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
+    }
+
+    /* Load the state shared between host and guest (FPU, debug). */
+    if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE))
+        hmR0SvmLoadSharedState(pVCpu, pVmcb, pCtx);
+
+    HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT);             /* Preemption might set this, nothing to do on AMD-V. */
+    AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
+
+    /* Setup TSC offsetting. */
+    RTCPUID idCurrentCpu = hmR0GetCurrentCpu()->idCpu;
+    if (   pSvmTransient->fUpdateTscOffsetting
+        || idCurrentCpu != pVCpu->hm.s.idLastCpu)
+    {
+        hmR0SvmUpdateTscOffsetting(pVM, pVCpu, pVmcb);
+        pSvmTransient->fUpdateTscOffsetting = false;
+    }
+
+    /* If we've migrating CPUs, mark the VMCB Clean bits as dirty. */
+    if (idCurrentCpu != pVCpu->hm.s.idLastCpu)
+        pVmcb->ctrl.u64VmcbCleanBits = 0;
+
+    /* Store status of the shared guest-host state at the time of VMRUN. */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+    if (CPUMIsGuestInLongModeEx(pCtx))
+    {
+        pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
+        pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
+    }
+    else
+#endif
+    {
+        pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+        pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+    }
+    pSvmTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu);
+
+    /* Flush the appropriate tagged-TLB entries. */
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);    /* Used for TLB flushing, set this across the world switch. */
+    hmR0SvmFlushTaggedTlb(pVCpu);
+    Assert(hmR0GetCurrentCpu()->idCpu == pVCpu->hm.s.idLastCpu);
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+    TMNotifyStartOfExecution(pVCpu);                            /* Finally, notify TM to resume its clocks as we're about
+                                                                   to start executing. */
+
+    /*
+     * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
+     * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
+     *
+     * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc).
+     */
+    uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+    if (    (pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
+        && !(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSCP))
+    {
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
+        uint64_t u64GuestTscAux = CPUMR0GetGuestTscAux(pVCpu);
+        if (u64GuestTscAux != pVCpu->hm.s.u64HostTscAux)
+            ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTscAux);
+        pSvmTransient->fRestoreTscAuxMsr = true;
+    }
+    else
+    {
+        hmR0SvmSetMsrPermission(pVmcb, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE);
         pSvmTransient->fRestoreTscAuxMsr = false;
     }
@@ -3209,4 +3725,72 @@
 
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Performs some essential restoration of state after running nested-guest code in
+ * AMD-V.
+ *
+ * @param   pVM             The cross context VM structure.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pMixedCtx       Pointer to the nested-guest-CPU context. The data maybe
+ *                          out-of-sync. Make sure to update the required fields
+ *                          before using them.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ * @param   rcVMRun         Return code of VMRUN.
+ *
+ * @remarks Called with interrupts disabled.
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ *          unconditionally when it is safe to do so.
+ */
+static void hmR0SvmPostRunGuestNested(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient, int rcVMRun)
+{
+    RT_NOREF(pVM);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);   /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+    ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);            /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+
+    /* TSC read must be done early for maximum accuracy. */
+    PSVMVMCB     pVmcbNstGst     = pMixedCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+    if (!(pVmcbNstGstCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSC))
+        TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVmcbNstGstCtrl->u64TSCOffset);
+
+    if (pSvmTransient->fRestoreTscAuxMsr)
+    {
+        uint64_t u64GuestTscAuxMsr = ASMRdMsr(MSR_K8_TSC_AUX);
+        CPUMR0SetGuestTscAux(pVCpu, u64GuestTscAuxMsr);
+        if (u64GuestTscAuxMsr != pVCpu->hm.s.u64HostTscAux)
+            ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
+    }
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
+    TMNotifyEndOfExecution(pVCpu);                              /* Notify TM that the guest is no longer running. */
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+    Assert(!(ASMGetFlags() & X86_EFL_IF));
+    ASMSetFlags(pSvmTransient->fEFlags);                        /* Enable interrupts. */
+    VMMRZCallRing3Enable(pVCpu);                                /* It is now safe to do longjmps to ring-3!!! */
+
+    /* Mark the VMCB-state cache as unmodified by VMM. */
+    pVmcbNstGstCtrl->u64VmcbCleanBits = HMSVM_VMCB_CLEAN_ALL;
+
+    /* If VMRUN failed, we can bail out early. This does -not- cover SVM_EXIT_INVALID. */
+    if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
+    {
+        Log4(("VMRUN failure: rcVMRun=%Rrc\n", rcVMRun));
+        return;
+    }
+
+    pSvmTransient->u64ExitCode  = pVmcbNstGstCtrl->u64ExitCode; /* Save the #VMEXIT reason. */
+    HMCPU_EXIT_HISTORY_ADD(pVCpu, pVmcbNstGstCtrl->u64ExitCode);/* Update the #VMEXIT history array. */
+    pSvmTransient->fVectoringDoublePF = false;                  /* Vectoring double page-fault needs to be determined later. */
+    pSvmTransient->fVectoringPF       = false;                  /* Vectoring page-fault needs to be determined later. */
+
+    Assert(!pVCpu->hm.s.svm.fSyncVTpr);
+    hmR0SvmSaveGuestState(pVCpu, pMixedCtx, pVmcbNstGst);       /* Save the nested-guest state from the VMCB to the
+                                                                   guest-CPU context. */
+}
+#endif
+
 /**
  * Performs some essential restoration of state after running guest code in
@@ -3232,5 +3816,5 @@
     ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);            /* Initialized in vmR3CreateUVM(): used for EMT poking. */
 
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb =pVCpu->hm.s.svm.pVmcb;
     pVmcb->ctrl.u64VmcbCleanBits = HMSVM_VMCB_CLEAN_ALL;        /* Mark the VMCB-state cache as unmodified by VMM. */
 
@@ -3267,5 +3851,5 @@
     pSvmTransient->fVectoringPF = false;                        /* Vectoring page-fault needs to be determined later. */
 
-    hmR0SvmSaveGuestState(pVCpu, pMixedCtx);                    /* Save the guest state from the VMCB to the guest-CPU context. */
+    hmR0SvmSaveGuestState(pVCpu, pMixedCtx, pVmcb);             /* Save the guest state from the VMCB to the guest-CPU context. */
 
     if (RT_LIKELY(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID))
@@ -3299,13 +3883,17 @@
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pCtx        Pointer to the guest-CPU context.
- */
-static int hmR0SvmRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
-{
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVM->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
     SVMTRANSIENT SvmTransient;
     SvmTransient.fUpdateTscOffsetting = true;
-    uint32_t cLoops = 0;
-    int      rc     = VERR_INTERNAL_ERROR_5;
-
-    for (;; cLoops++)
+
+    int rc = VERR_INTERNAL_ERROR_5;
+    for (;;)
     {
         Assert(!HMR0SuspendPending());
@@ -3344,10 +3932,10 @@
         HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
         STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
-        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
         rc = hmR0SvmHandleExit(pVCpu, pCtx, &SvmTransient);
         STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
         if (rc != VINF_SUCCESS)
             break;
-        if (cLoops > pVM->hm.s.cMaxResumeLoops)
+        if (++(*pcLoops) >= cMaxResumeLoops)
         {
             STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
@@ -3369,20 +3957,25 @@
  * @param   pVCpu       The cross context virtual CPU structure.
  * @param   pCtx        Pointer to the guest-CPU context.
- */
-static int hmR0SvmRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
-{
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVM->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
     SVMTRANSIENT SvmTransient;
     SvmTransient.fUpdateTscOffsetting = true;
-    uint32_t cLoops  = 0;
-    int      rc      = VERR_INTERNAL_ERROR_5;
+
     uint16_t uCsStart  = pCtx->cs.Sel;
     uint64_t uRipStart = pCtx->rip;
 
-    for (;; cLoops++)
+    int rc = VERR_INTERNAL_ERROR_5;
+    for (;;)
     {
         Assert(!HMR0SuspendPending());
         AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
                   ("Illegal migration! Entered on CPU %u Current %u cLoops=%u\n", (unsigned)pVCpu->hm.s.idEnteredCpu,
-                  (unsigned)RTMpCpuId(), cLoops));
+                  (unsigned)RTMpCpuId(), *pcLoops));
 
         /* Preparatory work for running guest code, this may force us to return
@@ -3422,10 +4015,10 @@
         HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
         STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
-        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
         rc = hmR0SvmHandleExit(pVCpu, pCtx, &SvmTransient);
         STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
         if (rc != VINF_SUCCESS)
             break;
-        if (cLoops > pVM->hm.s.cMaxResumeLoops)
+        if (++(*pcLoops) >= cMaxResumeLoops)
         {
             STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
@@ -3460,4 +4053,83 @@
 }
 
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Runs the nested-guest code using AMD-V.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pCtx        Pointer to the guest-CPU context.
+ * @param   pcLoops     Pointer to the number of executed loops. If we're switching
+ *                      from the guest-code execution loop to this nested-guest
+ *                      execution loop pass the remainder value, else pass 0.
+ */
+static int hmR0SvmRunGuestCodeNested(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t *pcLoops)
+{
+    Assert(CPUMIsGuestInSvmNestedHwVirtMode(pCtx));
+    Assert(pcLoops);
+    Assert(*pcLoops <= pVM->hm.s.cMaxResumeLoops);
+
+    SVMTRANSIENT SvmTransient;
+    SvmTransient.fUpdateTscOffsetting = true;
+
+    int rc = VERR_INTERNAL_ERROR_4;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMSVM_ASSERT_CPU_SAFE();
+
+        /* Preparatory work for running nested-guest code, this may force us to return
+           to ring-3.  This bugger disables interrupts on VINF_SUCCESS! */
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        rc = hmR0SvmPreRunGuestNested(pVM, pVCpu, pCtx, &SvmTransient);
+        if (rc != VINF_SUCCESS)
+            break;
+
+        /*
+         * No longjmps to ring-3 from this point on!!!
+         * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
+         * This also disables flushing of the R0-logger instance (if any).
+         */
+        hmR0SvmPreRunGuestCommittedNested(pVM, pVCpu, pCtx, &SvmTransient);
+        rc = hmR0SvmRunGuest(pVM, pVCpu, pCtx);
+
+        /* Restore any residual host-state and save any bits shared between host
+           and guest into the guest-CPU state.  Re-enables interrupts! */
+        hmR0SvmPostRunGuestNested(pVM, pVCpu, pCtx, &SvmTransient, rc);
+
+        if (RT_UNLIKELY(   rc != VINF_SUCCESS                               /* Check for VMRUN errors. */
+                        || SvmTransient.u64ExitCode == SVM_EXIT_INVALID))   /* Check for invalid guest-state errors. */
+        {
+            if (rc == VINF_SUCCESS)
+                rc = VERR_SVM_INVALID_GUEST_STATE;
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
+            hmR0SvmReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
+            break;
+        }
+
+        /* Handle the #VMEXIT. */
+        HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
+        rc = hmR0SvmHandleExitNested(pVCpu, pCtx, &SvmTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
+        if (rc != VINF_SUCCESS)
+            break;
+        if (++(*pcLoops) >= pVM->hm.s.cMaxResumeLoops)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rc = VINF_EM_RAW_INTERRUPT;
+            break;
+        }
+
+        /** @todo handle single-stepping   */
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rc;
+}
+#endif
+
 
 /**
@@ -3475,9 +4147,26 @@
     VMMRZCallRing3SetNotification(pVCpu, hmR0SvmCallRing3Callback, pCtx);
 
-    int rc;
-    if (!pVCpu->hm.s.fSingleInstruction)
-        rc = hmR0SvmRunGuestCodeNormal(pVM, pVCpu, pCtx);
+    uint32_t cLoops = 0;
+    int      rc;
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+#endif
+    {
+        if (!pVCpu->hm.s.fSingleInstruction)
+            rc = hmR0SvmRunGuestCodeNormal(pVM, pVCpu, pCtx, &cLoops);
+        else
+            rc = hmR0SvmRunGuestCodeStep(pVM, pVCpu, pCtx, &cLoops);
+    }
+#ifdef VBOX_WITH_NESTED_HWVIRT
     else
-        rc = hmR0SvmRunGuestCodeStep(pVM, pVCpu, pCtx);
+    {
+        rc = VINF_SVM_VMRUN;
+    }
+
+    /* Re-check the nested-guest condition here as we may be transitioning from the normal
+       execution loop into the nested-guest. */
+    if (rc == VINF_SVM_VMRUN)
+        rc = hmR0SvmRunGuestCodeNested(pVM, pVCpu, pCtx, &cLoops);
+#endif
 
     if (rc == VERR_EM_INTERPRETER)
@@ -3493,6 +4182,8 @@
 
 
-/**
- * Handles a \#VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID).
+#ifdef VBOX_WITH_NESTED_HWVIRT
+/**
+ * Handles a nested-guest \#VMEXIT (for all EXITCODE values except
+ * SVM_EXIT_INVALID).
  *
  * @returns VBox status code (informational status codes included).
@@ -3501,24 +4192,46 @@
  * @param   pSvmTransient   Pointer to the SVM transient structure.
  */
-DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+static int hmR0SvmHandleExitNested(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
 {
     Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
     Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
 
-    /*
-     * The ordering of the case labels is based on most-frequently-occurring #VMEXITs for most guests under
-     * normal workloads (for some definition of "normal").
-     */
-    uint32_t u32ExitCode = pSvmTransient->u64ExitCode;
+    PSVMVMCB            pVmcbNstGst      = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
     switch (pSvmTransient->u64ExitCode)
     {
-        case SVM_EXIT_NPF:
-            return hmR0SvmExitNestedPF(pVCpu, pCtx, pSvmTransient);
+        //case SVM_EXIT_NPF:
+        {
+            /** @todo. */
+            break;
+        }
 
         case SVM_EXIT_IOIO:
+        {
+            /*
+             * Figure out if the IO port access is intercepted by the nested-guest. If not,
+             * we pass it to the outer guest.
+             */
+            if (pVmcbNstGstCache->u64InterceptCtrl & SVM_CTRL_INTERCEPT_IOIO_PROT)
+            {
+                void *pvIoBitmap = pCtx->hwvirt.svm.CTX_SUFF(pvIoBitmap);
+                SVMIOIOEXITINFO IoExitInfo;
+                IoExitInfo.u = (uint32_t)pVmcbNstGst->ctrl.u64ExitInfo1;
+                bool const fIntercept = HMSvmIsIOInterceptActive(pvIoBitmap, IoExitInfo.n.u16Port,
+                                                                 (SVMIOIOTYPE)IoExitInfo.n.u1Type,
+                                                                 (IoExitInfo.u >> SVM_IOIO_OP_SIZE_SHIFT) & 7,
+                                                                 (IoExitInfo.u >> SVM_IOIO_ADDR_SIZE_SHIFT) << 4,
+                                                                 IoExitInfo.n.u3SEG, IoExitInfo.n.u1REP, IoExitInfo.n.u1STR,
+                                                                 NULL /* pIoExitInfo */);
+                if (fIntercept)
+                    return hmR0SvmExecVmexit(pVCpu, pCtx);
+            }
             return hmR0SvmExitIOInstr(pVCpu, pCtx, pSvmTransient);
+        }
 
         case SVM_EXIT_RDTSC:
+        {
             return hmR0SvmExitRdtsc(pVCpu, pCtx, pSvmTransient);
+        }
 
         case SVM_EXIT_RDTSCP:
@@ -3686,5 +4399,255 @@
                      *        HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY here! */
 
-                    PSVMVMCB pVmcb   = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+                    PSVMVMCB pVmcb   = pVCpu->hm.s.svm.pVmcb;
+                    SVMEVENT Event;
+                    Event.u          = 0;
+                    Event.n.u1Valid  = 1;
+                    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+                    Event.n.u8Vector = pSvmTransient->u64ExitCode - SVM_EXIT_EXCEPTION_0;
+
+                    switch (Event.n.u8Vector)
+                    {
+                        case X86_XCPT_DE:
+                            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
+                            break;
+
+                        case X86_XCPT_NP:
+                            Event.n.u1ErrorCodeValid    = 1;
+                            Event.n.u32ErrorCode        = pVmcb->ctrl.u64ExitInfo1;
+                            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
+                            break;
+
+                        case X86_XCPT_SS:
+                            Event.n.u1ErrorCodeValid    = 1;
+                            Event.n.u32ErrorCode        = pVmcb->ctrl.u64ExitInfo1;
+                            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
+                            break;
+
+                        case X86_XCPT_GP:
+                            Event.n.u1ErrorCodeValid    = 1;
+                            Event.n.u32ErrorCode        = pVmcb->ctrl.u64ExitInfo1;
+                            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
+                            break;
+
+                        default:
+                            AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit caused by exception %#x\n", Event.n.u8Vector));
+                            pVCpu->hm.s.u32HMError = Event.n.u8Vector;
+                            return VERR_SVM_UNEXPECTED_XCPT_EXIT;
+                    }
+
+                    Log4(("#Xcpt: Vector=%#x at CS:RIP=%04x:%RGv\n", Event.n.u8Vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
+                    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+                    return VINF_SUCCESS;
+                }
+#endif  /* HMSVM_ALWAYS_TRAP_ALL_XCPTS */
+
+                default:
+                {
+                    AssertMsgFailed(("hmR0SvmHandleExit: Unknown exit code %#x\n", pSvmTransient->u64ExitCode));
+                    pVCpu->hm.s.u32HMError = pSvmTransient->u64ExitCode;
+                    return VERR_SVM_UNKNOWN_EXIT;
+                }
+            }
+        }
+    }
+    /* not reached */
+}
+#endif
+
+
+/**
+ * Handles a guest \#VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID).
+ *
+ * @returns VBox status code (informational status codes included).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static int hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
+{
+    Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
+    Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
+
+    /*
+     * The ordering of the case labels is based on most-frequently-occurring #VMEXITs for most guests under
+     * normal workloads (for some definition of "normal").
+     */
+    uint32_t u32ExitCode = pSvmTransient->u64ExitCode;
+    switch (pSvmTransient->u64ExitCode)
+    {
+        case SVM_EXIT_NPF:
+            return hmR0SvmExitNestedPF(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_IOIO:
+            return hmR0SvmExitIOInstr(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_RDTSC:
+            return hmR0SvmExitRdtsc(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_RDTSCP:
+            return hmR0SvmExitRdtscp(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_CPUID:
+            return hmR0SvmExitCpuid(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_14:  /* X86_XCPT_PF */
+            return hmR0SvmExitXcptPF(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_7:   /* X86_XCPT_NM */
+            return hmR0SvmExitXcptNM(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_6:   /* X86_XCPT_UD */
+            return hmR0SvmExitXcptUD(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_16:  /* X86_XCPT_MF */
+            return hmR0SvmExitXcptMF(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_1:   /* X86_XCPT_DB */
+            return hmR0SvmExitXcptDB(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_17:  /* X86_XCPT_AC */
+            return hmR0SvmExitXcptAC(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_EXCEPTION_3:   /* X86_XCPT_BP */
+            return hmR0SvmExitXcptBP(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_MONITOR:
+            return hmR0SvmExitMonitor(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_MWAIT:
+            return hmR0SvmExitMwait(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_HLT:
+            return hmR0SvmExitHlt(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_READ_CR0:
+        case SVM_EXIT_READ_CR3:
+        case SVM_EXIT_READ_CR4:
+            return hmR0SvmExitReadCRx(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_WRITE_CR0:
+        case SVM_EXIT_WRITE_CR3:
+        case SVM_EXIT_WRITE_CR4:
+        case SVM_EXIT_WRITE_CR8:
+            return hmR0SvmExitWriteCRx(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_PAUSE:
+            return hmR0SvmExitPause(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_VMMCALL:
+            return hmR0SvmExitVmmCall(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_VINTR:
+            return hmR0SvmExitVIntr(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_INTR:
+        case SVM_EXIT_FERR_FREEZE:
+        case SVM_EXIT_NMI:
+            return hmR0SvmExitIntr(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_MSR:
+            return hmR0SvmExitMsr(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_INVLPG:
+            return hmR0SvmExitInvlpg(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_WBINVD:
+            return hmR0SvmExitWbinvd(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_INVD:
+            return hmR0SvmExitInvd(pVCpu, pCtx, pSvmTransient);
+
+        case SVM_EXIT_RDPMC:
+            return hmR0SvmExitRdpmc(pVCpu, pCtx, pSvmTransient);
+
+        default:
+        {
+            switch (pSvmTransient->u64ExitCode)
+            {
+                case SVM_EXIT_READ_DR0:     case SVM_EXIT_READ_DR1:     case SVM_EXIT_READ_DR2:     case SVM_EXIT_READ_DR3:
+                case SVM_EXIT_READ_DR6:     case SVM_EXIT_READ_DR7:     case SVM_EXIT_READ_DR8:     case SVM_EXIT_READ_DR9:
+                case SVM_EXIT_READ_DR10:    case SVM_EXIT_READ_DR11:    case SVM_EXIT_READ_DR12:    case SVM_EXIT_READ_DR13:
+                case SVM_EXIT_READ_DR14:    case SVM_EXIT_READ_DR15:
+                    return hmR0SvmExitReadDRx(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_WRITE_DR0:    case SVM_EXIT_WRITE_DR1:    case SVM_EXIT_WRITE_DR2:    case SVM_EXIT_WRITE_DR3:
+                case SVM_EXIT_WRITE_DR6:    case SVM_EXIT_WRITE_DR7:    case SVM_EXIT_WRITE_DR8:    case SVM_EXIT_WRITE_DR9:
+                case SVM_EXIT_WRITE_DR10:   case SVM_EXIT_WRITE_DR11:   case SVM_EXIT_WRITE_DR12:   case SVM_EXIT_WRITE_DR13:
+                case SVM_EXIT_WRITE_DR14:   case SVM_EXIT_WRITE_DR15:
+                    return hmR0SvmExitWriteDRx(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_XSETBV:
+                    return hmR0SvmExitXsetbv(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_TASK_SWITCH:
+                    return hmR0SvmExitTaskSwitch(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_IRET:
+                    return hmR0SvmExitIret(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_SHUTDOWN:
+                    return hmR0SvmExitShutdown(pVCpu, pCtx, pSvmTransient);
+
+                case SVM_EXIT_SMI:
+                case SVM_EXIT_INIT:
+                {
+                    /*
+                     * We don't intercept NMIs. As for INIT signals, it really shouldn't ever happen here. If it ever does,
+                     * we want to know about it so log the exit code and bail.
+                     */
+                    AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit %#RX32\n", (uint32_t)pSvmTransient->u64ExitCode));
+                    pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode;
+                    return VERR_SVM_UNEXPECTED_EXIT;
+                }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT
+                case SVM_EXIT_CLGI:     return hmR0SvmExitClgi(pVCpu, pCtx, pSvmTransient);
+                case SVM_EXIT_STGI:     return hmR0SvmExitStgi(pVCpu, pCtx, pSvmTransient);
+                case SVM_EXIT_VMLOAD:   return hmR0SvmExitVmload(pVCpu, pCtx, pSvmTransient);
+                case SVM_EXIT_VMSAVE:   return hmR0SvmExitVmsave(pVCpu, pCtx, pSvmTransient);
+                case SVM_EXIT_INVLPGA:  return hmR0SvmExitInvlpga(pVCpu, pCtx, pSvmTransient);
+                case SVM_EXIT_VMRUN:    return hmR0SvmExitVmrun(pVCpu, pCtx, pSvmTransient);
+#else
+                case SVM_EXIT_CLGI:
+                case SVM_EXIT_STGI:
+                case SVM_EXIT_VMLOAD:
+                case SVM_EXIT_VMSAVE:
+                case SVM_EXIT_INVLPGA:
+                case SVM_EXIT_VMRUN:
+#endif
+                case SVM_EXIT_RSM:
+                case SVM_EXIT_SKINIT:
+                    return hmR0SvmExitSetPendingXcptUD(pVCpu, pCtx, pSvmTransient);
+
+#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+                case SVM_EXIT_EXCEPTION_0:             /* X86_XCPT_DE */
+                /*   SVM_EXIT_EXCEPTION_1: */          /* X86_XCPT_DB - Handled above. */
+                case SVM_EXIT_EXCEPTION_2:             /* X86_XCPT_NMI */
+                /*   SVM_EXIT_EXCEPTION_3: */          /* X86_XCPT_BP - Handled above. */
+                case SVM_EXIT_EXCEPTION_4:             /* X86_XCPT_OF */
+                case SVM_EXIT_EXCEPTION_5:             /* X86_XCPT_BR */
+                /*   SVM_EXIT_EXCEPTION_6: */          /* X86_XCPT_UD - Handled above. */
+                /*   SVM_EXIT_EXCEPTION_7: */          /* X86_XCPT_NM - Handled above. */
+                case SVM_EXIT_EXCEPTION_8:             /* X86_XCPT_DF */
+                case SVM_EXIT_EXCEPTION_9:             /* X86_XCPT_CO_SEG_OVERRUN */
+                case SVM_EXIT_EXCEPTION_10:            /* X86_XCPT_TS */
+                case SVM_EXIT_EXCEPTION_11:            /* X86_XCPT_NP */
+                case SVM_EXIT_EXCEPTION_12:            /* X86_XCPT_SS */
+                case SVM_EXIT_EXCEPTION_13:            /* X86_XCPT_GP */
+                /*   SVM_EXIT_EXCEPTION_14: */         /* X86_XCPT_PF - Handled above. */
+                case SVM_EXIT_EXCEPTION_15:            /* Reserved. */
+                /*   SVM_EXIT_EXCEPTION_16: */         /* X86_XCPT_MF - Handled above. */
+                /*   SVM_EXIT_EXCEPTION_17: */         /* X86_XCPT_AC - Handled above. */
+                case SVM_EXIT_EXCEPTION_18:            /* X86_XCPT_MC */
+                case SVM_EXIT_EXCEPTION_19:            /* X86_XCPT_XF */
+                case SVM_EXIT_EXCEPTION_20: case SVM_EXIT_EXCEPTION_21: case SVM_EXIT_EXCEPTION_22:
+                case SVM_EXIT_EXCEPTION_23: case SVM_EXIT_EXCEPTION_24: case SVM_EXIT_EXCEPTION_25:
+                case SVM_EXIT_EXCEPTION_26: case SVM_EXIT_EXCEPTION_27: case SVM_EXIT_EXCEPTION_28:
+                case SVM_EXIT_EXCEPTION_29: case SVM_EXIT_EXCEPTION_30: case SVM_EXIT_EXCEPTION_31:
+                {
+                    /** @todo r=ramshankar; We should be doing
+                     *        HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY here! */
+
+                    PSVMVMCB pVmcb   = pVCpu->hm.s.svm.pVmcb;
                     SVMEVENT Event;
                     Event.u          = 0;
@@ -4061,5 +5024,5 @@
 {
     int rc = VINF_SUCCESS;
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
 
     Log4(("EXITINTINFO: Pending vectoring event %#RX64 Valid=%RTbool ErrValid=%RTbool Err=%#RX32 Type=%u Vector=%u\n",
@@ -4358,5 +5321,5 @@
     if (pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
     {
-        PCSVMVMCB pVmcb = (PCSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         Assert(pVmcb->ctrl.u64NextRIP);
         AssertRelease(pVmcb->ctrl.u64NextRIP - pCtx->rip == cb);    /* temporary, remove later */
@@ -4369,5 +5332,5 @@
 }
 
-/* Currently only used by nested hw.virt instructions, so ifdef'd as such, otherwise compilers start whining. */
+
 #ifdef VBOX_WITH_NESTED_HWVIRT
 /**
@@ -4384,5 +5347,5 @@
     if (pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
     {
-        PCSVMVMCB pVmcb = (PCSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pCtx->rip;
         Assert(cbInstr == cbLikely);
@@ -4393,4 +5356,5 @@
 #endif
 
+
 /**
  * Advances the guest RIP by the number of bytes specified in @a cb. This does
@@ -4407,4 +5371,379 @@
 }
 #undef HMSVM_UPDATE_INTR_SHADOW
+
+
+#if defined(VBOX_WITH_NESTED_HWVIRT) && !defined(VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM)
+/**
+ * Merges the guest MSR permission bitmap into the nested-guest MSR permission
+ * bitmap.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure.
+ * @param   pvMsrBitmap         Pointer to the guest MSRPM bitmap.
+ * @param   pvNstGstMsrBitmap   Pointer to the nested-guest MSRPM bitmap.
+ */
+static void hmR0SvmMergeMsrpmBitmap(PVMCPU pVCpu, const void *pvMsrBitmap, void *pvNstGstMsrBitmap)
+{
+    RT_NOREF(pVCpu);
+    uint64_t const *puChunk       = (uint64_t *)pvMsrBitmap;
+    uint64_t       *puNstGstChunk = (uint64_t *)pvNstGstMsrBitmap;
+    uint32_t const cbChunks       = SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT;
+    uint32_t const cChunks        = cbChunks / sizeof(*puChunk);
+    Assert(cbChunks % sizeof(*puChunk) == 0);
+
+    for (uint32_t idxChunk = 0, offChunk = 0;
+          idxChunk < cChunks;
+          idxChunk++, offChunk += sizeof(*puChunk))
+    {
+        /* Leave reserved offsets (1800h+) untouched (as all bits set, see SVMR0InitVM). */
+        if (offChunk >= 0x1800)
+            break;
+        puNstGstChunk[idxChunk] |= puChunk[idxChunk];
+    }
+}
+
+
+/**
+ * Performs a \#VMEXIT that happens during VMRUN emulation in hmR0SvmExecVmrun.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ */
+static int hmR0SvmExecVmexit(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+    /*
+     * Disable the global interrupt flag to not cause any interrupts or NMIs
+     * in the guest.
+     */
+    pCtx->hwvirt.svm.fGif = 0;
+
+    /*
+     * Restore the guest's "host" state.
+     */
+    CPUMSvmVmExitRestoreHostState(pCtx);
+
+    /*
+     * Restore the guest's force-flags.
+     */
+    if (pCtx->hwvirt.fLocalForcedActions)
+    {
+        VMCPU_FF_SET(pVCpu, pCtx->hwvirt.fLocalForcedActions);
+        pCtx->hwvirt.fLocalForcedActions = 0;
+    }
+
+    /*
+     * Restore the modifications we did to the nested-guest VMCB in order
+     * to execute the nested-guest in SVM R0.
+     */
+    PSVMVMCB pVmcbNstGst = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    HMSvmNstGstVmExitNotify(pVCpu, pVmcbNstGst);
+
+    /*
+     * Write the nested-guest VMCB back to nested-guest memory.
+     */
+    RTGCPHYS const GCPhysVmcb = pCtx->hwvirt.svm.GCPhysVmcb;
+    int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), GCPhysVmcb, pVmcbNstGst, sizeof(*pVmcbNstGst));
+
+    /*
+     * Clear our cache of the nested-guest VMCB controls.
+     */
+    PSVMVMCBCTRL pVmcbCtrl = &pVmcbNstGst->ctrl;
+    memset(pVmcbCtrl, 0, sizeof(*pVmcbCtrl));
+    Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pCtx));
+
+    if (RT_SUCCESS(rc))
+        return VINF_SVM_VMEXIT;
+
+    Log(("hmR0SvmExecVmexit: Failed to write guest-VMCB at %#RGp\n", GCPhysVmcb));
+    return rc;
+}
+
+
+/**
+ * Caches the nested-guest VMCB fields before we modify them for executing the
+ * nested-guest under SVM R0.
+ *
+ * @param   pCtx            Pointer to the guest-CPU context.
+ */
+static void hmR0SvmVmRunCacheVmcb(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+    PSVMVMCB            pVmcbNstGst      = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMVMCBCTRL        pVmcbNstGstCtrl  = &pVmcbNstGst->ctrl;
+    PSVMNESTEDVMCBCACHE pNstGstVmcbCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+
+    pNstGstVmcbCache->u16InterceptRdCRx = pVmcbNstGstCtrl->u16InterceptRdCRx;
+    pNstGstVmcbCache->u16InterceptWrCRx = pVmcbNstGstCtrl->u16InterceptWrCRx;
+    pNstGstVmcbCache->u16InterceptRdDRx = pVmcbNstGstCtrl->u16InterceptRdDRx;
+    pNstGstVmcbCache->u16InterceptWrCRx = pVmcbNstGstCtrl->u16InterceptWrDRx;
+    pNstGstVmcbCache->u32InterceptXcpt  = pVmcbNstGstCtrl->u32InterceptXcpt;
+    pNstGstVmcbCache->u64InterceptCtrl  = pVmcbNstGstCtrl->u64InterceptCtrl;
+    pNstGstVmcbCache->u64IOPMPhysAddr   = pVmcbNstGstCtrl->u64IOPMPhysAddr;
+    pNstGstVmcbCache->u64MSRPMPhysAddr  = pVmcbNstGstCtrl->u64MSRPMPhysAddr;
+    pNstGstVmcbCache->u64VmcbCleanBits  = pVmcbNstGstCtrl->u64VmcbCleanBits;
+    pNstGstVmcbCache->fVIntrMasking     = pVmcbNstGstCtrl->IntCtrl.n.u1VIntrMasking;
+    pNstGstVmcbCache->fValid            = true;
+}
+
+
+/**
+ * Setup execution of the nested-guest in SVM R0.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ * @param   GCPhysVmcb      The nested-guest physical address of its VMCB.
+ * @param   cbInstr         Length of the VMRUN instruction in bytes.
+ */
+static int hmR0SvmExecVmrun(PVMCPU pVCpu, PCPUMCTX pCtx, RTGCPHYS GCPhysVmcb, uint8_t cbInstr)
+{
+    Assert(CPUMGetGuestCPL(pVCpu) == 0);
+    Assert(!pVCpu->hm.s.svm.NstGstVmcbCache.fVmrunEmulatedInR0);
+
+    /*
+     * Cache the physical address of the VMCB for #VMEXIT exceptions.
+     */
+    pCtx->hwvirt.svm.GCPhysVmcb = GCPhysVmcb;
+
+    /*
+     * Save the "host" (guest-state) so that when we do a #VMEXIT we can restore the guest-state.
+     *
+     * The real host-state shall be saved/restored by the physical CPU once it executes VMRUN
+     * with the nested-guest VMCB.
+     */
+    CPUMSvmVmRunSaveHostState(pCtx, cbInstr);
+
+    /*
+     * Read the nested-guest VMCB state.
+     */
+    PVM pVM = pVCpu->CTX_SUFF(pVM);
+    int rc = PGMPhysSimpleReadGCPhys(pVM, pCtx->hwvirt.svm.CTX_SUFF(pVmcb), GCPhysVmcb, sizeof(SVMVMCB));
+    if (RT_SUCCESS(rc))
+    {
+        PSVMVMCB          pVmcbNstGst      = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+        PSVMVMCBCTRL      pVmcbNstGstCtrl  = &pVmcbNstGst->ctrl;
+        PSVMVMCBSTATESAVE pVmcbNstGstState = &pVmcbNstGst->guest;
+
+        /*
+         * Validate nested-guest state and controls.
+         * The rest shall be done by the physical CPU.
+         */
+        /* VMRUN must always be intercepted. */
+        if (!CPUMIsGuestSvmCtrlInterceptSet(pCtx, SVM_CTRL_INTERCEPT_VMRUN))
+        {
+            Log(("hmR0SvmExecVmrun: VMRUN instruction not intercepted -> #VMEXIT\n"));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+
+        /* Nested paging. */
+        if (    pVmcbNstGstCtrl->NestedPaging.n.u1NestedPaging
+            && !pVM->cpum.ro.GuestFeatures.fSvmNestedPaging)
+        {
+            Log(("hmR0SvmExecVmrun: Nested paging not supported -> #VMEXIT\n"));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+        /** @todo When implementing nested-paging for the nested-guest don't forget to
+         *        adjust/check PAT MSR. */
+
+        /* AVIC. */
+        if (    pVmcbNstGstCtrl->IntCtrl.n.u1AvicEnable
+            && !pVM->cpum.ro.GuestFeatures.fSvmAvic)
+        {
+            Log(("hmR0SvmExecVmrun: AVIC not supported -> #VMEXIT\n"));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+
+        /* Last branch record (LBR) virtualization. */
+        if (    (pVmcbNstGstCtrl->u64LBRVirt & SVM_LBR_VIRT_ENABLE)
+            && !pVM->cpum.ro.GuestFeatures.fSvmLbrVirt)
+        {
+            Log(("hmR0SvmExecVmrun: LBR virtualization not supported -> #VMEXIT\n"));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+
+        /*
+         * MSR permission bitmap (MSRPM).
+         */
+        RTGCPHYS const GCPhysMsrBitmap = pVmcbNstGstCtrl->u64MSRPMPhysAddr;
+        Assert(pCtx->hwvirt.svm.CTX_SUFF(pvMsrBitmap));
+        rc = PGMPhysSimpleReadGCPhys(pVM, pCtx->hwvirt.svm.CTX_SUFF(pvMsrBitmap), GCPhysMsrBitmap,
+                                     SVM_MSRPM_PAGES * X86_PAGE_4K_SIZE);
+        if (RT_FAILURE(rc))
+        {
+            Log(("hmR0SvmExecVmrun: Failed reading the MSR permission bitmap at %#RGp. rc=%Rrc\n", GCPhysMsrBitmap, rc));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+
+        /*
+         * IO permission bitmap (IOPM).
+         */
+        RTHCPHYS HCPhysNstGstMsrpm;
+        rc = PGMPhysGCPhys2HCPhys(pVM, pVmcbNstGstCtrl->u64MSRPMPhysAddr, &HCPhysNstGstMsrpm);
+        if (RT_FAILURE(rc))
+        {
+            Log(("hmR0SvmExecVmrun: Failed reading the MSR permission bitmap at %#RGp. rc=%Rrc\n", GCPhysMsrBitmap, rc));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+
+        /*
+         * EFER MSR.
+         */
+        uint64_t uValidEfer;
+        rc = CPUMQueryValidatedGuestEfer(pVM, pVmcbNstGstState->u64CR0, pVmcbNstGstState->u64EFER, pVmcbNstGstState->u64EFER,
+                                         &uValidEfer);
+        if (RT_FAILURE(rc))
+        {
+            Log(("iemSvmVmrun: EFER invalid uOldEfer=%#RX64 -> #VMEXIT\n", pVmcbNstGstState->u64EFER));
+            pVmcbNstGstCtrl->u64ExitCode = SVM_EXIT_INVALID;
+            return hmR0SvmExecVmexit(pVCpu, pCtx);
+        }
+        bool const fLongModeEnabled         = RT_BOOL(uValidEfer & MSR_K6_EFER_LME);
+        bool const fPaging                  = RT_BOOL(pVmcbNstGstState->u64CR0 & X86_CR0_PG);
+        bool const fLongModeWithPaging      = fLongModeEnabled && fPaging;
+        /* Adjust EFER.LMA (this is normally done by the CPU when system software writes CR0) and update it. */
+        if (fLongModeWithPaging)
+            uValidEfer |= MSR_K6_EFER_LMA;
+
+        /*
+         * Cache the nested-guest VMCB fields before we start modifying them below.
+         */
+        hmR0SvmVmRunCacheVmcb(pVCpu, pCtx);
+
+        /*
+         * The IOPM of the nested-guest can be ignored because the the guest always
+         * intercepts all IO port accesses. Thus, we'll swap to the guest IOPM rather
+         * into the nested-guest one and swap it back on the #VMEXIT.
+         */
+        pVmcbNstGstCtrl->u64IOPMPhysAddr  = g_HCPhysIOBitmap;
+
+        /*
+         * Load the host-physical address into the MSRPM rather than the nested-guest
+         * physical address.
+         */
+        pVmcbNstGstCtrl->u64MSRPMPhysAddr = HCPhysNstGstMsrpm;
+
+        /*
+         * Merge the guest MSR permission bitmap in to the nested-guest one.
+         *
+         * Note the assumption here is that our MSRPM is set up only once in SVMR0SetupVM
+         * In hmR0SvmPreRunGuestCommittedNested we directly update the nested-guest one.
+         * Hence it can be done once here during VMRUN.
+         */
+        hmR0SvmMergeMsrpmBitmap(pVCpu, pVCpu->hm.s.svm.pvMsrBitmap, pCtx->hwvirt.svm.CTX_SUFF(pvMsrBitmap));
+
+        /*
+         * Merge the guest exception intercepts in to the nested-guest ones.
+         */
+        {
+            PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+            hmR0SvmMergeIntercepts(pVCpu, pVmcb, pVmcbNstGst);
+        }
+
+        /*
+         * Check for pending virtual interrupts.
+         */
+        if (pVmcbNstGstCtrl->IntCtrl.n.u1VIrqPending)
+            VMCPU_FF_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
+        else
+            Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST));
+
+        /*
+         * Preserve the required force-flags.
+         *
+         * We only preserve the force-flags that would affect the execution of the
+         * nested-guest (or the guest).
+         *
+         *   - VMCPU_FF_INHIBIT_INTERRUPTS need -not- be preserved as it's for a single
+         *     instruction which is this VMRUN instruction itself.
+         *
+         *   - VMCPU_FF_BLOCK_NMIS needs to be preserved as it blocks NMI until the
+         *     execution of a subsequent IRET instruction in the guest.
+         *
+         *   - The remaining FFs (e.g. timers) can stay in place so that we will be
+         *     able to generate interrupts that should cause #VMEXITs for the
+         *     nested-guest.
+         */
+        pCtx->hwvirt.fLocalForcedActions = pVCpu->fLocalForcedActions & VMCPU_FF_BLOCK_NMIS;
+
+        /*
+         * Interrupt shadow.
+         */
+        if (pVmcbNstGstCtrl->u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE)
+        {
+            LogFlow(("hmR0SvmExecVmrun: setting interrupt shadow. inhibit PC=%#RX64\n", pVmcbNstGstState->u64RIP));
+            /** @todo will this cause trouble if the nested-guest is 64-bit but the guest is 32-bit? */
+            EMSetInhibitInterruptsPC(pVCpu, pVmcbNstGstState->u64RIP);
+        }
+
+        /*
+         * Load the guest-CPU state.
+         * Skip CPL adjustments (will be done by the hardware).
+         */
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbNstGstState, ES, es);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbNstGstState, CS, cs);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbNstGstState, SS, ss);
+        HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbNstGstState, DS, ds);
+        pCtx->gdtr.cbGdt   = pVmcbNstGstState->GDTR.u32Limit;
+        pCtx->gdtr.pGdt    = pVmcbNstGstState->GDTR.u64Base;
+        pCtx->idtr.cbIdt   = pVmcbNstGstState->IDTR.u32Limit;
+        pCtx->idtr.pIdt    = pVmcbNstGstState->IDTR.u64Base;
+        pCtx->cr0          = pVmcbNstGstState->u64CR0;
+        pCtx->cr4          = pVmcbNstGstState->u64CR4;
+        pCtx->cr3          = pVmcbNstGstState->u64CR3;
+        pCtx->cr2          = pVmcbNstGstState->u64CR2;
+        pCtx->dr[6]        = pVmcbNstGstState->u64DR6;
+        pCtx->dr[7]        = pVmcbNstGstState->u64DR7;
+        pCtx->rflags.u64   = pVmcbNstGstState->u64RFlags;
+        pCtx->rax          = pVmcbNstGstState->u64RAX;
+        pCtx->rsp          = pVmcbNstGstState->u64RSP;
+        pCtx->rip          = pVmcbNstGstState->u64RIP;
+        pCtx->msrEFER      = uValidEfer;
+
+        /* Mask DR6, DR7 bits mandatory set/clear bits. */
+        pCtx->dr[6] &= ~(X86_DR6_RAZ_MASK | X86_DR6_MBZ_MASK);
+        pCtx->dr[6] |= X86_DR6_RA1_MASK;
+        pCtx->dr[7] &= ~(X86_DR7_RAZ_MASK | X86_DR7_MBZ_MASK);
+        pCtx->dr[7] |= X86_DR7_RA1_MASK;
+
+        /*
+         * VMRUN loads a subset of the guest-CPU state (see above) and nothing else. Ensure
+         * hmR0SvmLoadGuestStateNested doesn't need to load anything back to the VMCB cache
+         * as we go straight into executing the nested-guest.
+         *
+         * If we fall back to ring-3 we would have to re-load things from the guest-CPU
+         * state into the VMCB as we are unsure what state we're in (e.g., VMRUN ends up
+         * getting executed in IEM along with a handful of nested-guest instructions and
+         * we have to continue executing the nested-guest in R0 since IEM doesn't know
+         * about this VMCB cache which is in HM).
+         */
+        PSVMNESTEDVMCBCACHE pNstGstVmcbCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+        pNstGstVmcbCache->fVmrunEmulatedInR0 = true;
+        HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_ALL_GUEST);
+        HMCPU_CF_SET(pVCpu,   HM_CHANGED_HOST_GUEST_SHARED_STATE);
+
+        /*
+         * Clear global interrupt flags to allow interrupts and NMIs in the guest.
+         */
+        pCtx->hwvirt.svm.fGif = 1;
+
+        /*
+         * Inform PGM about paging mode changes.
+         * We include X86_CR0_PE because PGM doesn't handle paged-real mode yet.
+         */
+        /** @todo What about informing PGM about CR0.WP? */
+        PGMFlushTLB(pVCpu, pCtx->cr3, true /* fGlobal */);
+
+        int rc = PGMChangeMode(pVCpu, pVmcbNstGstState->u64CR0 | X86_CR0_PE, pVmcbNstGstState->u64CR4, pCtx->msrEFER);
+        return rc;
+    }
+
+    return rc;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT && !VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM */
 
 
@@ -4577,5 +5916,5 @@
     {
         Assert(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
-        PCSVMVMCB pVmcb = (PCSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         uint8_t const cbInstr   = pVmcb->ctrl.u64NextRIP - pCtx->rip;
         RTGCPTR const GCPtrPage = pVmcb->ctrl.u64ExitInfo1;
@@ -4688,5 +6027,5 @@
     {
         Assert(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
-        PCSVMVMCB pVmcb = (PCSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
         if (fMovCRx)
@@ -4728,5 +6067,5 @@
     {
         Assert(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
-        PCSVMVMCB pVmcb = (PCSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
         if (fMovCRx)
@@ -4801,5 +6140,5 @@
 {
     HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
     PVM      pVM   = pVCpu->CTX_SUFF(pVM);
 
@@ -4928,5 +6267,5 @@
 
         /* Don't intercept DRx read and writes. */
-        PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+        PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
         pVmcb->ctrl.u16InterceptRdDRx = 0;
         pVmcb->ctrl.u16InterceptWrDRx = 0;
@@ -5015,6 +6354,6 @@
     Log4(("hmR0SvmExitIOInstr: CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
 
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
     PVM      pVM   = pVCpu->CTX_SUFF(pVM);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
 
     /* Refer AMD spec. 15.10.2 "IN and OUT Behaviour" and Figure 15-2. "EXITINFO1 for IOIO Intercept" for the format. */
@@ -5232,5 +6571,5 @@
 
     /* See AMD spec. 15.25.6 "Nested versus Guest Page Faults, Fault Ordering" for VMCB details for #NPF. */
-    PSVMVMCB pVmcb           = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb           = pVCpu->hm.s.svm.pVmcb;
     uint32_t u32ErrCode      = pVmcb->ctrl.u64ExitInfo1;
     RTGCPHYS GCPhysFaultAddr = pVmcb->ctrl.u64ExitInfo2;
@@ -5336,5 +6675,5 @@
     HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
 
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
     pVmcb->ctrl.IntCtrl.n.u1VIrqPending = 0;  /* No virtual interrupts pending, we'll inject the current one/NMI before reentry. */
     pVmcb->ctrl.IntCtrl.n.u8VIntrVector = 0;
@@ -5435,5 +6774,5 @@
 
     /* Indicate that we no longer need to #VMEXIT when the guest is ready to receive NMIs, it is now ready. */
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
     hmR0SvmClearIretIntercept(pVmcb);
 
@@ -5454,5 +6793,5 @@
 
     /* See AMD spec. 15.12.15 "#PF (Page Fault)". */
-    PSVMVMCB    pVmcb         = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
+    PSVMVMCB    pVmcb         = pVCpu->hm.s.svm.pVmcb;
     uint32_t    u32ErrCode    = pVmcb->ctrl.u64ExitInfo1;
     RTGCUINTPTR uFaultAddress = pVmcb->ctrl.u64ExitInfo2;
@@ -5572,6 +6911,6 @@
 
     /* Paranoia; Ensure we cannot be called as a result of event delivery. */
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; NOREF(pVmcb);
-    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid); NOREF(pVmcb);
 
     /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
@@ -5626,6 +6965,6 @@
 
     /* Paranoia; Ensure we cannot be called as a result of event delivery. */
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; NOREF(pVmcb);
-    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid);  NOREF(pVmcb);
 
     int rc = VERR_SVM_UNEXPECTED_XCPT_EXIT;
@@ -5670,6 +7009,6 @@
 
     /* Paranoia; Ensure we cannot be called as a result of event delivery. */
-    PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb; NOREF(pVmcb);
-    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid); NOREF(pVmcb);
 
     STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
@@ -5718,6 +7057,6 @@
     /* This can be a fault-type #DB (instruction breakpoint) or a trap-type #DB (data breakpoint). However, for both cases
        DR6 and DR7 are updated to what the exception handler expects. See AMD spec. 15.12.2 "#DB (Debug)". */
-    PSVMVMCB    pVmcb   = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
-    PVM         pVM     = pVCpu->CTX_SUFF(pVM);
+    PVM      pVM   = pVCpu->CTX_SUFF(pVM);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
     int rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pVmcb->guest.u64DR6, pVCpu->hm.s.fSingleInstruction);
     if (rc == VINF_EM_RAW_GUEST_TRAP)
@@ -5878,6 +7217,13 @@
     /** @todo Stat. */
     /* STAM_COUNTER_INC(&pVCpu->hm.s.StatExitVmrun); */
+    VBOXSTRICTRC rcStrict;
     uint8_t const cbInstr = hmR0SvmGetInstrLengthHwAssist(pVCpu, pCtx, 3);
-    VBOXSTRICTRC rcStrict = IEMExecDecodedVmrun(pVCpu, cbInstr);
+#if defined(VBOX_WITH_NESTED_HWVIRT) && defined(VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM)
+    rcStrict = IEMExecDecodedVmrun(pVCpu, cbInstr);
+#else
+    rcStrict = hmR0SvmExecVmrun(pVCpu, pCtx, pCtx->rax, cbInstr);
+    if (rcStrict == VINF_SUCCESS)
+        rcStrict = VINF_SVM_VMRUN;
+#endif
     return VBOXSTRICTRC_VAL(rcStrict);
 }
Index: /trunk/src/VBox/VMM/include/HMInternal.h
===================================================================
--- /trunk/src/VBox/VMM/include/HMInternal.h	(revision 68225)
+++ /trunk/src/VBox/VMM/include/HMInternal.h	(revision 68226)
@@ -26,4 +26,5 @@
 #include <VBox/vmm/hm.h>
 #include <VBox/vmm/hm_vmx.h>
+#include <VBox/vmm/hm_svm.h>
 #include <VBox/vmm/pgm.h>
 #include <VBox/vmm/cpum.h>
@@ -867,6 +868,6 @@
         /** R0 memory object for the host VMCB which holds additional host-state. */
         RTR0MEMOBJ                  hMemObjVmcbHost;
-        /** Virtual address of the host VMCB which holds additional host-state. */
-        R0PTRTYPE(void *)           pvVmcbHost;
+        /** Padding. */
+        R0PTRTYPE(void *)           pvPadding;
 
         /** Physical address of the guest VMCB. */
@@ -874,6 +875,6 @@
         /** R0 memory object for the guest VMCB. */
         RTR0MEMOBJ                  hMemObjVmcb;
-        /** Virtual address of the guest VMCB. */
-        R0PTRTYPE(void *)           pvVmcb;
+        /** Pointer to the guest VMCB. */
+        R0PTRTYPE(PSVMVMCB)         pVmcb;
 
         /** Physical address of the MSR bitmap (8 KB). */
@@ -881,5 +882,5 @@
         /** R0 memory object for the MSR bitmap (8 KB). */
         RTR0MEMOBJ                  hMemObjMsrBitmap;
-        /** Virtual address of the MSR bitmap. */
+        /** Pointer to the MSR bitmap. */
         R0PTRTYPE(void *)           pvMsrBitmap;
 
@@ -888,4 +889,8 @@
         bool                        fSyncVTpr;
         uint8_t                     u8Alignment0[7];
+
+        /** Cache of the nested-guest's VMCB fields that we modify in order to run the
+         *  nested-guest using AMD-V. This will be restored on \#VMEXIT. */
+        SVMNESTEDVMCBCACHE          NstGstVmcbCache;
     } svm;
 
