Index: /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp	(revision 71503)
+++ /trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp	(revision 71504)
@@ -952,10 +952,6 @@
                                        | SVM_CTRL_INTERCEPT_VMMCALL;
 
-        /*
-         * CR0, CR4 reads/writes must be intercepted, as our shadow values may differ from the guest's.
-         * These interceptions might be relaxed later during VM execution if the conditions allow.
-         */
-        pVmcb->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4);
-        pVmcb->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4);
+        /* CR4 writes must always be intercepted for tracking PGM mode changes. */
+        pVmcb->ctrl.u16InterceptWrCRx = RT_BIT(4);
 
         /* Intercept all DRx reads and writes by default. Changed later on. */
@@ -1085,4 +1081,23 @@
 #endif
     return pVCpu->hm.s.svm.pVmcb;
+}
+
+
+/**
+ * Gets a pointer to the nested-guest VMCB cache.
+ *
+ * @returns Pointer to the nested-guest VMCB cache.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pCtx            Pointer to the guest-CPU context.
+ */
+DECLINLINE(PSVMNESTEDVMCBCACHE) hmR0SvmGetNestedVmcbCache(PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT
+    Assert(pCtx->hwvirt.svm.fHMCachedVmcb); RT_NOREF(pCtx);
+    return &pVCpu->hm.s.svm.NstGstVmcbCache;
+#else
+    RT_NOREF2(pVCpu, pCtx);
+    return NULL;
+#endif
 }
 
@@ -1393,6 +1408,5 @@
         if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
         {
-            Assert(pCtx->hwvirt.svm.fHMCachedVmcb); NOREF(pCtx);
-            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu, pCtx);
             fRemoveXcpt = !(pVmcbNstGstCache->u32InterceptXcpt & RT_BIT(u32Xcpt));
         }
@@ -1475,6 +1489,5 @@
         {
             /* If the nested-hypervisor intercepts CR0 reads/writes, we need to continue intercepting them. */
-            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
-            Assert(pCtx->hwvirt.svm.fHMCachedVmcb);
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu, pCtx);
             pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx       & ~RT_BIT(0))
                                           | (pVmcbNstGstCache->u16InterceptRdCRx &  RT_BIT(0));
@@ -1559,6 +1572,5 @@
     if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4))
     {
-        uint64_t u64GuestCR4 = pCtx->cr4;
-        Assert(RT_HI_U32(u64GuestCR4) == 0);
+        uint64_t uShadowCr4 = pCtx->cr4;
         if (!pVM->hm.s.fNestedPaging)
         {
@@ -1571,5 +1583,5 @@
 
                 case PGMMODE_32_BIT:        /* 32-bit paging. */
-                    u64GuestCR4 &= ~X86_CR4_PAE;
+                    uShadowCr4 &= ~X86_CR4_PAE;
                     break;
 
@@ -1577,5 +1589,5 @@
                 case PGMMODE_PAE_NX:        /* PAE paging with NX enabled. */
                     /** Must use PAE paging as we could use physical memory > 4 GB */
-                    u64GuestCR4 |= X86_CR4_PAE;
+                    uShadowCr4 |= X86_CR4_PAE;
                     break;
 
@@ -1595,9 +1607,30 @@
         }
 
-        pVmcb->guest.u64CR4 = u64GuestCR4;
-        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
-
         /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
-        pVCpu->hm.s.fLoadSaveGuestXcr0 = (u64GuestCR4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+        pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+        /* Avoid intercepting CR4 reads if the guest and shadow CR4 values are identical. */
+        if (uShadowCr4 == pCtx->cr4)
+        {
+            if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+                pVmcb->ctrl.u16InterceptRdCRx &= ~RT_BIT(4);
+            else
+            {
+                /* If the nested-hypervisor intercepts CR4 reads, we need to continue intercepting them. */
+                PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu, pCtx);
+                pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx       & ~RT_BIT(4))
+                                              | (pVmcbNstGstCache->u16InterceptRdCRx &  RT_BIT(4));
+            }
+        }
+        else
+            pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(4);
+
+        /* CR4 writes are always intercepted (both guest, nested-guest) from tracking PGM mode changes. */
+        Assert(pVmcb->ctrl.u16InterceptWrCRx & RT_BIT(4));
+
+        /* Update VMCB with the shadow CR4 the appropriate VMCB clean bits. */
+        Assert(RT_HI_U32(uShadowCr4) == 0);
+        pVmcb->guest.u64CR4 = uShadowCr4;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_CRX_EFER | HMSVM_VMCB_CLEAN_INTERCEPTS);
 
         HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4);
@@ -2021,7 +2054,6 @@
         pVmcbNstGst->ctrl.u16InterceptWrCRx |= pVmcb->ctrl.u16InterceptWrCRx;
 
-        /* Always intercept CR0, CR4 reads and writes as we alter them. */
-        pVmcbNstGst->ctrl.u16InterceptRdCRx |= RT_BIT(0) | RT_BIT(4);
-        pVmcbNstGst->ctrl.u16InterceptWrCRx |= RT_BIT(0) | RT_BIT(4);
+        /* Always intercept CR4 writes for tracking PGM mode changes. */
+        pVmcbNstGst->ctrl.u16InterceptWrCRx |= RT_BIT(4);
 
         /* Without nested paging, intercept CR3 reads and writes as we load shadow page tables. */
@@ -2973,6 +3005,5 @@
 
     PSVMVMCBCTRL         pVmcbNstGstCtrl  = &pVmcbNstGst->ctrl;
-    PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
-    Assert(pCtx->hwvirt.svm.fHMCachedVmcb); RT_NOREF(pCtx);
+    PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu, pCtx);
 
     /*
@@ -4278,5 +4309,6 @@
     RTCPUID idCurrentCpu = hmR0GetCurrentCpu()->idCpu;
     if (   pSvmTransient->fUpdateTscOffsetting
-        || idCurrentCpu != pVCpu->hm.s.idLastCpu)
+        || idCurrentCpu != pVCpu->hm.s.idLastCpu)   /** @todo is this correct for nested-guests where
+                                                              nested-VCPU<->physical-CPU mapping doesn't exist. */
     {
         hmR0SvmUpdateTscOffsettingNested(pVM, pVCpu, pCtx, pVmcbNstGst);
@@ -4535,7 +4567,7 @@
 
     /* TSC read must be done early for maximum accuracy. */
-    PSVMVMCB             pVmcbNstGst     = pMixedCtx->hwvirt.svm.CTX_SUFF(pVmcb);
-    PSVMVMCBCTRL         pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
-    PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+    PSVMVMCB             pVmcbNstGst      = pMixedCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMVMCBCTRL         pVmcbNstGstCtrl  = &pVmcbNstGst->ctrl;
+    PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu, pMixedCtx);
     if (!(pVmcbNstGstCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSC))
     {
