Index: /trunk/src/VBox/VMM/PGMInternal.h
===================================================================
--- /trunk/src/VBox/VMM/PGMInternal.h	(revision 22599)
+++ /trunk/src/VBox/VMM/PGMInternal.h	(revision 22600)
@@ -343,5 +343,5 @@
 #endif
 
-/** @def PGM_INVL_PG
+/** @def PGM_INVL_PG_ALL_VCPU
  * Invalidates a page on all VCPUs
  *
@@ -350,9 +350,9 @@
  */
 #ifdef IN_RC
-# define PGM_INVL_ALL_VCPU_PG(pVM, GCVirt)      ASMInvalidatePage((void *)(GCVirt))
+# define PGM_INVL_PG_ALL_VCPU(pVM, GCVirt)      ASMInvalidatePage((void *)(GCVirt))
 #elif defined(IN_RING0)
-# define PGM_INVL_ALL_VCPU_PG(pVM, GCVirt)      HWACCMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt))
+# define PGM_INVL_PG_ALL_VCPU(pVM, GCVirt)      HWACCMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt))
 #else
-# define PGM_INVL_ALL_VCPU_PG(pVM, GCVirt)      HWACCMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt))
+# define PGM_INVL_PG_ALL_VCPU(pVM, GCVirt)      HWACCMInvalidatePageOnAllVCpus(pVM, (RTGCPTR)(GCVirt))
 #endif
 
@@ -1680,4 +1680,5 @@
 #ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
     uint32_t            idxDirty;
+    RTGCPTR             pvDirtyFault;
 #else
     uint32_t            bPadding2;
Index: /trunk/src/VBox/VMM/VMMAll/PGMAllBth.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/PGMAllBth.h	(revision 22599)
+++ /trunk/src/VBox/VMM/VMMAll/PGMAllBth.h	(revision 22600)
@@ -838,5 +838,11 @@
                        /*
                         * Page was successfully synced, return to guest.
+                        * First invalidate the page as it might be in the TLB.
                         */
+#   if PGM_SHW_TYPE == PGM_TYPE_EPT
+                        HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)pvFault);
+#   else
+                        PGM_INVL_PG_ALL_VCPU(pVM, pvFault);
+#   endif
 #   ifdef VBOX_STRICT
                         RTGCPHYS GCPhys;
@@ -1483,5 +1489,5 @@
             PPGMPOOL pPool = pVM->pgm.s.CTX_SUFF(pPool);
             PX86PTPAE pGstPT;
-            
+
             pGstPT = (PX86PTPAE)&pPool->aDirtyPages[pShwPage->idxDirty][0];
             pGstPT->a[iPTDst].u = PteSrc.u;
Index: /trunk/src/VBox/VMM/VMMAll/PGMAllHandler.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/PGMAllHandler.cpp	(revision 22599)
+++ /trunk/src/VBox/VMM/VMMAll/PGMAllHandler.cpp	(revision 22600)
@@ -926,7 +926,4 @@
             PGM_PAGE_SET_HNDL_PHYS_STATE(pPage, PGM_PAGE_HNDL_PHYS_STATE_DISABLED);
             pgmUnlock(pVM);
-#ifndef IN_RC
-            HWACCMInvalidatePhysPage(pVM, GCPhysPage);
-#endif
             return VINF_SUCCESS;
         }
@@ -1048,7 +1045,4 @@
 
             pgmUnlock(pVM);
-#ifndef IN_RC
-            HWACCMInvalidatePhysPage(pVM, GCPhysPage);
-#endif
             return VINF_SUCCESS;
         }
@@ -1150,7 +1144,4 @@
             LogFlow(("PGMHandlerPhysicalPageAliasHC: => %R[pgmpage]\n", pPage));
             pgmUnlock(pVM);
-#ifndef IN_RC
-            HWACCMInvalidatePhysPage(pVM, GCPhysPage);
-#endif
             return VINF_SUCCESS;
         }
Index: /trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp	(revision 22599)
+++ /trunk/src/VBox/VMM/VMMAll/PGMAllPool.cpp	(revision 22600)
@@ -1171,5 +1171,5 @@
         pPage->pvLastAccessHandlerFault = pvFault;
         pPage->cLastAccessHandlerCount  = pVCpu->pgm.s.cPoolAccessHandler;
-        if (pPage->cModifications > cMaxModifications)
+        if (pPage->cModifications >= cMaxModifications)
         {
             STAM_COUNTER_INC(&pPool->CTX_MID_Z(StatMonitor,FlushReinit));
@@ -1185,4 +1185,5 @@
      */
     bool fReused = false;
+    bool fNotReusedNotForking = false;
     if (    (   pPage->cModifications < cMaxModifications   /** @todo #define */ /** @todo need to check that it's not mapping EIP. */ /** @todo adjust this! */
              || pgmPoolIsPageLocked(&pVM->pgm.s, pPage)
@@ -1276,4 +1277,5 @@
         Log4(("pgmPoolAccessHandler: eax=%#x ecx=%#x edi=%#x esi=%#x rip=%RGv opcode=%d prefix=%#x\n",
               pRegFrame->eax, pRegFrame->ecx, pRegFrame->edi, pRegFrame->esi, (RTGCPTR)pRegFrame->rip, pDis->pCurInstr->opcode, pDis->prefix));
+        fNotReusedNotForking = true;
     }
 
@@ -1282,8 +1284,12 @@
      * leads to pgm pool trashing and an excessive amount of write faults due to page monitoring.
      */
-    if (    !fReused
+    if (    pPage->cModifications >= cMaxModifications
         &&  !fForcedFlush
         &&  pPage->enmKind == PGMPOOLKIND_PAE_PT_FOR_PAE_PT
-        &&  pPage->cModifications >= cMaxModifications)
+        &&  (   fNotReusedNotForking 
+             || (   !pgmPoolMonitorIsReused(pVM, pVCpu, pRegFrame, pDis, pvFault)
+                 && !pgmPoolMonitorIsForking(pPool, pDis, GCPhysFault & PAGE_OFFSET_MASK))
+            )
+       )
     {
         Assert(!pgmPoolIsPageLocked(&pVM->pgm.s, pPage));
@@ -1319,20 +1325,26 @@
         }
 
-        /* Temporarily allow write access to the page table again. */
-        rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
-        if (rc == VINF_SUCCESS)
-        {
-            rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
-            AssertMsg(rc == VINF_SUCCESS 
-                      /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
-                      ||  rc == VERR_PAGE_TABLE_NOT_PRESENT 
-                      ||  rc == VERR_PAGE_NOT_PRESENT, 
-                      ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
-
-            pgmPoolAddDirtyPage(pVM, pPool, pPage);
- 
-            STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
-            pgmUnlock(pVM);
-            return rc;
+        /* The flushing above might fail for locked pages, so double check. */
+        if (    pPage->iMonitoredNext == NIL_PGMPOOL_IDX
+            &&  pPage->iMonitoredPrev == NIL_PGMPOOL_IDX)
+        {
+            /* Temporarily allow write access to the page table again. */
+            rc = PGMHandlerPhysicalPageTempOff(pVM, pPage->GCPhys, pPage->GCPhys);
+            if (rc == VINF_SUCCESS)
+            {
+                rc = PGMShwModifyPage(pVCpu, pvFault, 1, X86_PTE_RW, ~(uint64_t)X86_PTE_RW);
+                AssertMsg(rc == VINF_SUCCESS 
+                        /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
+                        ||  rc == VERR_PAGE_TABLE_NOT_PRESENT 
+                        ||  rc == VERR_PAGE_NOT_PRESENT, 
+                        ("PGMShwModifyPage -> GCPtr=%RGv rc=%d\n", pvFault, rc));
+
+                pgmPoolAddDirtyPage(pVM, pPool, pPage);
+                pPage->pvDirtyFault = pvFault;
+     
+                STAM_PROFILE_STOP(&pVM->pgm.s.CTX_SUFF(pPool)->CTX_SUFF_Z(StatMonitor), a);
+                pgmUnlock(pVM);
+                return rc;
+            }
         }
     }
@@ -1359,4 +1371,25 @@
 # ifdef PGMPOOL_WITH_OPTIMIZED_DIRTY_PT
 /**
+ * Check references to guest physical memory in a PAE / PAE page table.
+ *
+ * @param   pPool       The pool.
+ * @param   pPage       The page.
+ * @param   pShwPT      The shadow page table (mapping of the page).
+ * @param   pGstPT      The guest page table.
+ */
+DECLINLINE(void) pgmPoolTrackCheckPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT)
+{
+    for (unsigned i = 0; i < RT_ELEMENTS(pShwPT->a); i++)
+    {
+        if (pShwPT->a[i].n.u1Present)
+        {
+            RTHCPHYS HCPhys = -1;
+            int rc = PGMPhysGCPhys2HCPhys(pPool->CTX_SUFF(pVM), pGstPT->a[i].u & X86_PTE_PAE_PG_MASK, &HCPhys);
+            AssertMsg(rc == VINF_SUCCESS && (pShwPT->a[i].u & X86_PTE_PAE_PG_MASK) == HCPhys, ("rc=%d guest %RX64 shw=%RX64 vs %RHp\n", rc, pGstPT->a[i].u, pShwPT->a[i].u, HCPhys));
+        }
+    }
+}
+
+/**
  * Clear references to guest physical memory in a PAE / PAE page table.
  *
@@ -1366,5 +1399,5 @@
  * @param   pShwPT      The shadow page table (mapping of the page).
  * @param   pGstPT      The guest page table.
- * @param   pGstPT      The old cached guest page table.
+ * @param   pOldGstPT   The old cached guest page table.
  */
 DECLINLINE(unsigned) pgmPoolTrackFlushPTPaePae(PPGMPOOL pPool, PPGMPOOLPAGE pPage, PX86PTPAE pShwPT, PCX86PTPAE pGstPT, PCX86PTPAE pOldGstPT)
@@ -1446,4 +1479,14 @@
     pPage->fDirty         = false;
 
+#ifdef VBOX_STRICT
+    uint64_t fFlags = 0;
+    rc = PGMShwGetPage(VMMGetCpu(pVM), pPage->pvDirtyFault, &fFlags, NULL);
+    AssertMsg(      (rc == VINF_SUCCESS && !(fFlags & X86_PTE_RW))
+                /* In the SMP case the page table might be removed while we wait for the PGM lock in the trap handler. */
+                ||  rc == VERR_PAGE_TABLE_NOT_PRESENT 
+                ||  rc == VERR_PAGE_NOT_PRESENT, 
+                ("PGMShwGetPage -> GCPtr=%RGv rc=%d flags=%RX64\n", pPage->pvDirtyFault, rc, fFlags));
+#endif
+
     /* This page is likely to be modified again, so reduce the nr of modifications just a bit here. */
     Assert(pPage->cModifications);
@@ -1463,4 +1506,5 @@
 }
 
+# ifndef IN_RING3
 /**
  * Add a new dirty page
@@ -1476,7 +1520,5 @@
     Assert(PGMIsLocked(pVM));
     AssertCompile(RT_ELEMENTS(pPool->aIdxDirtyPages) == 8 || RT_ELEMENTS(pPool->aIdxDirtyPages) == 16);
-
-    if (pPage->fDirty)
-        return;
+    Assert(!pPage->fDirty);
 
     idxFree = pPool->idxFreeDirtyPage;
@@ -1489,13 +1531,16 @@
     AssertMsg(pPool->aIdxDirtyPages[idxFree] == NIL_PGMPOOL_IDX, ("idxFree=%d cDirtyPages=%d\n", idxFree, pPool->cDirtyPages));
 
+    Log(("Add dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
+
     /* Make a copy of the guest page table as we require valid GCPhys addresses when removing
      * references to physical pages. (the HCPhys linear lookup is *extremely* expensive!)
      */
+    void *pvShw = PGMPOOL_PAGE_2_LOCKED_PTR(pPool->CTX_SUFF(pVM), pPage);
     void *pvGst;
     int rc = PGM_GCPHYS_2_PTR(pPool->CTX_SUFF(pVM), pPage->GCPhys, &pvGst); AssertReleaseRC(rc);
     memcpy(&pPool->aDirtyPages[idxFree][0], pvGst, PAGE_SIZE);
+    pgmPoolTrackCheckPTPaePae(pPool, pPage, (PX86PTPAE)pvShw, (PCX86PTPAE)pvGst);
 
     STAM_COUNTER_INC(&pPool->StatDirtyPage);
-    Log(("Mark dirty page %RGp (slot=%d)\n", pPage->GCPhys, idxFree));
     pPage->fDirty                  = true;
     pPage->idxDirty                = idxFree;
@@ -1523,5 +1568,5 @@
     return;
 }
-
+# endif /* !IN_RING3 */
 
 /**
@@ -3143,5 +3188,5 @@
     {
 # ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
-        /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kill the pool otherwise. */
+        /* Start a subset here because pgmPoolTrackFlushGCPhysPTsSlow kills the pool otherwise. */
         uint32_t iPrevSubset = PGMDynMapPushAutoSubset(pVCpu);
 # endif
Index: /trunk/src/VBox/VMM/VMMAll/PGMAllShw.h
===================================================================
--- /trunk/src/VBox/VMM/VMMAll/PGMAllShw.h	(revision 22599)
+++ /trunk/src/VBox/VMM/VMMAll/PGMAllShw.h	(revision 22600)
@@ -367,5 +367,5 @@
                 HWACCMInvalidatePhysPage(pVM, (RTGCPHYS)GCPtr);
 # else
-                PGM_INVL_ALL_VCPU_PG(pVM, GCPtr);
+                PGM_INVL_PG_ALL_VCPU(pVM, GCPtr);
 # endif
             }
