Index: /trunk/include/VBox/err.h
===================================================================
--- /trunk/include/VBox/err.h	(revision 36983)
+++ /trunk/include/VBox/err.h	(revision 36984)
@@ -1503,7 +1503,9 @@
 /** The chunk to be unmapped isn't actually mapped into the process. */
 #define VERR_GMM_CHUNK_NOT_MAPPED                   (-3813)
+/** The chunk has been mapped too many times already (impossible). */
+#define VERR_GMM_TOO_MANY_CHUNK_MAPPINGS            (-3814)
 /** The reservation or reservation update was declined - too many VMs, too
  * little memory, and/or too low GMM configuration. */
-#define VERR_GMM_MEMORY_RESERVATION_DECLINED        (-3814)
+#define VERR_GMM_MEMORY_RESERVATION_DECLINED        (-3815)
 /** @} */
 
Index: /trunk/src/VBox/VMM/VMMR0/GMMR0.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR0/GMMR0.cpp	(revision 36983)
+++ /trunk/src/VBox/VMM/VMMR0/GMMR0.cpp	(revision 36984)
@@ -162,8 +162,10 @@
 #include <iprt/asm.h>
 #include <iprt/avl.h>
+#include <iprt/list.h>
 #include <iprt/mem.h>
 #include <iprt/memobj.h>
 #include <iprt/semaphore.h>
 #include <iprt/string.h>
+#include <iprt/time.h>
 
 
@@ -397,4 +399,6 @@
      * chunks with no free pages. */
     PGMMCHUNKFREESET    pSet;
+    /** List node in the chunk list (GMM::ChunkList).  */
+    RTLISTNODE          ListNode;
     /** Pointer to an array of mappings. */
     PGMMCHUNKMAP        paMappings;
@@ -465,4 +469,7 @@
     /** The number of free pages in the set. */
     uint64_t            cFreePages;
+    /** The generation ID for the set.  This is incremented whenever
+     *  something is linked or unlinked from this set. */
+    uint64_t            idGeneration;
     /** Chunks ordered by increasing number of free pages. */
     PGMMCHUNK           apLists[GMM_CHUNK_FREE_SET_LISTS];
@@ -477,7 +484,13 @@
     /** Magic / eye catcher. GMM_MAGIC */
     uint32_t            u32Magic;
+    /** The number of threads waiting on the mutex. */
+    uint32_t            cMtxContenders;
     /** The fast mutex protecting the GMM.
      * More fine grained locking can be implemented later if necessary. */
-    RTSEMFASTMUTEX      Mtx;
+    RTSEMFASTMUTEX      hMtx;
+#ifdef VBOX_STRICT
+    /** The current mutex owner. */
+    RTNATIVETHREAD      hMtxOwner;
+#endif
     /** The chunk tree. */
     PAVLU32NODECORE     pChunks;
@@ -492,4 +505,11 @@
     /** @todo separate trees for distinctly different guest OSes. */
     PAVLGCPTRNODECORE   pGlobalSharedModuleTree;
+
+    /** The fast mutex protecting the GMM cleanup.
+     * This is serializes VMs cleaning up their memory, so that we can
+     * safely leave the primary mutex (hMtx). */
+    RTSEMFASTMUTEX      hMtxCleanup;
+    /** The chunk list.  For simplifying the cleanup process. */
+    RTLISTNODE          ChunkList;
 
     /** The maximum number of pages we're allowed to allocate.
@@ -528,4 +548,7 @@
     uint16_t            cRegisteredVMs;
 
+    /** The number of freed chunks ever.  This is used a list generation to
+     *  avoid restarting the cleanup scanning when the list wasn't modified. */
+    uint32_t            cFreedChunks;
     /** The previous allocated Chunk ID.
      * Used as a hint to avoid scanning the whole bitmap. */
@@ -540,5 +563,5 @@
 
 /** The value of GMM::u32Magic (Katsuhiro Otomo). */
-#define GMM_MAGIC       0x19540414
+#define GMM_MAGIC       UINT32_C(0x19540414)
 
 
@@ -623,6 +646,5 @@
 *******************************************************************************/
 static DECLCALLBACK(int)     gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
-static DECLCALLBACK(int)     gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGMM);
-static DECLCALLBACK(int)     gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM);
+static bool                  gmmR0CleanupVMScanChunk(PGVM pGVM, PGMMCHUNK pChunk);
 /*static*/ DECLCALLBACK(int) gmmR0CleanupVMDestroyChunk(PAVLU32NODECORE pNode, void *pvGVM);
 DECLINLINE(void)             gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
@@ -632,4 +654,5 @@
 static void                  gmmR0FreeSharedPage(PGMM pGMM, uint32_t idPage, PGMMPAGE pPage);
 static int                   gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+static void                  gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
 
 
@@ -648,57 +671,66 @@
 
     /*
-     * Allocate the instance data and the lock(s).
+     * Allocate the instance data and the locks.
      */
     PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
     if (!pGMM)
         return VERR_NO_MEMORY;
+
     pGMM->u32Magic = GMM_MAGIC;
     for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
         pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
+    RTListInit(&pGMM->ChunkList);
     ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
 
-    int rc = RTSemFastMutexCreate(&pGMM->Mtx);
+    int rc = RTSemFastMutexCreate(&pGMM->hMtx);
     if (RT_SUCCESS(rc))
     {
-        /*
-         * Check and see if RTR0MemObjAllocPhysNC works.
-         */
+        rc = RTSemFastMutexCreate(&pGMM->hMtxCleanup);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Check and see if RTR0MemObjAllocPhysNC works.
+             */
 #if 0 /* later, see #3170. */
-        RTR0MEMOBJ MemObj;
-        rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
-        if (RT_SUCCESS(rc))
-        {
-            rc = RTR0MemObjFree(MemObj, true);
-            AssertRC(rc);
-        }
-        else if (rc == VERR_NOT_SUPPORTED)
-            pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
-        else
-            SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
+            RTR0MEMOBJ MemObj;
+            rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
+            if (RT_SUCCESS(rc))
+            {
+                rc = RTR0MemObjFree(MemObj, true);
+                AssertRC(rc);
+            }
+            else if (rc == VERR_NOT_SUPPORTED)
+                pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
+            else
+                SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
 #else
 # if defined(RT_OS_WINDOWS) || (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
-        pGMM->fLegacyAllocationMode = false;
+            pGMM->fLegacyAllocationMode = false;
 #  if ARCH_BITS == 32
-        /* Don't reuse possibly partial chunks because of the virtual address space limitation. */
-        pGMM->fBoundMemoryMode = true;
+            /* Don't reuse possibly partial chunks because of the virtual address space limitation. */
+            pGMM->fBoundMemoryMode = true;
 #  else
-        pGMM->fBoundMemoryMode = false;
+            pGMM->fBoundMemoryMode = false;
 #  endif
 # else
-        pGMM->fLegacyAllocationMode = true;
-        pGMM->fBoundMemoryMode = true;
+            pGMM->fLegacyAllocationMode = true;
+            pGMM->fBoundMemoryMode = true;
 # endif
 #endif
 
-        /*
-         * Query system page count and guess a reasonable cMaxPages value.
-         */
-        pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
-
-        g_pGMM = pGMM;
-        LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
-        return VINF_SUCCESS;
-    }
-
+            /*
+             * Query system page count and guess a reasonable cMaxPages value.
+             */
+            pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
+
+            g_pGMM = pGMM;
+            LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
+            return VINF_SUCCESS;
+        }
+
+        RTSemFastMutexDestroy(pGMM->hMtx);
+    }
+
+    pGMM->u32Magic = 0;
     RTMemFree(pGMM);
     SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
@@ -731,7 +763,9 @@
     /* Destroy the fundamentals. */
     g_pGMM = NULL;
-    pGMM->u32Magic++;
-    RTSemFastMutexDestroy(pGMM->Mtx);
-    pGMM->Mtx = NIL_RTSEMFASTMUTEX;
+    pGMM->u32Magic    = ~GMM_MAGIC;
+    RTSemFastMutexDestroy(pGMM->hMtx);
+    pGMM->hMtx        = NIL_RTSEMFASTMUTEX;
+    RTSemFastMutexDestroy(pGMM->hMtxCleanup);
+    pGMM->hMtxCleanup = NIL_RTSEMFASTMUTEX;
 
     /* free any chunks still hanging around. */
@@ -798,4 +832,86 @@
 
 /**
+ * Acquires the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static int gmmR0MutexAcquire(PGMM pGMM)
+{
+    ASMAtomicIncU32(&pGMM->cMtxContenders);
+    int rc = RTSemFastMutexRequest(pGMM->hMtx);
+    ASMAtomicDecU32(&pGMM->cMtxContenders);
+    AssertRC(rc);
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+    return rc;
+}
+
+
+/**
+ * Releases the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static int gmmR0MutexRelease(PGMM pGMM)
+{
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+    int rc = RTSemFastMutexRelease(pGMM->hMtx);
+    AssertRC(rc);
+    return rc;
+}
+
+
+/**
+ * Yields the GMM giant lock if there is contention and a certain minimum time
+ * has elapsed since we took it.
+ *
+ * @returns @c true if the mutex was yielded, @c false if not.
+ * @param   pGMM            Pointer to the GMM instance.
+ * @param   puLockNanoTS    Where the lock acquisition time stamp is kept
+ *                          (in/out).
+ */
+static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
+{
+    /*
+     * If nobody is contending the mutex, don't bother checking the time.
+     */
+    if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
+        return false;
+
+    /*
+     * Don't yield if we haven't executed for at least 2 milliseconds.
+     */
+    uint64_t uNanoNow = RTTimeSystemNanoTS();
+    if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
+        return false;
+
+    /*
+     * Yield the mutex.
+     */
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+    ASMAtomicIncU32(&pGMM->cMtxContenders);
+    int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
+
+    RTThreadYield();
+
+    int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
+    *puLockNanoTS = RTTimeSystemNanoTS();
+    ASMAtomicDecU32(&pGMM->cMtxContenders);
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+
+    return true;
+}
+
+
+/**
  * Cleans up when a VM is terminating.
  *
@@ -809,12 +925,15 @@
     GMM_GET_VALID_INSTANCE_VOID(pGMM);
 
-    int rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Clean up all registered shared modules first.
+     */
+    gmmR0SharedModuleCleanup(pGMM, pGVM);
+#endif
+
+    int rc = RTSemFastMutexRequest(pGMM->hMtxCleanup); AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
+    uint64_t uLockNanoTS = RTTimeSystemNanoTS();
     GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
-
-#ifdef VBOX_WITH_PAGE_SHARING
-    /* Clean up all registered shared modules. */
-    RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
-#endif
 
     /*
@@ -865,31 +984,71 @@
             /*
              * Walk the entire pool looking for pages that belong to this VM
-             * and left over mappings. (This'll only catch private pages, shared
-             * pages will be 'left behind'.)
+             * and left over mappings.  (This'll only catch private pages,
+             * shared pages will be 'left behind'.)
              */
-            /** @todo this might be kind of expensive with a lot of VMs and
-             *   memory hanging around... */
-            uint64_t cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
-            RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0CleanupVMScanChunk, pGVM);
+            uint64_t    cPrivatePages = pGVM->gmm.s.cPrivatePages; /* save */
+
+            unsigned    iCountDown = 64;
+            bool        fRedoFromStart;
+            PGMMCHUNK   pChunk;
+            do
+            {
+                fRedoFromStart = false;
+                RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
+                {
+                    if (   !gmmR0CleanupVMScanChunk(pGVM, pChunk)
+                        || iCountDown != 0)
+                        iCountDown--;
+                    else
+                    {
+                        iCountDown = 64;
+                        uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
+                        fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
+                                      && pGMM->cFreedChunks != cFreeChunksOld;
+                        if (fRedoFromStart)
+                            break;
+                    }
+                }
+            } while (fRedoFromStart);
+
             if (pGVM->gmm.s.cPrivatePages)
                 SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.cPrivatePages);
+
             pGMM->cAllocatedPages -= cPrivatePages;
 
-            /* free empty chunks. */
-            if (cPrivatePages)
+            /*
+             * Free empty chunks.
+             */
+            do
             {
-                PGMMCHUNK pCur = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
-                while (pCur)
+                iCountDown = 10240;
+                pChunk = pGMM->Private.apLists[RT_ELEMENTS(pGMM->Private.apLists) - 1];
+                while (pChunk)
                 {
-                    PGMMCHUNK pNext = pCur->pFreeNext;
-                    if (    pCur->cFree == GMM_CHUNK_NUM_PAGES
+                    PGMMCHUNK pNext = pChunk->pFreeNext;
+                    if (    pChunk->cFree == GMM_CHUNK_NUM_PAGES
                         &&  (   !pGMM->fBoundMemoryMode
-                             || pCur->hGVM == pGVM->hSelf))
-                        gmmR0FreeChunk(pGMM, pGVM, pCur);
-                    pCur = pNext;
+                             || pChunk->hGVM == pGVM->hSelf))
+                    {
+                        gmmR0FreeChunk(pGMM, pGVM, pChunk);
+                        iCountDown = 1;
+                    }
+                    pChunk = pNext;
+
+                    if (--iCountDown == 0)
+                    {
+                        uint64_t const idGenerationOld = pGMM->Private.idGeneration;
+                        fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
+                                      && pGMM->Private.idGeneration != idGenerationOld;
+                        if (fRedoFromStart)
+                            break;
+                        iCountDown = 10240;
+                    }
                 }
-            }
-
-            /* account for shared pages that weren't freed. */
+            } while (fRedoFromStart);
+
+            /*
+             * Account for shared pages that weren't freed.
+             */
             if (pGVM->gmm.s.cSharedPages)
             {
@@ -899,5 +1058,7 @@
             }
 
-            /* Clean up balloon statistics in case the VM process crashed. */
+            /*
+             * Clean up balloon statistics in case the VM process crashed.
+             */
             Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.cBalloonedPages);
             pGMM->cBalloonedPages -= pGVM->gmm.s.cBalloonedPages;
@@ -926,5 +1087,6 @@
 
     GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
+    RTSemFastMutexRelease(pGMM->hMtxCleanup);
 
     LogFlow(("GMMR0CleanupVM: returns\n"));
@@ -933,15 +1095,12 @@
 
 /**
- * RTAvlU32DoWithAll callback.
- *
- * @returns 0
- * @param   pNode   The node to search.
- * @param   pvGVM   Pointer to the shared VM structure.
- */
-static DECLCALLBACK(int) gmmR0CleanupVMScanChunk(PAVLU32NODECORE pNode, void *pvGVM)
-{
-    PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
-    PGVM pGVM = (PGVM)pvGVM;
-
+ * Scan one chunk for private pages belonging to the specified VM.
+ *
+ * @returns @c true if a mapping was found (and freed), @c false if not.
+ * @param   pGVM        The global VM handle.
+ * @param   pChunk      The chunk to scan.
+ */
+static bool gmmR0CleanupVMScanChunk(PGVM pGVM, PGMMCHUNK pChunk)
+{
     /*
      * Look for pages belonging to the VM.
@@ -1007,16 +1166,20 @@
 
     /*
-     * Look for the mapping belonging to the terminating VM.
-     */
-    for (unsigned i = 0; i < pChunk->cMappings; i++)
+     * Look for a mapping belonging to the terminating VM.
+     */
+    unsigned cMappings = pChunk->cMappings;
+    bool fMappingFreed = true;
+    for (unsigned i = 0; i < cMappings; i++)
         if (pChunk->paMappings[i].pGVM == pGVM)
         {
             RTR0MEMOBJ MemObj = pChunk->paMappings[i].MapObj;
 
-            pChunk->cMappings--;
-            if (i < pChunk->cMappings)
-                 pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
-            pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
-            pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
+            cMappings--;
+            if (i < cMappings)
+                 pChunk->paMappings[i] = pChunk->paMappings[cMappings];
+            pChunk->paMappings[cMappings].pGVM   = NULL;
+            pChunk->paMappings[cMappings].MapObj = NIL_RTR0MEMOBJ;
+            Assert(pChunk->cMappings - 1U == cMappings);
+            pChunk->cMappings = cMappings;
 
             int rc = RTR0MemObjFree(MemObj, false /* fFreeMappings (NA) */);
@@ -1027,4 +1190,5 @@
                 AssertRC(rc);
             }
+            fMappingFreed = true;
             break;
         }
@@ -1050,5 +1214,5 @@
     }
 
-    return 0;
+    return fMappingFreed;
 }
 
@@ -1146,6 +1310,5 @@
     AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -1180,5 +1343,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
     return rc;
@@ -1242,6 +1405,5 @@
     AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -1275,5 +1437,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
     return rc;
@@ -1449,4 +1611,5 @@
     {
         pSet->cFreePages -= pChunk->cFree;
+        pSet->idGeneration++;
 
         PGMMCHUNK pPrev = pChunk->pFreePrev;
@@ -1497,4 +1660,5 @@
 
         pSet->cFreePages += pChunk->cFree;
+        pSet->idGeneration++;
     }
 }
@@ -1614,4 +1778,5 @@
             {
                 pGMM->cChunks++;
+                RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
                 gmmR0LinkChunk(pChunk, pSet);
                 LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
@@ -1662,5 +1827,5 @@
 
     /* Leave the lock temporarily as the allocation might take long. */
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     if (enmChunkType == GMMCHUNKTYPE_NON_CONTINUOUS)
         rc = RTR0MemObjAllocPhysNC(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
@@ -1668,6 +1833,5 @@
         rc = RTR0MemObjAllocPhysEx(&MemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
 
-    /* Grab the lock again. */
-    int rc2 = RTSemFastMutexRequest(pGMM->Mtx);
+    int rc2 = gmmR0MutexAcquire(pGMM);
     AssertRCReturn(rc2, rc2);
 
@@ -2079,6 +2243,5 @@
     }
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -2198,5 +2361,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
     return rc;
@@ -2258,6 +2421,5 @@
     }
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -2287,5 +2449,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
     return rc;
@@ -2317,4 +2479,5 @@
     return GMMR0AllocatePages(pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
 }
+
 
 /**
@@ -2360,6 +2523,5 @@
     *pIdPage = NIL_GMM_PAGEID;
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRCReturn(rc, rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -2372,5 +2534,5 @@
             Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
                  pGVM->gmm.s.Reserved.cBasePages, pGVM->gmm.s.Allocated.cBasePages, cPages));
-            RTSemFastMutexRelease(pGMM->Mtx);
+            gmmR0MutexRelease(pGMM);
             return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
         }
@@ -2380,5 +2542,5 @@
         if (RT_FAILURE(rc))
         {
-            RTSemFastMutexRelease(pGMM->Mtx);
+            gmmR0MutexRelease(pGMM);
             return rc;
         }
@@ -2406,5 +2568,5 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
     return rc;
@@ -2438,6 +2600,5 @@
         return VERR_NOT_SUPPORTED;
 
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -2447,24 +2608,24 @@
         {
             Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Allocated.cBasePages, cPages));
-            RTSemFastMutexRelease(pGMM->Mtx);
+            gmmR0MutexRelease(pGMM);
             return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
         }
 
         PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
-        if (    RT_LIKELY(pPage)
-            &&  RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
-        {
-                PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
-                Assert(pChunk);
-                Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
-                Assert(pChunk->cPrivate > 0);
-
-                /* Release the memory immediately. */
-                gmmR0FreeChunk(pGMM, NULL, pChunk);
-
-                /* Update accounting. */
-                pGVM->gmm.s.Allocated.cBasePages -= cPages;
-                pGVM->gmm.s.cPrivatePages        -= cPages;
-                pGMM->cAllocatedPages            -= cPages;
+        if (RT_LIKELY(   pPage
+                      && GMM_PAGE_IS_PRIVATE(pPage)))
+        {
+            PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+            Assert(pChunk);
+            Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+            Assert(pChunk->cPrivate > 0);
+
+            /* Release the memory immediately. */
+            gmmR0FreeChunk(pGMM, NULL, pChunk);
+
+            /* Update accounting. */
+            pGVM->gmm.s.Allocated.cBasePages -= cPages;
+            pGVM->gmm.s.cPrivatePages        -= cPages;
+            pGMM->cAllocatedPages            -= cPages;
         }
         else
@@ -2474,5 +2635,5 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
     return rc;
@@ -2502,4 +2663,5 @@
 }
 
+
 /**
  * Frees a chunk, giving it back to the host OS.
@@ -2546,4 +2708,6 @@
              */
             gmmR0UnlinkChunk(pChunk);
+
+            RTListNodeRemove(&pChunk->ListNode);
 
             PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
@@ -2570,4 +2734,6 @@
 
             RTMemFree(pChunk);
+
+            pGMM->cFreedChunks++;
         }
         else
@@ -2658,4 +2824,5 @@
 
 #ifdef VBOX_WITH_PAGE_SHARING
+
 /**
  * Converts a private page to a shared page, the page is known to exist and be valid and such.
@@ -2688,4 +2855,5 @@
 }
 
+
 /**
  * Increase the use count of a shared page, the page is known to exist and be valid and such.
@@ -2706,5 +2874,6 @@
     pGVM->gmm.s.Allocated.cBasePages++;
 }
-#endif
+
+#endif /* VBOX_WITH_PAGE_SHARING */
 
 /**
@@ -2727,4 +2896,5 @@
     gmmR0FreePageWorker(pGMM, pChunk, idPage, pPage);
 }
+
 
 /**
@@ -2808,7 +2978,5 @@
                 Assert(pPage->Shared.cRefs);
                 if (!--pPage->Shared.cRefs)
-                {
                     gmmR0FreeSharedPage(pGMM, idPage, pPage);
-                }
                 else
                 {
@@ -2894,6 +3062,5 @@
      * Take the semaphore and call the worker function.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -2903,5 +3070,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
     return rc;
@@ -2978,6 +3145,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -3071,5 +3237,5 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
     return rc;
@@ -3164,6 +3330,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -3176,8 +3341,9 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
     return rc;
 }
+
 
 /**
@@ -3196,5 +3362,6 @@
          * Find the mapping and try unmapping it.
          */
-        for (uint32_t i = 0; i < pChunk->cMappings; i++)
+        uint32_t cMappings = pChunk->cMappings;
+        for (uint32_t i = 0; i < cMappings; i++)
         {
             Assert(pChunk->paMappings[i].pGVM && pChunk->paMappings[i].MapObj != NIL_RTR0MEMOBJ);
@@ -3206,9 +3373,11 @@
                 {
                     /* update the record. */
-                    pChunk->cMappings--;
-                    if (i < pChunk->cMappings)
-                        pChunk->paMappings[i] = pChunk->paMappings[pChunk->cMappings];
-                    pChunk->paMappings[pChunk->cMappings].MapObj = NIL_RTR0MEMOBJ;
-                    pChunk->paMappings[pChunk->cMappings].pGVM = NULL;
+                    cMappings--;
+                    if (i < cMappings)
+                        pChunk->paMappings[i] = pChunk->paMappings[cMappings];
+                    pChunk->paMappings[cMappings].MapObj = NIL_RTR0MEMOBJ;
+                    pChunk->paMappings[cMappings].pGVM   = NULL;
+                    Assert(pChunk->cMappings - 1U == cMappings);
+                    pChunk->cMappings = cMappings;
                 }
                 return rc;
@@ -3237,4 +3406,6 @@
 static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
 {
+    Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+
     /*
      * If we're in legacy mode this is simple.
@@ -3279,16 +3450,22 @@
     {
         /* reallocate the array? assumes few users per chunk (usually one). */
-        if (   pChunk->cMappings <= 3
-            || (pChunk->cMappings & 3) == 0)
-        {
-            unsigned cNewSize = pChunk->cMappings <= 3
-                              ? pChunk->cMappings + 1
-                              : pChunk->cMappings + 4;
+        unsigned iMapping = pChunk->cMappings;
+        if (   iMapping <= 3
+            || (iMapping & 3) == 0)
+        {
+            unsigned cNewSize = iMapping <= 3
+                              ? iMapping + 1
+                              : iMapping + 4;
             Assert(cNewSize < 4 || RT_ALIGN_32(cNewSize, 4) == cNewSize);
+            if (RT_UNLIKELY(cNewSize > UINT16_MAX))
+            {
+                rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
+                return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
+            }
+
             void *pvMappings = RTMemRealloc(pChunk->paMappings, cNewSize * sizeof(pChunk->paMappings[0]));
             if (RT_UNLIKELY(!pvMappings))
             {
-                rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */);
-                AssertRC(rc);
+                rc = RTR0MemObjFree(MapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
                 return VERR_NO_MEMORY;
             }
@@ -3297,7 +3474,8 @@
 
         /* insert new entry */
-        pChunk->paMappings[pChunk->cMappings].MapObj = MapObj;
-        pChunk->paMappings[pChunk->cMappings].pGVM   = pGVM;
-        pChunk->cMappings++;
+        pChunk->paMappings[iMapping].MapObj = MapObj;
+        pChunk->paMappings[iMapping].pGVM   = pGVM;
+        Assert(pChunk->cMappings == iMapping);
+        pChunk->cMappings = iMapping + 1;
 
         *ppvR3 = RTR0MemObjAddressR3(MapObj);
@@ -3387,6 +3565,5 @@
      * it it's limits, so, no problem here.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -3424,5 +3601,5 @@
     else
         rc = VERR_INTERNAL_ERROR_5;
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
 
     LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
@@ -3491,6 +3668,5 @@
     {
         /* Grab the lock. */
-        rc = RTSemFastMutexRequest(pGMM->Mtx);
-        AssertRC(rc);
+        rc = gmmR0MutexAcquire(pGMM);
         if (RT_SUCCESS(rc))
         {
@@ -3499,5 +3675,5 @@
              */
             rc = gmmR0RegisterChunk(pGMM, &pGMM->Private, MemObj, pGVM->hSelf, GMMCHUNKTYPE_NON_CONTINUOUS);
-            RTSemFastMutexRelease(pGMM->Mtx);
+            gmmR0MutexRelease(pGMM);
         }
 
@@ -3572,6 +3748,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -3713,5 +3888,5 @@
 
 end:
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     return rc;
 #else
@@ -3772,6 +3947,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -3829,5 +4003,5 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     return rc;
 #else
@@ -4027,8 +4201,9 @@
 }
 
-/**
- * RTAvlU32Destroy callback.
- *
- * @returns 0
+
+/**
+ * RTAvlGCPtrDestroy callback.
+ *
+ * @returns 0 or VERR_INTERNAL_ERROR.
  * @param   pNode   The node to destroy.
  * @param   pvGVM   The GVM handle.
@@ -4036,8 +4211,6 @@
 static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvGVM)
 {
-    PGVM pGVM = (PGVM)pvGVM;
-    PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
-    PGMM pGMM;
-    GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
+    PGVM                    pGVM   = (PGVM)pvGVM;
+    PGMMSHAREDMODULEPERVM   pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
 
     Assert(pRecVM->pGlobalModule || pRecVM->fCollision);
@@ -4045,5 +4218,5 @@
     {
         PGMMSHAREDMODULE pRec = pRecVM->pGlobalModule;
-        Assert(pRec);
+        AssertPtr(pRec);
         Assert(pRec->cUsers);
 
@@ -4052,9 +4225,11 @@
         if (pRec->cUsers == 0)
         {
-            for (unsigned i = 0; i < pRec->cRegions; i++)
+            for (uint32_t i = 0; i < pRec->cRegions; i++)
                 if (pRec->aRegions[i].paHCPhysPageID)
                     RTMemFree(pRec->aRegions[i].paHCPhysPageID);
 
             /* Remove from the tree and free memory. */
+            PGMM pGMM;
+            GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
             RTAvlGCPtrRemove(&pGMM->pGlobalSharedModuleTree, pRec->Core.Key);
             RTMemFree(pRec);
@@ -4063,4 +4238,24 @@
     RTMemFree(pRecVM);
     return 0;
+}
+
+
+/**
+ * Used by GMMR0CleanupVM to clean up shared modules.
+ *
+ * This is called without taking the GMM lock so that it can be yielded as
+ * needed here.
+ *
+ * @param   pGMM                The GMM handle.
+ * @param   pGVM                The global VM handle.
+ */
+static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
+{
+    gmmR0MutexAcquire(pGMM);
+    GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
+
+    RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, pGVM);
+
+    gmmR0MutexRelease(pGMM);
 }
 
@@ -4090,6 +4285,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -4103,5 +4297,5 @@
         rc = VERR_INTERNAL_ERROR_5;
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     return rc;
 #else
@@ -4159,6 +4353,5 @@
      * Take the semaphore and do some more validations.
      */
-    int rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
     if (!GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
         rc = VERR_INTERNAL_ERROR_5;
@@ -4183,5 +4376,5 @@
     GMM_GET_VALID_INSTANCE(pGMM, VERR_INTERNAL_ERROR);
 
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     return VINF_SUCCESS;
 }
@@ -4213,6 +4406,5 @@
      * Take the semaphore and do some more validations.
      */
-    rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    gmmR0MutexAcquire(pGMM);
 # endif
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
@@ -4237,5 +4429,5 @@
 
 # ifndef DEBUG_sandervl
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
 # endif
     return rc;
@@ -4326,6 +4518,5 @@
      * Take the semaphore and do some more validations.
      */
-    int rc = RTSemFastMutexRequest(pGMM->Mtx);
-    AssertRC(rc);
+    int rc = gmmR0MutexAcquire(pGMM);
     if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
     {
@@ -4372,5 +4563,5 @@
 
 end:
-    RTSemFastMutexRelease(pGMM->Mtx);
+    gmmR0MutexRelease(pGMM);
     return rc;
 }
