Index: /trunk/src/VBox/VMM/VMMR3/VM.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/VM.cpp	(revision 66095)
+++ /trunk/src/VBox/VMM/VMMR3/VM.cpp	(revision 66096)
@@ -2551,43 +2551,55 @@
      * wait for them to complete.
      */
-    /* Signal them. */
+    /* Signal them - in reverse order since EMT(0) waits for the others. */
     ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true);
     if (pUVM->pVM)
         VM_FF_SET(pUVM->pVM, VM_FF_CHECK_VM_STATE); /* Can't hurt... */
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
+    VMCPUID iCpu = pUVM->cCpus;
+    while (iCpu-- > 0)
     {
         VMR3NotifyGlobalFFU(pUVM, VMNOTIFYFF_FLAGS_DONE_REM);
-        RTSemEventSignal(pUVM->aCpus[i].vm.s.EventSemWait);
-    }
-
-    /* Wait for them. */
-    uint64_t    NanoTS = RTTimeNanoTS();
-    RTTHREAD    hSelf  = RTThreadSelf();
+        RTSemEventSignal(pUVM->aCpus[iCpu].vm.s.EventSemWait);
+    }
+
+    /* Wait for EMT(0), it in turn waits for the rest. */
     ASMAtomicUoWriteBool(&pUVM->vm.s.fTerminateEMT, true);
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
-    {
-        RTTHREAD hThread = pUVM->aCpus[i].vm.s.ThreadEMT;
-        if (    hThread != NIL_RTTHREAD
-            &&  hThread != hSelf)
+
+    RTTHREAD const hSelf = RTThreadSelf();
+    RTTHREAD hThread = pUVM->aCpus[0].vm.s.ThreadEMT;
+    if (   hThread != NIL_RTTHREAD
+        && hThread != hSelf)
+    {
+        int rc2 = RTThreadWait(hThread, RT_MAX(cMilliesEMTWait, 2000), NULL);
+        if (rc2 == VERR_TIMEOUT) /* avoid the assertion when debugging. */
+            rc2 = RTThreadWait(hThread, 1000, NULL);
+        AssertLogRelMsgRC(rc2, ("iCpu=0 rc=%Rrc\n", rc2));
+        if (RT_SUCCESS(rc2))
+            pUVM->aCpus[0].vm.s.ThreadEMT = NIL_RTTHREAD;
+    }
+
+    /* Just in case we're in a weird failure situation w/o EMT(0) to do the
+       waiting, wait the other EMTs too. */
+    for (iCpu = 1; iCpu < pUVM->cCpus; iCpu++)
+    {
+        ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread);
+        if (hThread != NIL_RTTHREAD)
         {
-            uint64_t cMilliesElapsed = (RTTimeNanoTS() - NanoTS) / 1000000;
-            int rc2 = RTThreadWait(hThread,
-                                   cMilliesElapsed < cMilliesEMTWait
-                                   ? RT_MAX(cMilliesEMTWait - cMilliesElapsed, 2000)
-                                   : 2000,
-                                   NULL);
-            if (rc2 == VERR_TIMEOUT) /* avoid the assertion when debugging. */
-                rc2 = RTThreadWait(hThread, 1000, NULL);
-            AssertLogRelMsgRC(rc2, ("i=%u rc=%Rrc\n", i, rc2));
-            if (RT_SUCCESS(rc2))
-                pUVM->aCpus[0].vm.s.ThreadEMT = NIL_RTTHREAD;
+            if (hThread != hSelf)
+            {
+                int rc2 = RTThreadWait(hThread, 250 /*ms*/, NULL);
+                AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2));
+                if (RT_SUCCESS(rc2))
+                    continue;
+            }
+            pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread;
         }
     }
 
     /* Cleanup the semaphores. */
-    for (VMCPUID i = 0; i < pUVM->cCpus; i++)
-    {
-        RTSemEventDestroy(pUVM->aCpus[i].vm.s.EventSemWait);
-        pUVM->aCpus[i].vm.s.EventSemWait = NIL_RTSEMEVENT;
+    iCpu = pUVM->cCpus;
+    while (iCpu-- > 0)
+    {
+        RTSemEventDestroy(pUVM->aCpus[iCpu].vm.s.EventSemWait);
+        pUVM->aCpus[iCpu].vm.s.EventSemWait = NIL_RTSEMEVENT;
     }
 
Index: /trunk/src/VBox/VMM/VMMR3/VMEmt.cpp
===================================================================
--- /trunk/src/VBox/VMM/VMMR3/VMEmt.cpp	(revision 66095)
+++ /trunk/src/VBox/VMM/VMMR3/VMEmt.cpp	(revision 66096)
@@ -249,13 +249,27 @@
     Log(("vmR3EmulationThread: Terminating emulation thread! Thread=%#x pUVM=%p rc=%Rrc enmBefore=%d enmVMState=%d\n",
          hThreadSelf, pUVM, rc, enmBefore, pUVM->pVM ? pUVM->pVM->enmVMState : VMSTATE_TERMINATED));
+    PVM pVM;
     if (   idCpu == 0
-        && pUVM->pVM)
-    {
-        PVM pVM = pUVM->pVM;
+        && (pVM = pUVM->pVM) != NULL)
+    {
+        /* Wait for any other EMTs to terminate before we destroy the VM (see vmR3DestroyVM). */
+        for (VMCPUID iCpu = 1; iCpu < pUVM->cCpus; iCpu++)
+        {
+            RTTHREAD hThread;
+            ASMAtomicXchgHandle(&pUVM->aCpus[iCpu].vm.s.ThreadEMT, NIL_RTTHREAD, &hThread);
+            if (hThread != NIL_RTTHREAD)
+            {
+                int rc2 = RTThreadWait(hThread, 5 * RT_NS_1SEC, NULL);
+                AssertLogRelMsgRC(rc2, ("iCpu=%u rc=%Rrc\n", iCpu, rc2));
+                if (RT_FAILURE(rc2))
+                    pUVM->aCpus[iCpu].vm.s.ThreadEMT = hThread;
+            }
+        }
+
+        /* Switch to the terminated state, clearing the VM pointer and finally destroy the VM. */
         vmR3SetTerminated(pVM);
+
         pUVM->pVM = NULL;
 
-        /** @todo SMP: This isn't 100% safe. We should wait for the other
-         *        threads to finish before destroy the VM. */
         int rc2 = SUPR3CallVMMR0Ex(pVM->pVMR0, 0 /*idCpu*/, VMMR0_DO_GVMM_DESTROY_VM, 0, NULL);
         AssertLogRelRC(rc2);
