Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/common/wddm/VBoxMPIf.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/common/wddm/VBoxMPIf.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/common/wddm/VBoxMPIf.h	(revision 37626)
@@ -33,5 +33,29 @@
 
 /* One would increase this whenever definitions in this file are changed */
-#define VBOXVIDEOIF_VERSION 9
+#define VBOXVIDEOIF_VERSION 10
+
+#define VBOXWDDM_NODE_ID_SYSTEM           0
+#define VBOXWDDM_NODE_ID_3D               (VBOXWDDM_NODE_ID_SYSTEM)
+#define VBOXWDDM_NODE_ID_3D_KMT           (VBOXWDDM_NODE_ID_3D)
+#define VBOXWDDM_NODE_ID_2D_VIDEO         (VBOXWDDM_NODE_ID_3D_KMT+1)
+#define VBOXWDDM_NUM_NODES                (VBOXWDDM_NODE_ID_2D_VIDEO+1)
+
+#define VBOXWDDM_ENGINE_ID_SYSTEM         0
+#if (VBOXWDDM_NODE_ID_3D == VBOXWDDM_NODE_ID_SYSTEM)
+# define VBOXWDDM_ENGINE_ID_3D            (VBOXWDDM_ENGINE_ID_SYSTEM+1)
+#else
+# define VBOXWDDM_ENGINE_ID_3D            0
+#endif
+#if (VBOXWDDM_NODE_ID_3D_KMT == VBOXWDDM_NODE_ID_3D)
+# define VBOXWDDM_ENGINE_ID_3D_KMT     VBOXWDDM_ENGINE_ID_3D
+#else
+# define VBOXWDDM_ENGINE_ID_3D_KMT     0
+#endif
+#if (VBOXWDDM_NODE_ID_2D_VIDEO == VBOXWDDM_NODE_ID_3D)
+# define VBOXWDDM_ENGINE_ID_2D_VIDEO       VBOXWDDM_ENGINE_ID_3D
+#else
+# define VBOXWDDM_ENGINE_ID_2D_VIDEO       0
+#endif
+
 
 /* create allocation func */
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispCm.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispCm.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispCm.cpp	(revision 37626)
@@ -119,6 +119,14 @@
     Info.u64UmInfo = (uint64_t)pContext;
 
-    pContext->ContextInfo.NodeOrdinal = 0;
-    pContext->ContextInfo.EngineAffinity = 0;
+    if (VBOXDISPMODE_IS_3D(pDevice->pAdapter))
+    {
+        pContext->ContextInfo.NodeOrdinal = VBOXWDDM_NODE_ID_3D;
+        pContext->ContextInfo.EngineAffinity = VBOXWDDM_ENGINE_ID_3D;
+    }
+    else
+    {
+        pContext->ContextInfo.NodeOrdinal = VBOXWDDM_NODE_ID_2D_VIDEO;
+        pContext->ContextInfo.EngineAffinity = VBOXWDDM_ENGINE_ID_2D_VIDEO;
+    }
     pContext->ContextInfo.Flags.Value = 0;
     pContext->ContextInfo.pPrivateDriverData = &Info;
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispD3D.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispD3D.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispD3D.cpp	(revision 37626)
@@ -890,4 +890,173 @@
 }
 #endif
+
+typedef struct VBOXWDDMDISP_NSCADD
+{
+    VOID* pvCommandBuffer;
+    UINT cbCommandBuffer;
+    D3DDDI_ALLOCATIONLIST* pAllocationList;
+    UINT cAllocationList;
+    D3DDDI_PATCHLOCATIONLIST* pPatchLocationList;
+    UINT cPatchLocationList;
+    UINT cAllocations;
+}VBOXWDDMDISP_NSCADD, *PVBOXWDDMDISP_NSCADD;
+
+static HRESULT vboxWddmNSCAddAlloc(PVBOXWDDMDISP_NSCADD pData, PVBOXWDDMDISP_ALLOCATION pAlloc, BOOL bWrite)
+{
+    HRESULT hr = S_OK;
+    if (pData->cAllocationList && pData->cPatchLocationList && pData->cbCommandBuffer > 4)
+    {
+        memset(pData->pAllocationList, 0, sizeof (D3DDDI_ALLOCATIONLIST));
+        pData->pAllocationList[0].hAllocation = pAlloc->hAllocation;
+        if (bWrite)
+            pData->pAllocationList[0].WriteOperation = 1;
+
+        memset(pData->pPatchLocationList, 0, sizeof (D3DDDI_PATCHLOCATIONLIST));
+        pData->pPatchLocationList[0].PatchOffset = pData->cAllocations*4;
+        pData->pPatchLocationList[0].AllocationIndex = pData->cAllocations;
+
+        pData->cbCommandBuffer -= 4;
+        --pData->cAllocationList;
+        --pData->cPatchLocationList;
+        ++pData->cAllocations;
+
+        ++pData->pAllocationList;
+        ++pData->pPatchLocationList;
+        pData->pvCommandBuffer = (VOID*)(((uint8_t*)pData->pvCommandBuffer) + 4);
+
+    }
+    else
+        hr = S_FALSE;
+
+    return hr;
+}
+
+static HRESULT vboxWddmDalNotifyChange(PVBOXWDDMDISP_DEVICE pDevice)
+{
+    VBOXWDDMDISP_NSCADD NscAdd;
+    BOOL bReinitRenderData = TRUE;
+
+    do
+    {
+        if (bReinitRenderData)
+        {
+            NscAdd.pvCommandBuffer = pDevice->DefaultContext.ContextInfo.pCommandBuffer;
+            NscAdd.cbCommandBuffer = pDevice->DefaultContext.ContextInfo.CommandBufferSize;
+            NscAdd.pAllocationList = pDevice->DefaultContext.ContextInfo.pAllocationList;
+            NscAdd.cAllocationList = pDevice->DefaultContext.ContextInfo.AllocationListSize;
+            NscAdd.pPatchLocationList = pDevice->DefaultContext.ContextInfo.pPatchLocationList;
+            NscAdd.cPatchLocationList = pDevice->DefaultContext.ContextInfo.PatchLocationListSize;
+            NscAdd.cAllocations = 0;
+            Assert(NscAdd.cbCommandBuffer >= sizeof (VBOXWDDM_DMA_PRIVATEDATA_BASEHDR));
+            if (NscAdd.cbCommandBuffer < sizeof (VBOXWDDM_DMA_PRIVATEDATA_BASEHDR))
+                return E_FAIL;
+
+            PVBOXWDDM_DMA_PRIVATEDATA_BASEHDR pHdr = (PVBOXWDDM_DMA_PRIVATEDATA_BASEHDR)NscAdd.pvCommandBuffer;
+            pHdr->enmCmd = VBOXVDMACMD_TYPE_DMA_NOP;
+            NscAdd.pvCommandBuffer = (VOID*)(((uint8_t*)NscAdd.pvCommandBuffer) + sizeof (*pHdr));
+            NscAdd.cbCommandBuffer -= sizeof (*pHdr);
+            bReinitRenderData = FALSE;
+        }
+
+        EnterCriticalSection(&pDevice->DirtyAllocListLock);
+
+        PVBOXWDDMDISP_ALLOCATION pAlloc = RTListGetFirst(&pDevice->DirtyAllocList, VBOXWDDMDISP_ALLOCATION, DirtyAllocListEntry);
+        if (pAlloc)
+        {
+            HRESULT tmpHr = vboxWddmNSCAddAlloc(&NscAdd, pAlloc, TRUE);
+            Assert(tmpHr == S_OK || tmpHr == S_FALSE);
+            if (tmpHr == S_OK)
+            {
+                RTListNodeRemove(&pAlloc->DirtyAllocListEntry);
+                LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+                continue;
+            }
+
+            LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+
+        }
+        else
+        {
+            LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+            if (!NscAdd.cAllocations)
+                break;
+        }
+
+        D3DDDICB_RENDER RenderData = {0};
+        RenderData.CommandLength = pDevice->DefaultContext.ContextInfo.CommandBufferSize - NscAdd.cbCommandBuffer;
+        Assert(RenderData.CommandLength);
+        Assert(RenderData.CommandLength < UINT32_MAX/2);
+        RenderData.CommandOffset = 0;
+        RenderData.NumAllocations = pDevice->DefaultContext.ContextInfo.AllocationListSize - NscAdd.cAllocationList;
+        Assert(RenderData.NumAllocations == NscAdd.cAllocations);
+        RenderData.NumPatchLocations = pDevice->DefaultContext.ContextInfo.PatchLocationListSize - NscAdd.cPatchLocationList;
+        Assert(RenderData.NumPatchLocations == NscAdd.cAllocations);
+//        RenderData.NewCommandBufferSize = sizeof (VBOXVDMACMD) + 4 * (100);
+//        RenderData.NewAllocationListSize = 100;
+//        RenderData.NewPatchLocationListSize = 100;
+        RenderData.hContext = pDevice->DefaultContext.ContextInfo.hContext;
+
+        HRESULT hr = pDevice->RtCallbacks.pfnRenderCb(pDevice->hDevice, &RenderData);
+        Assert(hr == S_OK);
+        if (hr == S_OK)
+        {
+            pDevice->DefaultContext.ContextInfo.CommandBufferSize = RenderData.NewCommandBufferSize;
+            pDevice->DefaultContext.ContextInfo.pCommandBuffer = RenderData.pNewCommandBuffer;
+            pDevice->DefaultContext.ContextInfo.AllocationListSize = RenderData.NewAllocationListSize;
+            pDevice->DefaultContext.ContextInfo.pAllocationList = RenderData.pNewAllocationList;
+            pDevice->DefaultContext.ContextInfo.PatchLocationListSize = RenderData.NewPatchLocationListSize;
+            pDevice->DefaultContext.ContextInfo.pPatchLocationList = RenderData.pNewPatchLocationList;
+            bReinitRenderData = TRUE;
+        }
+        else
+            break;
+    } while (1);
+
+    return S_OK;
+}
+
+static BOOLEAN vboxWddmDalCheckAdd(PVBOXWDDMDISP_DEVICE pDevice, PVBOXWDDMDISP_ALLOCATION pAlloc)
+{
+    if (!pAlloc->pRc->RcDesc.fFlags.SharedResource)
+    {
+        Assert(!pAlloc->DirtyAllocListEntry.pNext);
+        return FALSE;
+    }
+
+    EnterCriticalSection(&pDevice->DirtyAllocListLock);
+    if (!pAlloc->DirtyAllocListEntry.pNext)
+    {
+        RTListAppend(&pDevice->DirtyAllocList, &pAlloc->DirtyAllocListEntry);
+    }
+    LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+
+    return TRUE;
+}
+
+static VOID vboxWddmDalCheckAddRts(PVBOXWDDMDISP_DEVICE pDevice)
+{
+    for (UINT i = 0; i < pDevice->cRTs; ++i)
+    {
+        if (pDevice->apRTs[i])
+        {
+            vboxWddmDalCheckAdd(pDevice, pDevice->apRTs[i]);
+        }
+    }
+}
+
+static BOOLEAN vboxWddmDalCheckRemove(PVBOXWDDMDISP_DEVICE pDevice, PVBOXWDDMDISP_ALLOCATION pAlloc)
+{
+    BOOLEAN fRemoved = FALSE;
+
+    EnterCriticalSection(&pDevice->DirtyAllocListLock);
+    if (pAlloc->DirtyAllocListEntry.pNext)
+    {
+        RTListNodeRemove(&pAlloc->DirtyAllocListEntry);
+        fRemoved = TRUE;
+    }
+    LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+
+    return fRemoved;
+}
 
 #ifdef VBOX_WITH_VIDEOHWACCEL
@@ -3514,211 +3683,4 @@
 
 //        vboxVDbgMpPrintF((pDevice, __FUNCTION__": DrawPrimitive\n"));
-#if 0
-        IDirect3DVertexDeclaration9* pDecl;
-        hr = pDevice9If->GetVertexDeclaration(&pDecl);
-        Assert(hr == S_OK);
-        if (hr == S_OK)
-        {
-            Assert(pDecl);
-            D3DVERTEXELEMENT9 aDecls9[MAXD3DDECLLENGTH];
-            UINT cDecls9 = 0;
-            hr = pDecl->GetDeclaration(aDecls9, &cDecls9);
-            Assert(hr == S_OK);
-            if (hr == S_OK)
-            {
-                Assert(cDecls9);
-                for (UINT i = 0; i < cDecls9 - 1 /* the last one is D3DDECL_END */; ++i)
-                {
-                    D3DVERTEXELEMENT9 *pDecl9 = &aDecls9[i];
-                    Assert(pDecl9->Stream < RT_ELEMENTS(pDevice->aStreamSourceUm) || pDecl9->Stream == 0xff);
-                    if (pDecl9->Stream != 0xff)
-                    {
-                        PVBOXWDDMDISP_STREAMSOURCEUM pStrSrc = &pDevice->aStreamSourceUm[pDecl9->Stream];
-                        if (pStrSrc->pvBuffer)
-                        {
-                            WORD iStream = pDecl9->Stream;
-                            D3DVERTEXELEMENT9 *pLastCDecl9 = pDecl9;
-                            for (UINT j = i+1; j < cDecls9 - 1 /* the last one is D3DDECL_END */; ++j)
-                            {
-                                pDecl9 = &aDecls9[j];
-                                if (iStream == pDecl9->Stream)
-                                {
-                                    pDecl9->Stream = 0xff; /* mark as done */
-                                    Assert(pDecl9->Offset != pLastCDecl9->Offset);
-                                    if (pDecl9->Offset > pLastCDecl9->Offset)
-                                        pLastCDecl9 = pDecl9;
-                                }
-                            }
-                            /* vertex size is MAX(all Offset's) + sizeof (data_type with MAX offset) + stride*/
-                            UINT cbVertex = pLastCDecl9->Offset + pStrSrc->cbStride;
-                            UINT cbType;
-                            switch (pLastCDecl9->Type)
-                            {
-                                case D3DDECLTYPE_FLOAT1:
-                                    cbType = sizeof (float);
-                                    break;
-                                case D3DDECLTYPE_FLOAT2:
-                                    cbType = sizeof (float) * 2;
-                                    break;
-                                case D3DDECLTYPE_FLOAT3:
-                                    cbType = sizeof (float) * 3;
-                                    break;
-                                case D3DDECLTYPE_FLOAT4:
-                                    cbType = sizeof (float) * 4;
-                                    break;
-                                case D3DDECLTYPE_D3DCOLOR:
-                                    cbType = 4;
-                                    break;
-                                case D3DDECLTYPE_UBYTE4:
-                                    cbType = 4;
-                                    break;
-                                case D3DDECLTYPE_SHORT2:
-                                    cbType = sizeof (short) * 2;
-                                    break;
-                                case D3DDECLTYPE_SHORT4:
-                                    cbType = sizeof (short) * 4;
-                                    break;
-                                case D3DDECLTYPE_UBYTE4N:
-                                    cbType = 4;
-                                    break;
-                                case D3DDECLTYPE_SHORT2N:
-                                    cbType = sizeof (short) * 2;
-                                    break;
-                                case D3DDECLTYPE_SHORT4N:
-                                    cbType = sizeof (short) * 4;
-                                    break;
-                                case D3DDECLTYPE_USHORT2N:
-                                    cbType = sizeof (short) * 2;
-                                    break;
-                                case D3DDECLTYPE_USHORT4N:
-                                    cbType = sizeof (short) * 4;
-                                    break;
-                                case D3DDECLTYPE_UDEC3:
-                                    cbType = sizeof (signed) * 3;
-                                    break;
-                                case D3DDECLTYPE_DEC3N:
-                                    cbType = sizeof (unsigned) * 3;
-                                    break;
-                                case D3DDECLTYPE_FLOAT16_2:
-                                    cbType = 2 * 2;
-                                    break;
-                                case D3DDECLTYPE_FLOAT16_4:
-                                    cbType = 2 * 4;
-                                    break;
-                                default:
-                                    Assert(0);
-                                    cbType = 1;
-                            }
-                            cbVertex += cbType;
-
-                            UINT cVertexes;
-                            switch (pData->PrimitiveType)
-                            {
-                                case D3DPT_POINTLIST:
-                                    cVertexes = pData->PrimitiveCount;
-                                    break;
-                                case D3DPT_LINELIST:
-                                    cVertexes = pData->PrimitiveCount * 2;
-                                    break;
-                                case D3DPT_LINESTRIP:
-                                    cVertexes = pData->PrimitiveCount + 1;
-                                    break;
-                                case D3DPT_TRIANGLELIST:
-                                    cVertexes = pData->PrimitiveCount * 3;
-                                    break;
-                                case D3DPT_TRIANGLESTRIP:
-                                    cVertexes = pData->PrimitiveCount + 2;
-                                    break;
-                                case D3DPT_TRIANGLEFAN:
-                                    cVertexes = pData->PrimitiveCount + 2;
-                                    break;
-                                default:
-                                    Assert(0);
-                                    cVertexes = pData->PrimitiveCount;
-                            }
-                            UINT cbVertexes = cVertexes * cbVertex;
-                            IDirect3DVertexBuffer9 *pCurVb = NULL, *pVb = NULL;
-                            UINT cbOffset;
-                            UINT cbStride;
-                            hr = pDevice9If->GetStreamSource(iStream, &pCurVb, &cbOffset, &cbStride);
-                            Assert(hr == S_OK);
-                            if (hr == S_OK)
-                            {
-                                if (pCurVb)
-                                {
-                                    if (cbStride == pStrSrc->cbStride)
-                                    {
-                                        /* ensure our data feets in the buffer */
-                                        D3DVERTEXBUFFER_DESC Desc;
-                                        hr = pCurVb->GetDesc(&Desc);
-                                        Assert(hr == S_OK);
-                                        if (hr == S_OK)
-                                        {
-                                            if (Desc.Size >= cbVertexes)
-                                                pVb = pCurVb;
-                                        }
-                                    }
-                                }
-                            }
-                            else
-                            {
-                                pCurVb = NULL;
-                            }
-
-                            if (!pVb)
-                            {
-                                hr = pDevice9If->CreateVertexBuffer(cbVertexes,
-                                        0, /* DWORD Usage */
-                                        0, /* DWORD FVF */
-                                        D3DPOOL_DEFAULT, /* D3DPOOL Pool */
-                                        &pVb,
-                                        NULL /*HANDLE* pSharedHandle*/);
-                                Assert(hr == S_OK);
-                                if (hr == S_OK)
-                                {
-                                    hr = pDevice9If->SetStreamSource(iStream, pVb, 0, pStrSrc->cbStride);
-                                    Assert(hr == S_OK);
-                                    if (hr == S_OK)
-                                    {
-                                        if (pCurVb)
-                                            pCurVb->Release();
-                                    }
-                                    else
-                                    {
-                                        pVb->Release();
-                                        pVb = NULL;
-                                    }
-                                }
-                            }
-
-                            if (pVb)
-                            {
-                                Assert(hr == S_OK);
-                                VOID *pvData;
-                                hr = pVb->Lock(0, /* UINT OffsetToLock */
-                                        cbVertexes,
-                                        &pvData,
-                                        D3DLOCK_DISCARD);
-                                Assert(hr == S_OK);
-                                if (hr == S_OK)
-                                {
-                                    memcpy (pvData, ((uint8_t*)pStrSrc->pvBuffer) + pData->VStart * cbVertex, cbVertexes);
-                                    HRESULT tmpHr = pVb->Unlock();
-                                    Assert(tmpHr == S_OK);
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            if (hr == S_OK)
-            {
-                hr = pDevice9If->DrawPrimitive(pData->PrimitiveType,
-                        0 /* <- since we use our own StreamSource buffer which has data at the very beginning*/,
-                        pData->PrimitiveCount);
-                Assert(hr == S_OK);
-            }
-        }
-#endif
     }
 
@@ -3898,10 +3860,10 @@
 {
     VBOXDISPPROFILE_FUNCTION_DDI_PROLOGUE();
+    vboxVDbgPrintF(("==> "__FUNCTION__", hDevice(0x%p)\n", hDevice));
+    PVBOXWDDMDISP_DEVICE pDevice = (PVBOXWDDMDISP_DEVICE)hDevice;
+    Assert(pDevice);
+    VBOXDISPCRHGSMI_SCOPE_SET_DEV(pDevice);
+    Assert(0);
     vboxVDbgPrintF(("<== "__FUNCTION__", hDevice(0x%p)\n", hDevice));
-    PVBOXWDDMDISP_DEVICE pDevice = (PVBOXWDDMDISP_DEVICE)hDevice;
-    Assert(pDevice);
-    VBOXDISPCRHGSMI_SCOPE_SET_DEV(pDevice);
-    Assert(0);
-    vboxVDbgPrintF(("==> "__FUNCTION__", hDevice(0x%p)\n", hDevice));
     return E_FAIL;
 }
@@ -3991,4 +3953,9 @@
             pDstSurfIf->Release();
         }
+    }
+
+    for (UINT i = 0; i < pDstRc->cAllocations; ++i)
+    {
+        PVBOXWDDMDISP_ALLOCATION pDAlloc = &pDstRc->aAllocations[i];
     }
 
@@ -5521,8 +5488,5 @@
             }
 
-            EnterCriticalSection(&pDevice->DirtyAllocListLock);
-            if (pAlloc->DirtyAllocListEntry.pNext)
-                RTListNodeRemove(&pAlloc->DirtyAllocListEntry);
-            LeaveCriticalSection(&pDevice->DirtyAllocListLock);
+            vboxWddmDalCheckRemove(pDevice, pAlloc);
         }
     }
@@ -5657,5 +5621,4 @@
 #ifdef VBOXWDDM_TEST_UHGSMI
         {
-//            Assert(0);
             static uint32_t cCals = 100000;
             static uint32_t cbData = 8 * 1024 * 1024;
@@ -5663,6 +5626,4 @@
             int rc = vboxUhgsmiTst(&pDevice->Uhgsmi.Base, cbData, cCals, &TimeMs);
             uint32_t cCPS = (((uint64_t)cCals) * 1000ULL)/TimeMs;
-//            Assert(0);
-//            vboxVDbgDoMpPrintF(pDevice, "Time : %I64u ms, calls: %d, cps: %d\n", TimeMs, cCals, cCPS);
         }
 #endif
@@ -5778,129 +5739,4 @@
 }
 
-typedef struct VBOXWDDMDISP_NSCADD
-{
-    VOID* pvCommandBuffer;
-    UINT cbCommandBuffer;
-    D3DDDI_ALLOCATIONLIST* pAllocationList;
-    UINT cAllocationList;
-    D3DDDI_PATCHLOCATIONLIST* pPatchLocationList;
-    UINT cPatchLocationList;
-    UINT cAllocations;
-}VBOXWDDMDISP_NSCADD, *PVBOXWDDMDISP_NSCADD;
-
-static HRESULT vboxWddmNSCAddAlloc(PVBOXWDDMDISP_NSCADD pData, PVBOXWDDMDISP_ALLOCATION pAlloc, BOOL bWrite)
-{
-    HRESULT hr = S_OK;
-    if (pData->cAllocationList && pData->cPatchLocationList && pData->cbCommandBuffer > 4)
-    {
-        memset(pData->pAllocationList, 0, sizeof (D3DDDI_ALLOCATIONLIST));
-        pData->pAllocationList[0].hAllocation = pAlloc->hAllocation;
-        if (bWrite)
-            pData->pAllocationList[0].WriteOperation = 1;
-
-        memset(pData->pPatchLocationList, 0, sizeof (D3DDDI_PATCHLOCATIONLIST));
-        pData->pPatchLocationList[0].PatchOffset = pData->cAllocations*4;
-        pData->pPatchLocationList[0].AllocationIndex = pData->cAllocations;
-
-        pData->cbCommandBuffer -= 4;
-        --pData->cAllocationList;
-        --pData->cPatchLocationList;
-        ++pData->cAllocations;
-
-        ++pData->pAllocationList;
-        ++pData->pPatchLocationList;
-        pData->pvCommandBuffer = (VOID*)(((uint8_t*)pData->pvCommandBuffer) + 4);
-
-    }
-    else
-        hr = S_FALSE;
-
-    return hr;
-}
-
-static HRESULT vboxWddmNotifySharedChange(PVBOXWDDMDISP_DEVICE pDevice)
-{
-    VBOXWDDMDISP_NSCADD NscAdd;
-    BOOL bReinitRenderData = TRUE;
-
-    do
-    {
-        if (bReinitRenderData)
-        {
-            NscAdd.pvCommandBuffer = pDevice->DefaultContext.ContextInfo.pCommandBuffer;
-            NscAdd.cbCommandBuffer = pDevice->DefaultContext.ContextInfo.CommandBufferSize;
-            NscAdd.pAllocationList = pDevice->DefaultContext.ContextInfo.pAllocationList;
-            NscAdd.cAllocationList = pDevice->DefaultContext.ContextInfo.AllocationListSize;
-            NscAdd.pPatchLocationList = pDevice->DefaultContext.ContextInfo.pPatchLocationList;
-            NscAdd.cPatchLocationList = pDevice->DefaultContext.ContextInfo.PatchLocationListSize;
-            NscAdd.cAllocations = 0;
-            Assert(NscAdd.cbCommandBuffer >= sizeof (VBOXWDDM_DMA_PRIVATEDATA_BASEHDR));
-            if (NscAdd.cbCommandBuffer < sizeof (VBOXWDDM_DMA_PRIVATEDATA_BASEHDR))
-                return E_FAIL;
-
-            PVBOXWDDM_DMA_PRIVATEDATA_BASEHDR pHdr = (PVBOXWDDM_DMA_PRIVATEDATA_BASEHDR)NscAdd.pvCommandBuffer;
-            pHdr->enmCmd = VBOXVDMACMD_TYPE_DMA_NOP;
-            NscAdd.pvCommandBuffer = (VOID*)(((uint8_t*)NscAdd.pvCommandBuffer) + sizeof (*pHdr));
-            NscAdd.cbCommandBuffer -= sizeof (*pHdr);
-            bReinitRenderData = FALSE;
-        }
-
-        EnterCriticalSection(&pDevice->DirtyAllocListLock);
-
-        PVBOXWDDMDISP_ALLOCATION pAlloc = RTListGetFirst(&pDevice->DirtyAllocList, VBOXWDDMDISP_ALLOCATION, DirtyAllocListEntry);
-        if (pAlloc)
-        {
-            HRESULT tmpHr = vboxWddmNSCAddAlloc(&NscAdd, pAlloc, TRUE);
-            Assert(tmpHr == S_OK || tmpHr == S_FALSE);
-            if (tmpHr == S_OK)
-            {
-                RTListNodeRemove(&pAlloc->DirtyAllocListEntry);
-                LeaveCriticalSection(&pDevice->DirtyAllocListLock);
-                continue;
-            }
-
-            LeaveCriticalSection(&pDevice->DirtyAllocListLock);
-
-        }
-        else
-        {
-            LeaveCriticalSection(&pDevice->DirtyAllocListLock);
-            if (!NscAdd.cAllocations)
-                break;
-        }
-
-        D3DDDICB_RENDER RenderData = {0};
-        RenderData.CommandLength = pDevice->DefaultContext.ContextInfo.CommandBufferSize - NscAdd.cbCommandBuffer;
-        Assert(RenderData.CommandLength);
-        Assert(RenderData.CommandLength < UINT32_MAX/2);
-        RenderData.CommandOffset = 0;
-        RenderData.NumAllocations = pDevice->DefaultContext.ContextInfo.AllocationListSize - NscAdd.cAllocationList;
-        Assert(RenderData.NumAllocations == NscAdd.cAllocations);
-        RenderData.NumPatchLocations = pDevice->DefaultContext.ContextInfo.PatchLocationListSize - NscAdd.cPatchLocationList;
-        Assert(RenderData.NumPatchLocations == NscAdd.cAllocations);
-//        RenderData.NewCommandBufferSize = sizeof (VBOXVDMACMD) + 4 * (100);
-//        RenderData.NewAllocationListSize = 100;
-//        RenderData.NewPatchLocationListSize = 100;
-        RenderData.hContext = pDevice->DefaultContext.ContextInfo.hContext;
-
-        HRESULT hr = pDevice->RtCallbacks.pfnRenderCb(pDevice->hDevice, &RenderData);
-        Assert(hr == S_OK);
-        if (hr == S_OK)
-        {
-            pDevice->DefaultContext.ContextInfo.CommandBufferSize = RenderData.NewCommandBufferSize;
-            pDevice->DefaultContext.ContextInfo.pCommandBuffer = RenderData.pNewCommandBuffer;
-            pDevice->DefaultContext.ContextInfo.AllocationListSize = RenderData.NewAllocationListSize;
-            pDevice->DefaultContext.ContextInfo.pAllocationList = RenderData.pNewAllocationList;
-            pDevice->DefaultContext.ContextInfo.PatchLocationListSize = RenderData.NewPatchLocationListSize;
-            pDevice->DefaultContext.ContextInfo.pPatchLocationList = RenderData.pNewPatchLocationList;
-            bReinitRenderData = TRUE;
-        }
-        else
-            break;
-    } while (1);
-
-    return S_OK;
-}
-
 static HRESULT APIENTRY vboxWddmDDevFlush(HANDLE hDevice)
 {
@@ -5913,26 +5749,9 @@
     if (VBOXDISPMODE_IS_3D(pDevice->pAdapter))
     {
-//        Assert(pDevice->cScreens);
-//        UINT cProcessed = 0;
-//        for (UINT i = 0; cProcessed < pDevice->cScreens && i < RT_ELEMENTS(pDevice->aScreens); ++i)
-//        {
-//            PVBOXWDDMDISP_SCREEN pScreen = &pDevice->aScreens[i];
-//            if (pScreen->pDevice9If)
-//            {
-//                ++cProcessed;
-////                if (pScreen->pRenderTargetRc->cAllocations == 1)
-////                {
-////                    hr = pScreen->pDevice9If->Present(NULL, NULL, NULL, NULL);
-////                    Assert(hr == S_OK);
-////                }
-////                else
-                {
-                    hr = pDevice->pAdapter->D3D.pfnVBoxWineExD3DDev9Flush((IDirect3DDevice9Ex*)pDevice->pDevice9If);
-                    Assert(hr == S_OK);
-                }
-//            }
-//        }
-
-        vboxWddmNotifySharedChange(pDevice);
+
+        hr = pDevice->pAdapter->D3D.pfnVBoxWineExD3DDev9Flush((IDirect3DDevice9Ex*)pDevice->pDevice9If);
+        Assert(hr == S_OK);
+
+        vboxWddmDalNotifyChange(pDevice);
     }
     vboxVDbgPrintF(("<== "__FUNCTION__", hDevice(0x%p), hr(0x%x)\n", hDevice, hr));
@@ -6491,12 +6310,6 @@
 #endif
 
-    if (pDstRc->RcDesc.fFlags.SharedResource)
-    {
-        PVBOXWDDMDISP_ALLOCATION pAlloc = &pDstRc->aAllocations[pData->DstSubResourceIndex];
-        EnterCriticalSection(&pDevice->DirtyAllocListLock);
-        if (!pAlloc->DirtyAllocListEntry.pNext)
-            RTListAppend(&pDevice->DirtyAllocList, &pAlloc->DirtyAllocListEntry);
-        LeaveCriticalSection(&pDevice->DirtyAllocListLock);
-    }
+    PVBOXWDDMDISP_ALLOCATION pDAlloc = &pDstRc->aAllocations[pData->DstSubResourceIndex];
+    vboxWddmDalCheckAdd(pDevice, pDAlloc);
 
     vboxVDbgPrintF(("<== "__FUNCTION__", hDevice(0x%p), hr(0x%x)\n", hDevice, hr));
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispKmt.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispKmt.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/disp/wddm/VBoxDispKmt.cpp	(revision 37626)
@@ -259,6 +259,6 @@
     D3DKMT_CREATECONTEXT ContextData = {0};
     ContextData.hDevice = pDevice->hDevice;
-    ContextData.NodeOrdinal = 0;
-    ContextData.EngineAffinity = 0;
+    ContextData.NodeOrdinal = VBOXWDDM_NODE_ID_3D_KMT;
+    ContextData.EngineAffinity = VBOXWDDM_ENGINE_ID_3D_KMT;
     ContextData.pPrivateDriverData = &Info;
     ContextData.PrivateDriverDataSize = sizeof (Info);
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/Makefile.kmk
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/Makefile.kmk	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/Makefile.kmk	(revision 37626)
@@ -83,5 +83,4 @@
   VBoxVideoWddm_DEFS     += VBOX_WITH_CRHGSMI
  endif
-
  ifdef DEBUG_misha
   VBoxVideoWddm_DEFS       += LOG_ENABLED
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/common/VBoxMPDevExt.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/common/VBoxMPDevExt.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/common/VBoxMPDevExt.h	(revision 37626)
@@ -88,5 +88,6 @@
    /* hgsmi allocation manager */
    VBOXVIDEOCM_ALLOC_MGR AllocMgr;
-   VBOXVDMADDI_CMD_QUEUE DdiCmdQueue;
+   VBOXVDMADDI_NODE aNodes[VBOXWDDM_NUM_NODES];
+   LIST_ENTRY DpcCmdQueue;
    LIST_ENTRY SwapchainList3D;
    /* mutex for context list operations */
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.cpp	(revision 37626)
@@ -1260,5 +1260,5 @@
         {
             PVBOXVDMADDI_CMD pDdiCmd = VBOXVDMADDI_CMD_FROM_BUF_DR(pDr);
-            vboxVdmaDdiCmdInit(pDdiCmd, 0, NULL, vboxVideoAMgrAllocSubmitCompletion, pDr);
+            vboxVdmaDdiCmdInit(pDdiCmd, 0, 0, vboxVideoAMgrAllocSubmitCompletion, pDr);
             /* mark command as submitted & invisible for the dx runtime since dx did not originate it */
             vboxVdmaDdiCmdSubmittedNotDx(pDdiCmd);
@@ -1381,2 +1381,16 @@
 }
 
+VOID vboxWddmCounterU32Wait(uint32_t volatile * pu32, uint32_t u32Val)
+{
+    LARGE_INTEGER Interval;
+    Interval.QuadPart = -(int64_t) 2 /* ms */ * 10000;
+    uint32_t u32CurVal;
+
+    while ((u32CurVal = ASMAtomicReadU32(pu32)) != u32Val)
+    {
+        Assert(u32CurVal >= u32Val);
+        Assert(u32CurVal < UINT32_MAX/2);
+
+        KeDelayExecutionThread(KernelMode, FALSE, &Interval);
+    }
+}
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPMisc.h	(revision 37626)
@@ -148,3 +148,6 @@
 #endif
 
+
+VOID vboxWddmCounterU32Wait(uint32_t volatile * pu32, uint32_t u32Val);
+
 #endif /* #ifndef ___VBoxMPMisc_h__ */
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPTypes.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPTypes.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPTypes.h	(revision 37626)
@@ -116,4 +116,5 @@
     struct VBOXWDDM_SWAPCHAIN *pSwapchain;
     VBOXWDDM_ALLOC_TYPE enmType;
+    volatile uint32_t cRefs;
 //    VBOXWDDM_ALLOCUSAGE_TYPE enmCurrentUsage;
     D3DDDI_RESOURCEFLAGS fRcFlags;
@@ -123,4 +124,5 @@
     VBOXVHWA_SURFHANDLE hHostHandle;
 #endif
+    BOOLEAN fDeleted;
     BOOLEAN bVisible;
     BOOLEAN bAssigned;
@@ -141,5 +143,7 @@
 {
     uint32_t fFlags;
+    volatile uint32_t cRefs;
     VBOXWDDM_RC_DESC RcDesc;
+    BOOLEAN fDeleted;
     uint32_t cAllocations;
     VBOXWDDM_ALLOCATION aAllocations[1];
@@ -190,5 +194,4 @@
 typedef struct VBOXWDDM_CONTEXT
 {
-//    LIST_ENTRY ListEntry;
     struct VBOXWDDM_DEVICE * pDevice;
     HANDLE hContext;
@@ -196,5 +199,4 @@
     UINT  NodeOrdinal;
     UINT  EngineAffinity;
-//    UINT uLastCompletedCmdFenceId;
     VBOXWDDM_HTABLE Swapchains;
     VBOXVIDEOCM_CTX CmContext;
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVbva.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVbva.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVbva.h	(revision 37626)
@@ -51,4 +51,5 @@
 #define VBOXVBVA_OP_WITHLOCK_ATDPC(_op, _pdext, _psrc, _arg) \
         do { \
+            Assert(KeGetCurrentIrql() == DISPATCH_LEVEL); \
             KeAcquireSpinLockAtDpcLevel(&(_psrc)->Vbva.Lock);  \
             VBOXVBVA_OP(_op, _pdext, _psrc, _arg);        \
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.cpp	(revision 37626)
@@ -213,18 +213,68 @@
 }
 
-PVBOXVDMAPIPE_CMD_DR vboxVdmaGgCmdCreate(PVBOXVDMAGG pVdma, VBOXVDMAPIPE_CMD_TYPE enmType, uint32_t cbCmd)
-{
-    PVBOXVDMAPIPE_CMD_DR pHdr = (PVBOXVDMAPIPE_CMD_DR)vboxWddmMemAllocZero(cbCmd);
-    Assert(pHdr);
-    if (pHdr)
-    {
-        pHdr->enmType = enmType;
-        return pHdr;
-    }
-    return NULL;
-}
-
-void vboxVdmaGgCmdDestroy(PVBOXVDMAPIPE_CMD_DR pDr)
-{
+PVBOXVDMAPIPE_CMD_DR vboxVdmaGgCmdCreate(PVBOXMP_DEVEXT pDevExt, VBOXVDMAPIPE_CMD_TYPE enmType, uint32_t cbCmd)
+{
+    PVBOXVDMAPIPE_CMD_DR pHdr;
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+    if (enmType == VBOXVDMAPIPE_CMD_TYPE_DMACMD)
+    {
+        UINT cbAlloc = VBOXVDMACMD_SIZE_FROMBODYSIZE(cbCmd);
+        VBOXVDMACBUF_DR* pDr = vboxVdmaCBufDrCreate(&pDevExt->u.primary.Vdma, cbAlloc);
+        if (!pDr)
+        {
+            WARN(("dr allocation failed"));
+            return NULL;
+        }
+        pDr->fFlags = VBOXVDMACBUF_FLAG_BUF_FOLLOWS_DR;
+        pDr->cbBuf = VBOXVDMACMD_HEADER_SIZE();
+        pDr->rc = VINF_SUCCESS;
+
+
+        PVBOXVDMACMD pDmaHdr = VBOXVDMACBUF_DR_TAIL(pDr, VBOXVDMACMD);
+        pDmaHdr->enmType = VBOXVDMACMD_TYPE_DMA_NOP;
+        pDmaHdr->u32CmdSpecific = 0;
+
+        pHdr = VBOXVDMACMD_BODY(pDmaHdr, VBOXVDMAPIPE_CMD_DR);
+    }
+    else
+#endif
+    {
+        pHdr = (PVBOXVDMAPIPE_CMD_DR)vboxWddmMemAllocZero(cbCmd);
+        if (!pHdr)
+        {
+            WARN(("cmd allocation failed"));
+            return NULL;
+        }
+    }
+    pHdr->enmType = enmType;
+    return pHdr;
+}
+
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+DECLINLINE(VBOXVDMACBUF_DR*) vboxVdmaGgCmdDmaGetDr(PVBOXVDMAPIPE_CMD_DMACMD pDr)
+{
+    VBOXVDMACMD* pDmaCmd = VBOXVDMACMD_FROM_BODY(pDr);
+    VBOXVDMACBUF_DR* pDmaDr = VBOXVDMACBUF_DR_FROM_TAIL(pDmaCmd);
+    return pDmaDr;
+}
+
+DECLINLINE(PVBOXVDMADDI_CMD) vboxVdmaGgCmdDmaGetDdiCmd(PVBOXVDMAPIPE_CMD_DMACMD pDr)
+{
+    VBOXVDMACBUF_DR* pDmaDr = vboxVdmaGgCmdDmaGetDr(pDr);
+    return VBOXVDMADDI_CMD_FROM_BUF_DR(pDmaDr);
+}
+
+#endif
+
+void vboxVdmaGgCmdDestroy(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DR pDr)
+{
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+    if (pDr->enmType == VBOXVDMAPIPE_CMD_TYPE_DMACMD)
+    {
+        VBOXVDMACBUF_DR* pDmaDr = vboxVdmaGgCmdDmaGetDr((PVBOXVDMAPIPE_CMD_DMACMD)pDr);
+        vboxVdmaCBufDrFree(&pDevExt->u.primary.Vdma, pDmaDr);
+        return;
+    }
+#endif
     vboxWddmMemFree(pDr);
 }
@@ -232,5 +282,5 @@
 DECLCALLBACK(VOID) vboxVdmaGgDdiCmdDestroy(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, PVOID pvContext)
 {
-    vboxVdmaGgCmdDestroy((PVBOXVDMAPIPE_CMD_DR)pvContext);
+    vboxVdmaGgCmdDestroy(pDevExt, (PVBOXVDMAPIPE_CMD_DR)pvContext);
 }
 
@@ -528,9 +578,7 @@
 }
 
-static NTSTATUS vboxVdmaGgDmaColorFill(PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCF)
+static NTSTATUS vboxVdmaGgDmaColorFill(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCF)
 {
     NTSTATUS Status = STATUS_UNSUCCESSFUL;
-    PVBOXWDDM_CONTEXT pContext = pCF->Hdr.DdiCmd.pContext;
-    PVBOXMP_DEVEXT pDevExt = pContext->pDevice->pAdapter;
     Assert (pDevExt->pvVisibleVram);
     if (pDevExt->pvVisibleVram)
@@ -593,5 +641,13 @@
                     {
                         PVBOXWDDM_SOURCE pSource = &pDevExt->aSources[pCF->ClrFill.Alloc.pAlloc->SurfDesc.VidPnSourceId];
-                        VBOXVBVA_OP_WITHLOCK(ReportDirtyRect, pDevExt, pSource, &UnionRect);
+                        uint32_t cUnlockedVBVADisabled = ASMAtomicReadU32(&pDevExt->cUnlockedVBVADisabled);
+                        if (!cUnlockedVBVADisabled)
+                        {
+                            VBOXVBVA_OP(ReportDirtyRect, pDevExt, pSource, &UnionRect);
+                        }
+                        else
+                        {
+                            VBOXVBVA_OP_WITHLOCK(ReportDirtyRect, pDevExt, pSource, &UnionRect);
+                        }
                     }
                 }
@@ -682,5 +738,5 @@
  * @return on success the number of bytes the command contained, otherwise - VERR_xxx error code
  */
-static NTSTATUS vboxVdmaGgDmaBlt(PVBOXVDMAPIPE_CMD_DMACMD_BLT pBlt)
+static NTSTATUS vboxVdmaGgDmaBlt(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD_BLT pBlt)
 {
     /* we do not support stretching for now */
@@ -694,5 +750,4 @@
 
     NTSTATUS Status = STATUS_SUCCESS;
-    PVBOXMP_DEVEXT pDevExt = pBlt->Hdr.pDevExt;
 
     if (pBlt->Blt.DstRects.UpdateRects.cRects)
@@ -734,9 +789,8 @@
 }
 
-static NTSTATUS vboxVdmaGgDmaCmdProcess(VBOXVDMAPIPE_CMD_DMACMD *pDmaCmd)
-{
-    PVBOXMP_DEVEXT pDevExt = pDmaCmd->pDevExt;
-    PVBOXWDDM_CONTEXT pContext = pDmaCmd->DdiCmd.pContext;
+static NTSTATUS vboxVdmaGgDmaCmdProcessFast(PVBOXMP_DEVEXT pDevExt, VBOXVDMAPIPE_CMD_DMACMD *pDmaCmd)
+{
     NTSTATUS Status = STATUS_SUCCESS;
+    PVBOXWDDM_CONTEXT pContext = pDmaCmd->pContext;
     DXGK_INTERRUPT_TYPE enmComplType = DXGK_INTERRUPT_DMA_COMPLETED;
     switch (pDmaCmd->enmCmd)
@@ -763,45 +817,37 @@
                                 Assert(pContext->enmType == VBOXWDDM_CONTEXT_TYPE_SYSTEM);
 
-                                if (pBlt->Hdr.fFlags.b2DRelated || pBlt->Hdr.fFlags.b3DRelated)
+                                if (pBlt->Hdr.fFlags.b2DRelated)
                                 {
-                                    POINT pos;
-                                    BOOLEAN bPosMoved = FALSE;
-                                    if (pBlt->Hdr.fFlags.b3DRelated)
+                                    RECT OverlayUnionRect;
+                                    RECT UpdateRect;
+                                    UpdateRect = pBlt->Blt.DstRects.UpdateRects.aRects[0];
+                                    for (UINT i = 1; i < pBlt->Blt.DstRects.UpdateRects.cRects; ++i)
                                     {
-                                        pos = pSource->VScreenPos;
-                                        if (pos.x || pos.y)
-                                        {
-                                            vboxWddmBltPipeRectsTranslate(&pBlt->Blt.DstRects, pos.x, pos.y);
-                                            bPosMoved = TRUE;
-                                        }
-                                        Status = vboxVdmaGgDirtyRectsProcess(pDevExt, pContext, NULL, &pBlt->Blt.DstRects);
-                                        Assert(Status == STATUS_SUCCESS);
+                                        vboxWddmRectUnite(&UpdateRect, &pBlt->Blt.DstRects.UpdateRects.aRects[i]);
                                     }
-
-
-                                    if (pBlt->Hdr.fFlags.b2DRelated)
+                                    vboxVhwaHlpOverlayDstRectUnion(pDevExt, pDstAlloc->SurfDesc.VidPnSourceId, &OverlayUnionRect);
+                                    Assert(pBlt->Blt.DstRects.ContextRect.left == 0); /* <-| otherwise we would probably need to translate the UpdateRects to left;top first??*/
+                                    Assert(pBlt->Blt.DstRects.ContextRect.top == 0); /* <--| */
+                                    vboxVdmaDirtyRectsCalcIntersection(&OverlayUnionRect, &pBlt->Blt.DstRects.UpdateRects, &pBlt->Blt.DstRects.UpdateRects);
+                                    if (pBlt->Blt.DstRects.UpdateRects.cRects)
                                     {
-                                        if (bPosMoved)
-                                        {
-                                            vboxWddmBltPipeRectsTranslate(&pBlt->Blt.DstRects, -pos.x, -pos.y);
-                                        }
-
-                                        RECT OverlayUnionRect;
-                                        RECT UpdateRect;
-                                        UpdateRect = pBlt->Blt.DstRects.UpdateRects.aRects[0];
-                                        for (UINT i = 1; i < pBlt->Blt.DstRects.UpdateRects.cRects; ++i)
-                                        {
-                                            vboxWddmRectUnite(&UpdateRect, &pBlt->Blt.DstRects.UpdateRects.aRects[i]);
-                                        }
-                                        vboxVhwaHlpOverlayDstRectUnion(pDevExt, pDstAlloc->SurfDesc.VidPnSourceId, &OverlayUnionRect);
-                                        Assert(pBlt->Blt.DstRects.ContextRect.left == 0); /* <-| otherwise we would probably need to translate the UpdateRects to left;top first??*/
-                                        Assert(pBlt->Blt.DstRects.ContextRect.top == 0); /* <--| */
-                                        vboxVdmaDirtyRectsCalcIntersection(&OverlayUnionRect, &pBlt->Blt.DstRects.UpdateRects, &pBlt->Blt.DstRects.UpdateRects);
-                                        if (pBlt->Blt.DstRects.UpdateRects.cRects)
-                                        {
-                                            vboxVdmaGgDmaBlt(pBlt);
-                                        }
+                                        vboxVdmaGgDmaBlt(pDevExt, pBlt);
+                                    }
+
+                                    uint32_t cUnlockedVBVADisabled = ASMAtomicReadU32(&pDevExt->cUnlockedVBVADisabled);
+                                    if (!cUnlockedVBVADisabled)
+                                    {
+                                        VBOXVBVA_OP(ReportDirtyRect, pDevExt, pSource, &UpdateRect);
+                                    }
+                                    else
+                                    {
                                         VBOXVBVA_OP_WITHLOCK(ReportDirtyRect, pDevExt, pSource, &UpdateRect);
                                     }
+
+                                }
+
+                                if (pBlt->Hdr.fFlags.b3DRelated)
+                                {
+                                    Status = STATUS_MORE_PROCESSING_REQUIRED;
                                 }
 
@@ -816,12 +862,5 @@
                                     if (pBlt->Hdr.fFlags.b3DRelated)
                                     {
-                                        PVBOXWDDM_SWAPCHAIN pSwapchain;
-                                        pSwapchain = vboxWddmSwapchainRetainByAlloc(pDevExt, pSrcAlloc);
-                                        if (pSwapchain)
-                                        {
-                                            Status = vboxVdmaGgDirtyRectsProcess(pDevExt, pContext, pSwapchain, &pBlt->Blt.DstRects);
-                                            Assert(Status == STATUS_SUCCESS);
-                                            vboxWddmSwapchainRelease(pSwapchain);
-                                        }
+                                        Status = STATUS_MORE_PROCESSING_REQUIRED;
                                     }
                                 }
@@ -846,5 +885,5 @@
                         if (pBlt->Blt.DstRects.UpdateRects.cRects)
                         {
-                            vboxVdmaGgDmaBlt(pBlt);
+                            vboxVdmaGgDmaBlt(pDevExt, pBlt);
                         }
                     }
@@ -859,4 +898,9 @@
             }
 
+            if (Status == STATUS_MORE_PROCESSING_REQUIRED)
+            {
+                vboxWddmAllocationRetain(pDstAlloc);
+                vboxWddmAllocationRetain(pSrcAlloc);
+            }
             break;
         }
@@ -865,5 +909,124 @@
             PVBOXVDMAPIPE_CMD_DMACMD_FLIP pFlip = (PVBOXVDMAPIPE_CMD_DMACMD_FLIP)pDmaCmd;
             Assert(pFlip->Hdr.fFlags.b3DRelated);
-            Assert(!pFlip->Hdr.fFlags.bDecVBVAUnlock);
+            Assert(!pFlip->Hdr.fFlags.b2DRelated);
+            if (pFlip->Hdr.fFlags.b3DRelated)
+            {
+                Status = STATUS_MORE_PROCESSING_REQUIRED;
+                vboxWddmAllocationRetain(pFlip->Flip.Alloc.pAlloc);
+            }
+
+            break;
+        }
+        case VBOXVDMACMD_TYPE_DMA_PRESENT_CLRFILL:
+        {
+            PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCF = (PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL)pDmaCmd;
+            Assert(pCF->Hdr.fFlags.b2DRelated);
+            Assert(!pCF->Hdr.fFlags.b3DRelated);
+            Status = vboxVdmaGgDmaColorFill(pDevExt, pCF);
+            Assert(Status == STATUS_SUCCESS);
+            break;
+        }
+        default:
+            Assert(0);
+            break;
+    }
+
+    NTSTATUS tmpStatus = vboxVdmaGgCmdDmaNotifyCompleted(pDevExt, pDmaCmd, enmComplType);
+    Assert(tmpStatus == STATUS_SUCCESS);
+    if (Status != STATUS_MORE_PROCESSING_REQUIRED)
+    {
+        vboxVdmaGgCmdDestroy(pDevExt, &pDmaCmd->Hdr);
+    }
+
+    return Status;
+}
+
+static NTSTATUS vboxVdmaGgDmaCmdProcessSlow(PVBOXMP_DEVEXT pDevExt, VBOXVDMAPIPE_CMD_DMACMD *pDmaCmd)
+{
+    NTSTATUS Status = STATUS_SUCCESS;
+    PVBOXWDDM_CONTEXT pContext = pDmaCmd->pContext;
+    DXGK_INTERRUPT_TYPE enmComplType = DXGK_INTERRUPT_DMA_COMPLETED;
+    switch (pDmaCmd->enmCmd)
+    {
+        case VBOXVDMACMD_TYPE_DMA_PRESENT_BLT:
+        {
+            PVBOXVDMAPIPE_CMD_DMACMD_BLT pBlt = (PVBOXVDMAPIPE_CMD_DMACMD_BLT)pDmaCmd;
+            PVBOXWDDM_ALLOCATION pDstAlloc = pBlt->Blt.DstAlloc.pAlloc;
+            PVBOXWDDM_ALLOCATION pSrcAlloc = pBlt->Blt.SrcAlloc.pAlloc;
+            BOOLEAN bComplete = TRUE;
+            switch (pDstAlloc->enmType)
+            {
+                case VBOXWDDM_ALLOC_TYPE_STD_SHAREDPRIMARYSURFACE:
+                case VBOXWDDM_ALLOC_TYPE_UMD_RC_GENERIC:
+                {
+                    if (pDstAlloc->bAssigned)
+                    {
+                        VBOXWDDM_SOURCE *pSource = &pDevExt->aSources[pDstAlloc->SurfDesc.VidPnSourceId];
+                        Assert(pSource->pPrimaryAllocation == pDstAlloc);
+                        switch (pSrcAlloc->enmType)
+                        {
+                            case VBOXWDDM_ALLOC_TYPE_STD_SHADOWSURFACE:
+                            {
+                                Assert(pContext->enmType == VBOXWDDM_CONTEXT_TYPE_SYSTEM);
+
+                                if (pBlt->Hdr.fFlags.b3DRelated)
+                                {
+                                    POINT pos = pSource->VScreenPos;
+                                    if (pos.x || pos.y)
+                                    {
+                                        vboxWddmBltPipeRectsTranslate(&pBlt->Blt.DstRects, pos.x, pos.y);
+                                    }
+
+                                    Status = vboxVdmaGgDirtyRectsProcess(pDevExt, pContext, NULL, &pBlt->Blt.DstRects);
+                                    Assert(Status == STATUS_SUCCESS);
+                                }
+
+                                break;
+                            }
+                            case VBOXWDDM_ALLOC_TYPE_UMD_RC_GENERIC:
+                            {
+                                Assert(pContext->enmType == VBOXWDDM_CONTEXT_TYPE_CUSTOM_3D);
+                                Assert(pSrcAlloc->fRcFlags.RenderTarget);
+                                if (pSrcAlloc->fRcFlags.RenderTarget)
+                                {
+                                    if (pBlt->Hdr.fFlags.b3DRelated)
+                                    {
+                                        POINT pos = pSource->VScreenPos;
+                                        if (pos.x || pos.y)
+                                        {
+                                            vboxWddmBltPipeRectsTranslate(&pBlt->Blt.DstRects, pos.x, pos.y);
+                                        }
+
+                                        PVBOXWDDM_SWAPCHAIN pSwapchain;
+                                        pSwapchain = vboxWddmSwapchainRetainByAlloc(pDevExt, pSrcAlloc);
+                                        if (pSwapchain)
+                                        {
+                                            Status = vboxVdmaGgDirtyRectsProcess(pDevExt, pContext, pSwapchain, &pBlt->Blt.DstRects);
+                                            Assert(Status == STATUS_SUCCESS);
+                                            vboxWddmSwapchainRelease(pSwapchain);
+                                        }
+                                    }
+                                }
+                                break;
+                            }
+                            default:
+                                AssertBreakpoint();
+                                break;
+                        }
+                    }
+                    break;
+                }
+                default:
+                    Assert(0);
+            }
+
+            vboxWddmAllocationRelease(pDstAlloc);
+            vboxWddmAllocationRelease(pSrcAlloc);
+            break;
+        }
+        case VBOXVDMACMD_TYPE_DMA_PRESENT_FLIP:
+        {
+            PVBOXVDMAPIPE_CMD_DMACMD_FLIP pFlip = (PVBOXVDMAPIPE_CMD_DMACMD_FLIP)pDmaCmd;
+            Assert(pFlip->Hdr.fFlags.b3DRelated);
             Assert(!pFlip->Hdr.fFlags.b2DRelated);
             PVBOXWDDM_ALLOCATION pAlloc = pFlip->Flip.Alloc.pAlloc;
@@ -889,14 +1052,6 @@
             }
 
-            break;
-        }
-        case VBOXVDMACMD_TYPE_DMA_PRESENT_CLRFILL:
-        {
-            PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCF = (PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL)pDmaCmd;
-            Assert(pCF->Hdr.fFlags.b2DRelated);
-            Assert(pCF->Hdr.fFlags.bDecVBVAUnlock);
-            Assert(!pCF->Hdr.fFlags.b3DRelated);
-            Status = vboxVdmaGgDmaColorFill(pCF);
-            Assert(Status == STATUS_SUCCESS);
+            vboxWddmAllocationRelease(pAlloc);
+
             break;
         }
@@ -906,12 +1061,5 @@
     }
 
-    if (pDmaCmd->fFlags.bDecVBVAUnlock)
-    {
-        uint32_t cNew = ASMAtomicDecU32(&pDevExt->cUnlockedVBVADisabled);
-        Assert(cNew < UINT32_MAX/2);
-    }
-
-    Status = vboxVdmaDdiCmdCompleted(pDevExt, &pDevExt->DdiCmdQueue, &pDmaCmd->DdiCmd, enmComplType);
-    Assert(Status == STATUS_SUCCESS);
+    vboxVdmaGgCmdDestroy(pDevExt, &pDmaCmd->Hdr);
 
     return Status;
@@ -920,5 +1068,6 @@
 static VOID vboxVdmaGgWorkerThread(PVOID pvUser)
 {
-    PVBOXVDMAGG pVdma = (PVBOXVDMAGG)pvUser;
+    PVBOXMP_DEVEXT pDevExt = (PVBOXMP_DEVEXT)pvUser;
+    PVBOXVDMAGG pVdma = &pDevExt->u.primary.Vdma.DmaGg;
 
     NTSTATUS Status = vboxVdmaPipeSvrOpen(&pVdma->CmdPipe);
@@ -940,18 +1089,8 @@
                     switch (pDr->enmType)
                     {
-#if 0
-                        case VBOXVDMAPIPE_CMD_TYPE_RECTSINFO:
-                        {
-                            PVBOXVDMAPIPE_CMD_RECTSINFO pRects = (PVBOXVDMAPIPE_CMD_RECTSINFO)pDr;
-                            Status = vboxVdmaGgDirtyRectsProcess(pRects);
-                            Assert(Status == STATUS_SUCCESS);
-                            vboxVdmaGgCmdDestroy(pDr);
-                            break;
-                        }
-#endif
                         case VBOXVDMAPIPE_CMD_TYPE_DMACMD:
                         {
                             PVBOXVDMAPIPE_CMD_DMACMD pDmaCmd = (PVBOXVDMAPIPE_CMD_DMACMD)pDr;
-                            Status = vboxVdmaGgDmaCmdProcess(pDmaCmd);
+                            Status = vboxVdmaGgDmaCmdProcessSlow(pDevExt, pDmaCmd);
                             Assert(Status == STATUS_SUCCESS);
                         } break;
@@ -959,7 +1098,7 @@
                         {
                             PVBOXVDMAPIPE_CMD_RECTSINFO pRects = (PVBOXVDMAPIPE_CMD_RECTSINFO)pDr;
-                            Status = vboxVdmaGgDirtyRectsProcess(pRects->pDevExt, pRects->pContext, pRects->pSwapchain, &pRects->ContextsRects);
+                            Status = vboxVdmaGgDirtyRectsProcess(pDevExt, pRects->pContext, pRects->pSwapchain, &pRects->ContextsRects);
                             Assert(Status == STATUS_SUCCESS);
-                            vboxVdmaGgCmdDestroy(pDr);
+                            vboxVdmaGgCmdDestroy(pDevExt, pDr);
                             break;
                         }
@@ -979,11 +1118,12 @@
 }
 
-NTSTATUS vboxVdmaGgConstruct(PVBOXVDMAGG pVdma)
-{
+NTSTATUS vboxVdmaGgConstruct(PVBOXMP_DEVEXT pDevExt)
+{
+    PVBOXVDMAGG pVdma = &pDevExt->u.primary.Vdma.DmaGg;
     NTSTATUS Status = vboxVdmaPipeConstruct(&pVdma->CmdPipe);
     Assert(Status == STATUS_SUCCESS);
     if (Status == STATUS_SUCCESS)
     {
-        Status = vboxVdmaGgThreadCreate(&pVdma->pThread, vboxVdmaGgWorkerThread, pVdma);
+        Status = vboxVdmaGgThreadCreate(&pVdma->pThread, vboxVdmaGgWorkerThread, pDevExt);
         Assert(Status == STATUS_SUCCESS);
         if (Status == STATUS_SUCCESS)
@@ -999,6 +1139,7 @@
 }
 
-NTSTATUS vboxVdmaGgDestruct(PVBOXVDMAGG pVdma)
-{
+NTSTATUS vboxVdmaGgDestruct(PVBOXMP_DEVEXT pDevExt)
+{
+    PVBOXVDMAGG pVdma = &pDevExt->u.primary.Vdma.DmaGg;
     /* this informs the server thread that it should complete all current commands and exit */
     NTSTATUS Status = vboxVdmaPipeCltClose(&pVdma->CmdPipe);
@@ -1018,7 +1159,62 @@
 }
 
-NTSTATUS vboxVdmaGgCmdSubmit(PVBOXVDMAGG pVdma, PVBOXVDMAPIPE_CMD_DR pCmd)
-{
-    return vboxVdmaPipeCltCmdPut(&pVdma->CmdPipe, &pCmd->PipeHdr);
+NTSTATUS vboxVdmaGgCmdSubmit(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DR pCmd)
+{
+    switch (pCmd->enmType)
+    {
+        case VBOXVDMAPIPE_CMD_TYPE_DMACMD:
+        {
+            PVBOXVDMAPIPE_CMD_DMACMD pDmaCmd = (PVBOXVDMAPIPE_CMD_DMACMD)pCmd;
+            NTSTATUS Status = vboxVdmaGgDmaCmdProcessFast(pDevExt, pDmaCmd);
+            if (Status == STATUS_MORE_PROCESSING_REQUIRED)
+                break;
+            return Status;
+        }
+        default:
+            break;
+    }
+    return vboxVdmaPipeCltCmdPut(&pDevExt->u.primary.Vdma.DmaGg.CmdPipe, &pCmd->PipeHdr);
+}
+
+NTSTATUS vboxVdmaGgCmdDmaNotifySubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD pCmd)
+{
+    PVBOXVDMADDI_CMD pDdiCmd;
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+    pDdiCmd = vboxVdmaGgCmdDmaGetDdiCmd(pCmd);
+#else
+    pDdiCmd = &pCmd->DdiCmd;
+#endif
+    NTSTATUS Status = vboxVdmaDdiCmdSubmitted(pDevExt, pDdiCmd);
+    Assert(Status == STATUS_SUCCESS);
+    return Status;
+}
+
+NTSTATUS vboxVdmaGgCmdDmaNotifyCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
+{
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+    VBOXVDMACBUF_DR* pDr = vboxVdmaGgCmdDmaGetDr(pCmd);
+    int rc = vboxVdmaCBufDrSubmit(pDevExt, &pDevExt->u.primary.Vdma, pDr);
+    Assert(rc == VINF_SUCCESS);
+    if (RT_SUCCESS(rc))
+    {
+        return STATUS_SUCCESS;
+    }
+    return STATUS_UNSUCCESSFUL;
+#else
+    return vboxVdmaDdiCmdCompleted(pDevExt, &pCmd->DdiCmd, enmComplType);
+#endif
+}
+
+VOID vboxVdmaGgCmdDmaNotifyInit(PVBOXVDMAPIPE_CMD_DMACMD pCmd,
+        uint32_t u32NodeOrdinal, uint32_t u32FenceId,
+        PFNVBOXVDMADDICMDCOMPLETE_DPC pfnComplete, PVOID pvComplete)
+{
+    PVBOXVDMADDI_CMD pDdiCmd;
+#ifdef VBOX_WDDM_IRQ_COMPLETION
+    pDdiCmd = vboxVdmaGgCmdDmaGetDdiCmd(pCmd);
+#else
+    pDdiCmd = &pCmd->DdiCmd;
+#endif
+    vboxVdmaDdiCmdInit(pDdiCmd, u32NodeOrdinal, u32FenceId, pfnComplete, pvComplete);
 }
 
@@ -1156,5 +1352,5 @@
 #endif
         {
-            NTSTATUS Status = vboxVdmaGgConstruct(&pInfo->DmaGg);
+            NTSTATUS Status = vboxVdmaGgConstruct(pDevExt);
             Assert(Status == STATUS_SUCCESS);
             if (Status == STATUS_SUCCESS)
@@ -1231,5 +1427,5 @@
 {
     int rc = VINF_SUCCESS;
-    NTSTATUS Status = vboxVdmaGgDestruct(&pInfo->DmaGg);
+    NTSTATUS Status = vboxVdmaGgDestruct(pDevExt);
     Assert(Status == STATUS_SUCCESS);
     if (Status == STATUS_SUCCESS)
@@ -1304,5 +1500,5 @@
     }
 
-    if (vboxVdmaDdiCmdCompletedIrq(pDevExt, &pDevExt->DdiCmdQueue, VBOXVDMADDI_CMD_FROM_BUF_DR(pDr), enmComplType))
+    if (vboxVdmaDdiCmdCompletedIrq(pDevExt, VBOXVDMADDI_CMD_FROM_BUF_DR(pDr), enmComplType))
     {
         pDevExt->bNotifyDxDpc = TRUE;
@@ -1383,6 +1579,18 @@
 
 /* ddi dma command queue */
-DECLINLINE(BOOLEAN) vboxVdmaDdiCmdCanComplete(PVBOXVDMADDI_CMD_QUEUE pQueue)
-{
+
+VOID vboxVdmaDdiCmdGetCompletedListIsr(PVBOXMP_DEVEXT pDevExt, LIST_ENTRY *pList)
+{
+    vboxVideoLeDetach(&pDevExt->DpcCmdQueue, pList);
+}
+
+BOOLEAN vboxVdmaDdiCmdIsCompletedListEmptyIsr(PVBOXMP_DEVEXT pDevExt)
+{
+    return IsListEmpty(&pDevExt->DpcCmdQueue);
+}
+
+DECLINLINE(BOOLEAN) vboxVdmaDdiCmdCanComplete(PVBOXMP_DEVEXT pDevExt, UINT u32NodeOrdinal)
+{
+    PVBOXVDMADDI_CMD_QUEUE pQueue = &pDevExt->aNodes[u32NodeOrdinal].CmdQueue;
     return ASMAtomicUoReadU32(&pQueue->cQueuedCmds) == 0;
 }
@@ -1393,6 +1601,7 @@
 }
 
-static VOID vboxVdmaDdiCmdNotifyCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
-{
+static VOID vboxVdmaDdiCmdNotifyCompletedIrq(PVBOXMP_DEVEXT pDevExt, UINT u32NodeOrdinal, UINT u32FenceId, DXGK_INTERRUPT_TYPE enmComplType)
+{
+    PVBOXVDMADDI_NODE pNode = &pDevExt->aNodes[u32NodeOrdinal];
     DXGKARGCB_NOTIFY_INTERRUPT_DATA notify;
     memset(&notify, 0, sizeof(DXGKARGCB_NOTIFY_INTERRUPT_DATA));
@@ -1401,31 +1610,15 @@
         case DXGK_INTERRUPT_DMA_COMPLETED:
             notify.InterruptType = DXGK_INTERRUPT_DMA_COMPLETED;
-            notify.DmaCompleted.SubmissionFenceId = pCmd->u32FenceId;
-//            if (pCmd->pContext)
-//            {
-//                notify.DmaCompleted.NodeOrdinal = pCmd->pContext->NodeOrdinal;
-//                pCmd->pContext->uLastCompletedCmdFenceId = pCmd->u32FenceId;
-//            }
-//            else
-            {
-                pDevExt->u.primary.Vdma.uLastCompletedPagingBufferCmdFenceId = pCmd->u32FenceId;
-            }
-
-            InsertTailList(&pQueue->DpcCmdQueue, &pCmd->QueueEntry);
-
-            break;
+            notify.DmaCompleted.SubmissionFenceId = u32FenceId;
+            notify.DmaCompleted.NodeOrdinal = u32NodeOrdinal;
+            pNode->uLastCompletedFenceId = u32FenceId;
+            break;
+
         case DXGK_INTERRUPT_DMA_PREEMPTED:
             Assert(0);
             notify.InterruptType = DXGK_INTERRUPT_DMA_PREEMPTED;
-            notify.DmaPreempted.PreemptionFenceId = pCmd->u32FenceId;
-//            if (pCmd->pContext)
-//            {
-//                notify.DmaPreempted.LastCompletedFenceId = pCmd->pContext->uLastCompletedCmdFenceId;
-//                notify.DmaPreempted.NodeOrdinal = pCmd->pContext->NodeOrdinal;
-//            }
-//            else
-            {
-                notify.DmaPreempted.LastCompletedFenceId = pDevExt->u.primary.Vdma.uLastCompletedPagingBufferCmdFenceId;
-            }
+            notify.DmaPreempted.PreemptionFenceId = u32FenceId;
+            notify.DmaPreempted.NodeOrdinal = u32NodeOrdinal;
+            notify.DmaPreempted.LastCompletedFenceId = pNode->uLastCompletedFenceId;
             break;
 
@@ -1433,11 +1626,9 @@
             Assert(0);
             notify.InterruptType = DXGK_INTERRUPT_DMA_FAULTED;
-            notify.DmaFaulted.FaultedFenceId = pCmd->u32FenceId;
+            notify.DmaFaulted.FaultedFenceId = u32FenceId;
             notify.DmaFaulted.Status = STATUS_UNSUCCESSFUL; /* @todo: better status ? */
-            if (pCmd->pContext)
-            {
-                notify.DmaFaulted.NodeOrdinal = pCmd->pContext->NodeOrdinal;
-            }
-            break;
+            notify.DmaFaulted.NodeOrdinal = u32NodeOrdinal;
+            break;
+
         default:
             Assert(0);
@@ -1448,31 +1639,53 @@
 }
 
-DECLINLINE(VOID) vboxVdmaDdiCmdDequeueIrq(PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd)
-{
+static VOID vboxVdmaDdiCmdProcessCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
+{
+    vboxVdmaDdiCmdNotifyCompletedIrq(pDevExt, pCmd->u32NodeOrdinal, pCmd->u32FenceId, enmComplType);
+    switch (enmComplType)
+    {
+        case DXGK_INTERRUPT_DMA_COMPLETED:
+            InsertTailList(&pDevExt->DpcCmdQueue, &pCmd->QueueEntry);
+            break;
+        default:
+            AssertFailed();
+            break;
+    }
+}
+
+DECLINLINE(VOID) vboxVdmaDdiCmdDequeueIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd)
+{
+    PVBOXVDMADDI_CMD_QUEUE pQueue = &pDevExt->aNodes[pCmd->u32NodeOrdinal].CmdQueue;
     ASMAtomicDecU32(&pQueue->cQueuedCmds);
     RemoveEntryList(&pCmd->QueueEntry);
 }
 
-DECLINLINE(VOID) vboxVdmaDdiCmdEnqueueIrq(PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd)
-{
+DECLINLINE(VOID) vboxVdmaDdiCmdEnqueueIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd)
+{
+    PVBOXVDMADDI_CMD_QUEUE pQueue = &pDevExt->aNodes[pCmd->u32NodeOrdinal].CmdQueue;
     ASMAtomicIncU32(&pQueue->cQueuedCmds);
     InsertTailList(&pQueue->CmdQueue, &pCmd->QueueEntry);
 }
 
-VOID vboxVdmaDdiQueueInit(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue)
-{
-    pQueue->cQueuedCmds = 0;
-    InitializeListHead(&pQueue->CmdQueue);
-    InitializeListHead(&pQueue->DpcCmdQueue);
-}
-
-BOOLEAN vboxVdmaDdiCmdCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
+VOID vboxVdmaDdiNodesInit(PVBOXMP_DEVEXT pDevExt)
+{
+    for (UINT i = 0; i < RT_ELEMENTS(pDevExt->aNodes); ++i)
+    {
+        pDevExt->aNodes[i].uLastCompletedFenceId = 0;
+        PVBOXVDMADDI_CMD_QUEUE pQueue = &pDevExt->aNodes[i].CmdQueue;
+        pQueue->cQueuedCmds = 0;
+        InitializeListHead(&pQueue->CmdQueue);
+    }
+    InitializeListHead(&pDevExt->DpcCmdQueue);
+}
+
+BOOLEAN vboxVdmaDdiCmdCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
 {
     if (VBOXVDMADDI_STATE_NOT_DX_CMD == pCmd->enmState)
     {
-        InsertTailList(&pQueue->DpcCmdQueue, &pCmd->QueueEntry);
+        InsertTailList(&pDevExt->DpcCmdQueue, &pCmd->QueueEntry);
         return FALSE;
     }
 
+    PVBOXVDMADDI_CMD_QUEUE pQueue = &pDevExt->aNodes[pCmd->u32NodeOrdinal].CmdQueue;
     BOOLEAN bQueued = pCmd->enmState > VBOXVDMADDI_STATE_NOT_QUEUED;
     BOOLEAN bComplete = FALSE;
@@ -1484,5 +1697,5 @@
         if (pQueue->CmdQueue.Flink == &pCmd->QueueEntry)
         {
-            vboxVdmaDdiCmdDequeueIrq(pQueue, pCmd);
+            vboxVdmaDdiCmdDequeueIrq(pDevExt, pCmd);
             bComplete = TRUE;
         }
@@ -1494,10 +1707,10 @@
     else
     {
-        vboxVdmaDdiCmdEnqueueIrq(pQueue, pCmd);
+        vboxVdmaDdiCmdEnqueueIrq(pDevExt, pCmd);
     }
 
     if (bComplete)
     {
-        vboxVdmaDdiCmdNotifyCompletedIrq(pDevExt, pQueue, pCmd, enmComplType);
+        vboxVdmaDdiCmdProcessCompletedIrq(pDevExt, pCmd, enmComplType);
 
         while (!IsListEmpty(&pQueue->CmdQueue))
@@ -1506,6 +1719,6 @@
             if (pCmd->enmState == VBOXVDMADDI_STATE_COMPLETED)
             {
-                vboxVdmaDdiCmdDequeueIrq(pQueue, pCmd);
-                vboxVdmaDdiCmdNotifyCompletedIrq(pDevExt, pQueue, pCmd, pCmd->enmComplType);
+                vboxVdmaDdiCmdDequeueIrq(pDevExt, pCmd);
+                vboxVdmaDdiCmdProcessCompletedIrq(pDevExt, pCmd, pCmd->enmComplType);
             }
             else
@@ -1522,5 +1735,5 @@
 }
 
-VOID vboxVdmaDdiCmdSubmittedIrq(PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd)
+VOID vboxVdmaDdiCmdSubmittedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd)
 {
     BOOLEAN bQueued = pCmd->enmState >= VBOXVDMADDI_STATE_PENDING;
@@ -1528,5 +1741,5 @@
     pCmd->enmState = VBOXVDMADDI_STATE_SUBMITTED;
     if (!bQueued)
-        vboxVdmaDdiCmdEnqueueIrq(pQueue, pCmd);
+        vboxVdmaDdiCmdEnqueueIrq(pDevExt, pCmd);
 }
 
@@ -1534,5 +1747,4 @@
 {
     PVBOXMP_DEVEXT pDevExt;
-    PVBOXVDMADDI_CMD_QUEUE pQueue;
     PVBOXVDMADDI_CMD pCmd;
     DXGK_INTERRUPT_TYPE enmComplType;
@@ -1542,15 +1754,21 @@
 {
     PVBOXVDMADDI_CMD_COMPLETED_CB pdc = (PVBOXVDMADDI_CMD_COMPLETED_CB)Context;
-    BOOLEAN bNeedDpc = vboxVdmaDdiCmdCompletedIrq(pdc->pDevExt, pdc->pQueue, pdc->pCmd, pdc->enmComplType);
-    pdc->pDevExt->bNotifyDxDpc |= bNeedDpc;
+    PVBOXMP_DEVEXT pDevExt = pdc->pDevExt;
+    BOOLEAN bNeedDpc = vboxVdmaDdiCmdCompletedIrq(pDevExt, pdc->pCmd, pdc->enmComplType);
+    pDevExt->bNotifyDxDpc |= bNeedDpc;
+
+    if (bNeedDpc)
+    {
+        BOOLEAN bRc = pDevExt->u.primary.DxgkInterface.DxgkCbQueueDpc(pDevExt->u.primary.DxgkInterface.DeviceHandle);
+        Assert(bRc);
+    }
 
     return bNeedDpc;
 }
 
-NTSTATUS vboxVdmaDdiCmdCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
+NTSTATUS vboxVdmaDdiCmdCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType)
 {
     VBOXVDMADDI_CMD_COMPLETED_CB context;
     context.pDevExt = pDevExt;
-    context.pQueue = pQueue;
     context.pCmd = pCmd;
     context.enmComplType = enmComplType;
@@ -1563,9 +1781,4 @@
             &bNeedDps);
     Assert(Status == STATUS_SUCCESS);
-    if (Status == STATUS_SUCCESS && bNeedDps)
-    {
-        BOOLEAN bRc = pDevExt->u.primary.DxgkInterface.DxgkCbQueueDpc(pDevExt->u.primary.DxgkInterface.DeviceHandle);
-        Assert(bRc);
-    }
     return Status;
 }
@@ -1573,6 +1786,5 @@
 typedef struct VBOXVDMADDI_CMD_SUBMITTED_CB
 {
-//    PVBOXMP_DEVEXT pDevExt;
-    PVBOXVDMADDI_CMD_QUEUE pQueue;
+    PVBOXMP_DEVEXT pDevExt;
     PVBOXVDMADDI_CMD pCmd;
 } VBOXVDMADDI_CMD_SUBMITTED_CB, *PVBOXVDMADDI_CMD_SUBMITTED_CB;
@@ -1581,13 +1793,13 @@
 {
     PVBOXVDMADDI_CMD_SUBMITTED_CB pdc = (PVBOXVDMADDI_CMD_SUBMITTED_CB)Context;
-    vboxVdmaDdiCmdSubmittedIrq(pdc->pQueue, pdc->pCmd);
+    vboxVdmaDdiCmdSubmittedIrq(pdc->pDevExt, pdc->pCmd);
 
     return FALSE;
 }
 
-NTSTATUS vboxVdmaDdiCmdSubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd)
+NTSTATUS vboxVdmaDdiCmdSubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd)
 {
     VBOXVDMADDI_CMD_SUBMITTED_CB context;
-    context.pQueue = pQueue;
+    context.pDevExt = pDevExt;
     context.pCmd = pCmd;
     BOOLEAN bRc;
@@ -1605,5 +1817,5 @@
 {
     PVBOXMP_DEVEXT pDevExt;
-    PVBOXWDDM_CONTEXT pContext;
+    UINT u32NodeOrdinal;
     uint32_t u32FenceId;
 } VBOXVDMADDI_CMD_COMPLETE_CB, *PVBOXVDMADDI_CMD_COMPLETE_CB;
@@ -1613,13 +1825,6 @@
     PVBOXVDMADDI_CMD_COMPLETE_CB pdc = (PVBOXVDMADDI_CMD_COMPLETE_CB)Context;
     PVBOXMP_DEVEXT pDevExt = pdc->pDevExt;
-    DXGKARGCB_NOTIFY_INTERRUPT_DATA notify;
-    memset(&notify, 0, sizeof(DXGKARGCB_NOTIFY_INTERRUPT_DATA));
-
-    notify.InterruptType = DXGK_INTERRUPT_DMA_COMPLETED;
-    notify.DmaCompleted.SubmissionFenceId = pdc->u32FenceId;
-    notify.DmaCompleted.NodeOrdinal = pdc->pContext->NodeOrdinal;
-    notify.DmaCompleted.EngineOrdinal = 0;
-
-    pDevExt->u.primary.DxgkInterface.DxgkCbNotifyInterrupt(pDevExt->u.primary.DxgkInterface.DeviceHandle, &notify);
+
+    vboxVdmaDdiCmdNotifyCompletedIrq(pDevExt, pdc->u32NodeOrdinal, pdc->u32FenceId, DXGK_INTERRUPT_DMA_COMPLETED);
 
     pDevExt->bNotifyDxDpc = TRUE;
@@ -1629,9 +1834,9 @@
 }
 
-static NTSTATUS vboxVdmaDdiCmdFenceNotifyComplete(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_CONTEXT pContext, uint32_t u32FenceId)
+static NTSTATUS vboxVdmaDdiCmdFenceNotifyComplete(PVBOXMP_DEVEXT pDevExt, uint32_t u32NodeOrdinal, uint32_t u32FenceId)
 {
     VBOXVDMADDI_CMD_COMPLETE_CB context;
     context.pDevExt = pDevExt;
-    context.pContext = pContext;
+    context.u32NodeOrdinal = u32NodeOrdinal;
     context.u32FenceId = u32FenceId;
     BOOLEAN bRet;
@@ -1646,8 +1851,8 @@
 }
 
-NTSTATUS vboxVdmaDdiCmdFenceComplete(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_CONTEXT pContext, uint32_t u32FenceId, DXGK_INTERRUPT_TYPE enmComplType)
-{
-    if (vboxVdmaDdiCmdCanComplete(&pDevExt->DdiCmdQueue))
-        return vboxVdmaDdiCmdFenceNotifyComplete(pDevExt, pContext, u32FenceId);
+NTSTATUS vboxVdmaDdiCmdFenceComplete(PVBOXMP_DEVEXT pDevExt, uint32_t u32NodeOrdinal, uint32_t u32FenceId, DXGK_INTERRUPT_TYPE enmComplType)
+{
+    if (vboxVdmaDdiCmdCanComplete(pDevExt, u32NodeOrdinal))
+        return vboxVdmaDdiCmdFenceNotifyComplete(pDevExt, u32NodeOrdinal, u32FenceId);
 
     PVBOXVDMADDI_CMD pCmd = (PVBOXVDMADDI_CMD)vboxWddmMemAlloc(sizeof (VBOXVDMADDI_CMD));
@@ -1655,6 +1860,6 @@
     if (pCmd)
     {
-        vboxVdmaDdiCmdInit(pCmd, u32FenceId, pContext, vboxVdmaDdiCmdCompletionCbFree, NULL);
-        NTSTATUS Status = vboxVdmaDdiCmdCompleted(pDevExt, &pDevExt->DdiCmdQueue, pCmd, enmComplType);
+        vboxVdmaDdiCmdInit(pCmd, u32NodeOrdinal, u32FenceId, vboxVdmaDdiCmdCompletionCbFree, NULL);
+        NTSTATUS Status = vboxVdmaDdiCmdCompleted(pDevExt, pCmd, enmComplType);
         Assert(Status == STATUS_SUCCESS);
         if (Status == STATUS_SUCCESS)
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPVdma.h	(revision 37626)
@@ -25,4 +25,6 @@
 #include <VBox/HGSMI/HGSMI.h>
 
+typedef struct _VBOXMP_DEVEXT *PVBOXMP_DEVEXT;
+
 /* ddi dma command queue handling */
 typedef enum
@@ -44,7 +46,7 @@
     LIST_ENTRY QueueEntry;
     VBOXVDMADDI_STATE enmState;
+    uint32_t u32NodeOrdinal;
     uint32_t u32FenceId;
     DXGK_INTERRUPT_TYPE enmComplType;
-    PVBOXWDDM_CONTEXT pContext;
     PFNVBOXVDMADDICMDCOMPLETE_DPC pfnComplete;
     PVOID pvComplete;
@@ -55,16 +57,21 @@
     volatile uint32_t cQueuedCmds;
     LIST_ENTRY CmdQueue;
-    LIST_ENTRY DpcCmdQueue;
 } VBOXVDMADDI_CMD_QUEUE, *PVBOXVDMADDI_CMD_QUEUE;
 
-VOID vboxVdmaDdiQueueInit(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue);
-BOOLEAN vboxVdmaDdiCmdCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType);
-VOID vboxVdmaDdiCmdSubmittedIrq(PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd);
-
-NTSTATUS vboxVdmaDdiCmdCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType);
-NTSTATUS vboxVdmaDdiCmdSubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, PVBOXVDMADDI_CMD pCmd);
+typedef struct VBOXVDMADDI_NODE
+{
+    VBOXVDMADDI_CMD_QUEUE CmdQueue;
+    UINT uLastCompletedFenceId;
+} VBOXVDMADDI_NODE, *PVBOXVDMADDI_NODE;
+
+VOID vboxVdmaDdiNodesInit(PVBOXMP_DEVEXT pDevExt);
+BOOLEAN vboxVdmaDdiCmdCompletedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType);
+VOID vboxVdmaDdiCmdSubmittedIrq(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd);
+
+NTSTATUS vboxVdmaDdiCmdCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, DXGK_INTERRUPT_TYPE enmComplType);
+NTSTATUS vboxVdmaDdiCmdSubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd);
 
 DECLINLINE(VOID) vboxVdmaDdiCmdInit(PVBOXVDMADDI_CMD pCmd,
-        uint32_t u32FenceId, PVBOXWDDM_CONTEXT pContext,
+        uint32_t u32NodeOrdinal, uint32_t u32FenceId,
         PFNVBOXVDMADDICMDCOMPLETE_DPC pfnComplete, PVOID pvComplete)
 {
@@ -72,6 +79,6 @@
     pCmd->QueueEntry.Flink = NULL;
     pCmd->enmState = VBOXVDMADDI_STATE_NOT_QUEUED;
+    pCmd->u32NodeOrdinal = u32NodeOrdinal;
     pCmd->u32FenceId = u32FenceId;
-    pCmd->pContext = pContext;
     pCmd->pfnComplete = pfnComplete;
     pCmd->pvComplete = pvComplete;
@@ -87,21 +94,15 @@
 }
 
-NTSTATUS vboxVdmaDdiCmdFenceComplete(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_CONTEXT pContext, uint32_t u32FenceId, DXGK_INTERRUPT_TYPE enmComplType);
+NTSTATUS vboxVdmaDdiCmdFenceComplete(PVBOXMP_DEVEXT pDevExt, uint32_t u32NodeOrdinal, uint32_t u32FenceId, DXGK_INTERRUPT_TYPE enmComplType);
 
 DECLCALLBACK(VOID) vboxVdmaDdiCmdCompletionCbFree(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD pCmd, PVOID pvContext);
 
-DECLINLINE(VOID) vboxVdmaDdiCmdGetCompletedListIsr(PVBOXVDMADDI_CMD_QUEUE pQueue, LIST_ENTRY *pList)
-{
-    vboxVideoLeDetach(&pQueue->DpcCmdQueue, pList);
-}
-
-DECLINLINE(BOOLEAN) vboxVdmaDdiCmdIsCompletedListEmptyIsr(PVBOXVDMADDI_CMD_QUEUE pQueue)
-{
-    return IsListEmpty(&pQueue->DpcCmdQueue);
-}
+VOID vboxVdmaDdiCmdGetCompletedListIsr(PVBOXMP_DEVEXT pDevExt, LIST_ENTRY *pList);
+
+BOOLEAN vboxVdmaDdiCmdIsCompletedListEmptyIsr(PVBOXMP_DEVEXT pDevExt);
 
 #define VBOXVDMADDI_CMD_FROM_ENTRY(_pEntry) ((PVBOXVDMADDI_CMD)(((uint8_t*)(_pEntry)) - RT_OFFSETOF(VBOXVDMADDI_CMD, QueueEntry)))
 
-DECLINLINE(VOID) vboxVdmaDdiCmdHandleCompletedList(PVBOXMP_DEVEXT pDevExt, PVBOXVDMADDI_CMD_QUEUE pQueue, LIST_ENTRY *pList)
+DECLINLINE(VOID) vboxVdmaDdiCmdHandleCompletedList(PVBOXMP_DEVEXT pDevExt, LIST_ENTRY *pList)
 {
     LIST_ENTRY *pEntry = pList->Flink;
@@ -188,5 +189,4 @@
 {
     VBOXVDMAPIPE_CMD_DR Hdr;
-    PVBOXMP_DEVEXT pDevExt;
     PVBOXWDDM_CONTEXT pContext;
     struct VBOXWDDM_SWAPCHAIN *pSwapchain;
@@ -202,6 +202,5 @@
             UINT b2DRelated     : 1;
             UINT b3DRelated     : 1;
-            UINT bDecVBVAUnlock : 1;
-            UINT Reserved       : 29;
+            UINT Reserved       : 30;
         };
         UINT Value;
@@ -211,6 +210,8 @@
 {
     VBOXVDMAPIPE_CMD_DR Hdr;
+#ifndef VBOX_WDDM_IRQ_COMPLETION
     VBOXVDMADDI_CMD DdiCmd;
-    PVBOXMP_DEVEXT pDevExt;
+#endif
+    PVBOXWDDM_CONTEXT pContext;
     VBOXVDMACMD_TYPE enmCmd;
     VBOXVDMAPIPE_FLAGS_DMACMD fFlags;
@@ -309,9 +310,15 @@
 
 #endif
-NTSTATUS vboxVdmaGgCmdSubmit(PVBOXVDMAGG pVdma, PVBOXVDMAPIPE_CMD_DR pCmd);
-PVBOXVDMAPIPE_CMD_DR vboxVdmaGgCmdCreate(PVBOXVDMAGG pVdma, VBOXVDMAPIPE_CMD_TYPE enmType, uint32_t cbCmd);
-void vboxVdmaGgCmdDestroy(PVBOXVDMAPIPE_CMD_DR pDr);
+NTSTATUS vboxVdmaGgCmdSubmit(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DR pCmd);
+PVBOXVDMAPIPE_CMD_DR vboxVdmaGgCmdCreate(PVBOXMP_DEVEXT pDevExt, VBOXVDMAPIPE_CMD_TYPE enmType, uint32_t cbCmd);
+void vboxVdmaGgCmdDestroy(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DR pDr);
 
 NTSTATUS vboxVdmaPostHideSwapchain(PVBOXWDDM_SWAPCHAIN pSwapchain);
+
+NTSTATUS vboxVdmaGgCmdDmaNotifyCompleted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD pCmd, DXGK_INTERRUPT_TYPE enmComplType);
+NTSTATUS vboxVdmaGgCmdDmaNotifySubmitted(PVBOXMP_DEVEXT pDevExt, PVBOXVDMAPIPE_CMD_DMACMD pCmd);
+VOID vboxVdmaGgCmdDmaNotifyInit(PVBOXVDMAPIPE_CMD_DMACMD pCmd,
+        uint32_t u32NodeOrdinal, uint32_t u32FenceId,
+        PFNVBOXVDMADDICMDCOMPLETE_DPC pfnComplete, PVOID pvComplete);
 
 #define VBOXVDMAPIPE_CMD_DR_FROM_DDI_CMD(_pCmd) ((PVBOXVDMAPIPE_CMD_DR)(((uint8_t*)(_pCmd)) - RT_OFFSETOF(VBOXVDMAPIPE_CMD_DR, DdiCmd)))
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.cpp	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.cpp	(revision 37626)
@@ -487,5 +487,5 @@
         Ctx.pDr = pDr;
         Ctx.pEvent = &Event;
-        vboxVdmaDdiCmdInit(pDdiCmd, 0, NULL, vboxWddmChildStatusReportCompletion, &Ctx);
+        vboxVdmaDdiCmdInit(pDdiCmd, 0, 0, vboxWddmChildStatusReportCompletion, &Ctx);
         /* mark command as submitted & invisible for the dx runtime since dx did not originate it */
         vboxVdmaDdiCmdSubmittedNotDx(pDdiCmd);
@@ -946,5 +946,5 @@
                     LOG(("sources(%d), children(%d)", *NumberOfVideoPresentSources, *NumberOfChildren));
 
-                    vboxVdmaDdiQueueInit(pContext, &pContext->DdiCmdQueue);
+                    vboxVdmaDdiNodesInit(pContext);
                     vboxVideoCmInit(&pContext->CmMgr);
                     InitializeListHead(&pContext->SwapchainList3D);
@@ -1242,5 +1242,5 @@
         }
 
-        bNeedDpc |= !vboxVdmaDdiCmdIsCompletedListEmptyIsr(&pDevExt->DdiCmdQueue);
+        bNeedDpc |= !vboxVdmaDdiCmdIsCompletedListEmptyIsr(pDevExt);
 
         if (pDevExt->bNotifyDxDpc)
@@ -1306,5 +1306,5 @@
     vboxSHGSMIListDetach2List(&pdc->pDevExt->VhwaCmdList, &pdc->data.VhwaCmdList);
 #endif
-    vboxVdmaDdiCmdGetCompletedListIsr(&pdc->pDevExt->DdiCmdQueue, &pdc->data.CompletedDdiCmdQueue);
+    vboxVdmaDdiCmdGetCompletedListIsr(pdc->pDevExt, &pdc->data.CompletedDdiCmdQueue);
 
     pdc->data.bNotifyDpc = pdc->pDevExt->bNotifyDxDpc;
@@ -1359,5 +1359,5 @@
 #endif
 
-    vboxVdmaDdiCmdHandleCompletedList(pDevExt, &pDevExt->DdiCmdQueue, &context.data.CompletedDdiCmdQueue);
+    vboxVdmaDdiCmdHandleCompletedList(pDevExt, &context.data.CompletedDdiCmdQueue);
 
 //    LOGF(("LEAVE, context(0x%p)", MiniportDeviceContext));
@@ -1602,5 +1602,5 @@
             pCaps->MemoryManagementCaps.PagingNode = 0;
             /* @todo: this correlates with pCaps->SchedulingCaps.MultiEngineAware */
-            pCaps->GpuEngineTopology.NbAsymetricProcessingNodes = 1;
+            pCaps->GpuEngineTopology.NbAsymetricProcessingNodes = VBOXWDDM_NUM_NODES;
 
             break;
@@ -1729,29 +1729,42 @@
 }
 
-PVBOXWDDM_ALLOCATION vboxWddmAllocationCreateFromResource(PVBOXWDDM_RESOURCE pResource, uint32_t iIndex)
-{
-    PVBOXWDDM_ALLOCATION pAllocation = NULL;
-    if (pResource)
-    {
-        Assert(iIndex < pResource->cAllocations);
-        if (iIndex < pResource->cAllocations)
-        {
-            pAllocation = &pResource->aAllocations[iIndex];
-            memset(pAllocation, 0, sizeof (VBOXWDDM_ALLOCATION));
-        }
-    }
-    else
-        pAllocation = (PVBOXWDDM_ALLOCATION)vboxWddmMemAllocZero(sizeof (VBOXWDDM_ALLOCATION));
-
-    if (pAllocation)
-    {
-        if (pResource)
-        {
-            pAllocation->pResource = pResource;
-            pAllocation->iIndex = iIndex;
-        }
-    }
-
-    return pAllocation;
+PVBOXWDDM_RESOURCE vboxWddmResourceCreate(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_RCINFO pRcInfo)
+{
+    PVBOXWDDM_RESOURCE pResource = (PVBOXWDDM_RESOURCE)vboxWddmMemAllocZero(RT_OFFSETOF(VBOXWDDM_RESOURCE, aAllocations[pRcInfo->cAllocInfos]));
+    if (!pResource)
+    {
+        AssertFailed();
+        return NULL;
+    }
+    pResource->cRefs = 1;
+    pResource->cAllocations = pRcInfo->cAllocInfos;
+    pResource->fFlags = pRcInfo->fFlags;
+    pResource->RcDesc = pRcInfo->RcDesc;
+    return pResource;
+}
+
+VOID vboxWddmResourceRetain(PVBOXWDDM_RESOURCE pResource)
+{
+    ASMAtomicIncU32(&pResource->cRefs);
+}
+
+static VOID vboxWddmResourceDestroy(PVBOXWDDM_RESOURCE pResource)
+{
+    vboxWddmMemFree(pResource);
+}
+
+VOID vboxWddmResourceWaitDereference(PVBOXWDDM_RESOURCE pResource)
+{
+    vboxWddmCounterU32Wait(&pResource->cRefs, 1);
+}
+
+VOID vboxWddmResourceRelease(PVBOXWDDM_RESOURCE pResource)
+{
+    uint32_t cRefs = ASMAtomicDecU32(&pResource->cRefs);
+    Assert(cRefs < UINT32_MAX/2);
+    if (!cRefs)
+    {
+        vboxWddmResourceDestroy(pResource);
+    }
 }
 
@@ -1762,4 +1775,5 @@
     {
         Assert(&pResource->aAllocations[pAllocation->iIndex] == pAllocation);
+        vboxWddmResourceRelease(pResource);
     }
     else
@@ -1769,8 +1783,6 @@
 }
 
-NTSTATUS vboxWddmDestroyAllocation(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_ALLOCATION pAllocation)
-{
-    PAGED_CODE();
-
+VOID vboxWddmAllocationCleanup(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_ALLOCATION pAllocation)
+{
     switch (pAllocation->enmType)
     {
@@ -1797,14 +1809,4 @@
         }
 #endif
-//#ifdef VBOX_WITH_VIDEOHWACCEL
-//        case VBOXWDDM_ALLOC_TYPE_UMD_RC_GENERIC:
-//        {
-//            if (pAllocation->fRcFlags.Overlay)
-//            {
-//                vboxVhwaHlpDestroyOverlay(pDevExt, pAllocation);
-//            }
-//            break;
-//        }
-//#endif
         case VBOXWDDM_ALLOC_TYPE_UMD_HGSMI_BUFFER:
         {
@@ -1825,11 +1827,48 @@
         vboxWddmSwapchainRelease(pSwapchain);
     }
+}
+
+VOID vboxWddmAllocationDestroy(PVBOXWDDM_ALLOCATION pAllocation)
+{
+    PAGED_CODE();
 
     vboxWddmAllocationDeleteFromResource(pAllocation->pResource, pAllocation);
-
-    return STATUS_SUCCESS;
-}
-
-NTSTATUS vboxWddmCreateAllocation(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_RESOURCE pResource, uint32_t iIndex, DXGK_ALLOCATIONINFO* pAllocationInfo)
+}
+
+PVBOXWDDM_ALLOCATION vboxWddmAllocationCreateFromResource(PVBOXWDDM_RESOURCE pResource, uint32_t iIndex)
+{
+    PVBOXWDDM_ALLOCATION pAllocation = NULL;
+    if (pResource)
+    {
+        Assert(iIndex < pResource->cAllocations);
+        if (iIndex < pResource->cAllocations)
+        {
+            pAllocation = &pResource->aAllocations[iIndex];
+            memset(pAllocation, 0, sizeof (VBOXWDDM_ALLOCATION));
+        }
+        vboxWddmResourceRetain(pResource);
+    }
+    else
+        pAllocation = (PVBOXWDDM_ALLOCATION)vboxWddmMemAllocZero(sizeof (VBOXWDDM_ALLOCATION));
+
+    if (pAllocation)
+    {
+        if (pResource)
+        {
+            pAllocation->pResource = pResource;
+            pAllocation->iIndex = iIndex;
+        }
+    }
+
+    return pAllocation;
+}
+
+VOID vboxWddmAllocationWaitDereference(PVBOXWDDM_ALLOCATION pAllocation)
+{
+    vboxWddmCounterU32Wait(&pAllocation->cRefs, 1);
+}
+
+
+NTSTATUS vboxWddmAllocationCreate(PVBOXMP_DEVEXT pDevExt, PVBOXWDDM_RESOURCE pResource, uint32_t iIndex, DXGK_ALLOCATIONINFO* pAllocationInfo)
 {
     PAGED_CODE();
@@ -1862,4 +1901,5 @@
             pAllocation->enmType = pAllocInfo->enmType;
             pAllocation->offVram = VBOXVIDEOOFFSET_VOID;
+            pAllocation->cRefs = 1;
             pAllocation->bVisible = FALSE;
             pAllocation->bAssigned = FALSE;
@@ -2036,4 +2076,5 @@
             if (pResource)
             {
+                pResource->cRefs = 1;
                 pResource->cAllocations = pRcInfo->cAllocInfos;
                 pResource->fFlags = pRcInfo->fFlags;
@@ -2053,13 +2094,14 @@
         for (UINT i = 0; i < pCreateAllocation->NumAllocations; ++i)
         {
-            Status = vboxWddmCreateAllocation(pDevExt, pResource, i, &pCreateAllocation->pAllocationInfo[i]);
+            Status = vboxWddmAllocationCreate(pDevExt, pResource, i, &pCreateAllocation->pAllocationInfo[i]);
             Assert(Status == STATUS_SUCCESS);
             if (Status != STATUS_SUCCESS)
             {
-                LOGREL(("ERROR: vboxWddmCreateAllocation error (0x%x)", Status));
+                LOGREL(("ERROR: vboxWddmAllocationCreate error (0x%x)", Status));
                 /* note: i-th allocation is expected to be cleared in a fail handling code above */
                 for (UINT j = 0; j < i; ++j)
                 {
-                    vboxWddmDestroyAllocation(pDevExt, (PVBOXWDDM_ALLOCATION)pCreateAllocation->pAllocationInfo[j].hAllocation);
+                    vboxWddmAllocationCleanup(pDevExt, (PVBOXWDDM_ALLOCATION)pCreateAllocation->pAllocationInfo[j].hAllocation);
+                    vboxWddmAllocationRelease((PVBOXWDDM_ALLOCATION)pCreateAllocation->pAllocationInfo[j].hAllocation);
                 }
             }
@@ -2068,5 +2110,5 @@
         pCreateAllocation->hResource = pResource;
         if (pResource && Status != STATUS_SUCCESS)
-            vboxWddmMemFree(pResource);
+            vboxWddmResourceRelease(pResource);
     }
     LOGF(("LEAVE, status(0x%x), context(0x%x)", Status, hAdapter));
@@ -2091,4 +2133,5 @@
 
     PVBOXWDDM_RESOURCE pRc = (PVBOXWDDM_RESOURCE)pDestroyAllocation->hResource;
+    PVBOXMP_DEVEXT pDevExt = (PVBOXMP_DEVEXT)hAdapter;
 
     if (pRc)
@@ -2101,10 +2144,15 @@
         PVBOXWDDM_ALLOCATION pAlloc = (PVBOXWDDM_ALLOCATION)pDestroyAllocation->pAllocationList[i];
         Assert(pAlloc->pResource == pRc);
-        vboxWddmDestroyAllocation((PVBOXMP_DEVEXT)hAdapter, pAlloc);
+        /* wait for all current allocation-related ops are completed */
+        vboxWddmAllocationWaitDereference(pAlloc);
+        vboxWddmAllocationCleanup(pDevExt, pAlloc);
+        vboxWddmAllocationRelease(pAlloc);
     }
 
     if (pRc)
     {
-        vboxWddmMemFree(pRc);
+        /* wait for all current resource-related ops are completed */
+        vboxWddmResourceWaitDereference(pRc);
+        vboxWddmResourceRelease(pRc);
     }
 
@@ -2483,28 +2531,18 @@
 static NTSTATUS vboxWddmSubmitCmd(PVBOXMP_DEVEXT pDevExt, VBOXVDMAPIPE_CMD_DMACMD *pCmd)
 {
-    NTSTATUS Status = vboxVdmaDdiCmdSubmitted(pDevExt, &pDevExt->DdiCmdQueue, &pCmd->DdiCmd);
+    NTSTATUS Status = vboxVdmaGgCmdDmaNotifySubmitted(pDevExt, pCmd);
     Assert(Status == STATUS_SUCCESS);
     if (Status == STATUS_SUCCESS)
     {
-        if (pCmd->fFlags.bDecVBVAUnlock)
-        {
-            uint32_t cNew = ASMAtomicIncU32(&pDevExt->cUnlockedVBVADisabled);
-            Assert(cNew < UINT32_MAX/2);
-        }
-        NTSTATUS submStatus = vboxVdmaGgCmdSubmit(&pDevExt->u.primary.Vdma.DmaGg, &pCmd->Hdr);
+        NTSTATUS submStatus = vboxVdmaGgCmdSubmit(pDevExt, &pCmd->Hdr);
         Assert(submStatus == STATUS_SUCCESS);
         if (submStatus != STATUS_SUCCESS)
         {
-            if (pCmd->fFlags.bDecVBVAUnlock)
-            {
-                uint32_t cNew = ASMAtomicDecU32(&pDevExt->cUnlockedVBVADisabled);
-                Assert(cNew < UINT32_MAX/2);
-            }
-            vboxVdmaDdiCmdCompleted(pDevExt, &pDevExt->DdiCmdQueue, &pCmd->DdiCmd, DXGK_INTERRUPT_DMA_FAULTED);
+            vboxVdmaGgCmdDmaNotifyCompleted(pDevExt, pCmd, DXGK_INTERRUPT_DMA_FAULTED);
         }
     }
     else
     {
-        vboxVdmaGgCmdDestroy(&pCmd->Hdr);
+        vboxVdmaGgCmdDestroy(pDevExt, &pCmd->Hdr);
     }
     return Status;
@@ -2514,13 +2552,12 @@
 {
     NTSTATUS Status = STATUS_SUCCESS;
-
-    PVBOXVDMAPIPE_CMD_DMACMD_BLT pBltCmd = (PVBOXVDMAPIPE_CMD_DMACMD_BLT)vboxVdmaGgCmdCreate(&pDevExt->u.primary.Vdma.DmaGg, VBOXVDMAPIPE_CMD_TYPE_DMACMD, RT_OFFSETOF(VBOXVDMAPIPE_CMD_DMACMD_BLT, Blt.DstRects.UpdateRects.aRects[pBlt->Blt.DstRects.UpdateRects.cRects]));
+    PVBOXVDMAPIPE_CMD_DMACMD_BLT pBltCmd = (PVBOXVDMAPIPE_CMD_DMACMD_BLT)vboxVdmaGgCmdCreate(pDevExt, VBOXVDMAPIPE_CMD_TYPE_DMACMD, RT_OFFSETOF(VBOXVDMAPIPE_CMD_DMACMD_BLT, Blt.DstRects.UpdateRects.aRects[pBlt->Blt.DstRects.UpdateRects.cRects]));
     Assert(pBltCmd);
     if (pBltCmd)
     {
         VBOXWDDM_SOURCE *pSource = &pDevExt->aSources[pBlt->Blt.DstAlloc.srcId];
-        vboxVdmaDdiCmdInit(&pBltCmd->Hdr.DdiCmd, u32FenceId, pContext, vboxVdmaGgDdiCmdDestroy, pBltCmd);
-        pBltCmd->Hdr.pDevExt = pDevExt;
+        vboxVdmaGgCmdDmaNotifyInit(&pBltCmd->Hdr, pContext->NodeOrdinal, u32FenceId, NULL, NULL);
         pBltCmd->Hdr.fFlags = fBltFlags;
+        pBltCmd->Hdr.pContext = pContext;
         pBltCmd->Hdr.enmCmd = VBOXVDMACMD_TYPE_DMA_PRESENT_BLT;
         memcpy(&pBltCmd->Blt, &pBlt->Blt, RT_OFFSETOF(VBOXVDMA_BLT, DstRects.UpdateRects.aRects[pBlt->Blt.DstRects.UpdateRects.cRects]));
@@ -2529,7 +2566,6 @@
     else
     {
-        Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, u32FenceId, DXGK_INTERRUPT_DMA_FAULTED);
-    }
-
+        Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, u32FenceId, DXGK_INTERRUPT_DMA_FAULTED);
+    }
     return Status;
 }
@@ -2627,5 +2663,5 @@
             /* get DPC data at IRQL */
 
-            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
+            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
             break;
         }
@@ -2675,8 +2711,9 @@
                                     uint32_t cUnlockedVBVADisabled = ASMAtomicReadU32(&pDevExt->cUnlockedVBVADisabled);
                                     if (!cUnlockedVBVADisabled)
+                                    {
                                         VBOXVBVA_OP(ReportDirtyRect, pDevExt, pSource, &rect);
+                                    }
                                     else
                                     {
-                                        Assert(KeGetCurrentIrql() == DISPATCH_LEVEL);
                                         VBOXVBVA_OP_WITHLOCK_ATDPC(ReportDirtyRect, pDevExt, pSource, &rect);
                                     }
@@ -2685,5 +2722,4 @@
                                 {
                                     fBltFlags.b2DRelated = 1;
-                                    fBltFlags.bDecVBVAUnlock = 1;
                                 }
 
@@ -2748,5 +2784,5 @@
             if (bComplete)
             {
-                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
+                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
             }
             break;
@@ -2787,6 +2823,6 @@
 
             PVBOXVDMADDI_CMD pDdiCmd = VBOXVDMADDI_CMD_FROM_BUF_DR(pDr);
-            vboxVdmaDdiCmdInit(pDdiCmd, pSubmitCommand->SubmissionFenceId, pContext, vboxWddmDmaCompleteChromiumCmd, pDr);
-            NTSTATUS Status = vboxVdmaDdiCmdSubmitted(pDevExt, &pDevExt->DdiCmdQueue, pDdiCmd);
+            vboxVdmaDdiCmdInit(pDdiCmd, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, vboxWddmDmaCompleteChromiumCmd, pDr);
+            NTSTATUS Status = vboxVdmaDdiCmdSubmitted(pDevExt, pDdiCmd);
             Assert(Status == STATUS_SUCCESS);
             if (Status == STATUS_SUCCESS)
@@ -2800,5 +2836,5 @@
             }
 #else
-            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
+            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
             Assert(Status == STATUS_SUCCESS);
 #endif
@@ -2808,14 +2844,14 @@
         {
             VBOXWDDM_DMA_PRIVATEDATA_FLIP *pFlip = (VBOXWDDM_DMA_PRIVATEDATA_FLIP*)pPrivateDataBase;
-            PVBOXVDMAPIPE_CMD_DMACMD_FLIP pFlipCmd = (PVBOXVDMAPIPE_CMD_DMACMD_FLIP)vboxVdmaGgCmdCreate(
-                    &pDevExt->u.primary.Vdma.DmaGg, VBOXVDMAPIPE_CMD_TYPE_DMACMD, sizeof (VBOXVDMAPIPE_CMD_DMACMD_FLIP));
+            PVBOXVDMAPIPE_CMD_DMACMD_FLIP pFlipCmd = (PVBOXVDMAPIPE_CMD_DMACMD_FLIP)vboxVdmaGgCmdCreate(pDevExt,
+                    VBOXVDMAPIPE_CMD_TYPE_DMACMD, sizeof (VBOXVDMAPIPE_CMD_DMACMD_FLIP));
             Assert(pFlipCmd);
             if (pFlipCmd)
             {
                 VBOXWDDM_SOURCE *pSource = &pDevExt->aSources[pFlip->Flip.Alloc.srcId];
-                vboxVdmaDdiCmdInit(&pFlipCmd->Hdr.DdiCmd, pSubmitCommand->SubmissionFenceId, pContext, vboxVdmaGgDdiCmdDestroy, pFlipCmd);
-                pFlipCmd->Hdr.pDevExt = pDevExt;
+                vboxVdmaGgCmdDmaNotifyInit(&pFlipCmd->Hdr, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, NULL, NULL);
                 pFlipCmd->Hdr.fFlags.Value = 0;
                 pFlipCmd->Hdr.fFlags.b3DRelated = 1;
+                pFlipCmd->Hdr.pContext = pContext;
                 pFlipCmd->Hdr.enmCmd = VBOXVDMACMD_TYPE_DMA_PRESENT_FLIP;
                 memcpy(&pFlipCmd->Flip, &pFlip->Flip, sizeof (pFlipCmd->Flip));
@@ -2825,5 +2861,5 @@
             else
             {
-                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_FAULTED);
+                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_FAULTED);
                 Assert(Status == STATUS_SUCCESS);
             }
@@ -2833,17 +2869,14 @@
         {
             PVBOXWDDM_DMA_PRIVATEDATA_CLRFILL pCF = (PVBOXWDDM_DMA_PRIVATEDATA_CLRFILL)pPrivateDataBase;
-            PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCFCmd = (PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL)vboxVdmaGgCmdCreate(
-                    &pDevExt->u.primary.Vdma.DmaGg, VBOXVDMAPIPE_CMD_TYPE_DMACMD,
-                    RT_OFFSETOF(VBOXVDMAPIPE_CMD_DMACMD_CLRFILL, ClrFill.Rects.aRects[pCF->ClrFill.Rects.cRects]));
+            PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL pCFCmd = (PVBOXVDMAPIPE_CMD_DMACMD_CLRFILL)vboxVdmaGgCmdCreate(pDevExt,
+                    VBOXVDMAPIPE_CMD_TYPE_DMACMD, RT_OFFSETOF(VBOXVDMAPIPE_CMD_DMACMD_CLRFILL, ClrFill.Rects.aRects[pCF->ClrFill.Rects.cRects]));
             Assert(pCFCmd);
             if (pCFCmd)
             {
 //                VBOXWDDM_SOURCE *pSource = &pDevExt->aSources[pFlip->Flip.Alloc.srcId];
-                vboxVdmaDdiCmdInit(&pCFCmd->Hdr.DdiCmd, pSubmitCommand->SubmissionFenceId, pContext, vboxVdmaGgDdiCmdDestroy, pCFCmd);
-                pCFCmd->Hdr.pDevExt = pDevExt;
-                pCFCmd->Hdr.pDevExt = pDevExt;
+                vboxVdmaGgCmdDmaNotifyInit(&pCFCmd->Hdr, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, NULL, NULL);
                 pCFCmd->Hdr.fFlags.Value = 0;
                 pCFCmd->Hdr.fFlags.b2DRelated = 1;
-                pCFCmd->Hdr.fFlags.bDecVBVAUnlock = 1;
+                pCFCmd->Hdr.pContext = pContext;
                 pCFCmd->Hdr.enmCmd = VBOXVDMACMD_TYPE_DMA_PRESENT_CLRFILL;
                 memcpy(&pCFCmd->ClrFill, &pCF->ClrFill, RT_OFFSETOF(VBOXVDMA_CLRFILL, Rects.aRects[pCF->ClrFill.Rects.cRects]));
@@ -2853,5 +2886,5 @@
             else
             {
-                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_FAULTED);
+                Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_FAULTED);
                 Assert(Status == STATUS_SUCCESS);
             }
@@ -2861,5 +2894,5 @@
         case VBOXVDMACMD_TYPE_DMA_NOP:
         {
-            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
+            Status = vboxVdmaDdiCmdFenceComplete(pDevExt, pContext->NodeOrdinal, pSubmitCommand->SubmissionFenceId, DXGK_INTERRUPT_DMA_COMPLETED);
             Assert(Status == STATUS_SUCCESS);
             break;
@@ -2906,5 +2939,5 @@
     LOGF(("ENTER, hAdapter(0x%x)", hAdapter));
 
-//    AssertBreakpoint();
+    AssertFailed();
     /* @todo: fixme: implement */
 
@@ -5318,4 +5351,10 @@
     vboxVDbgBreakFv();
 
+    if (pCreateContext->NodeOrdinal >= VBOXWDDM_NUM_NODES)
+    {
+        WARN(("Invalid NodeOrdinal (%d), expected to be less that (%d)\n", pCreateContext->NodeOrdinal, VBOXWDDM_NUM_NODES));
+        return STATUS_INVALID_PARAMETER;
+    }
+
     NTSTATUS Status = STATUS_SUCCESS;
     PVBOXWDDM_DEVICE pDevice = (PVBOXWDDM_DEVICE)hDevice;
@@ -5521,4 +5560,8 @@
     }
 */
+
+#ifdef DEBUG_misha
+    RTLogGroupSettings(0, "+default.e.l.f.l2.l3");
+#endif
 
     LOGREL(("Built %s %s", __DATE__, __TIME__));
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.h	(revision 37625)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Video/mp/wddm/VBoxMPWddm.h	(revision 37626)
@@ -62,4 +62,22 @@
 }
 
+VOID vboxWddmAllocationDestroy(PVBOXWDDM_ALLOCATION pAllocation);
+
+DECLINLINE(VOID) vboxWddmAllocationRelease(PVBOXWDDM_ALLOCATION pAllocation)
+{
+    uint32_t cRefs = ASMAtomicDecU32(&pAllocation->cRefs);
+    Assert(cRefs < UINT32_MAX/2);
+    if (!cRefs)
+    {
+        vboxWddmAllocationDestroy(pAllocation);
+    }
+}
+
+DECLINLINE(VOID) vboxWddmAllocationRetain(PVBOXWDDM_ALLOCATION pAllocation)
+{
+    ASMAtomicIncU32(&pAllocation->cRefs);
+}
+
+
 #define VBOXWDDMENTRY_2_SWAPCHAIN(_pE) ((PVBOXWDDM_SWAPCHAIN)((uint8_t*)(_pE) - RT_OFFSETOF(VBOXWDDM_SWAPCHAIN, DevExtListEntry)))
 
