Index: /trunk/include/VBox/VBoxVideo.h
===================================================================
--- /trunk/include/VBox/VBoxVideo.h	(revision 27382)
+++ /trunk/include/VBox/VBoxVideo.h	(revision 27383)
@@ -1164,4 +1164,5 @@
      * VERR_xxx         - on error */
     int32_t  rc;
+    uint64_t u64GuestContext;
     union
     {
@@ -1171,5 +1172,5 @@
 } VBOXVDMACBUF_DR, *PVBOXVDMACBUF_DR;
 
-#define VBOXVDMACBUF_DR_TAIL(_pCmd, _t) ( (_t)(((uint8_t*)(_pCmd)) + sizeof (VBOXVDMACBUF_DR)) )
+#define VBOXVDMACBUF_DR_TAIL(_pCmd, _t) ( (_t*)(((uint8_t*)(_pCmd)) + sizeof (VBOXVDMACBUF_DR)) )
 
 typedef struct VBOXVDMACMD
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.cpp	(revision 27382)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.cpp	(revision 27383)
@@ -165,37 +165,15 @@
 }
 
-PVBOXVDMACBUF_DR vboxVdmaCBufDrCreate (PVBOXVDMAINFO pInfo, PVBOXVDMACMDBUF_INFO pBufInfo)
-{
-    PVBOXVDMACBUF_DR pCmdDr;
-
-    if (pBufInfo->fFlags & VBOXVDMACBUF_FLAG_BUF_FOLLOWS_DR)
-    {
-        /* data info is a pointer to the buffer to be coppied and included in the command */
-        pCmdDr = (PVBOXVDMACBUF_DR)VBoxSHGSMICommandAlloc (&pInfo->CmdHeap, sizeof (VBOXVDMACBUF_DR) + pBufInfo->cbBuf, HGSMI_CH_VBVA, VBVA_VDMA_CMD);
-        Assert(pCmdDr);
-        if (!pCmdDr)
-            return NULL;
-
-        void * pvData = VBOXVDMACBUF_DR_TAIL(pCmdDr, void*);
-        memcpy(pvData, (void*)pBufInfo->Location.pvBuf, pBufInfo->cbBuf);
-    }
-    else
-    {
-        pCmdDr = (PVBOXVDMACBUF_DR)VBoxSHGSMICommandAlloc (&pInfo->CmdHeap, sizeof (VBOXVDMACBUF_DR), HGSMI_CH_VBVA, VBVA_VDMA_CMD);
-        Assert(pCmdDr);
-        if (!pCmdDr)
-            return NULL;
-
-        if (!(pBufInfo->fFlags & VBOXVDMACBUF_FLAG_BUF_VRAM_OFFSET))
-            pCmdDr->Location.phBuf = pBufInfo->Location.phBuf;
-        else
-            pCmdDr->Location.offVramBuf = pBufInfo->Location.offVramBuf;
-    }
-
-    pCmdDr->fFlags = pBufInfo->fFlags;
-    pCmdDr->cbBuf = pBufInfo->cbBuf;
-    pCmdDr->u32FenceId = pBufInfo->u32FenceId;
-
-    return pCmdDr;
+PVBOXVDMACBUF_DR vboxVdmaCBufDrCreate (PVBOXVDMAINFO pInfo, uint32_t cbTrailingData)
+{
+    uint32_t cbDr = sizeof (VBOXVDMACBUF_DR) + cbTrailingData;
+    PVBOXVDMACBUF_DR pDr = (PVBOXVDMACBUF_DR)VBoxSHGSMICommandAlloc (&pInfo->CmdHeap, cbDr, HGSMI_CH_VBVA, VBVA_VDMA_CMD);
+    Assert(pDr);
+    if (pDr)
+        memset (pDr, 0, cbDr);
+    else
+        drprintf((__FUNCTION__": VBoxSHGSMICommandAlloc returned NULL\n"));
+
+    return pDr;
 }
 
@@ -218,11 +196,18 @@
     memset(&notify, 0, sizeof(DXGKARGCB_NOTIFY_INTERRUPT_DATA));
 
+    PVBOXWDDM_CONTEXT pContext = (PVBOXWDDM_CONTEXT)pDr->u64GuestContext;
+
     if (RT_SUCCESS(pDr->rc))
     {
         notify.InterruptType = DXGK_INTERRUPT_DMA_COMPLETED;
         notify.DmaCompleted.SubmissionFenceId = pDr->u32FenceId;
-        notify.DmaCompleted.NodeOrdinal = 0; /* @todo: ? */
-        notify.DmaCompleted.EngineOrdinal = 0; /* @todo: ? */
-        pVdma->uLastCompletedCmdFenceId = pDr->u32FenceId;
+        if (pContext)
+        {
+            notify.DmaCompleted.NodeOrdinal = pContext->NodeOrdinal;
+            notify.DmaCompleted.EngineOrdinal = 0;
+            pContext->uLastCompletedCmdFenceId = pDr->u32FenceId;
+        }
+        else
+            pVdma->uLastCompletedPagingBufferCmdFenceId = pDr->u32FenceId;
         pDevExt->bSetNotifyDxDpc = TRUE;
     }
@@ -231,7 +216,13 @@
         notify.InterruptType = DXGK_INTERRUPT_DMA_PREEMPTED;
         notify.DmaPreempted.PreemptionFenceId = pDr->u32FenceId;
-        notify.DmaPreempted.LastCompletedFenceId = pVdma->uLastCompletedCmdFenceId;
-        notify.DmaPreempted.NodeOrdinal = 0; /* @todo: ? */
-        notify.DmaPreempted.EngineOrdinal = 0; /* @todo: ? */
+        if (pContext)
+        {
+            notify.DmaPreempted.LastCompletedFenceId = pContext->uLastCompletedCmdFenceId;
+            notify.DmaPreempted.NodeOrdinal = pContext->NodeOrdinal;
+            notify.DmaPreempted.EngineOrdinal = 0;
+        }
+        else
+            notify.DmaPreempted.LastCompletedFenceId = pVdma->uLastCompletedPagingBufferCmdFenceId;
+
         pDevExt->bSetNotifyDxDpc = TRUE;
     }
@@ -242,6 +233,9 @@
         notify.DmaFaulted.FaultedFenceId = pDr->u32FenceId;
         notify.DmaFaulted.Status = STATUS_UNSUCCESSFUL; /* @todo: better status ? */
-        notify.DmaFaulted.NodeOrdinal = 0; /* @todo: ? */
-        notify.DmaFaulted.EngineOrdinal = 0; /* @todo: ? */
+        if (pContext)
+        {
+            notify.DmaFaulted.NodeOrdinal = pContext->NodeOrdinal;
+            notify.DmaFaulted.EngineOrdinal = 0;
+        }
         pDevExt->bSetNotifyDxDpc = TRUE;
     }
@@ -258,16 +252,2 @@
     VBoxSHGSMICommandSubmitAsynchIrq (&pInfo->CmdHeap, pDr, vboxVdmaCBufDrCompletionIrq, pDevExt, VBOXSHGSMI_FLAG_GH_ASYNCH_FORCE);
 }
-
-int vboxVdmaCBufSubmit (PDEVICE_EXTENSION pDevExt, PVBOXVDMAINFO pInfo, PVBOXVDMACMDBUF_INFO pBufInfo)
-{
-    dfprintf((__FUNCTION__"\n"));
-
-    PVBOXVDMACBUF_DR pdr = vboxVdmaCBufDrCreate (pInfo, pBufInfo);
-    if (!pdr)
-        return VERR_OUT_OF_RESOURCES;
-
-    vboxVdmaCBufDrSubmit (pDevExt, pInfo, pdr);
-
-    return VINF_SUCCESS;
-}
-
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.h	(revision 27382)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoVdma.h	(revision 27383)
@@ -24,20 +24,7 @@
 {
     HGSMIHEAP CmdHeap;
-    UINT      uLastCompletedCmdFenceId;
+    UINT      uLastCompletedPagingBufferCmdFenceId;
     BOOL      fEnabled;
 } VBOXVDMAINFO, *PVBOXVDMAINFO;
-
-typedef struct VBOXVDMACMDBUF_INFO
-{
-    uint32_t fFlags;
-    uint32_t cbBuf;
-    union
-    {
-        RTGCPHYS phBuf;
-        ULONG offVramBuf;
-        void *pvBuf;
-    } Location;
-    uint32_t u32FenceId;
-} VBOXVDMACMDBUF_INFO, *PVBOXVDMACMDBUF_INFO;
 
 int vboxVdmaCreate (struct _DEVICE_EXTENSION* pDevExt, VBOXVDMAINFO *pInfo, ULONG offBuffer, ULONG cbBuffer);
@@ -46,5 +33,7 @@
 int vboxVdmaFlush (struct _DEVICE_EXTENSION* pDevExt, PVBOXVDMAINFO pInfo);
 int vboxVdmaDestroy (struct _DEVICE_EXTENSION* pDevExt, PVBOXVDMAINFO pInfo);
-int vboxVdmaCBufSubmit (struct _DEVICE_EXTENSION* pDevExt, PVBOXVDMAINFO pInfo, PVBOXVDMACMDBUF_INFO pBufInfo);
+void vboxVdmaCBufDrSubmit (struct _DEVICE_EXTENSION* pDevExt, PVBOXVDMAINFO pInfo, PVBOXVDMACBUF_DR pDr);
+struct VBOXVDMACBUF_DR* vboxVdmaCBufDrCreate (PVBOXVDMAINFO pInfo, uint32_t cbTrailingData);
+void vboxVdmaCBufDrFree (PVBOXVDMAINFO pInfo, struct VBOXVDMACBUF_DR* pDr);
 
 #endif /* #ifndef ___VBoxVideoVdma_h___ */
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.cpp
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.cpp	(revision 27382)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.cpp	(revision 27383)
@@ -1698,5 +1698,4 @@
 
     PDEVICE_EXTENSION pDevExt = (PDEVICE_EXTENSION)hAdapter;
-    VBOXVDMACMDBUF_INFO BufInfo = {0};
 
     Assert(!pSubmitCommand->DmaBufferSegmentId);
@@ -1704,23 +1703,35 @@
     /* the DMA command buffer is located in system RAM, the host will need to pick it from there */
     //BufInfo.fFlags = 0; /* see VBOXVDMACBUF_FLAG_xx */
-    BufInfo.cbBuf =  pSubmitCommand->DmaBufferPrivateDataSubmissionEndOffset - pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset;
-    BufInfo.Location.phBuf = pSubmitCommand->DmaBufferPhysicalAddress.QuadPart + pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset;
-    BufInfo.u32FenceId = pSubmitCommand->SubmissionFenceId;
-
-    int rc = vboxVdmaCBufSubmit (pDevExt, &pDevExt->u.primary.Vdma, &BufInfo);
-    AssertRC(rc);
-    if (!RT_SUCCESS(rc))
-    {
-        switch (rc)
-        {
-            case VERR_OUT_OF_RESOURCES:
-                /* @todo: try flushing.. */
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                break;
-            default:
-                Status = STATUS_UNSUCCESSFUL;
-                break;
-        }
-    }
+    Assert(pSubmitCommand->DmaBufferPrivateDataSubmissionEndOffset - pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset >= sizeof (VBOXWDDM_DMA_PRIVATE_DATA));
+    if (pSubmitCommand->DmaBufferPrivateDataSubmissionEndOffset - pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset < sizeof (VBOXWDDM_DMA_PRIVATE_DATA))
+    {
+        drprintf((__FUNCTION__": DmaBufferPrivateDataSubmissionEndOffset (%d) - DmaBufferPrivateDataSubmissionStartOffset (%d) < sizeof (VBOXWDDM_DMA_PRIVATE_DATA) (%d)\n",
+                pSubmitCommand->DmaBufferPrivateDataSubmissionEndOffset,
+                pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset,
+                sizeof (VBOXWDDM_DMA_PRIVATE_DATA)));
+        return STATUS_INVALID_PARAMETER;
+    }
+
+    PVBOXWDDM_DMA_PRIVATE_DATA pPrivateData = (PVBOXWDDM_DMA_PRIVATE_DATA)((uint8_t*)pSubmitCommand->pDmaBufferPrivateData + pSubmitCommand->DmaBufferPrivateDataSubmissionStartOffset);
+    Assert(pPrivateData);
+    PVBOXVDMACBUF_DR pDr = vboxVdmaCBufDrCreate (&pDevExt->u.primary.Vdma, 0);
+    if (!pDr)
+    {
+        /* @todo: try flushing.. */
+        drprintf((__FUNCTION__": vboxVdmaCBufDrCreate returned NULL\n"));
+        return STATUS_INSUFFICIENT_RESOURCES;
+    }
+    // vboxVdmaCBufDrCreate zero initializes the pDr
+    //pDr->fFlags = 0;
+    pDr->cbBuf = pSubmitCommand->DmaBufferSubmissionEndOffset - pSubmitCommand->DmaBufferSubmissionStartOffset;
+    pDr->u32FenceId = pSubmitCommand->SubmissionFenceId;
+    pDr->rc = VERR_NOT_IMPLEMENTED;
+    if (pPrivateData)
+        pDr->u64GuestContext = (uint64_t)pPrivateData->pContext;
+//    else    // vboxVdmaCBufDrCreate zero initializes the pDr
+//        pDr->u64GuestContext = NULL;
+    pDr->Location.phBuf = pSubmitCommand->DmaBufferPhysicalAddress.QuadPart + pSubmitCommand->DmaBufferSubmissionStartOffset;
+
+    vboxVdmaCBufDrSubmit (pDevExt, &pDevExt->u.primary.Vdma, pDr);
 
     dfprintf(("<== "__FUNCTION__ ", context(0x%x)\n", hAdapter));
@@ -1768,10 +1779,10 @@
         case DXGK_OPERATION_TRANSFER:
         {
-            pBuildPagingBuffer->pDmaBuffer = (uint8_t*)pBuildPagingBuffer->pDmaBuffer + VBOXVDMACMD_SIZE(VBOXVDMACMD_DMA_BPB_TRANSFER);
+//            pBuildPagingBuffer->pDmaBuffer = (uint8_t*)pBuildPagingBuffer->pDmaBuffer + VBOXVDMACMD_SIZE(VBOXVDMACMD_DMA_BPB_TRANSFER);
             break;
         }
         case DXGK_OPERATION_FILL:
         {
-            pBuildPagingBuffer->pDmaBuffer = (uint8_t*)pBuildPagingBuffer->pDmaBuffer + VBOXVDMACMD_SIZE(VBOXVDMACMD_DMA_BPB_FILL);
+//            pBuildPagingBuffer->pDmaBuffer = (uint8_t*)pBuildPagingBuffer->pDmaBuffer + VBOXVDMACMD_SIZE(VBOXVDMACMD_DMA_BPB_FILL);
             break;
         }
@@ -1918,5 +1929,5 @@
     PAGED_CODE();
 
-    dfprintf(("==> "__FUNCTION__ ", context(0x%x)\n", hAdapter));
+//    dfprintf(("==> "__FUNCTION__ ", context(0x%x)\n", hAdapter));
 
     vboxVDbgBreakFv();
@@ -2009,5 +2020,5 @@
     pIsSupportedVidPnArg->IsVidPnSupported = bSupported;
 
-    dfprintf(("<== "__FUNCTION__ ", status(0x%x), context(0x%x)\n", Status, hAdapter));
+//    dfprintf(("<== "__FUNCTION__ ", status(0x%x), context(0x%x)\n", Status, hAdapter));
 
     return Status;
@@ -2083,5 +2094,5 @@
     PAGED_CODE();
 
-    dfprintf(("==> "__FUNCTION__ ", context(0x%x)\n", hAdapter));
+//    dfprintf(("==> "__FUNCTION__ ", context(0x%x)\n", hAdapter));
 
     vboxVDbgBreakFv();
@@ -2129,5 +2140,5 @@
         drprintf((__FUNCTION__ ": DxgkCbQueryVidPnInterface failed Status(0x%x)\n", Status));
 
-    dfprintf(("<== "__FUNCTION__ ", status(0x%x), context(0x%x)\n", Status, hAdapter));
+//    dfprintf(("<== "__FUNCTION__ ", status(0x%x), context(0x%x)\n", Status, hAdapter));
 
     return Status;
@@ -2660,4 +2671,15 @@
     PVBOXWDDM_DEVICE pDevice = pContext->pDevice;
     PDEVICE_EXTENSION pDevExt = pDevice->pAdapter;
+
+    Assert(pPresent->DmaBufferPrivateDataSize >= sizeof (VBOXWDDM_DMA_PRIVATE_DATA));
+    if (pPresent->DmaBufferPrivateDataSize < sizeof (VBOXWDDM_DMA_PRIVATE_DATA))
+    {
+        drprintf((__FUNCTION__": Present->DmaBufferPrivateDataSize(%d) < sizeof VBOXWDDM_DMA_PRIVATE_DATA (%d)\n", pPresent->DmaBufferPrivateDataSize , sizeof (VBOXWDDM_DMA_PRIVATE_DATA)));
+        /* @todo: can this actually happen? what status tu return? */
+        return STATUS_INVALID_PARAMETER;
+    }
+
+    PVBOXWDDM_DMA_PRIVATE_DATA pPrivateData = (PVBOXWDDM_DMA_PRIVATE_DATA)pPresent->pDmaBufferPrivateData;
+    pPrivateData->pContext = (PVBOXWDDM_CONTEXT)hContext;
 
     if (pPresent->Flags.Blt)
@@ -2722,4 +2744,5 @@
                         Assert(i);
                         pTransfer->cDstSubRects = i;
+                        pPresent->pDmaBufferPrivateData = (uint8_t*)pPresent->pDmaBufferPrivateData + sizeof(VBOXWDDM_DMA_PRIVATE_DATA);
                     }
                     else
Index: /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.h
===================================================================
--- /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.h	(revision 27382)
+++ /trunk/src/VBox/Additions/WINNT/Graphics/Miniport/wddm/VBoxVideoWddm.h	(revision 27383)
@@ -17,5 +17,6 @@
 #define ___VBoxVideoWddm_h___
 
-#define VBOXWDDM_C_DMA_BUFFER_SIZE         0x10000
+/* one page size */
+#define VBOXWDDM_C_DMA_BUFFER_SIZE         0x1000
 #define VBOXWDDM_C_ALLOC_LIST_SIZE         0xc00
 #define VBOXWDDM_C_PATH_LOCATION_LIST_SIZE 0xc00
@@ -120,9 +121,11 @@
     UINT  NodeOrdinal;
     UINT  EngineAffinity;
+    UINT uLastCompletedCmdFenceId;
 } VBOXWDDM_CONTEXT, *PVBOXWDDM_CONTEXT;
 
 typedef struct VBOXWDDM_DMA_PRIVATE_DATA
 {
-    uint8_t Reserved[64];
+    PVBOXWDDM_CONTEXT pContext;
+    uint8_t Reserved[8];
 }VBOXWDDM_DMA_PRIVATE_DATA, *PVBOXWDDM_DMA_PRIVATE_DATA;
 
Index: /trunk/src/VBox/Devices/Graphics/DevVGA_VDMA.cpp
===================================================================
--- /trunk/src/VBox/Devices/Graphics/DevVGA_VDMA.cpp	(revision 27382)
+++ /trunk/src/VBox/Devices/Graphics/DevVGA_VDMA.cpp	(revision 27383)
@@ -86,6 +86,206 @@
     RTTHREAD hWorkerThread;
     PHGSMIINSTANCE pHgsmi;
+    PVGASTATE pVGAState;
     VBOXVDMAPIPE_CMD_POOL CmdPool;
 } VBOXVDMAHOST, *PVBOXVDMAHOST;
+
+/* to simplify things and to avoid extra backend if modifications we assume the VBOXVDMA_RECTL is the same as VBVACMDHDR */
+AssertCompile(sizeof(VBOXVDMA_RECTL) == sizeof(VBVACMDHDR));
+AssertCompile(RT_SIZEOFMEMB(VBOXVDMA_RECTL, left) == RT_SIZEOFMEMB(VBVACMDHDR, x));
+AssertCompile(RT_SIZEOFMEMB(VBOXVDMA_RECTL, top) == RT_SIZEOFMEMB(VBVACMDHDR, y));
+AssertCompile(RT_SIZEOFMEMB(VBOXVDMA_RECTL, width) == RT_SIZEOFMEMB(VBVACMDHDR, w));
+AssertCompile(RT_SIZEOFMEMB(VBOXVDMA_RECTL, height) == RT_SIZEOFMEMB(VBVACMDHDR, h));
+AssertCompile(RT_OFFSETOF(VBOXVDMA_RECTL, left) == RT_OFFSETOF(VBVACMDHDR, x));
+AssertCompile(RT_OFFSETOF(VBOXVDMA_RECTL, top) == RT_OFFSETOF(VBVACMDHDR, y));
+AssertCompile(RT_OFFSETOF(VBOXVDMA_RECTL, width) == RT_OFFSETOF(VBVACMDHDR, w));
+AssertCompile(RT_OFFSETOF(VBOXVDMA_RECTL, height) == RT_OFFSETOF(VBVACMDHDR, h));
+
+static int vboxVDMANotifyPrimaryUpdate (PVGASTATE pVGAState, unsigned uScreenId, const VBOXVDMA_RECTL * pRectl)
+{
+    pVGAState->pDrv->pfnVBVAUpdateBegin (pVGAState->pDrv, uScreenId);
+
+    /* Updates the rectangle and sends the command to the VRDP server. */
+    pVGAState->pDrv->pfnVBVAUpdateProcess (pVGAState->pDrv, uScreenId,
+            (const PVBVACMDHDR)pRectl /* <- see above AssertCompile's and comments */,
+            sizeof (VBOXVDMA_RECTL));
+
+    pVGAState->pDrv->pfnVBVAUpdateEnd (pVGAState->pDrv, uScreenId, pRectl->left, pRectl->top,
+                                               pRectl->width, pRectl->height);
+
+    return VINF_SUCCESS;
+}
+
+static int vboxVDMACmdExecBltPerform(PVBOXVDMAHOST pVdma,
+        uint8_t *pvDstSurf, const uint8_t *pvSrcSurf,
+        const PVBOXVDMA_SURF_DESC pDstDesc, const PVBOXVDMA_SURF_DESC pSrcDesc,
+        const VBOXVDMA_RECTL * pDstRectl, const VBOXVDMA_RECTL * pSrcRectl)
+{
+    /* we do not support color conversion */
+    Assert(pDstDesc->format == pSrcDesc->format);
+    /* we do not support stretching */
+    Assert(pDstRectl->height == pSrcRectl->height);
+    Assert(pDstRectl->width == pSrcRectl->width);
+    if (pDstDesc->format != pSrcDesc->format)
+        return VERR_INVALID_FUNCTION;
+    if (pDstDesc->width == pDstRectl->width
+            && pSrcDesc->width == pSrcRectl->width
+            && pSrcDesc->width == pDstDesc->width)
+    {
+        Assert(!pDstRectl->left);
+        Assert(!pSrcRectl->left);
+        uint32_t cbOff = pDstDesc->pitch * pDstRectl->top;
+        uint32_t cbSize = pDstDesc->pitch * pDstRectl->height;
+        memcpy(pvDstSurf + cbOff, pvSrcSurf + cbOff, cbSize);
+    }
+    else
+    {
+        uint32_t offDstStart = pDstDesc->pitch * pDstRectl->top + pDstRectl->left;
+        uint32_t cbDstLine = pDstDesc->bpp * pDstRectl->width;
+        cbDstLine = !(cbDstLine & 7) ? cbDstLine >> 3 : (cbDstLine >> 3) + 1;
+        Assert(cbDstLine <= pDstDesc->pitch);
+        uint32_t cbDstSkip = pDstDesc->pitch - cbDstLine;
+        uint8_t * pvDstStart = pvDstSurf + offDstStart;
+
+        uint32_t offSrcStart = pSrcDesc->pitch * pSrcRectl->top + pSrcRectl->left;
+        uint32_t cbSrcLine = pSrcDesc->bpp * pSrcRectl->width;
+        cbSrcLine = !(cbSrcLine & 7) ? cbSrcLine >> 3 : (cbSrcLine >> 3) + 1;
+        Assert(cbSrcLine <= pSrcDesc->pitch);
+        uint32_t cbSrcSkip = pSrcDesc->pitch - cbSrcLine;
+        const uint8_t * pvSrcStart = pvSrcSurf + offSrcStart;
+
+        Assert(cbDstLine == cbSrcLine);
+
+        for (uint32_t i = 0; ; ++i)
+        {
+            memcpy (pvDstStart, pvSrcStart, cbDstLine);
+            if (i == pDstRectl->height)
+                break;
+            pvDstStart += cbDstSkip;
+            pvSrcStart += cbSrcSkip;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+/*
+ * @return on success the number of bytes the command contained, otherwise - VERR_xxx error code
+ */
+static int vboxVDMACmdExecBlt(PVBOXVDMAHOST pVdma, const PVBOXVDMACMD_DMA_PRESENT_BLT pBlt, uint32_t cbBuffer)
+{
+    const uint32_t cbBlt = VBOXVDMACMD_BODY_FIELD_OFFSET(uint32_t, VBOXVDMACMD_DMA_PRESENT_BLT, aDstSubRects[pBlt->cDstSubRects]);
+    Assert(cbBlt <= cbBuffer);
+    if (cbBuffer < cbBlt)
+        return VERR_INVALID_FUNCTION;
+
+    /* we do not support stretching for now */
+    Assert(pBlt->srcRectl.width == pBlt->dstRectl.width);
+    Assert(pBlt->srcRectl.height == pBlt->dstRectl.height);
+    if (pBlt->srcRectl.width != pBlt->dstRectl.width)
+        return VERR_INVALID_FUNCTION;
+    if (pBlt->srcRectl.height != pBlt->dstRectl.height)
+        return VERR_INVALID_FUNCTION;
+    Assert(pBlt->cDstSubRects);
+
+    uint8_t * pvRam = pVdma->pVGAState->vram_ptrR3;
+
+    if (pBlt->cDstSubRects)
+    {
+        VBOXVDMA_RECTL dstRectl, srcRectl;
+        const VBOXVDMA_RECTL *pDstRectl, *pSrcRectl;
+        for (uint32_t i = 0; i < pBlt->cDstSubRects; ++i)
+        {
+            pDstRectl = &pBlt->aDstSubRects[i];
+            if (pBlt->dstRectl.left || pBlt->dstRectl.top)
+            {
+                dstRectl.left = pDstRectl->left + pBlt->dstRectl.left;
+                dstRectl.top = pDstRectl->top + pBlt->dstRectl.top;
+                dstRectl.width = pDstRectl->width;
+                dstRectl.height = pDstRectl->height;
+                pDstRectl = &dstRectl;
+            }
+
+            pSrcRectl = &pBlt->aDstSubRects[i];
+            if (pBlt->srcRectl.left || pBlt->srcRectl.top)
+            {
+                srcRectl.left = pSrcRectl->left + pBlt->srcRectl.left;
+                srcRectl.top = pSrcRectl->top + pBlt->srcRectl.top;
+                srcRectl.width = pSrcRectl->width;
+                srcRectl.height = pSrcRectl->height;
+                pSrcRectl = &srcRectl;
+            }
+
+            int rc = vboxVDMACmdExecBltPerform(pVdma, pvRam + pBlt->offDst, pvRam + pBlt->offSrc,
+                    &pBlt->dstDesc, &pBlt->srcDesc,
+                    pDstRectl,
+                    pSrcRectl);
+            AssertRC(rc);
+            if (!RT_SUCCESS(rc))
+                return rc;
+        }
+    }
+    else
+    {
+        int rc = vboxVDMACmdExecBltPerform(pVdma, pvRam + pBlt->offDst, pvRam + pBlt->offSrc,
+                &pBlt->dstDesc, &pBlt->srcDesc,
+                &pBlt->dstRectl,
+                &pBlt->srcRectl);
+        AssertRC(rc);
+        if (!RT_SUCCESS(rc))
+            return rc;
+    }
+
+    int iView = 0;
+    /* @todo: fixme: check if update is needed and get iView */
+    vboxVDMANotifyPrimaryUpdate (pVdma->pVGAState, iView, &pBlt->dstRectl);
+
+    return cbBlt;
+}
+
+static int vboxVDMACmdExec(PVBOXVDMAHOST pVdma, const uint8_t *pvBuffer, uint32_t cbBuffer)
+{
+    do
+    {
+        Assert(pvBuffer);
+        Assert(cbBuffer >= VBOXVDMACMD_HEADER_SIZE());
+
+        if (!pvBuffer)
+            return VERR_INVALID_PARAMETER;
+        if (cbBuffer < VBOXVDMACMD_HEADER_SIZE())
+            return VERR_INVALID_PARAMETER;
+
+        PVBOXVDMACMD pCmd = (PVBOXVDMACMD)pvBuffer;
+        uint32_t cbCmd = 0;
+        switch (pCmd->enmType)
+        {
+            case VBOXVDMACMD_TYPE_DMA_PRESENT_BLT:
+            {
+                const PVBOXVDMACMD_DMA_PRESENT_BLT pBlt = VBOXVDMACMD_BODY(pCmd, VBOXVDMACMD_DMA_PRESENT_BLT);
+                int cbBlt = vboxVDMACmdExecBlt(pVdma, pBlt, cbBuffer);
+                Assert(cbBlt >= 0);
+                Assert((uint32_t)cbBlt <= cbBuffer);
+                if (cbBlt >= 0)
+                {
+                    if (cbBlt == cbBuffer)
+                        return VINF_SUCCESS;
+                    else
+                    {
+                        cbBuffer -= (uint32_t)cbBlt;
+                        pvBuffer -= cbBlt;
+                    }
+                }
+                else
+                    return cbBlt; /* error */
+                break;
+            }
+            default:
+                AssertBreakpoint();
+                return VERR_INVALID_FUNCTION;
+        }
+    } while (1);
+
+    /* we should not be here */
+    AssertBreakpoint();
+    return VERR_INVALID_STATE;
+}
 
 int vboxVDMAPipeConstruct(PVBOXVDMAPIPE pPipe)
@@ -317,13 +517,64 @@
 }
 
-void vboxVDMACommandProcess(PVBOXVDMAHOST pVdma, PVBOXVDMACBUF_DR pCmd)
+static void vboxVDMACommandProcess(PVBOXVDMAHOST pVdma, PVBOXVDMACBUF_DR pCmd)
 {
     PHGSMIINSTANCE pHgsmi = pVdma->pHgsmi;
-    pCmd->rc = VINF_SUCCESS;
-    int rc = VBoxSHGSMICommandComplete (pHgsmi, pCmd);
-    AssertRC(rc);
-}
-
-void vboxVDMAControlProcess(PVBOXVDMAHOST pVdma, PVBOXVDMA_CTL pCmd)
+    const uint8_t * pvBuf;
+    PGMPAGEMAPLOCK Lock;
+    int rc;
+    bool bReleaseLocked = false;
+
+    do
+    {
+        PPDMDEVINS pDevIns = pVdma->pVGAState->pDevInsR3;
+
+        if (pCmd->fFlags & VBOXVDMACBUF_FLAG_BUF_FOLLOWS_DR)
+            pvBuf = VBOXVDMACBUF_DR_TAIL(pCmd, const uint8_t);
+        else if (pCmd->fFlags & VBOXVDMACBUF_FLAG_BUF_VRAM_OFFSET)
+        {
+            uint8_t * pvRam = pVdma->pVGAState->vram_ptrR3;
+            pvBuf = pvRam + pCmd->Location.offVramBuf;
+        }
+        else
+        {
+            RTGCPHYS phPage = pCmd->Location.phBuf & ~0xfffULL;
+            uint32_t offset = pCmd->Location.phBuf & 0xfff;
+            Assert(offset + pCmd->cbBuf <= 0x1000);
+            if (offset + pCmd->cbBuf > 0x1000)
+            {
+                /* @todo: more advanced mechanism of command buffer proc is actually needed */
+                rc = VERR_INVALID_PARAMETER;
+                break;
+            }
+
+            const void * pvPageBuf;
+            rc = PDMDevHlpPhysGCPhys2CCPtrReadOnly(pDevIns, phPage, 0, &pvPageBuf, &Lock);
+            AssertRC(rc);
+            if (!RT_SUCCESS(rc))
+            {
+                /* @todo: if (rc == VERR_PGM_PHYS_PAGE_RESERVED) -> fall back on using PGMPhysRead ?? */
+                break;
+            }
+
+            pvBuf = (const uint8_t *)pvPageBuf;
+            pvBuf += offset;
+
+            bReleaseLocked = true;
+        }
+
+        rc = vboxVDMACmdExec(pVdma, pvBuf, pCmd->cbBuf);
+        AssertRC(rc);
+
+        if (bReleaseLocked)
+            PDMDevHlpPhysReleasePageMappingLock(pDevIns, &Lock);
+    } while (0);
+
+    pCmd->rc = rc;
+
+    rc = VBoxSHGSMICommandComplete (pHgsmi, pCmd);
+    AssertRC(rc);
+}
+
+static void vboxVDMAControlProcess(PVBOXVDMAHOST pVdma, PVBOXVDMA_CTL pCmd)
 {
     PHGSMIINSTANCE pHgsmi = pVdma->pHgsmi;
@@ -340,5 +591,5 @@
 } VBOXVDMACMD_PROCESS_CONTEXT, *PVBOXVDMACMD_PROCESS_CONTEXT;
 
-DECLCALLBACK(bool) vboxVDMACommandProcessCb(PVBOXVDMAPIPE pPipe, void *pvCallback)
+static DECLCALLBACK(bool) vboxVDMACommandProcessCb(PVBOXVDMAPIPE pPipe, void *pvCallback)
 {
     PVBOXVDMACMD_PROCESS_CONTEXT pContext = (PVBOXVDMACMD_PROCESS_CONTEXT)pvCallback;
@@ -359,5 +610,5 @@
 }
 
-DECLCALLBACK(int) vboxVDMAWorkerThread(RTTHREAD ThreadSelf, void *pvUser)
+static DECLCALLBACK(int) vboxVDMAWorkerThread(RTTHREAD ThreadSelf, void *pvUser)
 {
     PVBOXVDMAHOST pVdma = (PVBOXVDMAHOST)pvUser;
@@ -421,5 +672,5 @@
         hgsmiListInit(&pVdma->PendingList);
         pVdma->pHgsmi = pVGAState->pHGSMI;
-
+        pVdma->pVGAState = pVGAState;
         rc = vboxVDMAPipeConstruct(&pVdma->Pipe);
         AssertRC(rc);
