Index: /trunk/src/VBox/Storage/VD.cpp
===================================================================
--- /trunk/src/VBox/Storage/VD.cpp	(revision 49943)
+++ /trunk/src/VBox/Storage/VD.cpp	(revision 49944)
@@ -2491,4 +2491,39 @@
 }
 
+static int vdWriteHelperStandardReadImageAsync(PVDIOCTX pIoCtx)
+{
+    int rc = VINF_SUCCESS;
+
+    LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
+
+    pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
+
+    if (   pIoCtx->Req.Io.cbTransferLeft
+        && !pIoCtx->cDataTransfersPending)
+        rc = vdReadHelperAsync(pIoCtx);
+
+    if (   RT_SUCCESS(rc)
+        && (   pIoCtx->Req.Io.cbTransferLeft
+            || pIoCtx->cMetaTransfersPending))
+        rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
+    else
+    {
+        size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
+
+        /* Zero out the remainder of this block. Will never be visible, as this
+         * is beyond the limit of the image. */
+        if (cbFill)
+            vdIoCtxSet(pIoCtx, '\0', cbFill);
+
+        /* Write the full block to the virtual disk. */
+        RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
+
+        vdIoCtxChildReset(pIoCtx);
+        pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
+    }
+
+    return rc;
+}
+
 static int vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
 {
@@ -2520,20 +2555,24 @@
         }
 
-        /* Zero out the remainder of this block. Will never be visible, as this
-         * is beyond the limit of the image. */
-        if (cbFill)
-        {
-            RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
-            vdIoCtxSet(pIoCtx, '\0', cbFill);
-        }
-
         if (cbReadImage)
         {
             /* Read remaining data. */
+            pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardReadImageAsync;
+
+            /* Read the data that goes before the write to fill the block. */
+            pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbReadImage; Assert(cbReadImage == (uint32_t)cbReadImage);
+            pIoCtx->Req.Io.cbTransfer     = pIoCtx->Req.Io.cbTransferLeft;
+            pIoCtx->Req.Io.uOffset       += cbWriteCopy;
         }
         else
         {
+            /* Zero out the remainder of this block. Will never be visible, as this
+             * is beyond the limit of the image. */
+            if (cbFill)
+                vdIoCtxSet(pIoCtx, '\0', cbFill);
+
             /* Write the full block to the virtual disk. */
             RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
+            vdIoCtxChildReset(pIoCtx);
             pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
         }
@@ -2543,4 +2582,5 @@
         /* Write the full block to the virtual disk. */
         RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
+        vdIoCtxChildReset(pIoCtx);
         pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
     }
@@ -2557,5 +2597,6 @@
     pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
 
-    if (pIoCtx->Req.Io.cbTransferLeft)
+    if (   pIoCtx->Req.Io.cbTransferLeft
+        && !pIoCtx->cDataTransfersPending)
         rc = vdReadHelperAsync(pIoCtx);
 
@@ -2592,7 +2633,8 @@
         /* If we have data to be written, use that instead of reading
          * data from the image. */
-        cbWriteCopy;
         if (cbWrite > cbThisWrite)
             cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
+        else
+            cbWriteCopy = 0;
 
         /* Figure out how much we cannot read from the image, because
Index: /trunk/src/VBox/Storage/VHD.cpp
===================================================================
--- /trunk/src/VBox/Storage/VHD.cpp	(revision 49943)
+++ /trunk/src/VBox/Storage/VHD.cpp	(revision 49944)
@@ -1636,5 +1636,6 @@
         {
             /* Check if the block allocation should be suppressed. */
-            if (fWrite & VD_WRITE_NO_ALLOC)
+            if (   (fWrite & VD_WRITE_NO_ALLOC)
+                || (cbWrite != pImage->cbDataBlock))
             {
                 *pcbPreRead = cBATEntryIndex * VHD_SECTOR_SIZE;
