VirtualBox

source: vbox/trunk/src/VBox/Storage/VD.cpp@ 67954

Last change on this file since 67954 was 67464, checked in by vboxsync, 7 years ago

VD.cpp: VDGetFormat: Here be dragons. Killed the one named VERR_EOF, but can imagine a there are a lot more lurking around...

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 375.4 KB
Line 
1/* $Id: VD.cpp 67464 2017-06-19 10:08:36Z vboxsync $ */
2/** @file
3 * VD - Virtual disk container implementation.
4 */
5
6/*
7 * Copyright (C) 2006-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_VD
23#include <VBox/vd.h>
24#include <VBox/err.h>
25#include <VBox/sup.h>
26#include <VBox/log.h>
27
28#include <iprt/alloc.h>
29#include <iprt/assert.h>
30#include <iprt/uuid.h>
31#include <iprt/file.h>
32#include <iprt/string.h>
33#include <iprt/asm.h>
34#include <iprt/param.h>
35#include <iprt/path.h>
36#include <iprt/sg.h>
37#include <iprt/semaphore.h>
38
39#include "VDInternal.h"
40
41/** Buffer size used for merging images. */
42#define VD_MERGE_BUFFER_SIZE (16 * _1M)
43
44/** Maximum number of segments in one I/O task. */
45#define VD_IO_TASK_SEGMENTS_MAX 64
46
47/** Threshold after not recently used blocks are removed from the list. */
48#define VD_DISCARD_REMOVE_THRESHOLD (10 * _1M) /** @todo experiment */
49
50/**
51 * VD async I/O interface storage descriptor.
52 */
53typedef struct VDIIOFALLBACKSTORAGE
54{
55 /** File handle. */
56 RTFILE File;
57 /** Completion callback. */
58 PFNVDCOMPLETED pfnCompleted;
59 /** Thread for async access. */
60 RTTHREAD ThreadAsync;
61} VDIIOFALLBACKSTORAGE, *PVDIIOFALLBACKSTORAGE;
62
63/**
64 * uModified bit flags.
65 */
66#define VD_IMAGE_MODIFIED_FLAG RT_BIT(0)
67#define VD_IMAGE_MODIFIED_FIRST RT_BIT(1)
68#define VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE RT_BIT(2)
69
70
71# define VD_IS_LOCKED(a_pDisk) \
72 do \
73 { \
74 NOREF(a_pDisk); \
75 AssertMsg((a_pDisk)->fLocked, \
76 ("Lock not held\n"));\
77 } while(0)
78
79/**
80 * VBox parent read descriptor, used internally for compaction.
81 */
82typedef struct VDPARENTSTATEDESC
83{
84 /** Pointer to disk descriptor. */
85 PVDISK pDisk;
86 /** Pointer to image descriptor. */
87 PVDIMAGE pImage;
88} VDPARENTSTATEDESC, *PVDPARENTSTATEDESC;
89
90/**
91 * Transfer direction.
92 */
93typedef enum VDIOCTXTXDIR
94{
95 /** Read */
96 VDIOCTXTXDIR_READ = 0,
97 /** Write */
98 VDIOCTXTXDIR_WRITE,
99 /** Flush */
100 VDIOCTXTXDIR_FLUSH,
101 /** Discard */
102 VDIOCTXTXDIR_DISCARD,
103 /** 32bit hack */
104 VDIOCTXTXDIR_32BIT_HACK = 0x7fffffff
105} VDIOCTXTXDIR, *PVDIOCTXTXDIR;
106
107/** Transfer function */
108typedef DECLCALLBACK(int) FNVDIOCTXTRANSFER (PVDIOCTX pIoCtx);
109/** Pointer to a transfer function. */
110typedef FNVDIOCTXTRANSFER *PFNVDIOCTXTRANSFER;
111
112/**
113 * I/O context
114 */
115typedef struct VDIOCTX
116{
117 /** Pointer to the next I/O context. */
118 struct VDIOCTX * volatile pIoCtxNext;
119 /** Disk this is request is for. */
120 PVDISK pDisk;
121 /** Return code. */
122 int rcReq;
123 /** Various flags for the I/O context. */
124 uint32_t fFlags;
125 /** Number of data transfers currently pending. */
126 volatile uint32_t cDataTransfersPending;
127 /** How many meta data transfers are pending. */
128 volatile uint32_t cMetaTransfersPending;
129 /** Flag whether the request finished */
130 volatile bool fComplete;
131 /** Temporary allocated memory which is freed
132 * when the context completes. */
133 void *pvAllocation;
134 /** Transfer function. */
135 PFNVDIOCTXTRANSFER pfnIoCtxTransfer;
136 /** Next transfer part after the current one completed. */
137 PFNVDIOCTXTRANSFER pfnIoCtxTransferNext;
138 /** Transfer direction */
139 VDIOCTXTXDIR enmTxDir;
140 /** Request type dependent data. */
141 union
142 {
143 /** I/O request (read/write). */
144 struct
145 {
146 /** Number of bytes left until this context completes. */
147 volatile uint32_t cbTransferLeft;
148 /** Current offset */
149 volatile uint64_t uOffset;
150 /** Number of bytes to transfer */
151 volatile size_t cbTransfer;
152 /** Current image in the chain. */
153 PVDIMAGE pImageCur;
154 /** Start image to read from. pImageCur is reset to this
155 * value after it reached the first image in the chain. */
156 PVDIMAGE pImageStart;
157 /** S/G buffer */
158 RTSGBUF SgBuf;
159 /** Number of bytes to clear in the buffer before the current read. */
160 size_t cbBufClear;
161 /** Number of images to read. */
162 unsigned cImagesRead;
163 /** Override for the parent image to start reading from. */
164 PVDIMAGE pImageParentOverride;
165 /** Original offset of the transfer - required for filtering read requests. */
166 uint64_t uOffsetXferOrig;
167 /** Original size of the transfer - required for fitlering read requests. */
168 size_t cbXferOrig;
169 } Io;
170 /** Discard requests. */
171 struct
172 {
173 /** Pointer to the range descriptor array. */
174 PCRTRANGE paRanges;
175 /** Number of ranges in the array. */
176 unsigned cRanges;
177 /** Range descriptor index which is processed. */
178 unsigned idxRange;
179 /** Start offset to discard currently. */
180 uint64_t offCur;
181 /** How many bytes left to discard in the current range. */
182 size_t cbDiscardLeft;
183 /** How many bytes to discard in the current block (<= cbDiscardLeft). */
184 size_t cbThisDiscard;
185 /** Discard block handled currently. */
186 PVDDISCARDBLOCK pBlock;
187 } Discard;
188 } Req;
189 /** Parent I/O context if any. Sets the type of the context (root/child) */
190 PVDIOCTX pIoCtxParent;
191 /** Type dependent data (root/child) */
192 union
193 {
194 /** Root data */
195 struct
196 {
197 /** Completion callback */
198 PFNVDASYNCTRANSFERCOMPLETE pfnComplete;
199 /** User argument 1 passed on completion. */
200 void *pvUser1;
201 /** User argument 2 passed on completion. */
202 void *pvUser2;
203 } Root;
204 /** Child data */
205 struct
206 {
207 /** Saved start offset */
208 uint64_t uOffsetSaved;
209 /** Saved transfer size */
210 size_t cbTransferLeftSaved;
211 /** Number of bytes transferred from the parent if this context completes. */
212 size_t cbTransferParent;
213 /** Number of bytes to pre read */
214 size_t cbPreRead;
215 /** Number of bytes to post read. */
216 size_t cbPostRead;
217 /** Number of bytes to write left in the parent. */
218 size_t cbWriteParent;
219 /** Write type dependent data. */
220 union
221 {
222 /** Optimized */
223 struct
224 {
225 /** Bytes to fill to satisfy the block size. Not part of the virtual disk. */
226 size_t cbFill;
227 /** Bytes to copy instead of reading from the parent */
228 size_t cbWriteCopy;
229 /** Bytes to read from the image. */
230 size_t cbReadImage;
231 } Optimized;
232 } Write;
233 } Child;
234 } Type;
235} VDIOCTX;
236
237/** Default flags for an I/O context, i.e. unblocked and async. */
238#define VDIOCTX_FLAGS_DEFAULT (0)
239/** Flag whether the context is blocked. */
240#define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
241/** Flag whether the I/O context is using synchronous I/O. */
242#define VDIOCTX_FLAGS_SYNC RT_BIT_32(1)
243/** Flag whether the read should update the cache. */
244#define VDIOCTX_FLAGS_READ_UPDATE_CACHE RT_BIT_32(2)
245/** Flag whether free blocks should be zeroed.
246 * If false and no image has data for sepcified
247 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
248 * Note that unallocated blocks are still zeroed
249 * if at least one image has valid data for a part
250 * of the range.
251 */
252#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
253/** Don't free the I/O context when complete because
254 * it was alloacted elsewhere (stack, ...). */
255#define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4)
256/** Don't set the modified flag for this I/O context when writing. */
257#define VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG RT_BIT_32(5)
258/** The write filter was applied already and shouldn't be applied a second time.
259 * Used at the beginning of vdWriteHelperAsync() because it might be called
260 * multiple times.
261 */
262#define VDIOCTX_FLAGS_WRITE_FILTER_APPLIED RT_BIT_32(6)
263
264/** NIL I/O context pointer value. */
265#define NIL_VDIOCTX ((PVDIOCTX)0)
266
267/**
268 * List node for deferred I/O contexts.
269 */
270typedef struct VDIOCTXDEFERRED
271{
272 /** Node in the list of deferred requests.
273 * A request can be deferred if the image is growing
274 * and the request accesses the same range or if
275 * the backend needs to read or write metadata from the disk
276 * before it can continue. */
277 RTLISTNODE NodeDeferred;
278 /** I/O context this entry points to. */
279 PVDIOCTX pIoCtx;
280} VDIOCTXDEFERRED, *PVDIOCTXDEFERRED;
281
282/**
283 * I/O task.
284 */
285typedef struct VDIOTASK
286{
287 /** Next I/O task waiting in the list. */
288 struct VDIOTASK * volatile pNext;
289 /** Storage this task belongs to. */
290 PVDIOSTORAGE pIoStorage;
291 /** Optional completion callback. */
292 PFNVDXFERCOMPLETED pfnComplete;
293 /** Opaque user data. */
294 void *pvUser;
295 /** Completion status code for the task. */
296 int rcReq;
297 /** Flag whether this is a meta data transfer. */
298 bool fMeta;
299 /** Type dependent data. */
300 union
301 {
302 /** User data transfer. */
303 struct
304 {
305 /** Number of bytes this task transferred. */
306 uint32_t cbTransfer;
307 /** Pointer to the I/O context the task belongs. */
308 PVDIOCTX pIoCtx;
309 } User;
310 /** Meta data transfer. */
311 struct
312 {
313 /** Meta transfer this task is for. */
314 PVDMETAXFER pMetaXfer;
315 } Meta;
316 } Type;
317} VDIOTASK;
318
319/**
320 * Storage handle.
321 */
322typedef struct VDIOSTORAGE
323{
324 /** Image I/O state this storage handle belongs to. */
325 PVDIO pVDIo;
326 /** AVL tree for pending async metadata transfers. */
327 PAVLRFOFFTREE pTreeMetaXfers;
328 /** Storage handle */
329 void *pStorage;
330} VDIOSTORAGE;
331
332/**
333 * Metadata transfer.
334 *
335 * @note This entry can't be freed if either the list is not empty or
336 * the reference counter is not 0.
337 * The assumption is that the backends don't need to read huge amounts of
338 * metadata to complete a transfer so the additional memory overhead should
339 * be relatively small.
340 */
341typedef struct VDMETAXFER
342{
343 /** AVL core for fast search (the file offset is the key) */
344 AVLRFOFFNODECORE Core;
345 /** I/O storage for this transfer. */
346 PVDIOSTORAGE pIoStorage;
347 /** Flags. */
348 uint32_t fFlags;
349 /** List of I/O contexts waiting for this metadata transfer to complete. */
350 RTLISTNODE ListIoCtxWaiting;
351 /** Number of references to this entry. */
352 unsigned cRefs;
353 /** Size of the data stored with this entry. */
354 size_t cbMeta;
355 /** Shadow buffer which is used in case a write is still active and other
356 * writes update the shadow buffer. */
357 uint8_t *pbDataShw;
358 /** List of I/O contexts updating the shadow buffer while there is a write
359 * in progress. */
360 RTLISTNODE ListIoCtxShwWrites;
361 /** Data stored - variable size. */
362 uint8_t abData[1];
363} VDMETAXFER;
364
365/**
366 * The transfer direction for the metadata.
367 */
368#define VDMETAXFER_TXDIR_MASK 0x3
369#define VDMETAXFER_TXDIR_NONE 0x0
370#define VDMETAXFER_TXDIR_WRITE 0x1
371#define VDMETAXFER_TXDIR_READ 0x2
372#define VDMETAXFER_TXDIR_FLUSH 0x3
373#define VDMETAXFER_TXDIR_GET(flags) ((flags) & VDMETAXFER_TXDIR_MASK)
374#define VDMETAXFER_TXDIR_SET(flags, dir) ((flags) = (flags & ~VDMETAXFER_TXDIR_MASK) | (dir))
375
376/** Forward declaration of the async discard helper. */
377static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx);
378static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx);
379static void vdDiskProcessBlockedIoCtx(PVDISK pDisk);
380static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc);
381static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
382
383/**
384 * internal: issue error message.
385 */
386static int vdError(PVDISK pDisk, int rc, RT_SRC_POS_DECL,
387 const char *pszFormat, ...)
388{
389 va_list va;
390 va_start(va, pszFormat);
391 if (pDisk->pInterfaceError)
392 pDisk->pInterfaceError->pfnError(pDisk->pInterfaceError->Core.pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va);
393 va_end(va);
394 return rc;
395}
396
397/**
398 * internal: thread synchronization, start read.
399 */
400DECLINLINE(int) vdThreadStartRead(PVDISK pDisk)
401{
402 int rc = VINF_SUCCESS;
403 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
404 rc = pDisk->pInterfaceThreadSync->pfnStartRead(pDisk->pInterfaceThreadSync->Core.pvUser);
405 return rc;
406}
407
408/**
409 * internal: thread synchronization, finish read.
410 */
411DECLINLINE(int) vdThreadFinishRead(PVDISK pDisk)
412{
413 int rc = VINF_SUCCESS;
414 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
415 rc = pDisk->pInterfaceThreadSync->pfnFinishRead(pDisk->pInterfaceThreadSync->Core.pvUser);
416 return rc;
417}
418
419/**
420 * internal: thread synchronization, start write.
421 */
422DECLINLINE(int) vdThreadStartWrite(PVDISK pDisk)
423{
424 int rc = VINF_SUCCESS;
425 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
426 rc = pDisk->pInterfaceThreadSync->pfnStartWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
427 return rc;
428}
429
430/**
431 * internal: thread synchronization, finish write.
432 */
433DECLINLINE(int) vdThreadFinishWrite(PVDISK pDisk)
434{
435 int rc = VINF_SUCCESS;
436 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
437 rc = pDisk->pInterfaceThreadSync->pfnFinishWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
438 return rc;
439}
440
441/**
442 * internal: add image structure to the end of images list.
443 */
444static void vdAddImageToList(PVDISK pDisk, PVDIMAGE pImage)
445{
446 pImage->pPrev = NULL;
447 pImage->pNext = NULL;
448
449 if (pDisk->pBase)
450 {
451 Assert(pDisk->cImages > 0);
452 pImage->pPrev = pDisk->pLast;
453 pDisk->pLast->pNext = pImage;
454 pDisk->pLast = pImage;
455 }
456 else
457 {
458 Assert(pDisk->cImages == 0);
459 pDisk->pBase = pImage;
460 pDisk->pLast = pImage;
461 }
462
463 pDisk->cImages++;
464}
465
466/**
467 * internal: remove image structure from the images list.
468 */
469static void vdRemoveImageFromList(PVDISK pDisk, PVDIMAGE pImage)
470{
471 Assert(pDisk->cImages > 0);
472
473 if (pImage->pPrev)
474 pImage->pPrev->pNext = pImage->pNext;
475 else
476 pDisk->pBase = pImage->pNext;
477
478 if (pImage->pNext)
479 pImage->pNext->pPrev = pImage->pPrev;
480 else
481 pDisk->pLast = pImage->pPrev;
482
483 pImage->pPrev = NULL;
484 pImage->pNext = NULL;
485
486 pDisk->cImages--;
487}
488
489/**
490 * Release a referene to the filter decrementing the counter and destroying the filter
491 * when the counter reaches zero.
492 *
493 * @returns The new reference count.
494 * @param pFilter The filter to release.
495 */
496static uint32_t vdFilterRelease(PVDFILTER pFilter)
497{
498 uint32_t cRefs = ASMAtomicDecU32(&pFilter->cRefs);
499 if (!cRefs)
500 {
501 pFilter->pBackend->pfnDestroy(pFilter->pvBackendData);
502 RTMemFree(pFilter);
503 }
504
505 return cRefs;
506}
507
508/**
509 * Increments the reference counter of the given filter.
510 *
511 * @return The new reference count.
512 * @param pFilter The filter.
513 */
514static uint32_t vdFilterRetain(PVDFILTER pFilter)
515{
516 return ASMAtomicIncU32(&pFilter->cRefs);
517}
518
519/**
520 * internal: find image by index into the images list.
521 */
522static PVDIMAGE vdGetImageByNumber(PVDISK pDisk, unsigned nImage)
523{
524 PVDIMAGE pImage = pDisk->pBase;
525 if (nImage == VD_LAST_IMAGE)
526 return pDisk->pLast;
527 while (pImage && nImage)
528 {
529 pImage = pImage->pNext;
530 nImage--;
531 }
532 return pImage;
533}
534
535/**
536 * Creates a new region list from the given one converting to match the flags if necessary.
537 *
538 * @returns VBox status code.
539 * @param pRegionList The region list to convert from.
540 * @param fFlags The flags for the new region list.
541 * @param ppRegionList Where to store the new region list on success.
542 */
543static int vdRegionListConv(PCVDREGIONLIST pRegionList, uint32_t fFlags, PPVDREGIONLIST ppRegionList)
544{
545 int rc = VINF_SUCCESS;
546 PVDREGIONLIST pRegionListNew = (PVDREGIONLIST)RTMemDup(pRegionList, RT_UOFFSETOF(VDREGIONLIST, aRegions[pRegionList->cRegions]));
547 if (RT_LIKELY(pRegionListNew))
548 {
549 /* Do we have to convert anything? */
550 if (pRegionList->fFlags != fFlags)
551 {
552 uint64_t offRegionNext = 0;
553
554 pRegionListNew->fFlags = fFlags;
555 for (unsigned i = 0; i < pRegionListNew->cRegions; i++)
556 {
557 PVDREGIONDESC pRegion = &pRegionListNew->aRegions[i];
558
559 if ( (fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
560 && !(pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS))
561 {
562 Assert(!(pRegion->cRegionBlocksOrBytes % pRegion->cbBlock));
563
564 /* Convert from bytes to logical blocks. */
565 pRegion->offRegion = offRegionNext;
566 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes / pRegion->cbBlock;
567 offRegionNext += pRegion->cRegionBlocksOrBytes;
568 }
569 else
570 {
571 /* Convert from logical blocks to bytes. */
572 pRegion->offRegion = offRegionNext;
573 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes * pRegion->cbBlock;
574 offRegionNext += pRegion->cRegionBlocksOrBytes;
575 }
576 }
577 }
578
579 *ppRegionList = pRegionListNew;
580 }
581 else
582 rc = VERR_NO_MEMORY;
583
584 return rc;
585}
586
587/**
588 * Returns the virtual size of the image in bytes.
589 *
590 * @returns Size of the given image in bytes.
591 * @param pImage The image to get the size from.
592 */
593static uint64_t vdImageGetSize(PVDIMAGE pImage)
594{
595 uint64_t cbImage = 0;
596 PCVDREGIONLIST pRegionList = NULL;
597 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
598 if (RT_SUCCESS(rc))
599 {
600 if (pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
601 {
602 PVDREGIONLIST pRegionListConv = NULL;
603 rc = vdRegionListConv(pRegionList, 0, &pRegionListConv);
604 if (RT_SUCCESS(rc))
605 {
606 for (uint32_t i = 0; i < pRegionListConv->cRegions; i++)
607 cbImage += pRegionListConv->aRegions[i].cRegionBlocksOrBytes;
608
609 VDRegionListFree(pRegionListConv);
610 }
611 }
612 else
613 for (uint32_t i = 0; i < pRegionList->cRegions; i++)
614 cbImage += pRegionList->aRegions[i].cRegionBlocksOrBytes;
615
616 AssertPtr(pImage->Backend->pfnRegionListRelease);
617 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
618 }
619
620 return cbImage;
621}
622
623/**
624 * Applies the filter chain to the given write request.
625 *
626 * @returns VBox status code.
627 * @param pDisk The HDD container.
628 * @param uOffset The start offset of the write.
629 * @param cbWrite Number of bytes to write.
630 * @param pIoCtx The I/O context associated with the request.
631 */
632static int vdFilterChainApplyWrite(PVDISK pDisk, uint64_t uOffset, size_t cbWrite,
633 PVDIOCTX pIoCtx)
634{
635 int rc = VINF_SUCCESS;
636
637 VD_IS_LOCKED(pDisk);
638
639 PVDFILTER pFilter;
640 RTListForEach(&pDisk->ListFilterChainWrite, pFilter, VDFILTER, ListNodeChainWrite)
641 {
642 rc = pFilter->pBackend->pfnFilterWrite(pFilter->pvBackendData, uOffset, cbWrite, pIoCtx);
643 if (RT_FAILURE(rc))
644 break;
645 /* Reset S/G buffer for the next filter. */
646 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
647 }
648
649 return rc;
650}
651
652/**
653 * Applies the filter chain to the given read request.
654 *
655 * @returns VBox status code.
656 * @param pDisk The HDD container.
657 * @param uOffset The start offset of the read.
658 * @param cbRead Number of bytes read.
659 * @param pIoCtx The I/O context associated with the request.
660 */
661static int vdFilterChainApplyRead(PVDISK pDisk, uint64_t uOffset, size_t cbRead,
662 PVDIOCTX pIoCtx)
663{
664 int rc = VINF_SUCCESS;
665
666 VD_IS_LOCKED(pDisk);
667
668 /* Reset buffer before starting. */
669 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
670
671 PVDFILTER pFilter;
672 RTListForEach(&pDisk->ListFilterChainRead, pFilter, VDFILTER, ListNodeChainRead)
673 {
674 rc = pFilter->pBackend->pfnFilterRead(pFilter->pvBackendData, uOffset, cbRead, pIoCtx);
675 if (RT_FAILURE(rc))
676 break;
677 /* Reset S/G buffer for the next filter. */
678 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
679 }
680
681 return rc;
682}
683
684DECLINLINE(void) vdIoCtxRootComplete(PVDISK pDisk, PVDIOCTX pIoCtx)
685{
686 if ( RT_SUCCESS(pIoCtx->rcReq)
687 && pIoCtx->enmTxDir == VDIOCTXTXDIR_READ)
688 pIoCtx->rcReq = vdFilterChainApplyRead(pDisk, pIoCtx->Req.Io.uOffsetXferOrig,
689 pIoCtx->Req.Io.cbXferOrig, pIoCtx);
690
691 pIoCtx->Type.Root.pfnComplete(pIoCtx->Type.Root.pvUser1,
692 pIoCtx->Type.Root.pvUser2,
693 pIoCtx->rcReq);
694}
695
696/**
697 * Initialize the structure members of a given I/O context.
698 */
699DECLINLINE(void) vdIoCtxInit(PVDIOCTX pIoCtx, PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
700 uint64_t uOffset, size_t cbTransfer, PVDIMAGE pImageStart,
701 PCRTSGBUF pcSgBuf, void *pvAllocation,
702 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
703{
704 pIoCtx->pDisk = pDisk;
705 pIoCtx->enmTxDir = enmTxDir;
706 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTransfer; Assert((uint32_t)cbTransfer == cbTransfer);
707 pIoCtx->Req.Io.uOffset = uOffset;
708 pIoCtx->Req.Io.cbTransfer = cbTransfer;
709 pIoCtx->Req.Io.pImageStart = pImageStart;
710 pIoCtx->Req.Io.pImageCur = pImageStart;
711 pIoCtx->Req.Io.cbBufClear = 0;
712 pIoCtx->Req.Io.pImageParentOverride = NULL;
713 pIoCtx->Req.Io.uOffsetXferOrig = uOffset;
714 pIoCtx->Req.Io.cbXferOrig = cbTransfer;
715 pIoCtx->cDataTransfersPending = 0;
716 pIoCtx->cMetaTransfersPending = 0;
717 pIoCtx->fComplete = false;
718 pIoCtx->fFlags = fFlags;
719 pIoCtx->pvAllocation = pvAllocation;
720 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
721 pIoCtx->pfnIoCtxTransferNext = NULL;
722 pIoCtx->rcReq = VINF_SUCCESS;
723 pIoCtx->pIoCtxParent = NULL;
724
725 /* There is no S/G list for a flush request. */
726 if ( enmTxDir != VDIOCTXTXDIR_FLUSH
727 && enmTxDir != VDIOCTXTXDIR_DISCARD)
728 RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pcSgBuf);
729 else
730 memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF));
731}
732
733/**
734 * Internal: Tries to read the desired range from the given cache.
735 *
736 * @returns VBox status code.
737 * @retval VERR_VD_BLOCK_FREE if the block is not in the cache.
738 * pcbRead will be set to the number of bytes not in the cache.
739 * Everything thereafter might be in the cache.
740 * @param pCache The cache to read from.
741 * @param uOffset Offset of the virtual disk to read.
742 * @param cbRead How much to read.
743 * @param pIoCtx The I/O context to read into.
744 * @param pcbRead Where to store the number of bytes actually read.
745 * On success this indicates the number of bytes read from the cache.
746 * If VERR_VD_BLOCK_FREE is returned this gives the number of bytes
747 * which are not in the cache.
748 * In both cases everything beyond this value
749 * might or might not be in the cache.
750 */
751static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
752 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
753{
754 int rc = VINF_SUCCESS;
755
756 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbRead=%zu pcbRead=%#p\n",
757 pCache, uOffset, pIoCtx, cbRead, pcbRead));
758
759 AssertPtr(pCache);
760 AssertPtr(pcbRead);
761
762 rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, cbRead,
763 pIoCtx, pcbRead);
764
765 LogFlowFunc(("returns rc=%Rrc pcbRead=%zu\n", rc, *pcbRead));
766 return rc;
767}
768
769/**
770 * Internal: Writes data for the given block into the cache.
771 *
772 * @returns VBox status code.
773 * @param pCache The cache to write to.
774 * @param uOffset Offset of the virtual disk to write to the cache.
775 * @param cbWrite How much to write.
776 * @param pIoCtx The I/O context to write from.
777 * @param pcbWritten How much data could be written, optional.
778 */
779static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, size_t cbWrite,
780 PVDIOCTX pIoCtx, size_t *pcbWritten)
781{
782 int rc = VINF_SUCCESS;
783
784 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbWrite=%zu pcbWritten=%#p\n",
785 pCache, uOffset, pIoCtx, cbWrite, pcbWritten));
786
787 AssertPtr(pCache);
788 AssertPtr(pIoCtx);
789 Assert(cbWrite > 0);
790
791 if (pcbWritten)
792 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
793 pIoCtx, pcbWritten);
794 else
795 {
796 size_t cbWritten = 0;
797
798 do
799 {
800 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
801 pIoCtx, &cbWritten);
802 uOffset += cbWritten;
803 cbWrite -= cbWritten;
804 } while ( cbWrite
805 && ( RT_SUCCESS(rc)
806 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
807 }
808
809 LogFlowFunc(("returns rc=%Rrc pcbWritten=%zu\n",
810 rc, pcbWritten ? *pcbWritten : cbWrite));
811 return rc;
812}
813
814/**
815 * Creates a new empty discard state.
816 *
817 * @returns Pointer to the new discard state or NULL if out of memory.
818 */
819static PVDDISCARDSTATE vdDiscardStateCreate(void)
820{
821 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
822
823 if (pDiscard)
824 {
825 RTListInit(&pDiscard->ListLru);
826 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
827 if (!pDiscard->pTreeBlocks)
828 {
829 RTMemFree(pDiscard);
830 pDiscard = NULL;
831 }
832 }
833
834 return pDiscard;
835}
836
837/**
838 * Removes the least recently used blocks from the waiting list until
839 * the new value is reached.
840 *
841 * @returns VBox status code.
842 * @param pDisk VD disk container.
843 * @param pDiscard The discard state.
844 * @param cbDiscardingNew How many bytes should be waiting on success.
845 * The number of bytes waiting can be less.
846 */
847static int vdDiscardRemoveBlocks(PVDISK pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
848{
849 int rc = VINF_SUCCESS;
850
851 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
852 pDisk, pDiscard, cbDiscardingNew));
853
854 while (pDiscard->cbDiscarding > cbDiscardingNew)
855 {
856 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
857
858 Assert(!RTListIsEmpty(&pDiscard->ListLru));
859
860 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
861 uint64_t offStart = pBlock->Core.Key;
862 uint32_t idxStart = 0;
863 size_t cbLeft = pBlock->cbDiscard;
864 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
865 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
866
867 while (cbLeft > 0)
868 {
869 int32_t idxEnd;
870 size_t cbThis = cbLeft;
871
872 if (fAllocated)
873 {
874 /* Check for the first unallocated bit. */
875 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
876 if (idxEnd != -1)
877 {
878 cbThis = (idxEnd - idxStart) * 512;
879 fAllocated = false;
880 }
881 }
882 else
883 {
884 /* Mark as unused and check for the first set bit. */
885 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
886 if (idxEnd != -1)
887 cbThis = (idxEnd - idxStart) * 512;
888
889
890 VDIOCTX IoCtx;
891 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
892 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
893 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
894 &IoCtx, offStart, cbThis, NULL,
895 NULL, &cbThis, NULL,
896 VD_DISCARD_MARK_UNUSED);
897 if (RT_FAILURE(rc))
898 break;
899
900 fAllocated = true;
901 }
902
903 idxStart = idxEnd;
904 offStart += cbThis;
905 cbLeft -= cbThis;
906 }
907
908 if (RT_FAILURE(rc))
909 break;
910
911 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
912 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
913 RTListNodeRemove(&pBlock->NodeLru);
914
915 pDiscard->cbDiscarding -= pBlock->cbDiscard;
916 RTMemFree(pBlock->pbmAllocated);
917 RTMemFree(pBlock);
918 }
919
920 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
921
922 LogFlowFunc(("returns rc=%Rrc\n", rc));
923 return rc;
924}
925
926/**
927 * Destroys the current discard state, writing any waiting blocks to the image.
928 *
929 * @returns VBox status code.
930 * @param pDisk VD disk container.
931 */
932static int vdDiscardStateDestroy(PVDISK pDisk)
933{
934 int rc = VINF_SUCCESS;
935
936 if (pDisk->pDiscard)
937 {
938 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
939 AssertRC(rc);
940 RTMemFree(pDisk->pDiscard->pTreeBlocks);
941 RTMemFree(pDisk->pDiscard);
942 pDisk->pDiscard = NULL;
943 }
944
945 return rc;
946}
947
948/**
949 * Marks the given range as allocated in the image.
950 * Required if there are discards in progress and a write to a block which can get discarded
951 * is written to.
952 *
953 * @returns VBox status code.
954 * @param pDisk VD container data.
955 * @param uOffset First byte to mark as allocated.
956 * @param cbRange Number of bytes to mark as allocated.
957 */
958static int vdDiscardSetRangeAllocated(PVDISK pDisk, uint64_t uOffset, size_t cbRange)
959{
960 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
961 int rc = VINF_SUCCESS;
962
963 if (pDiscard)
964 {
965 do
966 {
967 size_t cbThisRange = cbRange;
968 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
969
970 if (pBlock)
971 {
972 int32_t idxStart, idxEnd;
973
974 Assert(!(cbThisRange % 512));
975 Assert(!((uOffset - pBlock->Core.Key) % 512));
976
977 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
978
979 idxStart = (uOffset - pBlock->Core.Key) / 512;
980 idxEnd = idxStart + (int32_t)(cbThisRange / 512);
981 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
982 }
983 else
984 {
985 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
986 if (pBlock)
987 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
988 }
989
990 Assert(cbRange >= cbThisRange);
991
992 uOffset += cbThisRange;
993 cbRange -= cbThisRange;
994 } while (cbRange != 0);
995 }
996
997 return rc;
998}
999
1000DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1001 uint64_t uOffset, size_t cbTransfer,
1002 PVDIMAGE pImageStart,PCRTSGBUF pcSgBuf,
1003 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1004 uint32_t fFlags)
1005{
1006 PVDIOCTX pIoCtx = NULL;
1007
1008 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1009 if (RT_LIKELY(pIoCtx))
1010 {
1011 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1012 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1013 }
1014
1015 return pIoCtx;
1016}
1017
1018DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1019 uint64_t uOffset, size_t cbTransfer,
1020 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
1021 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1022 void *pvUser1, void *pvUser2,
1023 void *pvAllocation,
1024 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1025 uint32_t fFlags)
1026{
1027 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1028 pcSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1029
1030 if (RT_LIKELY(pIoCtx))
1031 {
1032 pIoCtx->pIoCtxParent = NULL;
1033 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1034 pIoCtx->Type.Root.pvUser1 = pvUser1;
1035 pIoCtx->Type.Root.pvUser2 = pvUser2;
1036 }
1037
1038 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
1039 return pIoCtx;
1040}
1041
1042DECLINLINE(void) vdIoCtxDiscardInit(PVDIOCTX pIoCtx, PVDISK pDisk, PCRTRANGE paRanges,
1043 unsigned cRanges, PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1044 void *pvUser1, void *pvUser2, void *pvAllocation,
1045 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
1046{
1047 pIoCtx->pIoCtxNext = NULL;
1048 pIoCtx->pDisk = pDisk;
1049 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;
1050 pIoCtx->cDataTransfersPending = 0;
1051 pIoCtx->cMetaTransfersPending = 0;
1052 pIoCtx->fComplete = false;
1053 pIoCtx->fFlags = fFlags;
1054 pIoCtx->pvAllocation = pvAllocation;
1055 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
1056 pIoCtx->pfnIoCtxTransferNext = NULL;
1057 pIoCtx->rcReq = VINF_SUCCESS;
1058 pIoCtx->Req.Discard.paRanges = paRanges;
1059 pIoCtx->Req.Discard.cRanges = cRanges;
1060 pIoCtx->Req.Discard.idxRange = 0;
1061 pIoCtx->Req.Discard.cbDiscardLeft = 0;
1062 pIoCtx->Req.Discard.offCur = 0;
1063 pIoCtx->Req.Discard.cbThisDiscard = 0;
1064
1065 pIoCtx->pIoCtxParent = NULL;
1066 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1067 pIoCtx->Type.Root.pvUser1 = pvUser1;
1068 pIoCtx->Type.Root.pvUser2 = pvUser2;
1069}
1070
1071DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVDISK pDisk, PCRTRANGE paRanges,
1072 unsigned cRanges,
1073 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1074 void *pvUser1, void *pvUser2,
1075 void *pvAllocation,
1076 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1077 uint32_t fFlags)
1078{
1079 PVDIOCTX pIoCtx = NULL;
1080
1081 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1082 if (RT_LIKELY(pIoCtx))
1083 {
1084 vdIoCtxDiscardInit(pIoCtx, pDisk, paRanges, cRanges, pfnComplete, pvUser1,
1085 pvUser2, pvAllocation, pfnIoCtxTransfer, fFlags);
1086 }
1087
1088 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
1089 return pIoCtx;
1090}
1091
1092DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1093 uint64_t uOffset, size_t cbTransfer,
1094 PVDIMAGE pImageStart, PCRTSGBUF pcSgBuf,
1095 PVDIOCTX pIoCtxParent, size_t cbTransferParent,
1096 size_t cbWriteParent, void *pvAllocation,
1097 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
1098{
1099 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1100 pcSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE);
1101
1102 AssertPtr(pIoCtxParent);
1103 Assert(!pIoCtxParent->pIoCtxParent);
1104
1105 if (RT_LIKELY(pIoCtx))
1106 {
1107 pIoCtx->pIoCtxParent = pIoCtxParent;
1108 pIoCtx->Type.Child.uOffsetSaved = uOffset;
1109 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
1110 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;
1111 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;
1112 }
1113
1114 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
1115 return pIoCtx;
1116}
1117
1118DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
1119{
1120 PVDIOTASK pIoTask = NULL;
1121
1122 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1123 if (pIoTask)
1124 {
1125 pIoTask->pIoStorage = pIoStorage;
1126 pIoTask->pfnComplete = pfnComplete;
1127 pIoTask->pvUser = pvUser;
1128 pIoTask->fMeta = false;
1129 pIoTask->Type.User.cbTransfer = cbTransfer;
1130 pIoTask->Type.User.pIoCtx = pIoCtx;
1131 }
1132
1133 return pIoTask;
1134}
1135
1136DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
1137{
1138 PVDIOTASK pIoTask = NULL;
1139
1140 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1141 if (pIoTask)
1142 {
1143 pIoTask->pIoStorage = pIoStorage;
1144 pIoTask->pfnComplete = pfnComplete;
1145 pIoTask->pvUser = pvUser;
1146 pIoTask->fMeta = true;
1147 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
1148 }
1149
1150 return pIoTask;
1151}
1152
1153DECLINLINE(void) vdIoCtxFree(PVDISK pDisk, PVDIOCTX pIoCtx)
1154{
1155 Log(("Freeing I/O context %#p\n", pIoCtx));
1156
1157 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
1158 {
1159 if (pIoCtx->pvAllocation)
1160 RTMemFree(pIoCtx->pvAllocation);
1161#ifdef DEBUG
1162 memset(&pIoCtx->pDisk, 0xff, sizeof(void *));
1163#endif
1164 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
1165 }
1166}
1167
1168DECLINLINE(void) vdIoTaskFree(PVDISK pDisk, PVDIOTASK pIoTask)
1169{
1170#ifdef DEBUG
1171 memset(pIoTask, 0xff, sizeof(VDIOTASK));
1172#endif
1173 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
1174}
1175
1176DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
1177{
1178 AssertPtr(pIoCtx->pIoCtxParent);
1179
1180 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
1181 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;
1182 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved;
1183 Assert((uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved == pIoCtx->Type.Child.cbTransferLeftSaved);
1184}
1185
1186DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
1187{
1188 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_OFFSETOF(VDMETAXFER, abData[cb]));
1189
1190 if (RT_LIKELY(pMetaXfer))
1191 {
1192 pMetaXfer->Core.Key = uOffset;
1193 pMetaXfer->Core.KeyLast = uOffset + cb - 1;
1194 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;
1195 pMetaXfer->cbMeta = cb;
1196 pMetaXfer->pIoStorage = pIoStorage;
1197 pMetaXfer->cRefs = 0;
1198 pMetaXfer->pbDataShw = NULL;
1199 RTListInit(&pMetaXfer->ListIoCtxWaiting);
1200 RTListInit(&pMetaXfer->ListIoCtxShwWrites);
1201 }
1202 return pMetaXfer;
1203}
1204
1205DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
1206{
1207 /* Put it on the waiting list. */
1208 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
1209 PVDIOCTX pHeadOld;
1210 pIoCtx->pIoCtxNext = pNext;
1211 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
1212 {
1213 pNext = pHeadOld;
1214 Assert(pNext != pIoCtx);
1215 pIoCtx->pIoCtxNext = pNext;
1216 ASMNopPause();
1217 }
1218}
1219
1220DECLINLINE(void) vdIoCtxDefer(PVDISK pDisk, PVDIOCTX pIoCtx)
1221{
1222 LogFlowFunc(("Deferring I/O context pIoCtx=%#p\n", pIoCtx));
1223
1224 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
1225 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1226 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
1227}
1228
1229static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
1230{
1231 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
1232}
1233
1234#if 0 /* unused */
1235static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
1236{
1237 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
1238}
1239#endif
1240
1241static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
1242{
1243 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1244}
1245
1246static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
1247{
1248 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1249}
1250
1251static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
1252{
1253 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
1254}
1255
1256/**
1257 * Returns whether the given I/O context has completed.
1258 *
1259 * @returns Flag whether the I/O context is complete.
1260 * @param pIoCtx The I/O context to check.
1261 */
1262DECLINLINE(bool) vdIoCtxIsComplete(PVDIOCTX pIoCtx)
1263{
1264 if ( !pIoCtx->cMetaTransfersPending
1265 && !pIoCtx->cDataTransfersPending
1266 && !pIoCtx->pfnIoCtxTransfer)
1267 return true;
1268
1269 /*
1270 * We complete the I/O context in case of an error
1271 * if there is no I/O task pending.
1272 */
1273 if ( RT_FAILURE(pIoCtx->rcReq)
1274 && !pIoCtx->cMetaTransfersPending
1275 && !pIoCtx->cDataTransfersPending)
1276 return true;
1277
1278 return false;
1279}
1280
1281/**
1282 * Returns whether the given I/O context is blocked due to a metadata transfer
1283 * or because the backend blocked it.
1284 *
1285 * @returns Flag whether the I/O context is blocked.
1286 * @param pIoCtx The I/O context to check.
1287 */
1288DECLINLINE(bool) vdIoCtxIsBlocked(PVDIOCTX pIoCtx)
1289{
1290 /* Don't change anything if there is a metadata transfer pending or we are blocked. */
1291 if ( pIoCtx->cMetaTransfersPending
1292 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1293 return true;
1294
1295 return false;
1296}
1297
1298/**
1299 * Process the I/O context, core method which assumes that the I/O context
1300 * acquired the lock.
1301 *
1302 * @returns VBox status code.
1303 * @param pIoCtx I/O context to process.
1304 */
1305static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
1306{
1307 int rc = VINF_SUCCESS;
1308
1309 VD_IS_LOCKED(pIoCtx->pDisk);
1310
1311 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
1312
1313 if (!vdIoCtxIsComplete(pIoCtx))
1314 {
1315 if (!vdIoCtxIsBlocked(pIoCtx))
1316 {
1317 if (pIoCtx->pfnIoCtxTransfer)
1318 {
1319 /* Call the transfer function advancing to the next while there is no error. */
1320 while ( pIoCtx->pfnIoCtxTransfer
1321 && !pIoCtx->cMetaTransfersPending
1322 && RT_SUCCESS(rc))
1323 {
1324 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
1325 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
1326
1327 /* Advance to the next part of the transfer if the current one succeeded. */
1328 if (RT_SUCCESS(rc))
1329 {
1330 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
1331 pIoCtx->pfnIoCtxTransferNext = NULL;
1332 }
1333 }
1334 }
1335
1336 if ( RT_SUCCESS(rc)
1337 && !pIoCtx->cMetaTransfersPending
1338 && !pIoCtx->cDataTransfersPending
1339 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1340 rc = VINF_VD_ASYNC_IO_FINISHED;
1341 else if ( RT_SUCCESS(rc)
1342 || rc == VERR_VD_NOT_ENOUGH_METADATA
1343 || rc == VERR_VD_IOCTX_HALT)
1344 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1345 else if ( RT_FAILURE(rc)
1346 && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
1347 {
1348 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
1349
1350 /*
1351 * The I/O context completed if we have an error and there is no data
1352 * or meta data transfer pending.
1353 */
1354 if ( !pIoCtx->cMetaTransfersPending
1355 && !pIoCtx->cDataTransfersPending)
1356 rc = VINF_VD_ASYNC_IO_FINISHED;
1357 else
1358 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1359 }
1360 }
1361 else
1362 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1363 }
1364 else
1365 rc = VINF_VD_ASYNC_IO_FINISHED;
1366
1367 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
1368 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
1369 pIoCtx->fComplete));
1370
1371 return rc;
1372}
1373
1374/**
1375 * Processes the list of waiting I/O contexts.
1376 *
1377 * @returns VBox status code, only valid if pIoCtxRc is not NULL, treat as void
1378 * function otherwise.
1379 * @param pDisk The disk structure.
1380 * @param pIoCtxRc An I/O context handle which waits on the list. When processed
1381 * The status code is returned. NULL if there is no I/O context
1382 * to return the status code for.
1383 */
1384static int vdDiskProcessWaitingIoCtx(PVDISK pDisk, PVDIOCTX pIoCtxRc)
1385{
1386 int rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1387
1388 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
1389
1390 VD_IS_LOCKED(pDisk);
1391
1392 /* Get the waiting list and process it in FIFO order. */
1393 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
1394
1395 /* Reverse it. */
1396 PVDIOCTX pCur = pIoCtxHead;
1397 pIoCtxHead = NULL;
1398 while (pCur)
1399 {
1400 PVDIOCTX pInsert = pCur;
1401 pCur = pCur->pIoCtxNext;
1402 pInsert->pIoCtxNext = pIoCtxHead;
1403 pIoCtxHead = pInsert;
1404 }
1405
1406 /* Process now. */
1407 pCur = pIoCtxHead;
1408 while (pCur)
1409 {
1410 int rcTmp;
1411 PVDIOCTX pTmp = pCur;
1412
1413 pCur = pCur->pIoCtxNext;
1414 pTmp->pIoCtxNext = NULL;
1415
1416 /*
1417 * Need to clear the sync flag here if there is a new I/O context
1418 * with it set and the context is not given in pIoCtxRc.
1419 * This happens most likely on a different thread and that one shouldn't
1420 * process the context synchronously.
1421 *
1422 * The thread who issued the context will wait on the event semaphore
1423 * anyway which is signalled when the completion handler is called.
1424 */
1425 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC
1426 && pTmp != pIoCtxRc)
1427 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
1428
1429 rcTmp = vdIoCtxProcessLocked(pTmp);
1430 if (pTmp == pIoCtxRc)
1431 {
1432 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1433 && RT_SUCCESS(pTmp->rcReq)
1434 && pTmp->enmTxDir == VDIOCTXTXDIR_READ)
1435 {
1436 int rc2 = vdFilterChainApplyRead(pDisk, pTmp->Req.Io.uOffsetXferOrig,
1437 pTmp->Req.Io.cbXferOrig, pTmp);
1438 if (RT_FAILURE(rc2))
1439 rcTmp = rc2;
1440 }
1441
1442 /* The given I/O context was processed, pass the return code to the caller. */
1443 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1444 && (pTmp->fFlags & VDIOCTX_FLAGS_SYNC))
1445 rc = pTmp->rcReq;
1446 else
1447 rc = rcTmp;
1448 }
1449 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1450 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1451 {
1452 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1453 vdThreadFinishWrite(pDisk);
1454
1455 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1456 vdIoCtxRootComplete(pDisk, pTmp);
1457
1458 if (fFreeCtx)
1459 vdIoCtxFree(pDisk, pTmp);
1460 }
1461 }
1462
1463 LogFlowFunc(("returns rc=%Rrc\n", rc));
1464 return rc;
1465}
1466
1467/**
1468 * Processes the list of blocked I/O contexts.
1469 *
1470 * @returns nothing.
1471 * @param pDisk The disk structure.
1472 */
1473static void vdDiskProcessBlockedIoCtx(PVDISK pDisk)
1474{
1475 LogFlowFunc(("pDisk=%#p\n", pDisk));
1476
1477 VD_IS_LOCKED(pDisk);
1478
1479 /* Get the waiting list and process it in FIFO order. */
1480 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
1481
1482 /* Reverse it. */
1483 PVDIOCTX pCur = pIoCtxHead;
1484 pIoCtxHead = NULL;
1485 while (pCur)
1486 {
1487 PVDIOCTX pInsert = pCur;
1488 pCur = pCur->pIoCtxNext;
1489 pInsert->pIoCtxNext = pIoCtxHead;
1490 pIoCtxHead = pInsert;
1491 }
1492
1493 /* Process now. */
1494 pCur = pIoCtxHead;
1495 while (pCur)
1496 {
1497 int rc;
1498 PVDIOCTX pTmp = pCur;
1499
1500 pCur = pCur->pIoCtxNext;
1501 pTmp->pIoCtxNext = NULL;
1502
1503 Assert(!pTmp->pIoCtxParent);
1504 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
1505 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
1506
1507 rc = vdIoCtxProcessLocked(pTmp);
1508 if ( rc == VINF_VD_ASYNC_IO_FINISHED
1509 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1510 {
1511 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1512 vdThreadFinishWrite(pDisk);
1513
1514 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1515 vdIoCtxRootComplete(pDisk, pTmp);
1516 if (fFreeCtx)
1517 vdIoCtxFree(pDisk, pTmp);
1518 }
1519 }
1520
1521 LogFlowFunc(("returns\n"));
1522}
1523
1524/**
1525 * Processes the I/O context trying to lock the criticial section.
1526 * The context is deferred if the critical section is busy.
1527 *
1528 * @returns VBox status code.
1529 * @param pIoCtx The I/O context to process.
1530 */
1531static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
1532{
1533 int rc = VINF_SUCCESS;
1534 PVDISK pDisk = pIoCtx->pDisk;
1535
1536 Log(("Defer pIoCtx=%#p\n", pIoCtx));
1537
1538 /* Put it on the waiting list first. */
1539 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
1540
1541 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
1542 {
1543 /* Leave it again, the context will be processed just before leaving the lock. */
1544 LogFlowFunc(("Successfully acquired the lock\n"));
1545 rc = vdDiskUnlock(pDisk, pIoCtx);
1546 }
1547 else
1548 {
1549 LogFlowFunc(("Lock is held\n"));
1550 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1551 }
1552
1553 return rc;
1554}
1555
1556/**
1557 * Process the I/O context in a synchronous manner, waiting
1558 * for it to complete.
1559 *
1560 * @returns VBox status code of the completed request.
1561 * @param pIoCtx The sync I/O context.
1562 * @param hEventComplete Event sempahore to wait on for completion.
1563 */
1564static int vdIoCtxProcessSync(PVDIOCTX pIoCtx, RTSEMEVENT hEventComplete)
1565{
1566 int rc = VINF_SUCCESS;
1567 PVDISK pDisk = pIoCtx->pDisk;
1568
1569 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
1570
1571 AssertMsg(pIoCtx->fFlags & (VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE),
1572 ("I/O context is not marked as synchronous\n"));
1573
1574 rc = vdIoCtxProcessTryLockDefer(pIoCtx);
1575 if (rc == VINF_VD_ASYNC_IO_FINISHED)
1576 rc = VINF_SUCCESS;
1577
1578 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1579 {
1580 rc = RTSemEventWait(hEventComplete, RT_INDEFINITE_WAIT);
1581 AssertRC(rc);
1582 }
1583
1584 rc = pIoCtx->rcReq;
1585 vdIoCtxFree(pDisk, pIoCtx);
1586
1587 return rc;
1588}
1589
1590DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVDISK pDisk, PVDIOCTX pIoCtx)
1591{
1592 return pDisk->pIoCtxLockOwner == pIoCtx;
1593}
1594
1595static int vdIoCtxLockDisk(PVDISK pDisk, PVDIOCTX pIoCtx)
1596{
1597 int rc = VINF_SUCCESS;
1598
1599 VD_IS_LOCKED(pDisk);
1600
1601 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
1602
1603 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
1604 {
1605 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
1606 vdIoCtxDefer(pDisk, pIoCtx);
1607 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1608 }
1609
1610 LogFlowFunc(("returns -> %Rrc\n", rc));
1611 return rc;
1612}
1613
1614static void vdIoCtxUnlockDisk(PVDISK pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
1615{
1616 RT_NOREF1(pIoCtx);
1617 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
1618 pDisk, pIoCtx, fProcessBlockedReqs));
1619
1620 VD_IS_LOCKED(pDisk);
1621
1622 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
1623 Assert(pDisk->pIoCtxLockOwner == pIoCtx);
1624 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
1625
1626 if (fProcessBlockedReqs)
1627 {
1628 /* Process any blocked writes if the current request didn't caused another growing. */
1629 vdDiskProcessBlockedIoCtx(pDisk);
1630 }
1631
1632 LogFlowFunc(("returns\n"));
1633}
1634
1635/**
1636 * Internal: Reads a given amount of data from the image chain of the disk.
1637 **/
1638static int vdDiskReadHelper(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1639 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
1640{
1641 RT_NOREF1(pDisk);
1642 int rc = VINF_SUCCESS;
1643 size_t cbThisRead = cbRead;
1644
1645 AssertPtr(pcbThisRead);
1646
1647 *pcbThisRead = 0;
1648
1649 /*
1650 * Try to read from the given image.
1651 * If the block is not allocated read from override chain if present.
1652 */
1653 rc = pImage->Backend->pfnRead(pImage->pBackendData,
1654 uOffset, cbThisRead, pIoCtx,
1655 &cbThisRead);
1656
1657 if (rc == VERR_VD_BLOCK_FREE)
1658 {
1659 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
1660 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
1661 pCurrImage = pCurrImage->pPrev)
1662 {
1663 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1664 uOffset, cbThisRead, pIoCtx,
1665 &cbThisRead);
1666 }
1667 }
1668
1669 if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE)
1670 *pcbThisRead = cbThisRead;
1671
1672 return rc;
1673}
1674
1675/**
1676 * internal: read the specified amount of data in whatever blocks the backend
1677 * will give us - async version.
1678 */
1679static DECLCALLBACK(int) vdReadHelperAsync(PVDIOCTX pIoCtx)
1680{
1681 int rc;
1682 PVDISK pDisk = pIoCtx->pDisk;
1683 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;
1684 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
1685 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;
1686 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
1687 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;
1688 size_t cbThisRead;
1689
1690 /*
1691 * Check whether there is a full block write in progress which was not allocated.
1692 * Defer I/O if the range interferes but only if it does not belong to the
1693 * write doing the allocation.
1694 */
1695 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
1696 && uOffset >= pDisk->uOffsetStartLocked
1697 && uOffset < pDisk->uOffsetEndLocked
1698 && ( !pIoCtx->pIoCtxParent
1699 || pIoCtx->pIoCtxParent != pDisk->pIoCtxLockOwner))
1700 {
1701 Log(("Interferring read while allocating a new block => deferring read\n"));
1702 vdIoCtxDefer(pDisk, pIoCtx);
1703 return VERR_VD_ASYNC_IO_IN_PROGRESS;
1704 }
1705
1706 /* Loop until all reads started or we have a backend which needs to read metadata. */
1707 do
1708 {
1709 /* Search for image with allocated block. Do not attempt to read more
1710 * than the previous reads marked as valid. Otherwise this would return
1711 * stale data when different block sizes are used for the images. */
1712 cbThisRead = cbToRead;
1713
1714 if ( pDisk->pCache
1715 && !pImageParentOverride)
1716 {
1717 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
1718 pIoCtx, &cbThisRead);
1719 if (rc == VERR_VD_BLOCK_FREE)
1720 {
1721 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
1722 pIoCtx, &cbThisRead);
1723
1724 /* If the read was successful, write the data back into the cache. */
1725 if ( RT_SUCCESS(rc)
1726 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UPDATE_CACHE)
1727 {
1728 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
1729 pIoCtx, NULL);
1730 }
1731 }
1732 }
1733 else
1734 {
1735 /*
1736 * Try to read from the given image.
1737 * If the block is not allocated read from override chain if present.
1738 */
1739 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1740 uOffset, cbThisRead, pIoCtx,
1741 &cbThisRead);
1742
1743 if ( rc == VERR_VD_BLOCK_FREE
1744 && cImagesRead != 1)
1745 {
1746 unsigned cImagesToProcess = cImagesRead;
1747
1748 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
1749 pIoCtx->Req.Io.pImageParentOverride = NULL;
1750
1751 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
1752 {
1753 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1754 uOffset, cbThisRead,
1755 pIoCtx, &cbThisRead);
1756 if (cImagesToProcess == 1)
1757 break;
1758 else if (cImagesToProcess > 0)
1759 cImagesToProcess--;
1760
1761 if (rc == VERR_VD_BLOCK_FREE)
1762 pCurrImage = pCurrImage->pPrev;
1763 }
1764 }
1765 }
1766
1767 /* The task state will be updated on success already, don't do it here!. */
1768 if (rc == VERR_VD_BLOCK_FREE)
1769 {
1770 /* No image in the chain contains the data for the block. */
1771 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisRead); Assert(cbThisRead == (uint32_t)cbThisRead);
1772
1773 /* Fill the free space with 0 if we are told to do so
1774 * or a previous read returned valid data. */
1775 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
1776 vdIoCtxSet(pIoCtx, '\0', cbThisRead);
1777 else
1778 pIoCtx->Req.Io.cbBufClear += cbThisRead;
1779
1780 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
1781 rc = VINF_VD_NEW_ZEROED_BLOCK;
1782 else
1783 rc = VINF_SUCCESS;
1784 }
1785 else if (rc == VERR_VD_IOCTX_HALT)
1786 {
1787 uOffset += cbThisRead;
1788 cbToRead -= cbThisRead;
1789 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1790 }
1791 else if ( RT_SUCCESS(rc)
1792 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1793 {
1794 /* First not free block, fill the space before with 0. */
1795 if ( pIoCtx->Req.Io.cbBufClear
1796 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1797 {
1798 RTSGBUF SgBuf;
1799 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
1800 RTSgBufReset(&SgBuf);
1801 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
1802 pIoCtx->Req.Io.cbBufClear = 0;
1803 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1804 }
1805 rc = VINF_SUCCESS;
1806 }
1807
1808 if (RT_FAILURE(rc))
1809 break;
1810
1811 cbToRead -= cbThisRead;
1812 uOffset += cbThisRead;
1813 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
1814 } while (cbToRead != 0 && RT_SUCCESS(rc));
1815
1816 if ( rc == VERR_VD_NOT_ENOUGH_METADATA
1817 || rc == VERR_VD_IOCTX_HALT)
1818 {
1819 /* Save the current state. */
1820 pIoCtx->Req.Io.uOffset = uOffset;
1821 pIoCtx->Req.Io.cbTransfer = cbToRead;
1822 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
1823 }
1824
1825 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1826 ? VERR_VD_BLOCK_FREE
1827 : rc;
1828}
1829
1830/**
1831 * internal: parent image read wrapper for compacting.
1832 */
1833static DECLCALLBACK(int) vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
1834 size_t cbRead)
1835{
1836 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
1837
1838 /** @todo
1839 * Only used for compaction so far which is not possible to mix with async I/O.
1840 * Needs to be changed if we want to support online compaction of images.
1841 */
1842 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
1843 AssertMsgReturn(!fLocked,
1844 ("Calling synchronous parent read while another thread holds the disk lock\n"),
1845 VERR_VD_INVALID_STATE);
1846
1847 /* Fake an I/O context. */
1848 RTSGSEG Segment;
1849 RTSGBUF SgBuf;
1850 VDIOCTX IoCtx;
1851
1852 Segment.pvSeg = pvBuf;
1853 Segment.cbSeg = cbRead;
1854 RTSgBufInit(&SgBuf, &Segment, 1);
1855 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
1856 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
1857 int rc = vdReadHelperAsync(&IoCtx);
1858 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
1859 return rc;
1860}
1861
1862/**
1863 * Extended version of vdReadHelper(), implementing certain optimizations
1864 * for image cloning.
1865 *
1866 * @returns VBox status code.
1867 * @param pDisk The disk to read from.
1868 * @param pImage The image to start reading from.
1869 * @param pImageParentOverride The parent image to read from
1870 * if the starting image returns a free block.
1871 * If NULL is passed the real parent of the image
1872 * in the chain is used.
1873 * @param uOffset Offset in the disk to start reading from.
1874 * @param pvBuf Where to store the read data.
1875 * @param cbRead How much to read.
1876 * @param fZeroFreeBlocks Flag whether free blocks should be zeroed.
1877 * If false and no image has data for sepcified
1878 * range VERR_VD_BLOCK_FREE is returned.
1879 * Note that unallocated blocks are still zeroed
1880 * if at least one image has valid data for a part
1881 * of the range.
1882 * @param fUpdateCache Flag whether to update the attached cache if
1883 * available.
1884 * @param cImagesRead Number of images in the chain to read until
1885 * the read is cut off. A value of 0 disables the cut off.
1886 */
1887static int vdReadHelperEx(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1888 uint64_t uOffset, void *pvBuf, size_t cbRead,
1889 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
1890{
1891 int rc = VINF_SUCCESS;
1892 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
1893 RTSGSEG Segment;
1894 RTSGBUF SgBuf;
1895 VDIOCTX IoCtx;
1896 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
1897
1898 rc = RTSemEventCreate(&hEventComplete);
1899 if (RT_FAILURE(rc))
1900 return rc;
1901
1902 if (fZeroFreeBlocks)
1903 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1904 if (fUpdateCache)
1905 fFlags |= VDIOCTX_FLAGS_READ_UPDATE_CACHE;
1906
1907 Segment.pvSeg = pvBuf;
1908 Segment.cbSeg = cbRead;
1909 RTSgBufInit(&SgBuf, &Segment, 1);
1910 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
1911 NULL, vdReadHelperAsync, fFlags);
1912
1913 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
1914 IoCtx.Req.Io.cImagesRead = cImagesRead;
1915 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
1916 IoCtx.Type.Root.pvUser1 = pDisk;
1917 IoCtx.Type.Root.pvUser2 = hEventComplete;
1918 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
1919 RTSemEventDestroy(hEventComplete);
1920 return rc;
1921}
1922
1923/**
1924 * internal: read the specified amount of data in whatever blocks the backend
1925 * will give us.
1926 */
1927static int vdReadHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
1928 void *pvBuf, size_t cbRead, bool fUpdateCache)
1929{
1930 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
1931 true /* fZeroFreeBlocks */, fUpdateCache, 0);
1932}
1933
1934/**
1935 * internal: mark the disk as not modified.
1936 */
1937static void vdResetModifiedFlag(PVDISK pDisk)
1938{
1939 if (pDisk->uModified & VD_IMAGE_MODIFIED_FLAG)
1940 {
1941 /* generate new last-modified uuid */
1942 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1943 {
1944 RTUUID Uuid;
1945
1946 RTUuidCreate(&Uuid);
1947 pDisk->pLast->Backend->pfnSetModificationUuid(pDisk->pLast->pBackendData,
1948 &Uuid);
1949
1950 if (pDisk->pCache)
1951 pDisk->pCache->Backend->pfnSetModificationUuid(pDisk->pCache->pBackendData,
1952 &Uuid);
1953 }
1954
1955 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FLAG;
1956 }
1957}
1958
1959/**
1960 * internal: mark the disk as modified.
1961 */
1962static void vdSetModifiedFlag(PVDISK pDisk)
1963{
1964 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
1965 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
1966 {
1967 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
1968
1969 /* First modify, so create a UUID and ensure it's written to disk. */
1970 vdResetModifiedFlag(pDisk);
1971
1972 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1973 {
1974 VDIOCTX IoCtx;
1975 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, NULL,
1976 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
1977 pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData, &IoCtx);
1978 }
1979 }
1980}
1981
1982/**
1983 * internal: write buffer to the image, taking care of block boundaries and
1984 * write optimizations.
1985 */
1986static int vdWriteHelperEx(PVDISK pDisk, PVDIMAGE pImage,
1987 PVDIMAGE pImageParentOverride, uint64_t uOffset,
1988 const void *pvBuf, size_t cbWrite,
1989 uint32_t fFlags, unsigned cImagesRead)
1990{
1991 int rc = VINF_SUCCESS;
1992 RTSGSEG Segment;
1993 RTSGBUF SgBuf;
1994 VDIOCTX IoCtx;
1995 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
1996
1997 rc = RTSemEventCreate(&hEventComplete);
1998 if (RT_FAILURE(rc))
1999 return rc;
2000
2001 fFlags |= VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
2002
2003 Segment.pvSeg = (void *)pvBuf;
2004 Segment.cbSeg = cbWrite;
2005 RTSgBufInit(&SgBuf, &Segment, 1);
2006 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
2007 NULL, vdWriteHelperAsync, fFlags);
2008
2009 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
2010 IoCtx.Req.Io.cImagesRead = cImagesRead;
2011 IoCtx.pIoCtxParent = NULL;
2012 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
2013 IoCtx.Type.Root.pvUser1 = pDisk;
2014 IoCtx.Type.Root.pvUser2 = hEventComplete;
2015 if (RT_SUCCESS(rc))
2016 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
2017
2018 RTSemEventDestroy(hEventComplete);
2019 return rc;
2020}
2021
2022/**
2023 * internal: write buffer to the image, taking care of block boundaries and
2024 * write optimizations.
2025 */
2026static int vdWriteHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
2027 const void *pvBuf, size_t cbWrite, uint32_t fFlags)
2028{
2029 return vdWriteHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbWrite,
2030 fFlags, 0);
2031}
2032
2033/**
2034 * Internal: Copies the content of one disk to another one applying optimizations
2035 * to speed up the copy process if possible.
2036 */
2037static int vdCopyHelper(PVDISK pDiskFrom, PVDIMAGE pImageFrom, PVDISK pDiskTo,
2038 uint64_t cbSize, unsigned cImagesFromRead, unsigned cImagesToRead,
2039 bool fSuppressRedundantIo, PVDINTERFACEPROGRESS pIfProgress,
2040 PVDINTERFACEPROGRESS pDstIfProgress)
2041{
2042 int rc = VINF_SUCCESS;
2043 int rc2;
2044 uint64_t uOffset = 0;
2045 uint64_t cbRemaining = cbSize;
2046 void *pvBuf = NULL;
2047 bool fLockReadFrom = false;
2048 bool fLockWriteTo = false;
2049 bool fBlockwiseCopy = false;
2050 unsigned uProgressOld = 0;
2051
2052 LogFlowFunc(("pDiskFrom=%#p pImageFrom=%#p pDiskTo=%#p cbSize=%llu cImagesFromRead=%u cImagesToRead=%u fSuppressRedundantIo=%RTbool pIfProgress=%#p pDstIfProgress=%#p\n",
2053 pDiskFrom, pImageFrom, pDiskTo, cbSize, cImagesFromRead, cImagesToRead, fSuppressRedundantIo, pDstIfProgress, pDstIfProgress));
2054
2055 if ( (fSuppressRedundantIo || (cImagesFromRead > 0))
2056 && RTListIsEmpty(&pDiskFrom->ListFilterChainRead))
2057 fBlockwiseCopy = true;
2058
2059 /* Allocate tmp buffer. */
2060 pvBuf = RTMemTmpAlloc(VD_MERGE_BUFFER_SIZE);
2061 if (!pvBuf)
2062 return rc;
2063
2064 do
2065 {
2066 size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining);
2067
2068 /* Note that we don't attempt to synchronize cross-disk accesses.
2069 * It wouldn't be very difficult to do, just the lock order would
2070 * need to be defined somehow to prevent deadlocks. Postpone such
2071 * magic as there is no use case for this. */
2072
2073 rc2 = vdThreadStartRead(pDiskFrom);
2074 AssertRC(rc2);
2075 fLockReadFrom = true;
2076
2077 if (fBlockwiseCopy)
2078 {
2079 RTSGSEG SegmentBuf;
2080 RTSGBUF SgBuf;
2081 VDIOCTX IoCtx;
2082
2083 SegmentBuf.pvSeg = pvBuf;
2084 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
2085 RTSgBufInit(&SgBuf, &SegmentBuf, 1);
2086 vdIoCtxInit(&IoCtx, pDiskFrom, VDIOCTXTXDIR_READ, 0, 0, NULL,
2087 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
2088
2089 /* Read the source data. */
2090 rc = pImageFrom->Backend->pfnRead(pImageFrom->pBackendData,
2091 uOffset, cbThisRead, &IoCtx,
2092 &cbThisRead);
2093
2094 if ( rc == VERR_VD_BLOCK_FREE
2095 && cImagesFromRead != 1)
2096 {
2097 unsigned cImagesToProcess = cImagesFromRead;
2098
2099 for (PVDIMAGE pCurrImage = pImageFrom->pPrev;
2100 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
2101 pCurrImage = pCurrImage->pPrev)
2102 {
2103 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
2104 uOffset, cbThisRead,
2105 &IoCtx, &cbThisRead);
2106 if (cImagesToProcess == 1)
2107 break;
2108 else if (cImagesToProcess > 0)
2109 cImagesToProcess--;
2110 }
2111 }
2112 }
2113 else
2114 rc = vdReadHelper(pDiskFrom, pImageFrom, uOffset, pvBuf, cbThisRead,
2115 false /* fUpdateCache */);
2116
2117 if (RT_FAILURE(rc) && rc != VERR_VD_BLOCK_FREE)
2118 break;
2119
2120 rc2 = vdThreadFinishRead(pDiskFrom);
2121 AssertRC(rc2);
2122 fLockReadFrom = false;
2123
2124 if (rc != VERR_VD_BLOCK_FREE)
2125 {
2126 rc2 = vdThreadStartWrite(pDiskTo);
2127 AssertRC(rc2);
2128 fLockWriteTo = true;
2129
2130 /* Only do collapsed I/O if we are copying the data blockwise. */
2131 rc = vdWriteHelperEx(pDiskTo, pDiskTo->pLast, NULL, uOffset, pvBuf,
2132 cbThisRead, VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG /* fFlags */,
2133 fBlockwiseCopy ? cImagesToRead : 0);
2134 if (RT_FAILURE(rc))
2135 break;
2136
2137 rc2 = vdThreadFinishWrite(pDiskTo);
2138 AssertRC(rc2);
2139 fLockWriteTo = false;
2140 }
2141 else /* Don't propagate the error to the outside */
2142 rc = VINF_SUCCESS;
2143
2144 uOffset += cbThisRead;
2145 cbRemaining -= cbThisRead;
2146
2147 unsigned uProgressNew = uOffset * 99 / cbSize;
2148 if (uProgressNew != uProgressOld)
2149 {
2150 uProgressOld = uProgressNew;
2151
2152 if (pIfProgress && pIfProgress->pfnProgress)
2153 {
2154 rc = pIfProgress->pfnProgress(pIfProgress->Core.pvUser,
2155 uProgressOld);
2156 if (RT_FAILURE(rc))
2157 break;
2158 }
2159 if (pDstIfProgress && pDstIfProgress->pfnProgress)
2160 {
2161 rc = pDstIfProgress->pfnProgress(pDstIfProgress->Core.pvUser,
2162 uProgressOld);
2163 if (RT_FAILURE(rc))
2164 break;
2165 }
2166 }
2167 } while (uOffset < cbSize);
2168
2169 RTMemFree(pvBuf);
2170
2171 if (fLockReadFrom)
2172 {
2173 rc2 = vdThreadFinishRead(pDiskFrom);
2174 AssertRC(rc2);
2175 }
2176
2177 if (fLockWriteTo)
2178 {
2179 rc2 = vdThreadFinishWrite(pDiskTo);
2180 AssertRC(rc2);
2181 }
2182
2183 LogFlowFunc(("returns rc=%Rrc\n", rc));
2184 return rc;
2185}
2186
2187/**
2188 * Flush helper async version.
2189 */
2190static DECLCALLBACK(int) vdSetModifiedHelperAsync(PVDIOCTX pIoCtx)
2191{
2192 int rc = VINF_SUCCESS;
2193 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2194
2195 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2196 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2197 rc = VINF_SUCCESS;
2198
2199 return rc;
2200}
2201
2202/**
2203 * internal: mark the disk as modified - async version.
2204 */
2205static int vdSetModifiedFlagAsync(PVDISK pDisk, PVDIOCTX pIoCtx)
2206{
2207 int rc = VINF_SUCCESS;
2208
2209 VD_IS_LOCKED(pDisk);
2210
2211 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
2212 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
2213 {
2214 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2215 if (RT_SUCCESS(rc))
2216 {
2217 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
2218
2219 /* First modify, so create a UUID and ensure it's written to disk. */
2220 vdResetModifiedFlag(pDisk);
2221
2222 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
2223 {
2224 PVDIOCTX pIoCtxFlush = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_FLUSH,
2225 0, 0, pDisk->pLast,
2226 NULL, pIoCtx, 0, 0, NULL,
2227 vdSetModifiedHelperAsync);
2228
2229 if (pIoCtxFlush)
2230 {
2231 rc = vdIoCtxProcessLocked(pIoCtxFlush);
2232 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2233 {
2234 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs */);
2235 vdIoCtxFree(pDisk, pIoCtxFlush);
2236 }
2237 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2238 {
2239 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2240 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2241 }
2242 else /* Another error */
2243 vdIoCtxFree(pDisk, pIoCtxFlush);
2244 }
2245 else
2246 rc = VERR_NO_MEMORY;
2247 }
2248 }
2249 }
2250
2251 return rc;
2252}
2253
2254static DECLCALLBACK(int) vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
2255{
2256 int rc = VINF_SUCCESS;
2257 PVDIMAGE pImage = pIoCtx->Req.Io.pImageStart;
2258 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2259 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2260 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2261
2262 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2263 rc = pImage->Backend->pfnWrite(pImage->pBackendData,
2264 pIoCtx->Req.Io.uOffset - cbPreRead,
2265 cbPreRead + cbThisWrite + cbPostRead,
2266 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
2267 Assert(rc != VERR_VD_BLOCK_FREE);
2268 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
2269 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPostRead == 0);
2270 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2271 rc = VINF_SUCCESS;
2272 else if (rc == VERR_VD_IOCTX_HALT)
2273 {
2274 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2275 rc = VINF_SUCCESS;
2276 }
2277
2278 LogFlowFunc(("returns rc=%Rrc\n", rc));
2279 return rc;
2280}
2281
2282static DECLCALLBACK(int) vdWriteHelperOptimizedCmpAndWriteAsync(PVDIOCTX pIoCtx)
2283{
2284 int rc = VINF_SUCCESS;
2285 size_t cbThisWrite = 0;
2286 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2287 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2288 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2289 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2290 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2291 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2292
2293 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2294
2295 AssertPtr(pIoCtxParent);
2296 Assert(!pIoCtxParent->pIoCtxParent);
2297 Assert(!pIoCtx->Req.Io.cbTransferLeft && !pIoCtx->cMetaTransfersPending);
2298
2299 vdIoCtxChildReset(pIoCtx);
2300 cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2301 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2302
2303 /* Check if the write would modify anything in this block. */
2304 if (!RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &pIoCtxParent->Req.Io.SgBuf, cbThisWrite))
2305 {
2306 RTSGBUF SgBufSrcTmp;
2307
2308 RTSgBufClone(&SgBufSrcTmp, &pIoCtxParent->Req.Io.SgBuf);
2309 RTSgBufAdvance(&SgBufSrcTmp, cbThisWrite);
2310 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbThisWrite);
2311
2312 if (!cbWriteCopy || !RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &SgBufSrcTmp, cbWriteCopy))
2313 {
2314 /* Block is completely unchanged, so no need to write anything. */
2315 LogFlowFunc(("Block didn't changed\n"));
2316 ASMAtomicWriteU32(&pIoCtx->Req.Io.cbTransferLeft, 0);
2317 RTSgBufAdvance(&pIoCtxParent->Req.Io.SgBuf, cbThisWrite);
2318 return VINF_VD_ASYNC_IO_FINISHED;
2319 }
2320 }
2321
2322 /* Copy the data to the right place in the buffer. */
2323 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2324 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2325 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2326
2327 /* Handle the data that goes after the write to fill the block. */
2328 if (cbPostRead)
2329 {
2330 /* Now assemble the remaining data. */
2331 if (cbWriteCopy)
2332 {
2333 /*
2334 * The S/G buffer of the parent needs to be cloned because
2335 * it is not allowed to modify the state.
2336 */
2337 RTSGBUF SgBufParentTmp;
2338
2339 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2340 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2341 }
2342
2343 /* Zero out the remainder of this block. Will never be visible, as this
2344 * is beyond the limit of the image. */
2345 if (cbFill)
2346 {
2347 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
2348 vdIoCtxSet(pIoCtx, '\0', cbFill);
2349 }
2350 }
2351
2352 /* Write the full block to the virtual disk. */
2353 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2354 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2355
2356 return rc;
2357}
2358
2359static DECLCALLBACK(int) vdWriteHelperOptimizedPreReadAsync(PVDIOCTX pIoCtx)
2360{
2361 int rc = VINF_SUCCESS;
2362
2363 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2364
2365 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2366
2367 if ( pIoCtx->Req.Io.cbTransferLeft
2368 && !pIoCtx->cDataTransfersPending)
2369 rc = vdReadHelperAsync(pIoCtx);
2370
2371 if ( ( RT_SUCCESS(rc)
2372 || (rc == VERR_VD_ASYNC_IO_IN_PROGRESS))
2373 && ( pIoCtx->Req.Io.cbTransferLeft
2374 || pIoCtx->cMetaTransfersPending))
2375 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2376 else
2377 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCmpAndWriteAsync;
2378
2379 return rc;
2380}
2381
2382/**
2383 * internal: write a complete block (only used for diff images), taking the
2384 * remaining data from parent images. This implementation optimizes out writes
2385 * that do not change the data relative to the state as of the parent images.
2386 * All backends which support differential/growing images support this - async version.
2387 */
2388static DECLCALLBACK(int) vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx)
2389{
2390 PVDISK pDisk = pIoCtx->pDisk;
2391 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2392 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2393 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2394 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2395 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2396 size_t cbFill = 0;
2397 size_t cbWriteCopy = 0;
2398 size_t cbReadImage = 0;
2399
2400 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2401
2402 AssertPtr(pIoCtx->pIoCtxParent);
2403 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2404
2405 if (cbPostRead)
2406 {
2407 /* Figure out how much we cannot read from the image, because
2408 * the last block to write might exceed the nominal size of the
2409 * image for technical reasons. */
2410 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2411 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2412
2413 /* If we have data to be written, use that instead of reading
2414 * data from the image. */
2415 if (cbWrite > cbThisWrite)
2416 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2417
2418 /* The rest must be read from the image. */
2419 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2420 }
2421
2422 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2423 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2424 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2425
2426 /* Read the entire data of the block so that we can compare whether it will
2427 * be modified by the write or not. */
2428 size_t cbTmp = cbPreRead + cbThisWrite + cbPostRead - cbFill; Assert(cbTmp == (uint32_t)cbTmp);
2429 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTmp;
2430 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2431 pIoCtx->Req.Io.uOffset -= cbPreRead;
2432
2433 /* Next step */
2434 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
2435 return VINF_SUCCESS;
2436}
2437
2438static DECLCALLBACK(int) vdWriteHelperStandardReadImageAsync(PVDIOCTX pIoCtx)
2439{
2440 int rc = VINF_SUCCESS;
2441
2442 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2443
2444 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2445
2446 if ( pIoCtx->Req.Io.cbTransferLeft
2447 && !pIoCtx->cDataTransfersPending)
2448 rc = vdReadHelperAsync(pIoCtx);
2449
2450 if ( RT_SUCCESS(rc)
2451 && ( pIoCtx->Req.Io.cbTransferLeft
2452 || pIoCtx->cMetaTransfersPending))
2453 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2454 else
2455 {
2456 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2457
2458 /* Zero out the remainder of this block. Will never be visible, as this
2459 * is beyond the limit of the image. */
2460 if (cbFill)
2461 vdIoCtxSet(pIoCtx, '\0', cbFill);
2462
2463 /* Write the full block to the virtual disk. */
2464 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2465
2466 vdIoCtxChildReset(pIoCtx);
2467 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2468 }
2469
2470 return rc;
2471}
2472
2473static DECLCALLBACK(int) vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
2474{
2475 int rc = VINF_SUCCESS;
2476 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2477 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2478 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2479
2480 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2481
2482 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2483 if (cbPostRead)
2484 {
2485 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2486 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2487 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2488
2489 /* Now assemble the remaining data. */
2490 if (cbWriteCopy)
2491 {
2492 /*
2493 * The S/G buffer of the parent needs to be cloned because
2494 * it is not allowed to modify the state.
2495 */
2496 RTSGBUF SgBufParentTmp;
2497
2498 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2499 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2500 }
2501
2502 if (cbReadImage)
2503 {
2504 /* Read remaining data. */
2505 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardReadImageAsync;
2506
2507 /* Read the data that goes before the write to fill the block. */
2508 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbReadImage; Assert(cbReadImage == (uint32_t)cbReadImage);
2509 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2510 pIoCtx->Req.Io.uOffset += cbWriteCopy;
2511 }
2512 else
2513 {
2514 /* Zero out the remainder of this block. Will never be visible, as this
2515 * is beyond the limit of the image. */
2516 if (cbFill)
2517 vdIoCtxSet(pIoCtx, '\0', cbFill);
2518
2519 /* Write the full block to the virtual disk. */
2520 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2521 vdIoCtxChildReset(pIoCtx);
2522 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2523 }
2524 }
2525 else
2526 {
2527 /* Write the full block to the virtual disk. */
2528 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2529 vdIoCtxChildReset(pIoCtx);
2530 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2531 }
2532
2533 return rc;
2534}
2535
2536static DECLCALLBACK(int) vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
2537{
2538 int rc = VINF_SUCCESS;
2539
2540 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2541
2542 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2543
2544 if ( pIoCtx->Req.Io.cbTransferLeft
2545 && !pIoCtx->cDataTransfersPending)
2546 rc = vdReadHelperAsync(pIoCtx);
2547
2548 if ( RT_SUCCESS(rc)
2549 && ( pIoCtx->Req.Io.cbTransferLeft
2550 || pIoCtx->cMetaTransfersPending))
2551 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2552 else
2553 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2554
2555 return rc;
2556}
2557
2558static DECLCALLBACK(int) vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
2559{
2560 PVDISK pDisk = pIoCtx->pDisk;
2561 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2562 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2563 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2564 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2565 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2566 size_t cbFill = 0;
2567 size_t cbWriteCopy = 0;
2568 size_t cbReadImage = 0;
2569
2570 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2571
2572 AssertPtr(pIoCtx->pIoCtxParent);
2573 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2574
2575 /* Calculate the amount of data to read that goes after the write to fill the block. */
2576 if (cbPostRead)
2577 {
2578 /* If we have data to be written, use that instead of reading
2579 * data from the image. */
2580 if (cbWrite > cbThisWrite)
2581 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2582 else
2583 cbWriteCopy = 0;
2584
2585 /* Figure out how much we cannot read from the image, because
2586 * the last block to write might exceed the nominal size of the
2587 * image for technical reasons. */
2588 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2589 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2590
2591 /* The rest must be read from the image. */
2592 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2593 }
2594
2595 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2596 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2597 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2598
2599 /* Next step */
2600 if (cbPreRead)
2601 {
2602 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
2603
2604 /* Read the data that goes before the write to fill the block. */
2605 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbPreRead; Assert(cbPreRead == (uint32_t)cbPreRead);
2606 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2607 pIoCtx->Req.Io.uOffset -= cbPreRead;
2608 }
2609 else
2610 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2611
2612 return VINF_SUCCESS;
2613}
2614
2615/**
2616 * internal: write buffer to the image, taking care of block boundaries and
2617 * write optimizations - async version.
2618 */
2619static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx)
2620{
2621 int rc;
2622 size_t cbWrite = pIoCtx->Req.Io.cbTransfer;
2623 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
2624 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2625 PVDISK pDisk = pIoCtx->pDisk;
2626 unsigned fWrite;
2627 size_t cbThisWrite;
2628 size_t cbPreRead, cbPostRead;
2629
2630 /* Apply write filter chain here if it was not done already. */
2631 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_WRITE_FILTER_APPLIED))
2632 {
2633 rc = vdFilterChainApplyWrite(pDisk, uOffset, cbWrite, pIoCtx);
2634 if (RT_FAILURE(rc))
2635 return rc;
2636 pIoCtx->fFlags |= VDIOCTX_FLAGS_WRITE_FILTER_APPLIED;
2637 }
2638
2639 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG))
2640 {
2641 rc = vdSetModifiedFlagAsync(pDisk, pIoCtx);
2642 if (RT_FAILURE(rc)) /* Includes I/O in progress. */
2643 return rc;
2644 }
2645
2646 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
2647 if (RT_FAILURE(rc))
2648 return rc;
2649
2650 /* Loop until all written. */
2651 do
2652 {
2653 /* Try to write the possibly partial block to the last opened image.
2654 * This works when the block is already allocated in this image or
2655 * if it is a full-block write (and allocation isn't suppressed below).
2656 * For image formats which don't support zero blocks, it's beneficial
2657 * to avoid unnecessarily allocating unchanged blocks. This prevents
2658 * unwanted expanding of images. VMDK is an example. */
2659 cbThisWrite = cbWrite;
2660
2661 /*
2662 * Check whether there is a full block write in progress which was not allocated.
2663 * Defer I/O if the range interferes.
2664 */
2665 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
2666 && uOffset >= pDisk->uOffsetStartLocked
2667 && uOffset < pDisk->uOffsetEndLocked)
2668 {
2669 Log(("Interferring write while allocating a new block => deferring write\n"));
2670 vdIoCtxDefer(pDisk, pIoCtx);
2671 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2672 break;
2673 }
2674
2675 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2676 ? 0 : VD_WRITE_NO_ALLOC;
2677 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset, cbThisWrite,
2678 pIoCtx, &cbThisWrite, &cbPreRead, &cbPostRead,
2679 fWrite);
2680 if (rc == VERR_VD_BLOCK_FREE)
2681 {
2682 /* Lock the disk .*/
2683 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2684 if (RT_SUCCESS(rc))
2685 {
2686 /*
2687 * Allocate segment and buffer in one go.
2688 * A bit hackish but avoids the need to allocate memory twice.
2689 */
2690 PRTSGBUF pTmp = (PRTSGBUF)RTMemAlloc(cbPreRead + cbThisWrite + cbPostRead + sizeof(RTSGSEG) + sizeof(RTSGBUF));
2691 AssertBreakStmt(pTmp, rc = VERR_NO_MEMORY);
2692 PRTSGSEG pSeg = (PRTSGSEG)(pTmp + 1);
2693
2694 pSeg->pvSeg = pSeg + 1;
2695 pSeg->cbSeg = cbPreRead + cbThisWrite + cbPostRead;
2696 RTSgBufInit(pTmp, pSeg, 1);
2697
2698 PVDIOCTX pIoCtxWrite = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_WRITE,
2699 uOffset, pSeg->cbSeg, pImage,
2700 pTmp,
2701 pIoCtx, cbThisWrite,
2702 cbWrite,
2703 pTmp,
2704 (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2705 ? vdWriteHelperStandardAsync
2706 : vdWriteHelperOptimizedAsync);
2707 if (!VALID_PTR(pIoCtxWrite))
2708 {
2709 RTMemTmpFree(pTmp);
2710 rc = VERR_NO_MEMORY;
2711 break;
2712 }
2713
2714 LogFlowFunc(("Disk is growing because of pIoCtx=%#p pIoCtxWrite=%#p\n",
2715 pIoCtx, pIoCtxWrite));
2716
2717 /* Save the current range for the growing operation to check for intersecting requests later. */
2718 pDisk->uOffsetStartLocked = uOffset - cbPreRead;
2719 pDisk->uOffsetEndLocked = uOffset + cbThisWrite + cbPostRead;
2720
2721 pIoCtxWrite->Type.Child.cbPreRead = cbPreRead;
2722 pIoCtxWrite->Type.Child.cbPostRead = cbPostRead;
2723 pIoCtxWrite->Req.Io.pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
2724
2725 /* Process the write request */
2726 rc = vdIoCtxProcessLocked(pIoCtxWrite);
2727
2728 if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
2729 {
2730 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2731 vdIoCtxFree(pDisk, pIoCtxWrite);
2732 break;
2733 }
2734 else if ( rc == VINF_VD_ASYNC_IO_FINISHED
2735 && ASMAtomicCmpXchgBool(&pIoCtxWrite->fComplete, true, false))
2736 {
2737 LogFlow(("Child write request completed\n"));
2738 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbThisWrite);
2739 Assert(cbThisWrite == (uint32_t)cbThisWrite);
2740 rc = pIoCtxWrite->rcReq;
2741 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisWrite);
2742 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2743 vdIoCtxFree(pDisk, pIoCtxWrite);
2744 }
2745 else
2746 {
2747 LogFlow(("Child write pending\n"));
2748 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2749 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2750 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2751 cbWrite -= cbThisWrite;
2752 uOffset += cbThisWrite;
2753 break;
2754 }
2755 }
2756 else
2757 {
2758 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2759 break;
2760 }
2761 }
2762
2763 if (rc == VERR_VD_IOCTX_HALT)
2764 {
2765 cbWrite -= cbThisWrite;
2766 uOffset += cbThisWrite;
2767 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2768 break;
2769 }
2770 else if (rc == VERR_VD_NOT_ENOUGH_METADATA)
2771 break;
2772
2773 cbWrite -= cbThisWrite;
2774 uOffset += cbThisWrite;
2775 } while (cbWrite != 0 && (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
2776
2777 if ( rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2778 || rc == VERR_VD_NOT_ENOUGH_METADATA
2779 || rc == VERR_VD_IOCTX_HALT)
2780 {
2781 /*
2782 * Tell the caller that we don't need to go back here because all
2783 * writes are initiated.
2784 */
2785 if ( !cbWrite
2786 && rc != VERR_VD_IOCTX_HALT)
2787 rc = VINF_SUCCESS;
2788
2789 pIoCtx->Req.Io.uOffset = uOffset;
2790 pIoCtx->Req.Io.cbTransfer = cbWrite;
2791 }
2792
2793 return rc;
2794}
2795
2796/**
2797 * Flush helper async version.
2798 */
2799static DECLCALLBACK(int) vdFlushHelperAsync(PVDIOCTX pIoCtx)
2800{
2801 int rc = VINF_SUCCESS;
2802 PVDISK pDisk = pIoCtx->pDisk;
2803 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2804
2805 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2806 if (RT_SUCCESS(rc))
2807 {
2808 /* Mark the whole disk as locked. */
2809 pDisk->uOffsetStartLocked = 0;
2810 pDisk->uOffsetEndLocked = UINT64_C(0xffffffffffffffff);
2811
2812 vdResetModifiedFlag(pDisk);
2813 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2814 if ( ( RT_SUCCESS(rc)
2815 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2816 || rc == VERR_VD_IOCTX_HALT)
2817 && pDisk->pCache)
2818 {
2819 rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData, pIoCtx);
2820 if ( RT_SUCCESS(rc)
2821 || ( rc != VERR_VD_ASYNC_IO_IN_PROGRESS
2822 && rc != VERR_VD_IOCTX_HALT))
2823 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2824 else if (rc != VERR_VD_IOCTX_HALT)
2825 rc = VINF_SUCCESS;
2826 }
2827 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2828 rc = VINF_SUCCESS;
2829 else if (rc != VERR_VD_IOCTX_HALT)/* Some other error. */
2830 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2831 }
2832
2833 return rc;
2834}
2835
2836/**
2837 * Async discard helper - discards a whole block which is recorded in the block
2838 * tree.
2839 *
2840 * @returns VBox status code.
2841 * @param pIoCtx The I/O context to operate on.
2842 */
2843static DECLCALLBACK(int) vdDiscardWholeBlockAsync(PVDIOCTX pIoCtx)
2844{
2845 int rc = VINF_SUCCESS;
2846 PVDISK pDisk = pIoCtx->pDisk;
2847 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2848 PVDDISCARDBLOCK pBlock = pIoCtx->Req.Discard.pBlock;
2849 size_t cbPreAllocated, cbPostAllocated, cbActuallyDiscarded;
2850
2851 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2852
2853 AssertPtr(pBlock);
2854
2855 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2856 pBlock->Core.Key, pBlock->cbDiscard,
2857 &cbPreAllocated, &cbPostAllocated,
2858 &cbActuallyDiscarded, NULL, 0);
2859 Assert(rc != VERR_VD_DISCARD_ALIGNMENT_NOT_MET);
2860 Assert(!cbPreAllocated);
2861 Assert(!cbPostAllocated);
2862 Assert(cbActuallyDiscarded == pBlock->cbDiscard || RT_FAILURE(rc));
2863
2864 /* Remove the block on success. */
2865 if ( RT_SUCCESS(rc)
2866 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2867 {
2868 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2869 Assert(pBlockRemove == pBlock); RT_NOREF1(pBlockRemove);
2870
2871 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2872 RTListNodeRemove(&pBlock->NodeLru);
2873 RTMemFree(pBlock->pbmAllocated);
2874 RTMemFree(pBlock);
2875 pIoCtx->Req.Discard.pBlock = NULL;/* Safety precaution. */
2876 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
2877 rc = VINF_SUCCESS;
2878 }
2879
2880 LogFlowFunc(("returns rc=%Rrc\n", rc));
2881 return rc;
2882}
2883
2884/**
2885 * Removes the least recently used blocks from the waiting list until
2886 * the new value is reached - version for async I/O.
2887 *
2888 * @returns VBox status code.
2889 * @param pDisk VD disk container.
2890 * @param pIoCtx The I/O context associated with this discard operation.
2891 * @param cbDiscardingNew How many bytes should be waiting on success.
2892 * The number of bytes waiting can be less.
2893 */
2894static int vdDiscardRemoveBlocksAsync(PVDISK pDisk, PVDIOCTX pIoCtx, size_t cbDiscardingNew)
2895{
2896 int rc = VINF_SUCCESS;
2897 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2898
2899 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
2900 pDisk, pDiscard, cbDiscardingNew));
2901
2902 while (pDiscard->cbDiscarding > cbDiscardingNew)
2903 {
2904 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
2905
2906 Assert(!RTListIsEmpty(&pDiscard->ListLru));
2907
2908 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
2909 uint64_t offStart = pBlock->Core.Key;
2910 uint32_t idxStart = 0;
2911 size_t cbLeft = pBlock->cbDiscard;
2912 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
2913 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
2914
2915 while (cbLeft > 0)
2916 {
2917 int32_t idxEnd;
2918 size_t cbThis = cbLeft;
2919
2920 if (fAllocated)
2921 {
2922 /* Check for the first unallocated bit. */
2923 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
2924 if (idxEnd != -1)
2925 {
2926 cbThis = (idxEnd - idxStart) * 512;
2927 fAllocated = false;
2928 }
2929 }
2930 else
2931 {
2932 /* Mark as unused and check for the first set bit. */
2933 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
2934 if (idxEnd != -1)
2935 cbThis = (idxEnd - idxStart) * 512;
2936
2937 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2938 offStart, cbThis, NULL, NULL, &cbThis,
2939 NULL, VD_DISCARD_MARK_UNUSED);
2940 if ( RT_FAILURE(rc)
2941 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2942 break;
2943
2944 fAllocated = true;
2945 }
2946
2947 idxStart = idxEnd;
2948 offStart += cbThis;
2949 cbLeft -= cbThis;
2950 }
2951
2952 if ( RT_FAILURE(rc)
2953 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2954 break;
2955
2956 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2957 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
2958 RTListNodeRemove(&pBlock->NodeLru);
2959
2960 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2961 RTMemFree(pBlock->pbmAllocated);
2962 RTMemFree(pBlock);
2963 }
2964
2965 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2966 rc = VINF_SUCCESS;
2967
2968 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
2969
2970 LogFlowFunc(("returns rc=%Rrc\n", rc));
2971 return rc;
2972}
2973
2974/**
2975 * Async discard helper - discards the current range if there is no matching
2976 * block in the tree.
2977 *
2978 * @returns VBox status code.
2979 * @param pIoCtx The I/O context to operate on.
2980 */
2981static DECLCALLBACK(int) vdDiscardCurrentRangeAsync(PVDIOCTX pIoCtx)
2982{
2983 PVDISK pDisk = pIoCtx->pDisk;
2984 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2985 uint64_t offStart = pIoCtx->Req.Discard.offCur;
2986 size_t cbThisDiscard = pIoCtx->Req.Discard.cbThisDiscard;
2987 void *pbmAllocated = NULL;
2988 size_t cbPreAllocated, cbPostAllocated;
2989 int rc = VINF_SUCCESS;
2990
2991 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2992
2993 /* No block found, try to discard using the backend first. */
2994 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2995 offStart, cbThisDiscard, &cbPreAllocated,
2996 &cbPostAllocated, &cbThisDiscard,
2997 &pbmAllocated, 0);
2998 if (rc == VERR_VD_DISCARD_ALIGNMENT_NOT_MET)
2999 {
3000 /* Create new discard block. */
3001 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTMemAllocZ(sizeof(VDDISCARDBLOCK));
3002 if (pBlock)
3003 {
3004 pBlock->Core.Key = offStart - cbPreAllocated;
3005 pBlock->Core.KeyLast = offStart + cbThisDiscard + cbPostAllocated - 1;
3006 pBlock->cbDiscard = cbPreAllocated + cbThisDiscard + cbPostAllocated;
3007 pBlock->pbmAllocated = pbmAllocated;
3008 bool fInserted = RTAvlrU64Insert(pDiscard->pTreeBlocks, &pBlock->Core);
3009 Assert(fInserted); NOREF(fInserted);
3010
3011 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3012 pDiscard->cbDiscarding += pBlock->cbDiscard;
3013
3014 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3015 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3016 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3017 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3018
3019 if (pDiscard->cbDiscarding > VD_DISCARD_REMOVE_THRESHOLD)
3020 rc = vdDiscardRemoveBlocksAsync(pDisk, pIoCtx, VD_DISCARD_REMOVE_THRESHOLD);
3021 else
3022 rc = VINF_SUCCESS;
3023
3024 if (RT_SUCCESS(rc))
3025 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
3026 }
3027 else
3028 {
3029 RTMemFree(pbmAllocated);
3030 rc = VERR_NO_MEMORY;
3031 }
3032 }
3033 else if ( RT_SUCCESS(rc)
3034 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) /* Save state and andvance to next range. */
3035 {
3036 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3037 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3038 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3039 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3040 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3041 rc = VINF_SUCCESS;
3042 }
3043
3044 LogFlowFunc(("returns rc=%Rrc\n", rc));
3045 return rc;
3046}
3047
3048/**
3049 * Async discard helper - entry point.
3050 *
3051 * @returns VBox status code.
3052 * @param pIoCtx The I/O context to operate on.
3053 */
3054static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx)
3055{
3056 int rc = VINF_SUCCESS;
3057 PVDISK pDisk = pIoCtx->pDisk;
3058 PCRTRANGE paRanges = pIoCtx->Req.Discard.paRanges;
3059 unsigned cRanges = pIoCtx->Req.Discard.cRanges;
3060 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3061
3062 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3063
3064 /* Check if the I/O context processed all ranges. */
3065 if ( pIoCtx->Req.Discard.idxRange == cRanges
3066 && !pIoCtx->Req.Discard.cbDiscardLeft)
3067 {
3068 LogFlowFunc(("All ranges discarded, completing\n"));
3069 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDeferredReqs*/);
3070 return VINF_SUCCESS;
3071 }
3072
3073 if (pDisk->pIoCtxLockOwner != pIoCtx)
3074 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
3075
3076 if (RT_SUCCESS(rc))
3077 {
3078 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3079 size_t cbDiscardLeft = pIoCtx->Req.Discard.cbDiscardLeft;
3080 size_t cbThisDiscard;
3081
3082 pDisk->uOffsetStartLocked = offStart;
3083 pDisk->uOffsetEndLocked = offStart + cbDiscardLeft;
3084
3085 if (RT_UNLIKELY(!pDiscard))
3086 {
3087 pDiscard = vdDiscardStateCreate();
3088 if (!pDiscard)
3089 return VERR_NO_MEMORY;
3090
3091 pDisk->pDiscard = pDiscard;
3092 }
3093
3094 if (!pIoCtx->Req.Discard.cbDiscardLeft)
3095 {
3096 offStart = paRanges[pIoCtx->Req.Discard.idxRange].offStart;
3097 cbDiscardLeft = paRanges[pIoCtx->Req.Discard.idxRange].cbRange;
3098 LogFlowFunc(("New range descriptor loaded (%u) offStart=%llu cbDiscard=%zu\n",
3099 pIoCtx->Req.Discard.idxRange, offStart, cbDiscardLeft));
3100 pIoCtx->Req.Discard.idxRange++;
3101 }
3102
3103 /* Look for a matching block in the AVL tree first. */
3104 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, false);
3105 if (!pBlock || pBlock->Core.KeyLast < offStart)
3106 {
3107 PVDDISCARDBLOCK pBlockAbove = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, true);
3108
3109 /* Clip range to remain in the current block. */
3110 if (pBlockAbove)
3111 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlockAbove->Core.KeyLast - offStart + 1);
3112 else
3113 cbThisDiscard = cbDiscardLeft;
3114
3115 Assert(!(cbThisDiscard % 512));
3116 pIoCtx->Req.Discard.pBlock = NULL;
3117 pIoCtx->pfnIoCtxTransferNext = vdDiscardCurrentRangeAsync;
3118 }
3119 else
3120 {
3121 /* Range lies partly in the block, update allocation bitmap. */
3122 int32_t idxStart, idxEnd;
3123
3124 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlock->Core.KeyLast - offStart + 1);
3125
3126 AssertPtr(pBlock);
3127
3128 Assert(!(cbThisDiscard % 512));
3129 Assert(!((offStart - pBlock->Core.Key) % 512));
3130
3131 idxStart = (offStart - pBlock->Core.Key) / 512;
3132 idxEnd = idxStart + (int32_t)(cbThisDiscard / 512);
3133
3134 ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd);
3135
3136 cbDiscardLeft -= cbThisDiscard;
3137 offStart += cbThisDiscard;
3138
3139 /* Call the backend to discard the block if it is completely unallocated now. */
3140 if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, (uint32_t)(pBlock->cbDiscard / 512)) == -1)
3141 {
3142 pIoCtx->Req.Discard.pBlock = pBlock;
3143 pIoCtx->pfnIoCtxTransferNext = vdDiscardWholeBlockAsync;
3144 rc = VINF_SUCCESS;
3145 }
3146 else
3147 {
3148 RTListNodeRemove(&pBlock->NodeLru);
3149 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3150
3151 /* Start with next range. */
3152 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3153 rc = VINF_SUCCESS;
3154 }
3155 }
3156
3157 /* Save state in the context. */
3158 pIoCtx->Req.Discard.offCur = offStart;
3159 pIoCtx->Req.Discard.cbDiscardLeft = cbDiscardLeft;
3160 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3161 }
3162
3163 LogFlowFunc(("returns rc=%Rrc\n", rc));
3164 return rc;
3165}
3166
3167/**
3168 * VD async I/O interface open callback.
3169 */
3170static DECLCALLBACK(int) vdIOOpenFallback(void *pvUser, const char *pszLocation,
3171 uint32_t fOpen, PFNVDCOMPLETED pfnCompleted,
3172 void **ppStorage)
3173{
3174 RT_NOREF1(pvUser);
3175 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)RTMemAllocZ(sizeof(VDIIOFALLBACKSTORAGE));
3176
3177 if (!pStorage)
3178 return VERR_NO_MEMORY;
3179
3180 pStorage->pfnCompleted = pfnCompleted;
3181
3182 /* Open the file. */
3183 int rc = RTFileOpen(&pStorage->File, pszLocation, fOpen);
3184 if (RT_SUCCESS(rc))
3185 {
3186 *ppStorage = pStorage;
3187 return VINF_SUCCESS;
3188 }
3189
3190 RTMemFree(pStorage);
3191 return rc;
3192}
3193
3194/**
3195 * VD async I/O interface close callback.
3196 */
3197static DECLCALLBACK(int) vdIOCloseFallback(void *pvUser, void *pvStorage)
3198{
3199 RT_NOREF1(pvUser);
3200 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3201
3202 RTFileClose(pStorage->File);
3203 RTMemFree(pStorage);
3204 return VINF_SUCCESS;
3205}
3206
3207static DECLCALLBACK(int) vdIODeleteFallback(void *pvUser, const char *pcszFilename)
3208{
3209 RT_NOREF1(pvUser);
3210 return RTFileDelete(pcszFilename);
3211}
3212
3213static DECLCALLBACK(int) vdIOMoveFallback(void *pvUser, const char *pcszSrc, const char *pcszDst, unsigned fMove)
3214{
3215 RT_NOREF1(pvUser);
3216 return RTFileMove(pcszSrc, pcszDst, fMove);
3217}
3218
3219static DECLCALLBACK(int) vdIOGetFreeSpaceFallback(void *pvUser, const char *pcszFilename, int64_t *pcbFreeSpace)
3220{
3221 RT_NOREF1(pvUser);
3222 return RTFsQuerySizes(pcszFilename, NULL, pcbFreeSpace, NULL, NULL);
3223}
3224
3225static DECLCALLBACK(int) vdIOGetModificationTimeFallback(void *pvUser, const char *pcszFilename, PRTTIMESPEC pModificationTime)
3226{
3227 RT_NOREF1(pvUser);
3228 RTFSOBJINFO info;
3229 int rc = RTPathQueryInfo(pcszFilename, &info, RTFSOBJATTRADD_NOTHING);
3230 if (RT_SUCCESS(rc))
3231 *pModificationTime = info.ModificationTime;
3232 return rc;
3233}
3234
3235/**
3236 * VD async I/O interface callback for retrieving the file size.
3237 */
3238static DECLCALLBACK(int) vdIOGetSizeFallback(void *pvUser, void *pvStorage, uint64_t *pcbSize)
3239{
3240 RT_NOREF1(pvUser);
3241 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3242
3243 return RTFileGetSize(pStorage->File, pcbSize);
3244}
3245
3246/**
3247 * VD async I/O interface callback for setting the file size.
3248 */
3249static DECLCALLBACK(int) vdIOSetSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize)
3250{
3251 RT_NOREF1(pvUser);
3252 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3253
3254 return RTFileSetSize(pStorage->File, cbSize);
3255}
3256
3257/**
3258 * VD async I/O interface callback for setting the file allocation size.
3259 */
3260static DECLCALLBACK(int) vdIOSetAllocationSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize,
3261 uint32_t fFlags)
3262{
3263 RT_NOREF2(pvUser, fFlags);
3264 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3265
3266 return RTFileSetAllocationSize(pStorage->File, cbSize, RTFILE_ALLOC_SIZE_F_DEFAULT);
3267}
3268
3269/**
3270 * VD async I/O interface callback for a synchronous write to the file.
3271 */
3272static DECLCALLBACK(int) vdIOWriteSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3273 const void *pvBuf, size_t cbWrite, size_t *pcbWritten)
3274{
3275 RT_NOREF1(pvUser);
3276 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3277
3278 return RTFileWriteAt(pStorage->File, uOffset, pvBuf, cbWrite, pcbWritten);
3279}
3280
3281/**
3282 * VD async I/O interface callback for a synchronous read from the file.
3283 */
3284static DECLCALLBACK(int) vdIOReadSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3285 void *pvBuf, size_t cbRead, size_t *pcbRead)
3286{
3287 RT_NOREF1(pvUser);
3288 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3289
3290 return RTFileReadAt(pStorage->File, uOffset, pvBuf, cbRead, pcbRead);
3291}
3292
3293/**
3294 * VD async I/O interface callback for a synchronous flush of the file data.
3295 */
3296static DECLCALLBACK(int) vdIOFlushSyncFallback(void *pvUser, void *pvStorage)
3297{
3298 RT_NOREF1(pvUser);
3299 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3300
3301 return RTFileFlush(pStorage->File);
3302}
3303
3304/**
3305 * VD async I/O interface callback for a asynchronous read from the file.
3306 */
3307static DECLCALLBACK(int) vdIOReadAsyncFallback(void *pvUser, void *pStorage, uint64_t uOffset,
3308 PCRTSGSEG paSegments, size_t cSegments,
3309 size_t cbRead, void *pvCompletion,
3310 void **ppTask)
3311{
3312 RT_NOREF8(pvUser, pStorage, uOffset, paSegments, cSegments, cbRead, pvCompletion, ppTask);
3313 return VERR_NOT_IMPLEMENTED;
3314}
3315
3316/**
3317 * VD async I/O interface callback for a asynchronous write to the file.
3318 */
3319static DECLCALLBACK(int) vdIOWriteAsyncFallback(void *pvUser, void *pStorage, uint64_t uOffset,
3320 PCRTSGSEG paSegments, size_t cSegments,
3321 size_t cbWrite, void *pvCompletion,
3322 void **ppTask)
3323{
3324 RT_NOREF8(pvUser, pStorage, uOffset, paSegments, cSegments, cbWrite, pvCompletion, ppTask);
3325 return VERR_NOT_IMPLEMENTED;
3326}
3327
3328/**
3329 * VD async I/O interface callback for a asynchronous flush of the file data.
3330 */
3331static DECLCALLBACK(int) vdIOFlushAsyncFallback(void *pvUser, void *pStorage,
3332 void *pvCompletion, void **ppTask)
3333{
3334 RT_NOREF4(pvUser, pStorage, pvCompletion, ppTask);
3335 return VERR_NOT_IMPLEMENTED;
3336}
3337
3338/**
3339 * Internal - Continues an I/O context after
3340 * it was halted because of an active transfer.
3341 */
3342static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
3343{
3344 PVDISK pDisk = pIoCtx->pDisk;
3345 int rc = VINF_SUCCESS;
3346
3347 VD_IS_LOCKED(pDisk);
3348
3349 if (RT_FAILURE(rcReq))
3350 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
3351
3352 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
3353 {
3354 /* Continue the transfer */
3355 rc = vdIoCtxProcessLocked(pIoCtx);
3356
3357 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3358 && ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false))
3359 {
3360 LogFlowFunc(("I/O context completed pIoCtx=%#p\n", pIoCtx));
3361 bool fFreeCtx = RT_BOOL(!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3362 if (pIoCtx->pIoCtxParent)
3363 {
3364 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
3365
3366 Assert(!pIoCtxParent->pIoCtxParent);
3367 if (RT_FAILURE(pIoCtx->rcReq))
3368 ASMAtomicCmpXchgS32(&pIoCtxParent->rcReq, pIoCtx->rcReq, VINF_SUCCESS);
3369
3370 ASMAtomicDecU32(&pIoCtxParent->cDataTransfersPending);
3371
3372 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE)
3373 {
3374 LogFlowFunc(("I/O context transferred %u bytes for the parent pIoCtxParent=%p\n",
3375 pIoCtx->Type.Child.cbTransferParent, pIoCtxParent));
3376
3377 /* Update the parent state. */
3378 Assert(pIoCtxParent->Req.Io.cbTransferLeft >= pIoCtx->Type.Child.cbTransferParent);
3379 ASMAtomicSubU32(&pIoCtxParent->Req.Io.cbTransferLeft, (uint32_t)pIoCtx->Type.Child.cbTransferParent);
3380 }
3381 else
3382 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH);
3383
3384 /*
3385 * A completed child write means that we finished growing the image.
3386 * We have to process any pending writes now.
3387 */
3388 vdIoCtxUnlockDisk(pDisk, pIoCtxParent, false /* fProcessDeferredReqs */);
3389
3390 /* Unblock the parent */
3391 pIoCtxParent->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3392
3393 rc = vdIoCtxProcessLocked(pIoCtxParent);
3394
3395 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3396 && ASMAtomicCmpXchgBool(&pIoCtxParent->fComplete, true, false))
3397 {
3398 LogFlowFunc(("Parent I/O context completed pIoCtxParent=%#p rcReq=%Rrc\n", pIoCtxParent, pIoCtxParent->rcReq));
3399 bool fFreeParentCtx = RT_BOOL(!(pIoCtxParent->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3400 vdIoCtxRootComplete(pDisk, pIoCtxParent);
3401 vdThreadFinishWrite(pDisk);
3402
3403 if (fFreeParentCtx)
3404 vdIoCtxFree(pDisk, pIoCtxParent);
3405 vdDiskProcessBlockedIoCtx(pDisk);
3406 }
3407 else if (!vdIoCtxIsDiskLockOwner(pDisk, pIoCtx))
3408 {
3409 /* Process any pending writes if the current request didn't caused another growing. */
3410 vdDiskProcessBlockedIoCtx(pDisk);
3411 }
3412 }
3413 else
3414 {
3415 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH)
3416 {
3417 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDerredReqs */);
3418 vdThreadFinishWrite(pDisk);
3419 }
3420 else if ( pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE
3421 || pIoCtx->enmTxDir == VDIOCTXTXDIR_DISCARD)
3422 vdThreadFinishWrite(pDisk);
3423 else
3424 {
3425 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_READ);
3426 vdThreadFinishRead(pDisk);
3427 }
3428
3429 LogFlowFunc(("I/O context completed pIoCtx=%#p rcReq=%Rrc\n", pIoCtx, pIoCtx->rcReq));
3430 vdIoCtxRootComplete(pDisk, pIoCtx);
3431 }
3432
3433 if (fFreeCtx)
3434 vdIoCtxFree(pDisk, pIoCtx);
3435 }
3436 }
3437
3438 return VINF_SUCCESS;
3439}
3440
3441/**
3442 * Internal - Called when user transfer completed.
3443 */
3444static int vdUserXferCompleted(PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
3445 PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3446 size_t cbTransfer, int rcReq)
3447{
3448 int rc = VINF_SUCCESS;
3449 PVDISK pDisk = pIoCtx->pDisk;
3450
3451 LogFlowFunc(("pIoStorage=%#p pIoCtx=%#p pfnComplete=%#p pvUser=%#p cbTransfer=%zu rcReq=%Rrc\n",
3452 pIoStorage, pIoCtx, pfnComplete, pvUser, cbTransfer, rcReq));
3453
3454 VD_IS_LOCKED(pDisk);
3455
3456 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbTransfer);
3457 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTransfer); Assert(cbTransfer == (uint32_t)cbTransfer);
3458 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
3459
3460 if (pfnComplete)
3461 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3462
3463 if (RT_SUCCESS(rc))
3464 rc = vdIoCtxContinue(pIoCtx, rcReq);
3465 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3466 rc = VINF_SUCCESS;
3467
3468 return rc;
3469}
3470
3471static void vdIoCtxContinueDeferredList(PVDIOSTORAGE pIoStorage, PRTLISTANCHOR pListWaiting,
3472 PFNVDXFERCOMPLETED pfnComplete, void *pvUser, int rcReq)
3473{
3474 LogFlowFunc(("pIoStorage=%#p pListWaiting=%#p pfnComplete=%#p pvUser=%#p rcReq=%Rrc\n",
3475 pIoStorage, pListWaiting, pfnComplete, pvUser, rcReq));
3476
3477 /* Go through the waiting list and continue the I/O contexts. */
3478 while (!RTListIsEmpty(pListWaiting))
3479 {
3480 int rc = VINF_SUCCESS;
3481 PVDIOCTXDEFERRED pDeferred = RTListGetFirst(pListWaiting, VDIOCTXDEFERRED, NodeDeferred);
3482 PVDIOCTX pIoCtx = pDeferred->pIoCtx;
3483 RTListNodeRemove(&pDeferred->NodeDeferred);
3484
3485 RTMemFree(pDeferred);
3486 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
3487
3488 if (pfnComplete)
3489 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3490
3491 LogFlow(("Completion callback for I/O context %#p returned %Rrc\n", pIoCtx, rc));
3492
3493 if (RT_SUCCESS(rc))
3494 {
3495 rc = vdIoCtxContinue(pIoCtx, rcReq);
3496 AssertRC(rc);
3497 }
3498 else
3499 Assert(rc == VERR_VD_ASYNC_IO_IN_PROGRESS);
3500 }
3501}
3502
3503/**
3504 * Internal - Called when a meta transfer completed.
3505 */
3506static int vdMetaXferCompleted(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3507 PVDMETAXFER pMetaXfer, int rcReq)
3508{
3509 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3510 RTLISTNODE ListIoCtxWaiting;
3511 bool fFlush;
3512
3513 LogFlowFunc(("pIoStorage=%#p pfnComplete=%#p pvUser=%#p pMetaXfer=%#p rcReq=%Rrc\n",
3514 pIoStorage, pfnComplete, pvUser, pMetaXfer, rcReq));
3515
3516 VD_IS_LOCKED(pDisk);
3517
3518 fFlush = VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_FLUSH;
3519
3520 if (!fFlush)
3521 {
3522 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3523
3524 if (RT_FAILURE(rcReq))
3525 {
3526 /* Remove from the AVL tree. */
3527 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3528 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3529 Assert(fRemoved); NOREF(fRemoved);
3530 /* If this was a write check if there is a shadow buffer with updated data. */
3531 if (pMetaXfer->pbDataShw)
3532 {
3533 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
3534 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3535 RTListConcatenate(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3536 RTMemFree(pMetaXfer->pbDataShw);
3537 pMetaXfer->pbDataShw = NULL;
3538 }
3539 RTMemFree(pMetaXfer);
3540 }
3541 else
3542 {
3543 /* Increase the reference counter to make sure it doesn't go away before the last context is processed. */
3544 pMetaXfer->cRefs++;
3545 }
3546 }
3547 else
3548 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3549
3550 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3551 vdIoCtxContinueDeferredList(pIoStorage, &ListIoCtxWaiting, pfnComplete, pvUser, rcReq);
3552
3553 /*
3554 * If there is a shadow buffer and the previous write was successful update with the
3555 * new data and trigger a new write.
3556 */
3557 if ( pMetaXfer->pbDataShw
3558 && RT_SUCCESS(rcReq)
3559 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
3560 {
3561 LogFlowFunc(("pMetaXfer=%#p Updating from shadow buffer and triggering new write\n", pMetaXfer));
3562 memcpy(pMetaXfer->abData, pMetaXfer->pbDataShw, pMetaXfer->cbMeta);
3563 RTMemFree(pMetaXfer->pbDataShw);
3564 pMetaXfer->pbDataShw = NULL;
3565 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3566
3567 /* Setup a new I/O write. */
3568 PVDIOTASK pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
3569 if (RT_LIKELY(pIoTask))
3570 {
3571 void *pvTask = NULL;
3572 RTSGSEG Seg;
3573
3574 Seg.cbSeg = pMetaXfer->cbMeta;
3575 Seg.pvSeg = pMetaXfer->abData;
3576
3577 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
3578 rcReq = pIoStorage->pVDIo->pInterfaceIo->pfnWriteAsync(pIoStorage->pVDIo->pInterfaceIo->Core.pvUser,
3579 pIoStorage->pStorage,
3580 pMetaXfer->Core.Key, &Seg, 1,
3581 pMetaXfer->cbMeta, pIoTask,
3582 &pvTask);
3583 if ( RT_SUCCESS(rcReq)
3584 || rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3585 {
3586 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3587 vdIoTaskFree(pDisk, pIoTask);
3588 }
3589 else
3590 RTListMove(&pMetaXfer->ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3591 }
3592 else
3593 rcReq = VERR_NO_MEMORY;
3594
3595 /* Cleanup if there was an error or the request completed already. */
3596 if (rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3597 vdIoCtxContinueDeferredList(pIoStorage, &pMetaXfer->ListIoCtxShwWrites, pfnComplete, pvUser, rcReq);
3598 }
3599
3600 /* Remove if not used anymore. */
3601 if (!fFlush)
3602 {
3603 pMetaXfer->cRefs--;
3604 if (!pMetaXfer->cRefs && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting))
3605 {
3606 /* Remove from the AVL tree. */
3607 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3608 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3609 Assert(fRemoved); NOREF(fRemoved);
3610 RTMemFree(pMetaXfer);
3611 }
3612 }
3613 else if (fFlush)
3614 RTMemFree(pMetaXfer);
3615
3616 return VINF_SUCCESS;
3617}
3618
3619/**
3620 * Processes a list of waiting I/O tasks. The disk lock must be held by caller.
3621 *
3622 * @returns nothing.
3623 * @param pDisk The disk to process the list for.
3624 */
3625static void vdIoTaskProcessWaitingList(PVDISK pDisk)
3626{
3627 LogFlowFunc(("pDisk=%#p\n", pDisk));
3628
3629 VD_IS_LOCKED(pDisk);
3630
3631 PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK);
3632
3633 Log(("I/O task list cleared\n"));
3634
3635 /* Reverse order. */
3636 PVDIOTASK pCur = pHead;
3637 pHead = NULL;
3638 while (pCur)
3639 {
3640 PVDIOTASK pInsert = pCur;
3641 pCur = pCur->pNext;
3642 pInsert->pNext = pHead;
3643 pHead = pInsert;
3644 }
3645
3646 while (pHead)
3647 {
3648 PVDIOSTORAGE pIoStorage = pHead->pIoStorage;
3649
3650 if (!pHead->fMeta)
3651 vdUserXferCompleted(pIoStorage, pHead->Type.User.pIoCtx,
3652 pHead->pfnComplete, pHead->pvUser,
3653 pHead->Type.User.cbTransfer, pHead->rcReq);
3654 else
3655 vdMetaXferCompleted(pIoStorage, pHead->pfnComplete, pHead->pvUser,
3656 pHead->Type.Meta.pMetaXfer, pHead->rcReq);
3657
3658 pCur = pHead;
3659 pHead = pHead->pNext;
3660 vdIoTaskFree(pDisk, pCur);
3661 }
3662}
3663
3664/**
3665 * Process any I/O context on the halted list.
3666 *
3667 * @returns nothing.
3668 * @param pDisk The disk.
3669 */
3670static void vdIoCtxProcessHaltedList(PVDISK pDisk)
3671{
3672 LogFlowFunc(("pDisk=%#p\n", pDisk));
3673
3674 VD_IS_LOCKED(pDisk);
3675
3676 /* Get the waiting list and process it in FIFO order. */
3677 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHaltedHead, NULL, PVDIOCTX);
3678
3679 /* Reverse it. */
3680 PVDIOCTX pCur = pIoCtxHead;
3681 pIoCtxHead = NULL;
3682 while (pCur)
3683 {
3684 PVDIOCTX pInsert = pCur;
3685 pCur = pCur->pIoCtxNext;
3686 pInsert->pIoCtxNext = pIoCtxHead;
3687 pIoCtxHead = pInsert;
3688 }
3689
3690 /* Process now. */
3691 pCur = pIoCtxHead;
3692 while (pCur)
3693 {
3694 PVDIOCTX pTmp = pCur;
3695
3696 pCur = pCur->pIoCtxNext;
3697 pTmp->pIoCtxNext = NULL;
3698
3699 /* Continue */
3700 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3701 vdIoCtxContinue(pTmp, pTmp->rcReq);
3702 }
3703}
3704
3705/**
3706 * Unlock the disk and process pending tasks.
3707 *
3708 * @returns VBox status code.
3709 * @param pDisk The disk to unlock.
3710 * @param pIoCtxRc The I/O context to get the status code from, optional.
3711 */
3712static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc)
3713{
3714 int rc = VINF_SUCCESS;
3715
3716 VD_IS_LOCKED(pDisk);
3717
3718 /*
3719 * Process the list of waiting I/O tasks first
3720 * because they might complete I/O contexts.
3721 * Same for the list of halted I/O contexts.
3722 * Afterwards comes the list of new I/O contexts.
3723 */
3724 vdIoTaskProcessWaitingList(pDisk);
3725 vdIoCtxProcessHaltedList(pDisk);
3726 rc = vdDiskProcessWaitingIoCtx(pDisk, pIoCtxRc);
3727 ASMAtomicXchgBool(&pDisk->fLocked, false);
3728
3729 /*
3730 * Need to check for new I/O tasks and waiting I/O contexts now
3731 * again as other threads might added them while we processed
3732 * previous lists.
3733 */
3734 while ( ASMAtomicUoReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX) != NULL
3735 || ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK) != NULL
3736 || ASMAtomicUoReadPtrT(&pDisk->pIoCtxHaltedHead, PVDIOCTX) != NULL)
3737 {
3738 /* Try lock disk again. */
3739 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3740 {
3741 vdIoTaskProcessWaitingList(pDisk);
3742 vdIoCtxProcessHaltedList(pDisk);
3743 vdDiskProcessWaitingIoCtx(pDisk, NULL);
3744 ASMAtomicXchgBool(&pDisk->fLocked, false);
3745 }
3746 else /* Let the other thread everything when he unlocks the disk. */
3747 break;
3748 }
3749
3750 return rc;
3751}
3752
3753/**
3754 * Try to lock the disk to complete pressing of the I/O task.
3755 * The completion is deferred if the disk is locked already.
3756 *
3757 * @returns nothing.
3758 * @param pIoTask The I/O task to complete.
3759 */
3760static void vdXferTryLockDiskDeferIoTask(PVDIOTASK pIoTask)
3761{
3762 PVDIOSTORAGE pIoStorage = pIoTask->pIoStorage;
3763 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3764
3765 Log(("Deferring I/O task pIoTask=%p\n", pIoTask));
3766
3767 /* Put it on the waiting list. */
3768 PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK);
3769 PVDIOTASK pHeadOld;
3770 pIoTask->pNext = pNext;
3771 while (!ASMAtomicCmpXchgExPtr(&pDisk->pIoTasksPendingHead, pIoTask, pNext, &pHeadOld))
3772 {
3773 pNext = pHeadOld;
3774 Assert(pNext != pIoTask);
3775 pIoTask->pNext = pNext;
3776 ASMNopPause();
3777 }
3778
3779 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3780 {
3781 /* Release disk lock, it will take care of processing all lists. */
3782 vdDiskUnlock(pDisk, NULL);
3783 }
3784}
3785
3786static DECLCALLBACK(int) vdIOIntReqCompleted(void *pvUser, int rcReq)
3787{
3788 PVDIOTASK pIoTask = (PVDIOTASK)pvUser;
3789
3790 LogFlowFunc(("Task completed pIoTask=%#p\n", pIoTask));
3791
3792 pIoTask->rcReq = rcReq;
3793 vdXferTryLockDiskDeferIoTask(pIoTask);
3794 return VINF_SUCCESS;
3795}
3796
3797/**
3798 * VD I/O interface callback for opening a file.
3799 */
3800static DECLCALLBACK(int) vdIOIntOpen(void *pvUser, const char *pszLocation,
3801 unsigned uOpenFlags, PPVDIOSTORAGE ppIoStorage)
3802{
3803 int rc = VINF_SUCCESS;
3804 PVDIO pVDIo = (PVDIO)pvUser;
3805 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
3806
3807 if (!pIoStorage)
3808 return VERR_NO_MEMORY;
3809
3810 /* Create the AVl tree. */
3811 pIoStorage->pTreeMetaXfers = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
3812 if (pIoStorage->pTreeMetaXfers)
3813 {
3814 rc = pVDIo->pInterfaceIo->pfnOpen(pVDIo->pInterfaceIo->Core.pvUser,
3815 pszLocation, uOpenFlags,
3816 vdIOIntReqCompleted,
3817 &pIoStorage->pStorage);
3818 if (RT_SUCCESS(rc))
3819 {
3820 pIoStorage->pVDIo = pVDIo;
3821 *ppIoStorage = pIoStorage;
3822 return VINF_SUCCESS;
3823 }
3824
3825 RTMemFree(pIoStorage->pTreeMetaXfers);
3826 }
3827 else
3828 rc = VERR_NO_MEMORY;
3829
3830 RTMemFree(pIoStorage);
3831 return rc;
3832}
3833
3834static DECLCALLBACK(int) vdIOIntTreeMetaXferDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
3835{
3836 RT_NOREF2(pNode, pvUser);
3837 AssertMsgFailed(("Tree should be empty at this point!\n"));
3838 return VINF_SUCCESS;
3839}
3840
3841static DECLCALLBACK(int) vdIOIntClose(void *pvUser, PVDIOSTORAGE pIoStorage)
3842{
3843 int rc = VINF_SUCCESS;
3844 PVDIO pVDIo = (PVDIO)pvUser;
3845
3846 /* We free everything here, even if closing the file failed for some reason. */
3847 rc = pVDIo->pInterfaceIo->pfnClose(pVDIo->pInterfaceIo->Core.pvUser, pIoStorage->pStorage);
3848 RTAvlrFileOffsetDestroy(pIoStorage->pTreeMetaXfers, vdIOIntTreeMetaXferDestroy, NULL);
3849 RTMemFree(pIoStorage->pTreeMetaXfers);
3850 RTMemFree(pIoStorage);
3851 return rc;
3852}
3853
3854static DECLCALLBACK(int) vdIOIntDelete(void *pvUser, const char *pcszFilename)
3855{
3856 PVDIO pVDIo = (PVDIO)pvUser;
3857 return pVDIo->pInterfaceIo->pfnDelete(pVDIo->pInterfaceIo->Core.pvUser,
3858 pcszFilename);
3859}
3860
3861static DECLCALLBACK(int) vdIOIntMove(void *pvUser, const char *pcszSrc, const char *pcszDst,
3862 unsigned fMove)
3863{
3864 PVDIO pVDIo = (PVDIO)pvUser;
3865 return pVDIo->pInterfaceIo->pfnMove(pVDIo->pInterfaceIo->Core.pvUser,
3866 pcszSrc, pcszDst, fMove);
3867}
3868
3869static DECLCALLBACK(int) vdIOIntGetFreeSpace(void *pvUser, const char *pcszFilename,
3870 int64_t *pcbFreeSpace)
3871{
3872 PVDIO pVDIo = (PVDIO)pvUser;
3873 return pVDIo->pInterfaceIo->pfnGetFreeSpace(pVDIo->pInterfaceIo->Core.pvUser,
3874 pcszFilename, pcbFreeSpace);
3875}
3876
3877static DECLCALLBACK(int) vdIOIntGetModificationTime(void *pvUser, const char *pcszFilename,
3878 PRTTIMESPEC pModificationTime)
3879{
3880 PVDIO pVDIo = (PVDIO)pvUser;
3881 return pVDIo->pInterfaceIo->pfnGetModificationTime(pVDIo->pInterfaceIo->Core.pvUser,
3882 pcszFilename, pModificationTime);
3883}
3884
3885static DECLCALLBACK(int) vdIOIntGetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3886 uint64_t *pcbSize)
3887{
3888 PVDIO pVDIo = (PVDIO)pvUser;
3889 return pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3890 pIoStorage->pStorage, pcbSize);
3891}
3892
3893static DECLCALLBACK(int) vdIOIntSetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3894 uint64_t cbSize)
3895{
3896 PVDIO pVDIo = (PVDIO)pvUser;
3897 return pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3898 pIoStorage->pStorage, cbSize);
3899}
3900
3901static DECLCALLBACK(int) vdIOIntSetAllocationSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3902 uint64_t cbSize, uint32_t fFlags,
3903 PVDINTERFACEPROGRESS pIfProgress,
3904 unsigned uPercentStart, unsigned uPercentSpan)
3905{
3906 PVDIO pVDIo = (PVDIO)pvUser;
3907 int rc = pVDIo->pInterfaceIo->pfnSetAllocationSize(pVDIo->pInterfaceIo->Core.pvUser,
3908 pIoStorage->pStorage, cbSize, fFlags);
3909 if (rc == VERR_NOT_SUPPORTED)
3910 {
3911 /* Fallback if the underlying medium does not support optimized storage allocation. */
3912 uint64_t cbSizeCur = 0;
3913 rc = pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3914 pIoStorage->pStorage, &cbSizeCur);
3915 if (RT_SUCCESS(rc))
3916 {
3917 if (cbSizeCur < cbSize)
3918 {
3919 const size_t cbBuf = 128 * _1K;
3920 void *pvBuf = RTMemTmpAllocZ(cbBuf);
3921 if (RT_LIKELY(pvBuf))
3922 {
3923 uint64_t cbFill = cbSize - cbSizeCur;
3924 uint64_t uOff = 0;
3925
3926 /* Write data to all blocks. */
3927 while ( uOff < cbFill
3928 && RT_SUCCESS(rc))
3929 {
3930 size_t cbChunk = (size_t)RT_MIN(cbFill - uOff, cbBuf);
3931
3932 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
3933 pIoStorage->pStorage, cbSizeCur + uOff,
3934 pvBuf, cbChunk, NULL);
3935 if (RT_SUCCESS(rc))
3936 {
3937 uOff += cbChunk;
3938
3939 rc = vdIfProgress(pIfProgress, uPercentStart + uOff * uPercentSpan / cbFill);
3940 }
3941 }
3942
3943 RTMemTmpFree(pvBuf);
3944 }
3945 else
3946 rc = VERR_NO_MEMORY;
3947 }
3948 else if (cbSizeCur > cbSize)
3949 rc = pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3950 pIoStorage->pStorage, cbSize);
3951 }
3952 }
3953
3954 if (RT_SUCCESS(rc))
3955 rc = vdIfProgress(pIfProgress, uPercentStart + uPercentSpan);
3956
3957 return rc;
3958}
3959
3960static DECLCALLBACK(int) vdIOIntReadUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
3961 PVDIOCTX pIoCtx, size_t cbRead)
3962{
3963 int rc = VINF_SUCCESS;
3964 PVDIO pVDIo = (PVDIO)pvUser;
3965 PVDISK pDisk = pVDIo->pDisk;
3966
3967 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbRead=%u\n",
3968 pvUser, pIoStorage, uOffset, pIoCtx, cbRead));
3969
3970 /** @todo Enable check for sync I/O later. */
3971 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
3972 VD_IS_LOCKED(pDisk);
3973
3974 Assert(cbRead > 0);
3975
3976 if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
3977 {
3978 RTSGSEG Seg;
3979 unsigned cSegments = 1;
3980 size_t cbTaskRead = 0;
3981
3982 /* Synchronous I/O contexts only have one buffer segment. */
3983 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
3984 ("Invalid number of buffer segments for synchronous I/O context"),
3985 VERR_INVALID_PARAMETER);
3986
3987 cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbRead);
3988 Assert(cbRead == cbTaskRead);
3989 Assert(cSegments == 1);
3990 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
3991 pIoStorage->pStorage, uOffset,
3992 Seg.pvSeg, cbRead, NULL);
3993 if (RT_SUCCESS(rc))
3994 {
3995 Assert(cbRead == (uint32_t)cbRead);
3996 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbRead);
3997 }
3998 }
3999 else
4000 {
4001 /* Build the S/G array and spawn a new I/O task */
4002 while (cbRead)
4003 {
4004 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
4005 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
4006 size_t cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbRead);
4007
4008 Assert(cSegments > 0);
4009 Assert(cbTaskRead > 0);
4010 AssertMsg(cbTaskRead <= cbRead, ("Invalid number of bytes to read\n"));
4011
4012 LogFlow(("Reading %u bytes into %u segments\n", cbTaskRead, cSegments));
4013
4014#ifdef RT_STRICT
4015 for (unsigned i = 0; i < cSegments; i++)
4016 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4017 ("Segment %u is invalid\n", i));
4018#endif
4019
4020 Assert(cbTaskRead == (uint32_t)cbTaskRead);
4021 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, NULL, NULL, pIoCtx, (uint32_t)cbTaskRead);
4022
4023 if (!pIoTask)
4024 return VERR_NO_MEMORY;
4025
4026 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4027
4028 void *pvTask;
4029 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4030 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4031 pIoStorage->pStorage, uOffset,
4032 aSeg, cSegments, cbTaskRead, pIoTask,
4033 &pvTask);
4034 if (RT_SUCCESS(rc))
4035 {
4036 AssertMsg(cbTaskRead <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4037 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskRead);
4038 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4039 vdIoTaskFree(pDisk, pIoTask);
4040 }
4041 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4042 {
4043 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4044 vdIoTaskFree(pDisk, pIoTask);
4045 break;
4046 }
4047
4048 uOffset += cbTaskRead;
4049 cbRead -= cbTaskRead;
4050 }
4051 }
4052
4053 LogFlowFunc(("returns rc=%Rrc\n", rc));
4054 return rc;
4055}
4056
4057static DECLCALLBACK(int) vdIOIntWriteUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4058 PVDIOCTX pIoCtx, size_t cbWrite, PFNVDXFERCOMPLETED pfnComplete,
4059 void *pvCompleteUser)
4060{
4061 int rc = VINF_SUCCESS;
4062 PVDIO pVDIo = (PVDIO)pvUser;
4063 PVDISK pDisk = pVDIo->pDisk;
4064
4065 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbWrite=%u\n",
4066 pvUser, pIoStorage, uOffset, pIoCtx, cbWrite));
4067
4068 /** @todo Enable check for sync I/O later. */
4069 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4070 VD_IS_LOCKED(pDisk);
4071
4072 Assert(cbWrite > 0);
4073
4074 if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4075 {
4076 RTSGSEG Seg;
4077 unsigned cSegments = 1;
4078 size_t cbTaskWrite = 0;
4079
4080 /* Synchronous I/O contexts only have one buffer segment. */
4081 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
4082 ("Invalid number of buffer segments for synchronous I/O context"),
4083 VERR_INVALID_PARAMETER);
4084
4085 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbWrite);
4086 Assert(cbWrite == cbTaskWrite);
4087 Assert(cSegments == 1);
4088 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4089 pIoStorage->pStorage, uOffset,
4090 Seg.pvSeg, cbWrite, NULL);
4091 if (RT_SUCCESS(rc))
4092 {
4093 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbWrite);
4094 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbWrite);
4095 }
4096 }
4097 else
4098 {
4099 /* Build the S/G array and spawn a new I/O task */
4100 while (cbWrite)
4101 {
4102 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
4103 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
4104 size_t cbTaskWrite = 0;
4105
4106 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbWrite);
4107
4108 Assert(cSegments > 0);
4109 Assert(cbTaskWrite > 0);
4110 AssertMsg(cbTaskWrite <= cbWrite, ("Invalid number of bytes to write\n"));
4111
4112 LogFlow(("Writing %u bytes from %u segments\n", cbTaskWrite, cSegments));
4113
4114#ifdef DEBUG
4115 for (unsigned i = 0; i < cSegments; i++)
4116 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4117 ("Segment %u is invalid\n", i));
4118#endif
4119
4120 Assert(cbTaskWrite == (uint32_t)cbTaskWrite);
4121 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, pfnComplete, pvCompleteUser, pIoCtx, (uint32_t)cbTaskWrite);
4122
4123 if (!pIoTask)
4124 return VERR_NO_MEMORY;
4125
4126 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4127
4128 void *pvTask;
4129 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4130 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4131 pIoStorage->pStorage,
4132 uOffset, aSeg, cSegments,
4133 cbTaskWrite, pIoTask, &pvTask);
4134 if (RT_SUCCESS(rc))
4135 {
4136 AssertMsg(cbTaskWrite <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4137 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskWrite);
4138 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4139 vdIoTaskFree(pDisk, pIoTask);
4140 }
4141 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4142 {
4143 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4144 vdIoTaskFree(pDisk, pIoTask);
4145 break;
4146 }
4147
4148 uOffset += cbTaskWrite;
4149 cbWrite -= cbTaskWrite;
4150 }
4151 }
4152
4153 LogFlowFunc(("returns rc=%Rrc\n", rc));
4154 return rc;
4155}
4156
4157static DECLCALLBACK(int) vdIOIntReadMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4158 void *pvBuf, size_t cbRead, PVDIOCTX pIoCtx,
4159 PPVDMETAXFER ppMetaXfer, PFNVDXFERCOMPLETED pfnComplete,
4160 void *pvCompleteUser)
4161{
4162 PVDIO pVDIo = (PVDIO)pvUser;
4163 PVDISK pDisk = pVDIo->pDisk;
4164 int rc = VINF_SUCCESS;
4165 RTSGSEG Seg;
4166 PVDIOTASK pIoTask;
4167 PVDMETAXFER pMetaXfer = NULL;
4168 void *pvTask = NULL;
4169
4170 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbRead=%u\n",
4171 pvUser, pIoStorage, uOffset, pvBuf, cbRead));
4172
4173 AssertMsgReturn( pIoCtx
4174 || (!ppMetaXfer && !pfnComplete && !pvCompleteUser),
4175 ("A synchronous metadata read is requested but the parameters are wrong\n"),
4176 VERR_INVALID_POINTER);
4177
4178 /** @todo Enable check for sync I/O later. */
4179 if ( pIoCtx
4180 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4181 VD_IS_LOCKED(pDisk);
4182
4183 if ( !pIoCtx
4184 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4185 {
4186 /* Handle synchronous metadata I/O. */
4187 /** @todo Integrate with metadata transfers below. */
4188 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
4189 pIoStorage->pStorage, uOffset,
4190 pvBuf, cbRead, NULL);
4191 if (ppMetaXfer)
4192 *ppMetaXfer = NULL;
4193 }
4194 else
4195 {
4196 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4197 if (!pMetaXfer)
4198 {
4199#ifdef RT_STRICT
4200 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGetBestFit(pIoStorage->pTreeMetaXfers, uOffset, false /* fAbove */);
4201 AssertMsg(!pMetaXfer || (pMetaXfer->Core.Key + (RTFOFF)pMetaXfer->cbMeta <= (RTFOFF)uOffset),
4202 ("Overlapping meta transfers!\n"));
4203#endif
4204
4205 /* Allocate a new meta transfer. */
4206 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbRead);
4207 if (!pMetaXfer)
4208 return VERR_NO_MEMORY;
4209
4210 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4211 if (!pIoTask)
4212 {
4213 RTMemFree(pMetaXfer);
4214 return VERR_NO_MEMORY;
4215 }
4216
4217 Seg.cbSeg = cbRead;
4218 Seg.pvSeg = pMetaXfer->abData;
4219
4220 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_READ);
4221 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4222 pIoStorage->pStorage,
4223 uOffset, &Seg, 1,
4224 cbRead, pIoTask, &pvTask);
4225
4226 if (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4227 {
4228 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4229 Assert(fInserted); NOREF(fInserted);
4230 }
4231 else
4232 RTMemFree(pMetaXfer);
4233
4234 if (RT_SUCCESS(rc))
4235 {
4236 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4237 vdIoTaskFree(pDisk, pIoTask);
4238 }
4239 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS && !pfnComplete)
4240 rc = VERR_VD_NOT_ENOUGH_METADATA;
4241 }
4242
4243 Assert(VALID_PTR(pMetaXfer) || RT_FAILURE(rc));
4244
4245 if (RT_SUCCESS(rc) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4246 {
4247 /* If it is pending add the request to the list. */
4248 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_READ)
4249 {
4250 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4251 AssertPtr(pDeferred);
4252
4253 RTListInit(&pDeferred->NodeDeferred);
4254 pDeferred->pIoCtx = pIoCtx;
4255
4256 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4257 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4258 rc = VERR_VD_NOT_ENOUGH_METADATA;
4259 }
4260 else
4261 {
4262 /* Transfer the data. */
4263 pMetaXfer->cRefs++;
4264 Assert(pMetaXfer->cbMeta >= cbRead);
4265 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4266 if (pMetaXfer->pbDataShw)
4267 memcpy(pvBuf, pMetaXfer->pbDataShw, cbRead);
4268 else
4269 memcpy(pvBuf, pMetaXfer->abData, cbRead);
4270 *ppMetaXfer = pMetaXfer;
4271 }
4272 }
4273 }
4274
4275 LogFlowFunc(("returns rc=%Rrc\n", rc));
4276 return rc;
4277}
4278
4279static DECLCALLBACK(int) vdIOIntWriteMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4280 const void *pvBuf, size_t cbWrite, PVDIOCTX pIoCtx,
4281 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4282{
4283 PVDIO pVDIo = (PVDIO)pvUser;
4284 PVDISK pDisk = pVDIo->pDisk;
4285 int rc = VINF_SUCCESS;
4286 RTSGSEG Seg;
4287 PVDIOTASK pIoTask;
4288 PVDMETAXFER pMetaXfer = NULL;
4289 bool fInTree = false;
4290 void *pvTask = NULL;
4291
4292 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbWrite=%u\n",
4293 pvUser, pIoStorage, uOffset, pvBuf, cbWrite));
4294
4295 AssertMsgReturn( pIoCtx
4296 || (!pfnComplete && !pvCompleteUser),
4297 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4298 VERR_INVALID_POINTER);
4299
4300 /** @todo Enable check for sync I/O later. */
4301 if ( pIoCtx
4302 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4303 VD_IS_LOCKED(pDisk);
4304
4305 if ( !pIoCtx
4306 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4307 {
4308 /* Handle synchronous metadata I/O. */
4309 /** @todo Integrate with metadata transfers below. */
4310 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4311 pIoStorage->pStorage, uOffset,
4312 pvBuf, cbWrite, NULL);
4313 }
4314 else
4315 {
4316 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4317 if (!pMetaXfer)
4318 {
4319 /* Allocate a new meta transfer. */
4320 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbWrite);
4321 if (!pMetaXfer)
4322 return VERR_NO_MEMORY;
4323 }
4324 else
4325 {
4326 Assert(pMetaXfer->cbMeta >= cbWrite);
4327 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4328 fInTree = true;
4329 }
4330
4331 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4332 {
4333 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4334 if (!pIoTask)
4335 {
4336 RTMemFree(pMetaXfer);
4337 return VERR_NO_MEMORY;
4338 }
4339
4340 memcpy(pMetaXfer->abData, pvBuf, cbWrite);
4341 Seg.cbSeg = cbWrite;
4342 Seg.pvSeg = pMetaXfer->abData;
4343
4344 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4345
4346 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
4347 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4348 pIoStorage->pStorage,
4349 uOffset, &Seg, 1, cbWrite, pIoTask,
4350 &pvTask);
4351 if (RT_SUCCESS(rc))
4352 {
4353 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4354 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4355 vdIoTaskFree(pDisk, pIoTask);
4356 if (fInTree && !pMetaXfer->cRefs)
4357 {
4358 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4359 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4360 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4361 RTMemFree(pMetaXfer);
4362 pMetaXfer = NULL;
4363 }
4364 }
4365 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4366 {
4367 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4368 AssertPtr(pDeferred);
4369
4370 RTListInit(&pDeferred->NodeDeferred);
4371 pDeferred->pIoCtx = pIoCtx;
4372
4373 if (!fInTree)
4374 {
4375 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4376 Assert(fInserted); NOREF(fInserted);
4377 }
4378
4379 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4380 }
4381 else
4382 {
4383 RTMemFree(pMetaXfer);
4384 pMetaXfer = NULL;
4385 }
4386 }
4387 else
4388 {
4389 /* I/O is in progress, update shadow buffer and add to waiting list. */
4390 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4391 if (!pMetaXfer->pbDataShw)
4392 {
4393 /* Allocate shadow buffer and set initial state. */
4394 LogFlowFunc(("pMetaXfer=%#p Creating shadow buffer\n", pMetaXfer));
4395 pMetaXfer->pbDataShw = (uint8_t *)RTMemAlloc(pMetaXfer->cbMeta);
4396 if (RT_LIKELY(pMetaXfer->pbDataShw))
4397 memcpy(pMetaXfer->pbDataShw, pMetaXfer->abData, pMetaXfer->cbMeta);
4398 else
4399 rc = VERR_NO_MEMORY;
4400 }
4401
4402 if (RT_SUCCESS(rc))
4403 {
4404 /* Update with written data and append to waiting list. */
4405 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4406 if (pDeferred)
4407 {
4408 LogFlowFunc(("pMetaXfer=%#p Updating shadow buffer\n", pMetaXfer));
4409
4410 RTListInit(&pDeferred->NodeDeferred);
4411 pDeferred->pIoCtx = pIoCtx;
4412 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4413 memcpy(pMetaXfer->pbDataShw, pvBuf, cbWrite);
4414 RTListAppend(&pMetaXfer->ListIoCtxShwWrites, &pDeferred->NodeDeferred);
4415 }
4416 else
4417 {
4418 /*
4419 * Free shadow buffer if there is no one depending on it, i.e.
4420 * we just allocated it.
4421 */
4422 if (RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites))
4423 {
4424 RTMemFree(pMetaXfer->pbDataShw);
4425 pMetaXfer->pbDataShw = NULL;
4426 }
4427 rc = VERR_NO_MEMORY;
4428 }
4429 }
4430 }
4431 }
4432
4433 LogFlowFunc(("returns rc=%Rrc\n", rc));
4434 return rc;
4435}
4436
4437static DECLCALLBACK(void) vdIOIntMetaXferRelease(void *pvUser, PVDMETAXFER pMetaXfer)
4438{
4439 PVDIO pVDIo = (PVDIO)pvUser;
4440 PVDISK pDisk = pVDIo->pDisk;
4441 PVDIOSTORAGE pIoStorage;
4442
4443 /*
4444 * It is possible that we get called with a NULL metadata xfer handle
4445 * for synchronous I/O. Just exit.
4446 */
4447 if (!pMetaXfer)
4448 return;
4449
4450 pIoStorage = pMetaXfer->pIoStorage;
4451
4452 VD_IS_LOCKED(pDisk);
4453
4454 Assert( VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE
4455 || VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4456 Assert(pMetaXfer->cRefs > 0);
4457
4458 pMetaXfer->cRefs--;
4459 if ( !pMetaXfer->cRefs
4460 && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting)
4461 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4462 {
4463 /* Free the meta data entry. */
4464 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4465 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4466 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4467
4468 RTMemFree(pMetaXfer);
4469 }
4470}
4471
4472static DECLCALLBACK(int) vdIOIntFlush(void *pvUser, PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
4473 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4474{
4475 PVDIO pVDIo = (PVDIO)pvUser;
4476 PVDISK pDisk = pVDIo->pDisk;
4477 int rc = VINF_SUCCESS;
4478 PVDIOTASK pIoTask;
4479 PVDMETAXFER pMetaXfer = NULL;
4480 void *pvTask = NULL;
4481
4482 LogFlowFunc(("pvUser=%#p pIoStorage=%#p pIoCtx=%#p\n",
4483 pvUser, pIoStorage, pIoCtx));
4484
4485 AssertMsgReturn( pIoCtx
4486 || (!pfnComplete && !pvCompleteUser),
4487 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4488 VERR_INVALID_POINTER);
4489
4490 /** @todo Enable check for sync I/O later. */
4491 if ( pIoCtx
4492 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4493 VD_IS_LOCKED(pDisk);
4494
4495 if (pVDIo->fIgnoreFlush)
4496 return VINF_SUCCESS;
4497
4498 if ( !pIoCtx
4499 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4500 {
4501 /* Handle synchronous flushes. */
4502 /** @todo Integrate with metadata transfers below. */
4503 rc = pVDIo->pInterfaceIo->pfnFlushSync(pVDIo->pInterfaceIo->Core.pvUser,
4504 pIoStorage->pStorage);
4505 }
4506 else
4507 {
4508 /* Allocate a new meta transfer. */
4509 pMetaXfer = vdMetaXferAlloc(pIoStorage, 0, 0);
4510 if (!pMetaXfer)
4511 return VERR_NO_MEMORY;
4512
4513 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
4514 if (!pIoTask)
4515 {
4516 RTMemFree(pMetaXfer);
4517 return VERR_NO_MEMORY;
4518 }
4519
4520 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4521
4522 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4523 AssertPtr(pDeferred);
4524
4525 RTListInit(&pDeferred->NodeDeferred);
4526 pDeferred->pIoCtx = pIoCtx;
4527
4528 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4529 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_FLUSH);
4530 rc = pVDIo->pInterfaceIo->pfnFlushAsync(pVDIo->pInterfaceIo->Core.pvUser,
4531 pIoStorage->pStorage,
4532 pIoTask, &pvTask);
4533 if (RT_SUCCESS(rc))
4534 {
4535 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4536 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4537 vdIoTaskFree(pDisk, pIoTask);
4538 RTMemFree(pDeferred);
4539 RTMemFree(pMetaXfer);
4540 }
4541 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4542 RTMemFree(pMetaXfer);
4543 }
4544
4545 LogFlowFunc(("returns rc=%Rrc\n", rc));
4546 return rc;
4547}
4548
4549static DECLCALLBACK(size_t) vdIOIntIoCtxCopyTo(void *pvUser, PVDIOCTX pIoCtx,
4550 const void *pvBuf, size_t cbBuf)
4551{
4552 PVDIO pVDIo = (PVDIO)pvUser;
4553 PVDISK pDisk = pVDIo->pDisk;
4554 size_t cbCopied = 0;
4555
4556 /** @todo Enable check for sync I/O later. */
4557 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4558 VD_IS_LOCKED(pDisk);
4559
4560 cbCopied = vdIoCtxCopyTo(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4561 Assert(cbCopied == cbBuf);
4562
4563 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCopied); - triggers with vdCopyHelper/dmgRead.
4564 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4565
4566 return cbCopied;
4567}
4568
4569static DECLCALLBACK(size_t) vdIOIntIoCtxCopyFrom(void *pvUser, PVDIOCTX pIoCtx,
4570 void *pvBuf, size_t cbBuf)
4571{
4572 PVDIO pVDIo = (PVDIO)pvUser;
4573 PVDISK pDisk = pVDIo->pDisk;
4574 size_t cbCopied = 0;
4575
4576 /** @todo Enable check for sync I/O later. */
4577 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4578 VD_IS_LOCKED(pDisk);
4579
4580 cbCopied = vdIoCtxCopyFrom(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4581 Assert(cbCopied == cbBuf);
4582
4583 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft > cbCopied); - triggers with vdCopyHelper/dmgRead.
4584 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4585
4586 return cbCopied;
4587}
4588
4589static DECLCALLBACK(size_t) vdIOIntIoCtxSet(void *pvUser, PVDIOCTX pIoCtx, int ch, size_t cb)
4590{
4591 PVDIO pVDIo = (PVDIO)pvUser;
4592 PVDISK pDisk = pVDIo->pDisk;
4593 size_t cbSet = 0;
4594
4595 /** @todo Enable check for sync I/O later. */
4596 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4597 VD_IS_LOCKED(pDisk);
4598
4599 cbSet = vdIoCtxSet(pIoCtx, ch, cb);
4600 Assert(cbSet == cb);
4601
4602 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbSet); - triggers with vdCopyHelper/dmgRead.
4603 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbSet);
4604
4605 return cbSet;
4606}
4607
4608static DECLCALLBACK(size_t) vdIOIntIoCtxSegArrayCreate(void *pvUser, PVDIOCTX pIoCtx,
4609 PRTSGSEG paSeg, unsigned *pcSeg,
4610 size_t cbData)
4611{
4612 PVDIO pVDIo = (PVDIO)pvUser;
4613 PVDISK pDisk = pVDIo->pDisk;
4614 size_t cbCreated = 0;
4615
4616 /** @todo It is possible that this gets called from a filter plugin
4617 * outside of the disk lock. Refine assertion or remove completely. */
4618#if 0
4619 /** @todo Enable check for sync I/O later. */
4620 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4621 VD_IS_LOCKED(pDisk);
4622#else
4623 NOREF(pDisk);
4624#endif
4625
4626 cbCreated = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, paSeg, pcSeg, cbData);
4627 Assert(!paSeg || cbData == cbCreated);
4628
4629 return cbCreated;
4630}
4631
4632static DECLCALLBACK(void) vdIOIntIoCtxCompleted(void *pvUser, PVDIOCTX pIoCtx, int rcReq,
4633 size_t cbCompleted)
4634{
4635 PVDIO pVDIo = (PVDIO)pvUser;
4636 PVDISK pDisk = pVDIo->pDisk;
4637
4638 LogFlowFunc(("pvUser=%#p pIoCtx=%#p rcReq=%Rrc cbCompleted=%zu\n",
4639 pvUser, pIoCtx, rcReq, cbCompleted));
4640
4641 /*
4642 * Grab the disk critical section to avoid races with other threads which
4643 * might still modify the I/O context.
4644 * Example is that iSCSI is doing an asynchronous write but calls us already
4645 * while the other thread is still hanging in vdWriteHelperAsync and couldn't update
4646 * the blocked state yet.
4647 * It can overwrite the state to true before we call vdIoCtxContinue and the
4648 * the request would hang indefinite.
4649 */
4650 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
4651 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCompleted);
4652 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCompleted);
4653
4654 /* Set next transfer function if the current one finished.
4655 * @todo: Find a better way to prevent vdIoCtxContinue from calling the current helper again. */
4656 if (!pIoCtx->Req.Io.cbTransferLeft)
4657 {
4658 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
4659 pIoCtx->pfnIoCtxTransferNext = NULL;
4660 }
4661
4662 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHaltedHead, pIoCtx);
4663 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
4664 {
4665 /* Immediately drop the lock again, it will take care of processing the list. */
4666 vdDiskUnlock(pDisk, NULL);
4667 }
4668}
4669
4670static DECLCALLBACK(bool) vdIOIntIoCtxIsSynchronous(void *pvUser, PVDIOCTX pIoCtx)
4671{
4672 NOREF(pvUser);
4673 return !!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC);
4674}
4675
4676static DECLCALLBACK(bool) vdIOIntIoCtxIsZero(void *pvUser, PVDIOCTX pIoCtx, size_t cbCheck,
4677 bool fAdvance)
4678{
4679 NOREF(pvUser);
4680
4681 bool fIsZero = RTSgBufIsZero(&pIoCtx->Req.Io.SgBuf, cbCheck);
4682 if (fIsZero && fAdvance)
4683 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbCheck);
4684
4685 return fIsZero;
4686}
4687
4688static DECLCALLBACK(size_t) vdIOIntIoCtxGetDataUnitSize(void *pvUser, PVDIOCTX pIoCtx)
4689{
4690 RT_NOREF1(pIoCtx);
4691 PVDIO pVDIo = (PVDIO)pvUser;
4692 PVDISK pDisk = pVDIo->pDisk;
4693 size_t cbSector = 0;
4694
4695 PVDIMAGE pImage = vdGetImageByNumber(pDisk, VD_LAST_IMAGE);
4696 AssertPtrReturn(pImage, 0);
4697
4698 PCVDREGIONLIST pRegionList = NULL;
4699 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
4700 if (RT_SUCCESS(rc))
4701 {
4702 cbSector = pRegionList->aRegions[0].cbBlock;
4703
4704 AssertPtr(pImage->Backend->pfnRegionListRelease);
4705 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
4706 }
4707
4708 return cbSector;
4709}
4710
4711/**
4712 * VD I/O interface callback for opening a file (limited version for VDGetFormat).
4713 */
4714static DECLCALLBACK(int) vdIOIntOpenLimited(void *pvUser, const char *pszLocation,
4715 uint32_t fOpen, PPVDIOSTORAGE ppIoStorage)
4716{
4717 int rc = VINF_SUCCESS;
4718 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4719 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
4720
4721 if (!pIoStorage)
4722 return VERR_NO_MEMORY;
4723
4724 rc = pInterfaceIo->pfnOpen(NULL, pszLocation, fOpen, NULL, &pIoStorage->pStorage);
4725 if (RT_SUCCESS(rc))
4726 *ppIoStorage = pIoStorage;
4727 else
4728 RTMemFree(pIoStorage);
4729
4730 return rc;
4731}
4732
4733static DECLCALLBACK(int) vdIOIntCloseLimited(void *pvUser, PVDIOSTORAGE pIoStorage)
4734{
4735 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4736 int rc = pInterfaceIo->pfnClose(NULL, pIoStorage->pStorage);
4737
4738 RTMemFree(pIoStorage);
4739 return rc;
4740}
4741
4742static DECLCALLBACK(int) vdIOIntDeleteLimited(void *pvUser, const char *pcszFilename)
4743{
4744 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4745 return pInterfaceIo->pfnDelete(NULL, pcszFilename);
4746}
4747
4748static DECLCALLBACK(int) vdIOIntMoveLimited(void *pvUser, const char *pcszSrc,
4749 const char *pcszDst, unsigned fMove)
4750{
4751 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4752 return pInterfaceIo->pfnMove(NULL, pcszSrc, pcszDst, fMove);
4753}
4754
4755static DECLCALLBACK(int) vdIOIntGetFreeSpaceLimited(void *pvUser, const char *pcszFilename,
4756 int64_t *pcbFreeSpace)
4757{
4758 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4759 return pInterfaceIo->pfnGetFreeSpace(NULL, pcszFilename, pcbFreeSpace);
4760}
4761
4762static DECLCALLBACK(int) vdIOIntGetModificationTimeLimited(void *pvUser,
4763 const char *pcszFilename,
4764 PRTTIMESPEC pModificationTime)
4765{
4766 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4767 return pInterfaceIo->pfnGetModificationTime(NULL, pcszFilename, pModificationTime);
4768}
4769
4770static DECLCALLBACK(int) vdIOIntGetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4771 uint64_t *pcbSize)
4772{
4773 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4774 return pInterfaceIo->pfnGetSize(NULL, pIoStorage->pStorage, pcbSize);
4775}
4776
4777static DECLCALLBACK(int) vdIOIntSetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4778 uint64_t cbSize)
4779{
4780 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4781 return pInterfaceIo->pfnSetSize(NULL, pIoStorage->pStorage, cbSize);
4782}
4783
4784static DECLCALLBACK(int) vdIOIntWriteUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4785 uint64_t uOffset, PVDIOCTX pIoCtx,
4786 size_t cbWrite,
4787 PFNVDXFERCOMPLETED pfnComplete,
4788 void *pvCompleteUser)
4789{
4790 NOREF(pvUser);
4791 NOREF(pStorage);
4792 NOREF(uOffset);
4793 NOREF(pIoCtx);
4794 NOREF(cbWrite);
4795 NOREF(pfnComplete);
4796 NOREF(pvCompleteUser);
4797 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4798}
4799
4800static DECLCALLBACK(int) vdIOIntReadUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4801 uint64_t uOffset, PVDIOCTX pIoCtx,
4802 size_t cbRead)
4803{
4804 NOREF(pvUser);
4805 NOREF(pStorage);
4806 NOREF(uOffset);
4807 NOREF(pIoCtx);
4808 NOREF(cbRead);
4809 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4810}
4811
4812static DECLCALLBACK(int) vdIOIntWriteMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4813 uint64_t uOffset, const void *pvBuffer,
4814 size_t cbBuffer, PVDIOCTX pIoCtx,
4815 PFNVDXFERCOMPLETED pfnComplete,
4816 void *pvCompleteUser)
4817{
4818 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4819
4820 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4821 ("Async I/O not implemented for the limited interface"),
4822 VERR_NOT_SUPPORTED);
4823
4824 return pInterfaceIo->pfnWriteSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4825}
4826
4827static DECLCALLBACK(int) vdIOIntReadMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4828 uint64_t uOffset, void *pvBuffer,
4829 size_t cbBuffer, PVDIOCTX pIoCtx,
4830 PPVDMETAXFER ppMetaXfer,
4831 PFNVDXFERCOMPLETED pfnComplete,
4832 void *pvCompleteUser)
4833{
4834 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4835
4836 AssertMsgReturn(!pIoCtx && !ppMetaXfer && !pfnComplete && !pvCompleteUser,
4837 ("Async I/O not implemented for the limited interface"),
4838 VERR_NOT_SUPPORTED);
4839
4840 return pInterfaceIo->pfnReadSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4841}
4842
4843#if 0 /* unsed */
4844static int vdIOIntMetaXferReleaseLimited(void *pvUser, PVDMETAXFER pMetaXfer)
4845{
4846 /* This is a NOP in this case. */
4847 NOREF(pvUser);
4848 NOREF(pMetaXfer);
4849 return VINF_SUCCESS;
4850}
4851#endif
4852
4853static DECLCALLBACK(int) vdIOIntFlushLimited(void *pvUser, PVDIOSTORAGE pStorage,
4854 PVDIOCTX pIoCtx,
4855 PFNVDXFERCOMPLETED pfnComplete,
4856 void *pvCompleteUser)
4857{
4858 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4859
4860 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4861 ("Async I/O not implemented for the limited interface"),
4862 VERR_NOT_SUPPORTED);
4863
4864 return pInterfaceIo->pfnFlushSync(NULL, pStorage->pStorage);
4865}
4866
4867/**
4868 * internal: send output to the log (unconditionally).
4869 */
4870static DECLCALLBACK(int) vdLogMessage(void *pvUser, const char *pszFormat, va_list args)
4871{
4872 NOREF(pvUser);
4873 RTLogPrintfV(pszFormat, args);
4874 return VINF_SUCCESS;
4875}
4876
4877DECLINLINE(int) vdMessageWrapper(PVDISK pDisk, const char *pszFormat, ...)
4878{
4879 va_list va;
4880 va_start(va, pszFormat);
4881 int rc = pDisk->pInterfaceError->pfnMessage(pDisk->pInterfaceError->Core.pvUser,
4882 pszFormat, va);
4883 va_end(va);
4884 return rc;
4885}
4886
4887
4888/**
4889 * internal: adjust PCHS geometry
4890 */
4891static void vdFixupPCHSGeometry(PVDGEOMETRY pPCHS, uint64_t cbSize)
4892{
4893 /* Fix broken PCHS geometry. Can happen for two reasons: either the backend
4894 * mixes up PCHS and LCHS, or the application used to create the source
4895 * image has put garbage in it. Additionally, if the PCHS geometry covers
4896 * more than the image size, set it back to the default. */
4897 if ( pPCHS->cHeads > 16
4898 || pPCHS->cSectors > 63
4899 || pPCHS->cCylinders == 0
4900 || (uint64_t)pPCHS->cHeads * pPCHS->cSectors * pPCHS->cCylinders * 512 > cbSize)
4901 {
4902 Assert(!(RT_MIN(cbSize / 512 / 16 / 63, 16383) - (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383)));
4903 pPCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383);
4904 pPCHS->cHeads = 16;
4905 pPCHS->cSectors = 63;
4906 }
4907}
4908
4909/**
4910 * internal: adjust PCHS geometry
4911 */
4912static void vdFixupLCHSGeometry(PVDGEOMETRY pLCHS, uint64_t cbSize)
4913{
4914 /* Fix broken LCHS geometry. Can happen for two reasons: either the backend
4915 * mixes up PCHS and LCHS, or the application used to create the source
4916 * image has put garbage in it. The fix in this case is to clear the LCHS
4917 * geometry to trigger autodetection when it is used next. If the geometry
4918 * already says "please autodetect" (cylinders=0) keep it. */
4919 if ( ( pLCHS->cHeads > 255
4920 || pLCHS->cHeads == 0
4921 || pLCHS->cSectors > 63
4922 || pLCHS->cSectors == 0)
4923 && pLCHS->cCylinders != 0)
4924 {
4925 pLCHS->cCylinders = 0;
4926 pLCHS->cHeads = 0;
4927 pLCHS->cSectors = 0;
4928 }
4929 /* Always recompute the number of cylinders stored in the LCHS
4930 * geometry if it isn't set to "autotedetect" at the moment.
4931 * This is very useful if the destination image size is
4932 * larger or smaller than the source image size. Do not modify
4933 * the number of heads and sectors. Windows guests hate it. */
4934 if ( pLCHS->cCylinders != 0
4935 && pLCHS->cHeads != 0 /* paranoia */
4936 && pLCHS->cSectors != 0 /* paranoia */)
4937 {
4938 Assert(!(RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024) - (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024)));
4939 pLCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024);
4940 }
4941}
4942
4943/**
4944 * Sets the I/O callbacks of the given interface to the fallback methods
4945 *
4946 * @returns nothing.
4947 * @param pIfIo The I/O interface to setup.
4948 */
4949static void vdIfIoFallbackCallbacksSetup(PVDINTERFACEIO pIfIo)
4950{
4951 pIfIo->pfnOpen = vdIOOpenFallback;
4952 pIfIo->pfnClose = vdIOCloseFallback;
4953 pIfIo->pfnDelete = vdIODeleteFallback;
4954 pIfIo->pfnMove = vdIOMoveFallback;
4955 pIfIo->pfnGetFreeSpace = vdIOGetFreeSpaceFallback;
4956 pIfIo->pfnGetModificationTime = vdIOGetModificationTimeFallback;
4957 pIfIo->pfnGetSize = vdIOGetSizeFallback;
4958 pIfIo->pfnSetSize = vdIOSetSizeFallback;
4959 pIfIo->pfnSetAllocationSize = vdIOSetAllocationSizeFallback;
4960 pIfIo->pfnReadSync = vdIOReadSyncFallback;
4961 pIfIo->pfnWriteSync = vdIOWriteSyncFallback;
4962 pIfIo->pfnFlushSync = vdIOFlushSyncFallback;
4963 pIfIo->pfnReadAsync = vdIOReadAsyncFallback;
4964 pIfIo->pfnWriteAsync = vdIOWriteAsyncFallback;
4965 pIfIo->pfnFlushAsync = vdIOFlushAsyncFallback;
4966}
4967
4968/**
4969 * Sets the internal I/O callbacks of the given interface.
4970 *
4971 * @returns nothing.
4972 * @param pIfIoInt The internal I/O interface to setup.
4973 */
4974static void vdIfIoIntCallbacksSetup(PVDINTERFACEIOINT pIfIoInt)
4975{
4976 pIfIoInt->pfnOpen = vdIOIntOpen;
4977 pIfIoInt->pfnClose = vdIOIntClose;
4978 pIfIoInt->pfnDelete = vdIOIntDelete;
4979 pIfIoInt->pfnMove = vdIOIntMove;
4980 pIfIoInt->pfnGetFreeSpace = vdIOIntGetFreeSpace;
4981 pIfIoInt->pfnGetModificationTime = vdIOIntGetModificationTime;
4982 pIfIoInt->pfnGetSize = vdIOIntGetSize;
4983 pIfIoInt->pfnSetSize = vdIOIntSetSize;
4984 pIfIoInt->pfnSetAllocationSize = vdIOIntSetAllocationSize;
4985 pIfIoInt->pfnReadUser = vdIOIntReadUser;
4986 pIfIoInt->pfnWriteUser = vdIOIntWriteUser;
4987 pIfIoInt->pfnReadMeta = vdIOIntReadMeta;
4988 pIfIoInt->pfnWriteMeta = vdIOIntWriteMeta;
4989 pIfIoInt->pfnMetaXferRelease = vdIOIntMetaXferRelease;
4990 pIfIoInt->pfnFlush = vdIOIntFlush;
4991 pIfIoInt->pfnIoCtxCopyFrom = vdIOIntIoCtxCopyFrom;
4992 pIfIoInt->pfnIoCtxCopyTo = vdIOIntIoCtxCopyTo;
4993 pIfIoInt->pfnIoCtxSet = vdIOIntIoCtxSet;
4994 pIfIoInt->pfnIoCtxSegArrayCreate = vdIOIntIoCtxSegArrayCreate;
4995 pIfIoInt->pfnIoCtxCompleted = vdIOIntIoCtxCompleted;
4996 pIfIoInt->pfnIoCtxIsSynchronous = vdIOIntIoCtxIsSynchronous;
4997 pIfIoInt->pfnIoCtxIsZero = vdIOIntIoCtxIsZero;
4998 pIfIoInt->pfnIoCtxGetDataUnitSize = vdIOIntIoCtxGetDataUnitSize;
4999}
5000
5001/**
5002 * Internally used completion handler for synchronous I/O contexts.
5003 */
5004static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq)
5005{
5006 RT_NOREF2(pvUser1, rcReq);
5007 RTSEMEVENT hEvent = (RTSEMEVENT)pvUser2;
5008
5009 RTSemEventSignal(hEvent);
5010}
5011
5012/**
5013 * Initializes HDD backends.
5014 *
5015 * @returns VBox status code.
5016 */
5017VBOXDDU_DECL(int) VDInit(void)
5018{
5019 int rc = vdPluginInit();
5020 LogRel(("VD: VDInit finished with %Rrc\n", rc));
5021 return rc;
5022}
5023
5024/**
5025 * Destroys loaded HDD backends.
5026 *
5027 * @returns VBox status code.
5028 */
5029VBOXDDU_DECL(int) VDShutdown(void)
5030{
5031 return vdPluginTerm();
5032}
5033
5034/**
5035 * Loads a single plugin given by filename.
5036 *
5037 * @returns VBox status code.
5038 * @param pszFilename The plugin filename to load.
5039 */
5040VBOXDDU_DECL(int) VDPluginLoadFromFilename(const char *pszFilename)
5041{
5042 if (!vdPluginIsInitialized())
5043 {
5044 int rc = VDInit();
5045 if (RT_FAILURE(rc))
5046 return rc;
5047 }
5048
5049 return vdPluginLoadFromFilename(pszFilename);
5050}
5051
5052/**
5053 * Load all plugins from a given path.
5054 *
5055 * @returns VBox statuse code.
5056 * @param pszPath The path to load plugins from.
5057 */
5058VBOXDDU_DECL(int) VDPluginLoadFromPath(const char *pszPath)
5059{
5060 if (!vdPluginIsInitialized())
5061 {
5062 int rc = VDInit();
5063 if (RT_FAILURE(rc))
5064 return rc;
5065 }
5066
5067 return vdPluginLoadFromPath(pszPath);
5068}
5069
5070/**
5071 * Unloads a single plugin given by filename.
5072 *
5073 * @returns VBox status code.
5074 * @param pszFilename The plugin filename to unload.
5075 */
5076VBOXDDU_DECL(int) VDPluginUnloadFromFilename(const char *pszFilename)
5077{
5078 if (!vdPluginIsInitialized())
5079 {
5080 int rc = VDInit();
5081 if (RT_FAILURE(rc))
5082 return rc;
5083 }
5084
5085 return vdPluginUnloadFromFilename(pszFilename);
5086}
5087
5088/**
5089 * Unload all plugins from a given path.
5090 *
5091 * @returns VBox statuse code.
5092 * @param pszPath The path to unload plugins from.
5093 */
5094VBOXDDU_DECL(int) VDPluginUnloadFromPath(const char *pszPath)
5095{
5096 if (!vdPluginIsInitialized())
5097 {
5098 int rc = VDInit();
5099 if (RT_FAILURE(rc))
5100 return rc;
5101 }
5102
5103 return vdPluginUnloadFromPath(pszPath);
5104}
5105
5106/**
5107 * Lists all HDD backends and their capabilities in a caller-provided buffer.
5108 *
5109 * @returns VBox status code.
5110 * VERR_BUFFER_OVERFLOW if not enough space is passed.
5111 * @param cEntriesAlloc Number of list entries available.
5112 * @param pEntries Pointer to array for the entries.
5113 * @param pcEntriesUsed Number of entries returned.
5114 */
5115VBOXDDU_DECL(int) VDBackendInfo(unsigned cEntriesAlloc, PVDBACKENDINFO pEntries,
5116 unsigned *pcEntriesUsed)
5117{
5118 int rc = VINF_SUCCESS;
5119
5120 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5121 /* Check arguments. */
5122 AssertMsgReturn(cEntriesAlloc,
5123 ("cEntriesAlloc=%u\n", cEntriesAlloc),
5124 VERR_INVALID_PARAMETER);
5125 AssertMsgReturn(VALID_PTR(pEntries),
5126 ("pEntries=%#p\n", pEntries),
5127 VERR_INVALID_PARAMETER);
5128 AssertMsgReturn(VALID_PTR(pcEntriesUsed),
5129 ("pcEntriesUsed=%#p\n", pcEntriesUsed),
5130 VERR_INVALID_PARAMETER);
5131 if (!vdPluginIsInitialized())
5132 VDInit();
5133
5134 uint32_t cBackends = vdGetImageBackendCount();
5135 if (cEntriesAlloc < cBackends)
5136 {
5137 *pcEntriesUsed = cBackends;
5138 return VERR_BUFFER_OVERFLOW;
5139 }
5140
5141 for (unsigned i = 0; i < cBackends; i++)
5142 {
5143 PCVDIMAGEBACKEND pBackend;
5144 rc = vdQueryImageBackend(i, &pBackend);
5145 AssertRC(rc);
5146
5147 pEntries[i].pszBackend = pBackend->pszBackendName;
5148 pEntries[i].uBackendCaps = pBackend->uBackendCaps;
5149 pEntries[i].paFileExtensions = pBackend->paFileExtensions;
5150 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5151 pEntries[i].pfnComposeLocation = pBackend->pfnComposeLocation;
5152 pEntries[i].pfnComposeName = pBackend->pfnComposeName;
5153 }
5154
5155 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5156 *pcEntriesUsed = cBackends;
5157 return rc;
5158}
5159
5160/**
5161 * Lists the capabilities of a backend identified by its name.
5162 *
5163 * @returns VBox status code.
5164 * @param pszBackend The backend name.
5165 * @param pEntry Pointer to an entry.
5166 */
5167VBOXDDU_DECL(int) VDBackendInfoOne(const char *pszBackend, PVDBACKENDINFO pEntry)
5168{
5169 LogFlowFunc(("pszBackend=%#p pEntry=%#p\n", pszBackend, pEntry));
5170 /* Check arguments. */
5171 AssertMsgReturn(VALID_PTR(pszBackend),
5172 ("pszBackend=%#p\n", pszBackend),
5173 VERR_INVALID_PARAMETER);
5174 AssertMsgReturn(VALID_PTR(pEntry),
5175 ("pEntry=%#p\n", pEntry),
5176 VERR_INVALID_PARAMETER);
5177 if (!vdPluginIsInitialized())
5178 VDInit();
5179
5180 PCVDIMAGEBACKEND pBackend;
5181 int rc = vdFindImageBackend(pszBackend, &pBackend);
5182 if (RT_SUCCESS(rc))
5183 {
5184 pEntry->pszBackend = pBackend->pszBackendName;
5185 pEntry->uBackendCaps = pBackend->uBackendCaps;
5186 pEntry->paFileExtensions = pBackend->paFileExtensions;
5187 pEntry->paConfigInfo = pBackend->paConfigInfo;
5188 }
5189
5190 return rc;
5191}
5192
5193/**
5194 * Lists all filters and their capabilities in a caller-provided buffer.
5195 *
5196 * @return VBox status code.
5197 * VERR_BUFFER_OVERFLOW if not enough space is passed.
5198 * @param cEntriesAlloc Number of list entries available.
5199 * @param pEntries Pointer to array for the entries.
5200 * @param pcEntriesUsed Number of entries returned.
5201 */
5202VBOXDDU_DECL(int) VDFilterInfo(unsigned cEntriesAlloc, PVDFILTERINFO pEntries,
5203 unsigned *pcEntriesUsed)
5204{
5205 int rc = VINF_SUCCESS;
5206
5207 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5208 /* Check arguments. */
5209 AssertMsgReturn(cEntriesAlloc,
5210 ("cEntriesAlloc=%u\n", cEntriesAlloc),
5211 VERR_INVALID_PARAMETER);
5212 AssertMsgReturn(VALID_PTR(pEntries),
5213 ("pEntries=%#p\n", pEntries),
5214 VERR_INVALID_PARAMETER);
5215 AssertMsgReturn(VALID_PTR(pcEntriesUsed),
5216 ("pcEntriesUsed=%#p\n", pcEntriesUsed),
5217 VERR_INVALID_PARAMETER);
5218 if (!vdPluginIsInitialized())
5219 VDInit();
5220
5221 uint32_t cBackends = vdGetFilterBackendCount();
5222 if (cEntriesAlloc < cBackends)
5223 {
5224 *pcEntriesUsed = cBackends;
5225 return VERR_BUFFER_OVERFLOW;
5226 }
5227
5228 for (unsigned i = 0; i < cBackends; i++)
5229 {
5230 PCVDFILTERBACKEND pBackend;
5231 rc = vdQueryFilterBackend(i, &pBackend);
5232 pEntries[i].pszFilter = pBackend->pszBackendName;
5233 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5234 }
5235
5236 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5237 *pcEntriesUsed = cBackends;
5238 return rc;
5239}
5240
5241/**
5242 * Lists the capabilities of a filter identified by its name.
5243 *
5244 * @return VBox status code.
5245 * @param pszFilter The filter name (case insensitive).
5246 * @param pEntry Pointer to an entry.
5247 */
5248VBOXDDU_DECL(int) VDFilterInfoOne(const char *pszFilter, PVDFILTERINFO pEntry)
5249{
5250 LogFlowFunc(("pszFilter=%#p pEntry=%#p\n", pszFilter, pEntry));
5251 /* Check arguments. */
5252 AssertMsgReturn(VALID_PTR(pszFilter),
5253 ("pszFilter=%#p\n", pszFilter),
5254 VERR_INVALID_PARAMETER);
5255 AssertMsgReturn(VALID_PTR(pEntry),
5256 ("pEntry=%#p\n", pEntry),
5257 VERR_INVALID_PARAMETER);
5258 if (!vdPluginIsInitialized())
5259 VDInit();
5260
5261 PCVDFILTERBACKEND pBackend;
5262 int rc = vdFindFilterBackend(pszFilter, &pBackend);
5263 if (RT_SUCCESS(rc))
5264 {
5265 pEntry->pszFilter = pBackend->pszBackendName;
5266 pEntry->paConfigInfo = pBackend->paConfigInfo;
5267 }
5268
5269 return rc;
5270}
5271
5272/**
5273 * Allocates and initializes an empty HDD container.
5274 * No image files are opened.
5275 *
5276 * @returns VBox status code.
5277 * @param pVDIfsDisk Pointer to the per-disk VD interface list.
5278 * @param enmType Type of the image container.
5279 * @param ppDisk Where to store the reference to HDD container.
5280 */
5281VBOXDDU_DECL(int) VDCreate(PVDINTERFACE pVDIfsDisk, VDTYPE enmType, PVDISK *ppDisk)
5282{
5283 int rc = VINF_SUCCESS;
5284 PVDISK pDisk = NULL;
5285
5286 LogFlowFunc(("pVDIfsDisk=%#p\n", pVDIfsDisk));
5287 do
5288 {
5289 /* Check arguments. */
5290 AssertMsgBreakStmt(VALID_PTR(ppDisk),
5291 ("ppDisk=%#p\n", ppDisk),
5292 rc = VERR_INVALID_PARAMETER);
5293
5294 pDisk = (PVDISK)RTMemAllocZ(sizeof(VDISK));
5295 if (pDisk)
5296 {
5297 pDisk->u32Signature = VDISK_SIGNATURE;
5298 pDisk->enmType = enmType;
5299 pDisk->