VirtualBox

source: vbox/trunk/src/VBox/Storage/VD.cpp@ 97078

Last change on this file since 97078 was 96407, checked in by vboxsync, 2 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 340.5 KB
Line 
1/* $Id: VD.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * VD - Virtual disk container implementation.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_VD
33#include <VBox/vd.h>
34#include <VBox/err.h>
35#include <VBox/sup.h>
36#include <VBox/log.h>
37
38#include <iprt/alloc.h>
39#include <iprt/assert.h>
40#include <iprt/uuid.h>
41#include <iprt/file.h>
42#include <iprt/string.h>
43#include <iprt/asm.h>
44#include <iprt/param.h>
45#include <iprt/path.h>
46#include <iprt/sg.h>
47#include <iprt/semaphore.h>
48#include <iprt/vector.h>
49
50#include "VDInternal.h"
51
52/** Buffer size used for merging images. */
53#define VD_MERGE_BUFFER_SIZE (16 * _1M)
54
55/** Maximum number of segments in one I/O task. */
56#define VD_IO_TASK_SEGMENTS_MAX 64
57
58/** Threshold after not recently used blocks are removed from the list. */
59#define VD_DISCARD_REMOVE_THRESHOLD (10 * _1M) /** @todo experiment */
60
61/**
62 * VD async I/O interface storage descriptor.
63 */
64typedef struct VDIIOFALLBACKSTORAGE
65{
66 /** File handle. */
67 RTFILE File;
68 /** Completion callback. */
69 PFNVDCOMPLETED pfnCompleted;
70 /** Thread for async access. */
71 RTTHREAD ThreadAsync;
72} VDIIOFALLBACKSTORAGE, *PVDIIOFALLBACKSTORAGE;
73
74/**
75 * uModified bit flags.
76 */
77#define VD_IMAGE_MODIFIED_FLAG RT_BIT(0)
78#define VD_IMAGE_MODIFIED_FIRST RT_BIT(1)
79#define VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE RT_BIT(2)
80
81
82# define VD_IS_LOCKED(a_pDisk) \
83 do \
84 { \
85 NOREF(a_pDisk); \
86 AssertMsg((a_pDisk)->fLocked, \
87 ("Lock not held\n"));\
88 } while(0)
89
90/**
91 * VBox parent read descriptor, used internally for compaction.
92 */
93typedef struct VDPARENTSTATEDESC
94{
95 /** Pointer to disk descriptor. */
96 PVDISK pDisk;
97 /** Pointer to image descriptor. */
98 PVDIMAGE pImage;
99} VDPARENTSTATEDESC, *PVDPARENTSTATEDESC;
100
101/**
102 * Transfer direction.
103 */
104typedef enum VDIOCTXTXDIR
105{
106 /** Read */
107 VDIOCTXTXDIR_READ = 0,
108 /** Write */
109 VDIOCTXTXDIR_WRITE,
110 /** Flush */
111 VDIOCTXTXDIR_FLUSH,
112 /** Discard */
113 VDIOCTXTXDIR_DISCARD,
114 /** 32bit hack */
115 VDIOCTXTXDIR_32BIT_HACK = 0x7fffffff
116} VDIOCTXTXDIR, *PVDIOCTXTXDIR;
117
118/** Transfer function */
119typedef DECLCALLBACKTYPE(int, FNVDIOCTXTRANSFER ,(PVDIOCTX pIoCtx));
120/** Pointer to a transfer function. */
121typedef FNVDIOCTXTRANSFER *PFNVDIOCTXTRANSFER;
122
123/**
124 * I/O context
125 */
126typedef struct VDIOCTX
127{
128 /** Pointer to the next I/O context. */
129 struct VDIOCTX * volatile pIoCtxNext;
130 /** Disk this is request is for. */
131 PVDISK pDisk;
132 /** Return code. */
133 int rcReq;
134 /** Various flags for the I/O context. */
135 uint32_t fFlags;
136 /** Number of data transfers currently pending. */
137 volatile uint32_t cDataTransfersPending;
138 /** How many meta data transfers are pending. */
139 volatile uint32_t cMetaTransfersPending;
140 /** Flag whether the request finished */
141 volatile bool fComplete;
142 /** Temporary allocated memory which is freed
143 * when the context completes. */
144 void *pvAllocation;
145 /** Transfer function. */
146 PFNVDIOCTXTRANSFER pfnIoCtxTransfer;
147 /** Next transfer part after the current one completed. */
148 PFNVDIOCTXTRANSFER pfnIoCtxTransferNext;
149 /** Transfer direction */
150 VDIOCTXTXDIR enmTxDir;
151 /** Request type dependent data. */
152 union
153 {
154 /** I/O request (read/write). */
155 struct
156 {
157 /** Number of bytes left until this context completes. */
158 volatile uint32_t cbTransferLeft;
159 /** Current offset */
160 volatile uint64_t uOffset;
161 /** Number of bytes to transfer */
162 volatile size_t cbTransfer;
163 /** Current image in the chain. */
164 PVDIMAGE pImageCur;
165 /** Start image to read from. pImageCur is reset to this
166 * value after it reached the first image in the chain. */
167 PVDIMAGE pImageStart;
168 /** S/G buffer */
169 RTSGBUF SgBuf;
170 /** Number of bytes to clear in the buffer before the current read. */
171 size_t cbBufClear;
172 /** Number of images to read. */
173 unsigned cImagesRead;
174 /** Override for the parent image to start reading from. */
175 PVDIMAGE pImageParentOverride;
176 /** Original offset of the transfer - required for filtering read requests. */
177 uint64_t uOffsetXferOrig;
178 /** Original size of the transfer - required for fitlering read requests. */
179 size_t cbXferOrig;
180 } Io;
181 /** Discard requests. */
182 struct
183 {
184 /** Pointer to the range descriptor array. */
185 PCRTRANGE paRanges;
186 /** Number of ranges in the array. */
187 unsigned cRanges;
188 /** Range descriptor index which is processed. */
189 unsigned idxRange;
190 /** Start offset to discard currently. */
191 uint64_t offCur;
192 /** How many bytes left to discard in the current range. */
193 size_t cbDiscardLeft;
194 /** How many bytes to discard in the current block (<= cbDiscardLeft). */
195 size_t cbThisDiscard;
196 /** Discard block handled currently. */
197 PVDDISCARDBLOCK pBlock;
198 } Discard;
199 } Req;
200 /** Parent I/O context if any. Sets the type of the context (root/child) */
201 PVDIOCTX pIoCtxParent;
202 /** Type dependent data (root/child) */
203 union
204 {
205 /** Root data */
206 struct
207 {
208 /** Completion callback */
209 PFNVDASYNCTRANSFERCOMPLETE pfnComplete;
210 /** User argument 1 passed on completion. */
211 void *pvUser1;
212 /** User argument 2 passed on completion. */
213 void *pvUser2;
214 } Root;
215 /** Child data */
216 struct
217 {
218 /** Saved start offset */
219 uint64_t uOffsetSaved;
220 /** Saved transfer size */
221 size_t cbTransferLeftSaved;
222 /** Number of bytes transferred from the parent if this context completes. */
223 size_t cbTransferParent;
224 /** Number of bytes to pre read */
225 size_t cbPreRead;
226 /** Number of bytes to post read. */
227 size_t cbPostRead;
228 /** Number of bytes to write left in the parent. */
229 size_t cbWriteParent;
230 /** Write type dependent data. */
231 union
232 {
233 /** Optimized */
234 struct
235 {
236 /** Bytes to fill to satisfy the block size. Not part of the virtual disk. */
237 size_t cbFill;
238 /** Bytes to copy instead of reading from the parent */
239 size_t cbWriteCopy;
240 /** Bytes to read from the image. */
241 size_t cbReadImage;
242 } Optimized;
243 } Write;
244 } Child;
245 } Type;
246} VDIOCTX;
247
248/** Default flags for an I/O context, i.e. unblocked and async. */
249#define VDIOCTX_FLAGS_DEFAULT (0)
250/** Flag whether the context is blocked. */
251#define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
252/** Flag whether the I/O context is using synchronous I/O. */
253#define VDIOCTX_FLAGS_SYNC RT_BIT_32(1)
254/** Flag whether the read should update the cache. */
255#define VDIOCTX_FLAGS_READ_UPDATE_CACHE RT_BIT_32(2)
256/** Flag whether free blocks should be zeroed.
257 * If false and no image has data for sepcified
258 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
259 * Note that unallocated blocks are still zeroed
260 * if at least one image has valid data for a part
261 * of the range.
262 */
263#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
264/** Don't free the I/O context when complete because
265 * it was alloacted elsewhere (stack, ...). */
266#define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4)
267/** Don't set the modified flag for this I/O context when writing. */
268#define VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG RT_BIT_32(5)
269/** The write filter was applied already and shouldn't be applied a second time.
270 * Used at the beginning of vdWriteHelperAsync() because it might be called
271 * multiple times.
272 */
273#define VDIOCTX_FLAGS_WRITE_FILTER_APPLIED RT_BIT_32(6)
274
275/** NIL I/O context pointer value. */
276#define NIL_VDIOCTX ((PVDIOCTX)0)
277
278/**
279 * List node for deferred I/O contexts.
280 */
281typedef struct VDIOCTXDEFERRED
282{
283 /** Node in the list of deferred requests.
284 * A request can be deferred if the image is growing
285 * and the request accesses the same range or if
286 * the backend needs to read or write metadata from the disk
287 * before it can continue. */
288 RTLISTNODE NodeDeferred;
289 /** I/O context this entry points to. */
290 PVDIOCTX pIoCtx;
291} VDIOCTXDEFERRED, *PVDIOCTXDEFERRED;
292
293/**
294 * I/O task.
295 */
296typedef struct VDIOTASK
297{
298 /** Next I/O task waiting in the list. */
299 struct VDIOTASK * volatile pNext;
300 /** Storage this task belongs to. */
301 PVDIOSTORAGE pIoStorage;
302 /** Optional completion callback. */
303 PFNVDXFERCOMPLETED pfnComplete;
304 /** Opaque user data. */
305 void *pvUser;
306 /** Completion status code for the task. */
307 int rcReq;
308 /** Flag whether this is a meta data transfer. */
309 bool fMeta;
310 /** Type dependent data. */
311 union
312 {
313 /** User data transfer. */
314 struct
315 {
316 /** Number of bytes this task transferred. */
317 uint32_t cbTransfer;
318 /** Pointer to the I/O context the task belongs. */
319 PVDIOCTX pIoCtx;
320 } User;
321 /** Meta data transfer. */
322 struct
323 {
324 /** Meta transfer this task is for. */
325 PVDMETAXFER pMetaXfer;
326 } Meta;
327 } Type;
328} VDIOTASK;
329
330/**
331 * Storage handle.
332 */
333typedef struct VDIOSTORAGE
334{
335 /** Image I/O state this storage handle belongs to. */
336 PVDIO pVDIo;
337 /** AVL tree for pending async metadata transfers. */
338 PAVLRFOFFTREE pTreeMetaXfers;
339 /** Storage handle */
340 void *pStorage;
341} VDIOSTORAGE;
342
343/**
344 * Metadata transfer.
345 *
346 * @note This entry can't be freed if either the list is not empty or
347 * the reference counter is not 0.
348 * The assumption is that the backends don't need to read huge amounts of
349 * metadata to complete a transfer so the additional memory overhead should
350 * be relatively small.
351 */
352typedef struct VDMETAXFER
353{
354 /** AVL core for fast search (the file offset is the key) */
355 AVLRFOFFNODECORE Core;
356 /** I/O storage for this transfer. */
357 PVDIOSTORAGE pIoStorage;
358 /** Flags. */
359 uint32_t fFlags;
360 /** List of I/O contexts waiting for this metadata transfer to complete. */
361 RTLISTNODE ListIoCtxWaiting;
362 /** Number of references to this entry. */
363 unsigned cRefs;
364 /** Size of the data stored with this entry. */
365 size_t cbMeta;
366 /** Shadow buffer which is used in case a write is still active and other
367 * writes update the shadow buffer. */
368 uint8_t *pbDataShw;
369 /** List of I/O contexts updating the shadow buffer while there is a write
370 * in progress. */
371 RTLISTNODE ListIoCtxShwWrites;
372 /** Data stored - variable size. */
373 uint8_t abData[1];
374} VDMETAXFER;
375
376/**
377 * The transfer direction for the metadata.
378 */
379#define VDMETAXFER_TXDIR_MASK 0x3
380#define VDMETAXFER_TXDIR_NONE 0x0
381#define VDMETAXFER_TXDIR_WRITE 0x1
382#define VDMETAXFER_TXDIR_READ 0x2
383#define VDMETAXFER_TXDIR_FLUSH 0x3
384#define VDMETAXFER_TXDIR_GET(flags) ((flags) & VDMETAXFER_TXDIR_MASK)
385#define VDMETAXFER_TXDIR_SET(flags, dir) ((flags) = (flags & ~VDMETAXFER_TXDIR_MASK) | (dir))
386
387/** Forward declaration of the async discard helper. */
388static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx);
389static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx);
390static void vdDiskProcessBlockedIoCtx(PVDISK pDisk);
391static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc);
392static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
393
394/**
395 * internal: issue error message.
396 */
397static int vdError(PVDISK pDisk, int rc, RT_SRC_POS_DECL,
398 const char *pszFormat, ...)
399{
400 va_list va;
401 va_start(va, pszFormat);
402 if (pDisk->pInterfaceError)
403 pDisk->pInterfaceError->pfnError(pDisk->pInterfaceError->Core.pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va);
404 va_end(va);
405 return rc;
406}
407
408/**
409 * internal: thread synchronization, start read.
410 */
411DECLINLINE(int) vdThreadStartRead(PVDISK pDisk)
412{
413 int rc = VINF_SUCCESS;
414 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
415 rc = pDisk->pInterfaceThreadSync->pfnStartRead(pDisk->pInterfaceThreadSync->Core.pvUser);
416 return rc;
417}
418
419/**
420 * internal: thread synchronization, finish read.
421 */
422DECLINLINE(int) vdThreadFinishRead(PVDISK pDisk)
423{
424 int rc = VINF_SUCCESS;
425 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
426 rc = pDisk->pInterfaceThreadSync->pfnFinishRead(pDisk->pInterfaceThreadSync->Core.pvUser);
427 return rc;
428}
429
430/**
431 * internal: thread synchronization, start write.
432 */
433DECLINLINE(int) vdThreadStartWrite(PVDISK pDisk)
434{
435 int rc = VINF_SUCCESS;
436 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
437 rc = pDisk->pInterfaceThreadSync->pfnStartWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
438 return rc;
439}
440
441/**
442 * internal: thread synchronization, finish write.
443 */
444DECLINLINE(int) vdThreadFinishWrite(PVDISK pDisk)
445{
446 int rc = VINF_SUCCESS;
447 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
448 rc = pDisk->pInterfaceThreadSync->pfnFinishWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
449 return rc;
450}
451
452/**
453 * internal: add image structure to the end of images list.
454 */
455static void vdAddImageToList(PVDISK pDisk, PVDIMAGE pImage)
456{
457 pImage->pPrev = NULL;
458 pImage->pNext = NULL;
459
460 if (pDisk->pBase)
461 {
462 Assert(pDisk->cImages > 0);
463 pImage->pPrev = pDisk->pLast;
464 pDisk->pLast->pNext = pImage;
465 pDisk->pLast = pImage;
466 }
467 else
468 {
469 Assert(pDisk->cImages == 0);
470 pDisk->pBase = pImage;
471 pDisk->pLast = pImage;
472 }
473
474 pDisk->cImages++;
475}
476
477/**
478 * internal: remove image structure from the images list.
479 */
480static void vdRemoveImageFromList(PVDISK pDisk, PVDIMAGE pImage)
481{
482 Assert(pDisk->cImages > 0);
483
484 if (pImage->pPrev)
485 pImage->pPrev->pNext = pImage->pNext;
486 else
487 pDisk->pBase = pImage->pNext;
488
489 if (pImage->pNext)
490 pImage->pNext->pPrev = pImage->pPrev;
491 else
492 pDisk->pLast = pImage->pPrev;
493
494 pImage->pPrev = NULL;
495 pImage->pNext = NULL;
496
497 pDisk->cImages--;
498}
499
500/**
501 * Release a referene to the filter decrementing the counter and destroying the filter
502 * when the counter reaches zero.
503 *
504 * @returns The new reference count.
505 * @param pFilter The filter to release.
506 */
507static uint32_t vdFilterRelease(PVDFILTER pFilter)
508{
509 uint32_t cRefs = ASMAtomicDecU32(&pFilter->cRefs);
510 if (!cRefs)
511 {
512 pFilter->pBackend->pfnDestroy(pFilter->pvBackendData);
513 RTMemFree(pFilter);
514 }
515
516 return cRefs;
517}
518
519/**
520 * Increments the reference counter of the given filter.
521 *
522 * @return The new reference count.
523 * @param pFilter The filter.
524 */
525static uint32_t vdFilterRetain(PVDFILTER pFilter)
526{
527 return ASMAtomicIncU32(&pFilter->cRefs);
528}
529
530/**
531 * internal: find image by index into the images list.
532 */
533static PVDIMAGE vdGetImageByNumber(PVDISK pDisk, unsigned nImage)
534{
535 PVDIMAGE pImage = pDisk->pBase;
536 if (nImage == VD_LAST_IMAGE)
537 return pDisk->pLast;
538 while (pImage && nImage)
539 {
540 pImage = pImage->pNext;
541 nImage--;
542 }
543 return pImage;
544}
545
546/**
547 * Creates a new region list from the given one converting to match the flags if necessary.
548 *
549 * @returns VBox status code.
550 * @param pRegionList The region list to convert from.
551 * @param fFlags The flags for the new region list.
552 * @param ppRegionList Where to store the new region list on success.
553 */
554static int vdRegionListConv(PCVDREGIONLIST pRegionList, uint32_t fFlags, PPVDREGIONLIST ppRegionList)
555{
556 int rc = VINF_SUCCESS;
557 PVDREGIONLIST pRegionListNew = (PVDREGIONLIST)RTMemDup(pRegionList,
558 RT_UOFFSETOF_DYN(VDREGIONLIST, aRegions[pRegionList->cRegions]));
559 if (RT_LIKELY(pRegionListNew))
560 {
561 /* Do we have to convert anything? */
562 if (pRegionList->fFlags != fFlags)
563 {
564 uint64_t offRegionNext = 0;
565
566 pRegionListNew->fFlags = fFlags;
567 for (unsigned i = 0; i < pRegionListNew->cRegions; i++)
568 {
569 PVDREGIONDESC pRegion = &pRegionListNew->aRegions[i];
570
571 if ( (fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
572 && !(pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS))
573 {
574 Assert(!(pRegion->cRegionBlocksOrBytes % pRegion->cbBlock));
575
576 /* Convert from bytes to logical blocks. */
577 pRegion->offRegion = offRegionNext;
578 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes / pRegion->cbBlock;
579 offRegionNext += pRegion->cRegionBlocksOrBytes;
580 }
581 else
582 {
583 /* Convert from logical blocks to bytes. */
584 pRegion->offRegion = offRegionNext;
585 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes * pRegion->cbBlock;
586 offRegionNext += pRegion->cRegionBlocksOrBytes;
587 }
588 }
589 }
590
591 *ppRegionList = pRegionListNew;
592 }
593 else
594 rc = VERR_NO_MEMORY;
595
596 return rc;
597}
598
599/**
600 * Returns the virtual size of the image in bytes.
601 *
602 * @returns Size of the given image in bytes.
603 * @param pImage The image to get the size from.
604 */
605static uint64_t vdImageGetSize(PVDIMAGE pImage)
606{
607 uint64_t cbImage = 0;
608
609 if (pImage->cbImage == VD_IMAGE_SIZE_UNINITIALIZED)
610 {
611 PCVDREGIONLIST pRegionList = NULL;
612 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
613 if (RT_SUCCESS(rc))
614 {
615 if (pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
616 {
617 PVDREGIONLIST pRegionListConv = NULL;
618 rc = vdRegionListConv(pRegionList, 0, &pRegionListConv);
619 if (RT_SUCCESS(rc))
620 {
621 for (uint32_t i = 0; i < pRegionListConv->cRegions; i++)
622 cbImage += pRegionListConv->aRegions[i].cRegionBlocksOrBytes;
623
624 VDRegionListFree(pRegionListConv);
625 }
626 }
627 else
628 for (uint32_t i = 0; i < pRegionList->cRegions; i++)
629 cbImage += pRegionList->aRegions[i].cRegionBlocksOrBytes;
630
631 AssertPtr(pImage->Backend->pfnRegionListRelease);
632 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
633 pImage->cbImage = cbImage; /* Cache the value. */
634 }
635 }
636 else
637 cbImage = pImage->cbImage;
638
639 return cbImage;
640}
641
642/**
643 * Applies the filter chain to the given write request.
644 *
645 * @returns VBox status code.
646 * @param pDisk The HDD container.
647 * @param uOffset The start offset of the write.
648 * @param cbWrite Number of bytes to write.
649 * @param pIoCtx The I/O context associated with the request.
650 */
651static int vdFilterChainApplyWrite(PVDISK pDisk, uint64_t uOffset, size_t cbWrite,
652 PVDIOCTX pIoCtx)
653{
654 int rc = VINF_SUCCESS;
655
656 VD_IS_LOCKED(pDisk);
657
658 PVDFILTER pFilter;
659 RTListForEach(&pDisk->ListFilterChainWrite, pFilter, VDFILTER, ListNodeChainWrite)
660 {
661 rc = pFilter->pBackend->pfnFilterWrite(pFilter->pvBackendData, uOffset, cbWrite, pIoCtx);
662 if (RT_FAILURE(rc))
663 break;
664 /* Reset S/G buffer for the next filter. */
665 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
666 }
667
668 return rc;
669}
670
671/**
672 * Applies the filter chain to the given read request.
673 *
674 * @returns VBox status code.
675 * @param pDisk The HDD container.
676 * @param uOffset The start offset of the read.
677 * @param cbRead Number of bytes read.
678 * @param pIoCtx The I/O context associated with the request.
679 */
680static int vdFilterChainApplyRead(PVDISK pDisk, uint64_t uOffset, size_t cbRead,
681 PVDIOCTX pIoCtx)
682{
683 int rc = VINF_SUCCESS;
684
685 VD_IS_LOCKED(pDisk);
686
687 /* Reset buffer before starting. */
688 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
689
690 PVDFILTER pFilter;
691 RTListForEach(&pDisk->ListFilterChainRead, pFilter, VDFILTER, ListNodeChainRead)
692 {
693 rc = pFilter->pBackend->pfnFilterRead(pFilter->pvBackendData, uOffset, cbRead, pIoCtx);
694 if (RT_FAILURE(rc))
695 break;
696 /* Reset S/G buffer for the next filter. */
697 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
698 }
699
700 return rc;
701}
702
703DECLINLINE(void) vdIoCtxRootComplete(PVDISK pDisk, PVDIOCTX pIoCtx)
704{
705 if ( RT_SUCCESS(pIoCtx->rcReq)
706 && pIoCtx->enmTxDir == VDIOCTXTXDIR_READ)
707 pIoCtx->rcReq = vdFilterChainApplyRead(pDisk, pIoCtx->Req.Io.uOffsetXferOrig,
708 pIoCtx->Req.Io.cbXferOrig, pIoCtx);
709
710 pIoCtx->Type.Root.pfnComplete(pIoCtx->Type.Root.pvUser1,
711 pIoCtx->Type.Root.pvUser2,
712 pIoCtx->rcReq);
713}
714
715/**
716 * Initialize the structure members of a given I/O context.
717 */
718DECLINLINE(void) vdIoCtxInit(PVDIOCTX pIoCtx, PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
719 uint64_t uOffset, size_t cbTransfer, PVDIMAGE pImageStart,
720 PCRTSGBUF pSgBuf, void *pvAllocation,
721 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
722{
723 pIoCtx->pDisk = pDisk;
724 pIoCtx->enmTxDir = enmTxDir;
725 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTransfer; Assert((uint32_t)cbTransfer == cbTransfer);
726 pIoCtx->Req.Io.uOffset = uOffset;
727 pIoCtx->Req.Io.cbTransfer = cbTransfer;
728 pIoCtx->Req.Io.pImageStart = pImageStart;
729 pIoCtx->Req.Io.pImageCur = pImageStart;
730 pIoCtx->Req.Io.cbBufClear = 0;
731 pIoCtx->Req.Io.pImageParentOverride = NULL;
732 pIoCtx->Req.Io.uOffsetXferOrig = uOffset;
733 pIoCtx->Req.Io.cbXferOrig = cbTransfer;
734 pIoCtx->cDataTransfersPending = 0;
735 pIoCtx->cMetaTransfersPending = 0;
736 pIoCtx->fComplete = false;
737 pIoCtx->fFlags = fFlags;
738 pIoCtx->pvAllocation = pvAllocation;
739 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
740 pIoCtx->pfnIoCtxTransferNext = NULL;
741 pIoCtx->rcReq = VINF_SUCCESS;
742 pIoCtx->pIoCtxParent = NULL;
743
744 /* There is no S/G list for a flush request. */
745 if ( enmTxDir != VDIOCTXTXDIR_FLUSH
746 && enmTxDir != VDIOCTXTXDIR_DISCARD)
747 RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pSgBuf);
748 else
749 memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF));
750}
751
752/**
753 * Internal: Tries to read the desired range from the given cache.
754 *
755 * @returns VBox status code.
756 * @retval VERR_VD_BLOCK_FREE if the block is not in the cache.
757 * pcbRead will be set to the number of bytes not in the cache.
758 * Everything thereafter might be in the cache.
759 * @param pCache The cache to read from.
760 * @param uOffset Offset of the virtual disk to read.
761 * @param cbRead How much to read.
762 * @param pIoCtx The I/O context to read into.
763 * @param pcbRead Where to store the number of bytes actually read.
764 * On success this indicates the number of bytes read from the cache.
765 * If VERR_VD_BLOCK_FREE is returned this gives the number of bytes
766 * which are not in the cache.
767 * In both cases everything beyond this value
768 * might or might not be in the cache.
769 */
770static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
771 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
772{
773 int rc = VINF_SUCCESS;
774
775 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbRead=%zu pcbRead=%#p\n",
776 pCache, uOffset, pIoCtx, cbRead, pcbRead));
777
778 AssertPtr(pCache);
779 AssertPtr(pcbRead);
780
781 rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, cbRead,
782 pIoCtx, pcbRead);
783
784 LogFlowFunc(("returns rc=%Rrc pcbRead=%zu\n", rc, *pcbRead));
785 return rc;
786}
787
788/**
789 * Internal: Writes data for the given block into the cache.
790 *
791 * @returns VBox status code.
792 * @param pCache The cache to write to.
793 * @param uOffset Offset of the virtual disk to write to the cache.
794 * @param cbWrite How much to write.
795 * @param pIoCtx The I/O context to write from.
796 * @param pcbWritten How much data could be written, optional.
797 */
798static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, size_t cbWrite,
799 PVDIOCTX pIoCtx, size_t *pcbWritten)
800{
801 int rc = VINF_SUCCESS;
802
803 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbWrite=%zu pcbWritten=%#p\n",
804 pCache, uOffset, pIoCtx, cbWrite, pcbWritten));
805
806 AssertPtr(pCache);
807 AssertPtr(pIoCtx);
808 Assert(cbWrite > 0);
809
810 if (pcbWritten)
811 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
812 pIoCtx, pcbWritten);
813 else
814 {
815 size_t cbWritten = 0;
816
817 do
818 {
819 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
820 pIoCtx, &cbWritten);
821 uOffset += cbWritten;
822 cbWrite -= cbWritten;
823 } while ( cbWrite
824 && ( RT_SUCCESS(rc)
825 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
826 }
827
828 LogFlowFunc(("returns rc=%Rrc pcbWritten=%zu\n",
829 rc, pcbWritten ? *pcbWritten : cbWrite));
830 return rc;
831}
832
833/**
834 * Creates a new empty discard state.
835 *
836 * @returns Pointer to the new discard state or NULL if out of memory.
837 */
838static PVDDISCARDSTATE vdDiscardStateCreate(void)
839{
840 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
841
842 if (pDiscard)
843 {
844 RTListInit(&pDiscard->ListLru);
845 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
846 if (!pDiscard->pTreeBlocks)
847 {
848 RTMemFree(pDiscard);
849 pDiscard = NULL;
850 }
851 }
852
853 return pDiscard;
854}
855
856/**
857 * Removes the least recently used blocks from the waiting list until
858 * the new value is reached.
859 *
860 * @returns VBox status code.
861 * @param pDisk VD disk container.
862 * @param pDiscard The discard state.
863 * @param cbDiscardingNew How many bytes should be waiting on success.
864 * The number of bytes waiting can be less.
865 */
866static int vdDiscardRemoveBlocks(PVDISK pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
867{
868 int rc = VINF_SUCCESS;
869
870 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
871 pDisk, pDiscard, cbDiscardingNew));
872
873 while (pDiscard->cbDiscarding > cbDiscardingNew)
874 {
875 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
876
877 Assert(!RTListIsEmpty(&pDiscard->ListLru));
878
879 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
880 uint64_t offStart = pBlock->Core.Key;
881 uint32_t idxStart = 0;
882 size_t cbLeft = pBlock->cbDiscard;
883 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
884 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
885
886 while (cbLeft > 0)
887 {
888 int32_t idxEnd;
889 size_t cbThis = cbLeft;
890
891 if (fAllocated)
892 {
893 /* Check for the first unallocated bit. */
894 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
895 if (idxEnd != -1)
896 {
897 cbThis = (idxEnd - idxStart) * 512;
898 fAllocated = false;
899 }
900 }
901 else
902 {
903 /* Mark as unused and check for the first set bit. */
904 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
905 if (idxEnd != -1)
906 cbThis = (idxEnd - idxStart) * 512;
907
908
909 VDIOCTX IoCtx;
910 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
911 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
912 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
913 &IoCtx, offStart, cbThis, NULL,
914 NULL, &cbThis, NULL,
915 VD_DISCARD_MARK_UNUSED);
916 if (RT_FAILURE(rc))
917 break;
918
919 fAllocated = true;
920 }
921
922 idxStart = idxEnd;
923 offStart += cbThis;
924 cbLeft -= cbThis;
925 }
926
927 if (RT_FAILURE(rc))
928 break;
929
930 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
931 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
932 RTListNodeRemove(&pBlock->NodeLru);
933
934 pDiscard->cbDiscarding -= pBlock->cbDiscard;
935 RTMemFree(pBlock->pbmAllocated);
936 RTMemFree(pBlock);
937 }
938
939 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
940
941 LogFlowFunc(("returns rc=%Rrc\n", rc));
942 return rc;
943}
944
945/**
946 * Destroys the current discard state, writing any waiting blocks to the image.
947 *
948 * @returns VBox status code.
949 * @param pDisk VD disk container.
950 */
951static int vdDiscardStateDestroy(PVDISK pDisk)
952{
953 int rc = VINF_SUCCESS;
954
955 if (pDisk->pDiscard)
956 {
957 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
958 AssertRC(rc);
959 RTMemFree(pDisk->pDiscard->pTreeBlocks);
960 RTMemFree(pDisk->pDiscard);
961 pDisk->pDiscard = NULL;
962 }
963
964 return rc;
965}
966
967/**
968 * Marks the given range as allocated in the image.
969 * Required if there are discards in progress and a write to a block which can get discarded
970 * is written to.
971 *
972 * @returns VBox status code.
973 * @param pDisk VD container data.
974 * @param uOffset First byte to mark as allocated.
975 * @param cbRange Number of bytes to mark as allocated.
976 */
977static int vdDiscardSetRangeAllocated(PVDISK pDisk, uint64_t uOffset, size_t cbRange)
978{
979 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
980 int rc = VINF_SUCCESS;
981
982 if (pDiscard)
983 {
984 do
985 {
986 size_t cbThisRange = cbRange;
987 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
988
989 if (pBlock)
990 {
991 int32_t idxStart, idxEnd;
992
993 Assert(!(cbThisRange % 512));
994 Assert(!((uOffset - pBlock->Core.Key) % 512));
995
996 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
997
998 idxStart = (uOffset - pBlock->Core.Key) / 512;
999 idxEnd = idxStart + (int32_t)(cbThisRange / 512);
1000 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
1001 }
1002 else
1003 {
1004 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
1005 if (pBlock)
1006 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
1007 }
1008
1009 Assert(cbRange >= cbThisRange);
1010
1011 uOffset += cbThisRange;
1012 cbRange -= cbThisRange;
1013 } while (cbRange != 0);
1014 }
1015
1016 return rc;
1017}
1018
1019DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1020 uint64_t uOffset, size_t cbTransfer,
1021 PVDIMAGE pImageStart,PCRTSGBUF pSgBuf,
1022 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1023 uint32_t fFlags)
1024{
1025 PVDIOCTX pIoCtx = NULL;
1026
1027 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1028 if (RT_LIKELY(pIoCtx))
1029 {
1030 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1031 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1032 }
1033
1034 return pIoCtx;
1035}
1036
1037DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1038 uint64_t uOffset, size_t cbTransfer,
1039 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
1040 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1041 void *pvUser1, void *pvUser2,
1042 void *pvAllocation,
1043 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1044 uint32_t fFlags)
1045{
1046 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1047 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1048
1049 if (RT_LIKELY(pIoCtx))
1050 {
1051 pIoCtx->pIoCtxParent = NULL;
1052 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1053 pIoCtx->Type.Root.pvUser1 = pvUser1;
1054 pIoCtx->Type.Root.pvUser2 = pvUser2;
1055 }
1056
1057 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
1058 return pIoCtx;
1059}
1060
1061DECLINLINE(void) vdIoCtxDiscardInit(PVDIOCTX pIoCtx, PVDISK pDisk, PCRTRANGE paRanges,
1062 unsigned cRanges, PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1063 void *pvUser1, void *pvUser2, void *pvAllocation,
1064 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
1065{
1066 pIoCtx->pIoCtxNext = NULL;
1067 pIoCtx->pDisk = pDisk;
1068 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;
1069 pIoCtx->cDataTransfersPending = 0;
1070 pIoCtx->cMetaTransfersPending = 0;
1071 pIoCtx->fComplete = false;
1072 pIoCtx->fFlags = fFlags;
1073 pIoCtx->pvAllocation = pvAllocation;
1074 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
1075 pIoCtx->pfnIoCtxTransferNext = NULL;
1076 pIoCtx->rcReq = VINF_SUCCESS;
1077 pIoCtx->Req.Discard.paRanges = paRanges;
1078 pIoCtx->Req.Discard.cRanges = cRanges;
1079 pIoCtx->Req.Discard.idxRange = 0;
1080 pIoCtx->Req.Discard.cbDiscardLeft = 0;
1081 pIoCtx->Req.Discard.offCur = 0;
1082 pIoCtx->Req.Discard.cbThisDiscard = 0;
1083
1084 pIoCtx->pIoCtxParent = NULL;
1085 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1086 pIoCtx->Type.Root.pvUser1 = pvUser1;
1087 pIoCtx->Type.Root.pvUser2 = pvUser2;
1088}
1089
1090DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVDISK pDisk, PCRTRANGE paRanges,
1091 unsigned cRanges,
1092 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1093 void *pvUser1, void *pvUser2,
1094 void *pvAllocation,
1095 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1096 uint32_t fFlags)
1097{
1098 PVDIOCTX pIoCtx = NULL;
1099
1100 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1101 if (RT_LIKELY(pIoCtx))
1102 {
1103 vdIoCtxDiscardInit(pIoCtx, pDisk, paRanges, cRanges, pfnComplete, pvUser1,
1104 pvUser2, pvAllocation, pfnIoCtxTransfer, fFlags);
1105 }
1106
1107 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
1108 return pIoCtx;
1109}
1110
1111DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1112 uint64_t uOffset, size_t cbTransfer,
1113 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
1114 PVDIOCTX pIoCtxParent, size_t cbTransferParent,
1115 size_t cbWriteParent, void *pvAllocation,
1116 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
1117{
1118 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1119 pSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE);
1120
1121 AssertPtr(pIoCtxParent);
1122 Assert(!pIoCtxParent->pIoCtxParent);
1123
1124 if (RT_LIKELY(pIoCtx))
1125 {
1126 pIoCtx->pIoCtxParent = pIoCtxParent;
1127 pIoCtx->Type.Child.uOffsetSaved = uOffset;
1128 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
1129 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;
1130 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;
1131 }
1132
1133 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
1134 return pIoCtx;
1135}
1136
1137DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
1138{
1139 PVDIOTASK pIoTask = NULL;
1140
1141 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1142 if (pIoTask)
1143 {
1144 pIoTask->pIoStorage = pIoStorage;
1145 pIoTask->pfnComplete = pfnComplete;
1146 pIoTask->pvUser = pvUser;
1147 pIoTask->fMeta = false;
1148 pIoTask->Type.User.cbTransfer = cbTransfer;
1149 pIoTask->Type.User.pIoCtx = pIoCtx;
1150 }
1151
1152 return pIoTask;
1153}
1154
1155DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
1156{
1157 PVDIOTASK pIoTask = NULL;
1158
1159 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1160 if (pIoTask)
1161 {
1162 pIoTask->pIoStorage = pIoStorage;
1163 pIoTask->pfnComplete = pfnComplete;
1164 pIoTask->pvUser = pvUser;
1165 pIoTask->fMeta = true;
1166 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
1167 }
1168
1169 return pIoTask;
1170}
1171
1172DECLINLINE(void) vdIoCtxFree(PVDISK pDisk, PVDIOCTX pIoCtx)
1173{
1174 Log(("Freeing I/O context %#p\n", pIoCtx));
1175
1176 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
1177 {
1178 if (pIoCtx->pvAllocation)
1179 RTMemFree(pIoCtx->pvAllocation);
1180#ifdef DEBUG
1181 memset(&pIoCtx->pDisk, 0xff, sizeof(void *));
1182#endif
1183 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
1184 }
1185}
1186
1187DECLINLINE(void) vdIoTaskFree(PVDISK pDisk, PVDIOTASK pIoTask)
1188{
1189#ifdef DEBUG
1190 memset(pIoTask, 0xff, sizeof(VDIOTASK));
1191#endif
1192 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
1193}
1194
1195DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
1196{
1197 AssertPtr(pIoCtx->pIoCtxParent);
1198
1199 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
1200 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;
1201 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved;
1202 Assert((uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved == pIoCtx->Type.Child.cbTransferLeftSaved);
1203}
1204
1205DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
1206{
1207 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_UOFFSETOF_DYN(VDMETAXFER, abData[cb]));
1208
1209 if (RT_LIKELY(pMetaXfer))
1210 {
1211 pMetaXfer->Core.Key = uOffset;
1212 pMetaXfer->Core.KeyLast = uOffset + cb - 1;
1213 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;
1214 pMetaXfer->cbMeta = cb;
1215 pMetaXfer->pIoStorage = pIoStorage;
1216 pMetaXfer->cRefs = 0;
1217 pMetaXfer->pbDataShw = NULL;
1218 RTListInit(&pMetaXfer->ListIoCtxWaiting);
1219 RTListInit(&pMetaXfer->ListIoCtxShwWrites);
1220 }
1221 return pMetaXfer;
1222}
1223
1224DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
1225{
1226 /* Put it on the waiting list. */
1227 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
1228 PVDIOCTX pHeadOld;
1229 pIoCtx->pIoCtxNext = pNext;
1230 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
1231 {
1232 pNext = pHeadOld;
1233 Assert(pNext != pIoCtx);
1234 pIoCtx->pIoCtxNext = pNext;
1235 ASMNopPause();
1236 }
1237}
1238
1239DECLINLINE(void) vdIoCtxDefer(PVDISK pDisk, PVDIOCTX pIoCtx)
1240{
1241 LogFlowFunc(("Deferring I/O context pIoCtx=%#p\n", pIoCtx));
1242
1243 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
1244 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1245 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
1246}
1247
1248static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
1249{
1250 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
1251}
1252
1253#if 0 /* unused */
1254static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
1255{
1256 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
1257}
1258#endif
1259
1260static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
1261{
1262 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1263}
1264
1265static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
1266{
1267 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1268}
1269
1270static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
1271{
1272 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
1273}
1274
1275/**
1276 * Returns whether the given I/O context has completed.
1277 *
1278 * @returns Flag whether the I/O context is complete.
1279 * @param pIoCtx The I/O context to check.
1280 */
1281DECLINLINE(bool) vdIoCtxIsComplete(PVDIOCTX pIoCtx)
1282{
1283 if ( !pIoCtx->cMetaTransfersPending
1284 && !pIoCtx->cDataTransfersPending
1285 && !pIoCtx->pfnIoCtxTransfer)
1286 return true;
1287
1288 /*
1289 * We complete the I/O context in case of an error
1290 * if there is no I/O task pending.
1291 */
1292 if ( RT_FAILURE(pIoCtx->rcReq)
1293 && !pIoCtx->cMetaTransfersPending
1294 && !pIoCtx->cDataTransfersPending)
1295 return true;
1296
1297 return false;
1298}
1299
1300/**
1301 * Returns whether the given I/O context is blocked due to a metadata transfer
1302 * or because the backend blocked it.
1303 *
1304 * @returns Flag whether the I/O context is blocked.
1305 * @param pIoCtx The I/O context to check.
1306 */
1307DECLINLINE(bool) vdIoCtxIsBlocked(PVDIOCTX pIoCtx)
1308{
1309 /* Don't change anything if there is a metadata transfer pending or we are blocked. */
1310 if ( pIoCtx->cMetaTransfersPending
1311 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1312 return true;
1313
1314 return false;
1315}
1316
1317/**
1318 * Process the I/O context, core method which assumes that the I/O context
1319 * acquired the lock.
1320 *
1321 * @returns VBox status code.
1322 * @param pIoCtx I/O context to process.
1323 */
1324static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
1325{
1326 int rc = VINF_SUCCESS;
1327
1328 VD_IS_LOCKED(pIoCtx->pDisk);
1329
1330 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
1331
1332 if (!vdIoCtxIsComplete(pIoCtx))
1333 {
1334 if (!vdIoCtxIsBlocked(pIoCtx))
1335 {
1336 if (pIoCtx->pfnIoCtxTransfer)
1337 {
1338 /* Call the transfer function advancing to the next while there is no error. */
1339 while ( pIoCtx->pfnIoCtxTransfer
1340 && !pIoCtx->cMetaTransfersPending
1341 && RT_SUCCESS(rc))
1342 {
1343 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
1344 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
1345
1346 /* Advance to the next part of the transfer if the current one succeeded. */
1347 if (RT_SUCCESS(rc))
1348 {
1349 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
1350 pIoCtx->pfnIoCtxTransferNext = NULL;
1351 }
1352 }
1353 }
1354
1355 if ( RT_SUCCESS(rc)
1356 && !pIoCtx->cMetaTransfersPending
1357 && !pIoCtx->cDataTransfersPending
1358 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1359 rc = VINF_VD_ASYNC_IO_FINISHED;
1360 else if ( RT_SUCCESS(rc)
1361 || rc == VERR_VD_NOT_ENOUGH_METADATA
1362 || rc == VERR_VD_IOCTX_HALT)
1363 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1364 else if ( RT_FAILURE(rc)
1365 && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
1366 {
1367 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
1368
1369 /*
1370 * The I/O context completed if we have an error and there is no data
1371 * or meta data transfer pending.
1372 */
1373 if ( !pIoCtx->cMetaTransfersPending
1374 && !pIoCtx->cDataTransfersPending)
1375 rc = VINF_VD_ASYNC_IO_FINISHED;
1376 else
1377 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1378 }
1379 }
1380 else
1381 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1382 }
1383 else
1384 rc = VINF_VD_ASYNC_IO_FINISHED;
1385
1386 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
1387 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
1388 pIoCtx->fComplete));
1389
1390 return rc;
1391}
1392
1393/**
1394 * Processes the list of waiting I/O contexts.
1395 *
1396 * @returns VBox status code, only valid if pIoCtxRc is not NULL, treat as void
1397 * function otherwise.
1398 * @param pDisk The disk structure.
1399 * @param pIoCtxRc An I/O context handle which waits on the list. When processed
1400 * The status code is returned. NULL if there is no I/O context
1401 * to return the status code for.
1402 */
1403static int vdDiskProcessWaitingIoCtx(PVDISK pDisk, PVDIOCTX pIoCtxRc)
1404{
1405 int rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1406
1407 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
1408
1409 VD_IS_LOCKED(pDisk);
1410
1411 /* Get the waiting list and process it in FIFO order. */
1412 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
1413
1414 /* Reverse it. */
1415 PVDIOCTX pCur = pIoCtxHead;
1416 pIoCtxHead = NULL;
1417 while (pCur)
1418 {
1419 PVDIOCTX pInsert = pCur;
1420 pCur = pCur->pIoCtxNext;
1421 pInsert->pIoCtxNext = pIoCtxHead;
1422 pIoCtxHead = pInsert;
1423 }
1424
1425 /* Process now. */
1426 pCur = pIoCtxHead;
1427 while (pCur)
1428 {
1429 int rcTmp;
1430 PVDIOCTX pTmp = pCur;
1431
1432 pCur = pCur->pIoCtxNext;
1433 pTmp->pIoCtxNext = NULL;
1434
1435 /*
1436 * Need to clear the sync flag here if there is a new I/O context
1437 * with it set and the context is not given in pIoCtxRc.
1438 * This happens most likely on a different thread and that one shouldn't
1439 * process the context synchronously.
1440 *
1441 * The thread who issued the context will wait on the event semaphore
1442 * anyway which is signalled when the completion handler is called.
1443 */
1444 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC
1445 && pTmp != pIoCtxRc)
1446 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
1447
1448 rcTmp = vdIoCtxProcessLocked(pTmp);
1449 if (pTmp == pIoCtxRc)
1450 {
1451 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1452 && RT_SUCCESS(pTmp->rcReq)
1453 && pTmp->enmTxDir == VDIOCTXTXDIR_READ)
1454 {
1455 int rc2 = vdFilterChainApplyRead(pDisk, pTmp->Req.Io.uOffsetXferOrig,
1456 pTmp->Req.Io.cbXferOrig, pTmp);
1457 if (RT_FAILURE(rc2))
1458 rcTmp = rc2;
1459 }
1460
1461 /* The given I/O context was processed, pass the return code to the caller. */
1462 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1463 && (pTmp->fFlags & VDIOCTX_FLAGS_SYNC))
1464 rc = pTmp->rcReq;
1465 else
1466 rc = rcTmp;
1467 }
1468 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1469 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1470 {
1471 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1472 vdThreadFinishWrite(pDisk);
1473
1474 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1475 vdIoCtxRootComplete(pDisk, pTmp);
1476
1477 if (fFreeCtx)
1478 vdIoCtxFree(pDisk, pTmp);
1479 }
1480 }
1481
1482 LogFlowFunc(("returns rc=%Rrc\n", rc));
1483 return rc;
1484}
1485
1486/**
1487 * Processes the list of blocked I/O contexts.
1488 *
1489 * @returns nothing.
1490 * @param pDisk The disk structure.
1491 */
1492static void vdDiskProcessBlockedIoCtx(PVDISK pDisk)
1493{
1494 LogFlowFunc(("pDisk=%#p\n", pDisk));
1495
1496 VD_IS_LOCKED(pDisk);
1497
1498 /* Get the waiting list and process it in FIFO order. */
1499 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
1500
1501 /* Reverse it. */
1502 PVDIOCTX pCur = pIoCtxHead;
1503 pIoCtxHead = NULL;
1504 while (pCur)
1505 {
1506 PVDIOCTX pInsert = pCur;
1507 pCur = pCur->pIoCtxNext;
1508 pInsert->pIoCtxNext = pIoCtxHead;
1509 pIoCtxHead = pInsert;
1510 }
1511
1512 /* Process now. */
1513 pCur = pIoCtxHead;
1514 while (pCur)
1515 {
1516 int rc;
1517 PVDIOCTX pTmp = pCur;
1518
1519 pCur = pCur->pIoCtxNext;
1520 pTmp->pIoCtxNext = NULL;
1521
1522 Assert(!pTmp->pIoCtxParent);
1523 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
1524 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
1525
1526 rc = vdIoCtxProcessLocked(pTmp);
1527 if ( rc == VINF_VD_ASYNC_IO_FINISHED
1528 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1529 {
1530 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1531 vdThreadFinishWrite(pDisk);
1532
1533 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1534 vdIoCtxRootComplete(pDisk, pTmp);
1535 if (fFreeCtx)
1536 vdIoCtxFree(pDisk, pTmp);
1537 }
1538 }
1539
1540 LogFlowFunc(("returns\n"));
1541}
1542
1543/**
1544 * Processes the I/O context trying to lock the criticial section.
1545 * The context is deferred if the critical section is busy.
1546 *
1547 * @returns VBox status code.
1548 * @param pIoCtx The I/O context to process.
1549 */
1550static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
1551{
1552 int rc = VINF_SUCCESS;
1553 PVDISK pDisk = pIoCtx->pDisk;
1554
1555 Log(("Defer pIoCtx=%#p\n", pIoCtx));
1556
1557 /* Put it on the waiting list first. */
1558 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
1559
1560 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
1561 {
1562 /* Leave it again, the context will be processed just before leaving the lock. */
1563 LogFlowFunc(("Successfully acquired the lock\n"));
1564 rc = vdDiskUnlock(pDisk, pIoCtx);
1565 }
1566 else
1567 {
1568 LogFlowFunc(("Lock is held\n"));
1569 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1570 }
1571
1572 return rc;
1573}
1574
1575/**
1576 * Process the I/O context in a synchronous manner, waiting
1577 * for it to complete.
1578 *
1579 * @returns VBox status code of the completed request.
1580 * @param pIoCtx The sync I/O context.
1581 * @param hEventComplete Event sempahore to wait on for completion.
1582 */
1583static int vdIoCtxProcessSync(PVDIOCTX pIoCtx, RTSEMEVENT hEventComplete)
1584{
1585 int rc = VINF_SUCCESS;
1586 PVDISK pDisk = pIoCtx->pDisk;
1587
1588 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
1589
1590 AssertMsg(pIoCtx->fFlags & (VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE),
1591 ("I/O context is not marked as synchronous\n"));
1592
1593 rc = vdIoCtxProcessTryLockDefer(pIoCtx);
1594 if (rc == VINF_VD_ASYNC_IO_FINISHED)
1595 rc = VINF_SUCCESS;
1596
1597 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1598 {
1599 rc = RTSemEventWait(hEventComplete, RT_INDEFINITE_WAIT);
1600 AssertRC(rc);
1601 }
1602
1603 rc = pIoCtx->rcReq;
1604 vdIoCtxFree(pDisk, pIoCtx);
1605
1606 return rc;
1607}
1608
1609DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVDISK pDisk, PVDIOCTX pIoCtx)
1610{
1611 return pDisk->pIoCtxLockOwner == pIoCtx;
1612}
1613
1614static int vdIoCtxLockDisk(PVDISK pDisk, PVDIOCTX pIoCtx)
1615{
1616 int rc = VINF_SUCCESS;
1617
1618 VD_IS_LOCKED(pDisk);
1619
1620 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
1621
1622 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
1623 {
1624 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
1625 vdIoCtxDefer(pDisk, pIoCtx);
1626 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1627 }
1628
1629 LogFlowFunc(("returns -> %Rrc\n", rc));
1630 return rc;
1631}
1632
1633static void vdIoCtxUnlockDisk(PVDISK pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
1634{
1635 RT_NOREF1(pIoCtx);
1636 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
1637 pDisk, pIoCtx, fProcessBlockedReqs));
1638
1639 VD_IS_LOCKED(pDisk);
1640
1641 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
1642 Assert(pDisk->pIoCtxLockOwner == pIoCtx);
1643 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
1644
1645 if (fProcessBlockedReqs)
1646 {
1647 /* Process any blocked writes if the current request didn't caused another growing. */
1648 vdDiskProcessBlockedIoCtx(pDisk);
1649 }
1650
1651 LogFlowFunc(("returns\n"));
1652}
1653
1654/**
1655 * Internal: Reads a given amount of data from the image chain of the disk.
1656 **/
1657static int vdDiskReadHelper(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1658 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
1659{
1660 RT_NOREF1(pDisk);
1661 int rc = VINF_SUCCESS;
1662 size_t cbThisRead = cbRead;
1663
1664 AssertPtr(pcbThisRead);
1665
1666 *pcbThisRead = 0;
1667
1668 /*
1669 * Try to read from the given image.
1670 * If the block is not allocated read from override chain if present.
1671 */
1672 rc = pImage->Backend->pfnRead(pImage->pBackendData,
1673 uOffset, cbThisRead, pIoCtx,
1674 &cbThisRead);
1675
1676 if (rc == VERR_VD_BLOCK_FREE)
1677 {
1678 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
1679 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
1680 pCurrImage = pCurrImage->pPrev)
1681 {
1682 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1683 uOffset, cbThisRead, pIoCtx,
1684 &cbThisRead);
1685 }
1686 }
1687
1688 if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE)
1689 *pcbThisRead = cbThisRead;
1690
1691 return rc;
1692}
1693
1694/**
1695 * internal: read the specified amount of data in whatever blocks the backend
1696 * will give us - async version.
1697 */
1698static DECLCALLBACK(int) vdReadHelperAsync(PVDIOCTX pIoCtx)
1699{
1700 int rc;
1701 PVDISK pDisk = pIoCtx->pDisk;
1702 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;
1703 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
1704 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;
1705 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
1706 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;
1707 size_t cbThisRead;
1708
1709 /*
1710 * Check whether there is a full block write in progress which was not allocated.
1711 * Defer I/O if the range interferes but only if it does not belong to the
1712 * write doing the allocation.
1713 */
1714 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
1715 && uOffset >= pDisk->uOffsetStartLocked
1716 && uOffset < pDisk->uOffsetEndLocked
1717 && ( !pIoCtx->pIoCtxParent
1718 || pIoCtx->pIoCtxParent != pDisk->pIoCtxLockOwner))
1719 {
1720 Log(("Interferring read while allocating a new block => deferring read\n"));
1721 vdIoCtxDefer(pDisk, pIoCtx);
1722 return VERR_VD_ASYNC_IO_IN_PROGRESS;
1723 }
1724
1725 /* Loop until all reads started or we have a backend which needs to read metadata. */
1726 do
1727 {
1728 /* Search for image with allocated block. Do not attempt to read more
1729 * than the previous reads marked as valid. Otherwise this would return
1730 * stale data when different block sizes are used for the images. */
1731 cbThisRead = cbToRead;
1732
1733 if ( pDisk->pCache
1734 && !pImageParentOverride)
1735 {
1736 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
1737 pIoCtx, &cbThisRead);
1738 if (rc == VERR_VD_BLOCK_FREE)
1739 {
1740 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
1741 pIoCtx, &cbThisRead);
1742
1743 /* If the read was successful, write the data back into the cache. */
1744 if ( RT_SUCCESS(rc)
1745 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UPDATE_CACHE)
1746 {
1747 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
1748 pIoCtx, NULL);
1749 }
1750 }
1751 }
1752 else
1753 {
1754 /*
1755 * Try to read from the given image.
1756 * If the block is not allocated read from override chain if present.
1757 */
1758 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1759 uOffset, cbThisRead, pIoCtx,
1760 &cbThisRead);
1761
1762 if ( rc == VERR_VD_BLOCK_FREE
1763 && cImagesRead != 1)
1764 {
1765 unsigned cImagesToProcess = cImagesRead;
1766
1767 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
1768 pIoCtx->Req.Io.pImageParentOverride = NULL;
1769
1770 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
1771 {
1772 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1773 uOffset, cbThisRead,
1774 pIoCtx, &cbThisRead);
1775 if (cImagesToProcess == 1)
1776 break;
1777 else if (cImagesToProcess > 0)
1778 cImagesToProcess--;
1779
1780 if (rc == VERR_VD_BLOCK_FREE)
1781 pCurrImage = pCurrImage->pPrev;
1782 }
1783 }
1784 }
1785
1786 /* The task state will be updated on success already, don't do it here!. */
1787 if (rc == VERR_VD_BLOCK_FREE)
1788 {
1789 /* No image in the chain contains the data for the block. */
1790 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisRead); Assert(cbThisRead == (uint32_t)cbThisRead);
1791
1792 /* Fill the free space with 0 if we are told to do so
1793 * or a previous read returned valid data. */
1794 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
1795 vdIoCtxSet(pIoCtx, '\0', cbThisRead);
1796 else
1797 pIoCtx->Req.Io.cbBufClear += cbThisRead;
1798
1799 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
1800 rc = VINF_VD_NEW_ZEROED_BLOCK;
1801 else
1802 rc = VINF_SUCCESS;
1803 }
1804 else if (rc == VERR_VD_IOCTX_HALT)
1805 {
1806 uOffset += cbThisRead;
1807 cbToRead -= cbThisRead;
1808 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1809 }
1810 else if ( RT_SUCCESS(rc)
1811 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1812 {
1813 /* First not free block, fill the space before with 0. */
1814 if ( pIoCtx->Req.Io.cbBufClear
1815 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1816 {
1817 RTSGBUF SgBuf;
1818 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
1819 RTSgBufReset(&SgBuf);
1820 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
1821 pIoCtx->Req.Io.cbBufClear = 0;
1822 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1823 }
1824 rc = VINF_SUCCESS;
1825 }
1826
1827 if (RT_FAILURE(rc))
1828 break;
1829
1830 cbToRead -= cbThisRead;
1831 uOffset += cbThisRead;
1832 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
1833 } while (cbToRead != 0 && RT_SUCCESS(rc));
1834
1835 if ( rc == VERR_VD_NOT_ENOUGH_METADATA
1836 || rc == VERR_VD_IOCTX_HALT)
1837 {
1838 /* Save the current state. */
1839 pIoCtx->Req.Io.uOffset = uOffset;
1840 pIoCtx->Req.Io.cbTransfer = cbToRead;
1841 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
1842 }
1843
1844 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1845 ? VERR_VD_BLOCK_FREE
1846 : rc;
1847}
1848
1849/**
1850 * internal: parent image read wrapper for compacting.
1851 */
1852static DECLCALLBACK(int) vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
1853 size_t cbRead)
1854{
1855 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
1856
1857 /** @todo
1858 * Only used for compaction so far which is not possible to mix with async I/O.
1859 * Needs to be changed if we want to support online compaction of images.
1860 */
1861 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
1862 AssertMsgReturn(!fLocked,
1863 ("Calling synchronous parent read while another thread holds the disk lock\n"),
1864 VERR_VD_INVALID_STATE);
1865
1866 /* Fake an I/O context. */
1867 RTSGSEG Segment;
1868 RTSGBUF SgBuf;
1869 VDIOCTX IoCtx;
1870
1871 Segment.pvSeg = pvBuf;
1872 Segment.cbSeg = cbRead;
1873 RTSgBufInit(&SgBuf, &Segment, 1);
1874 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
1875 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
1876 int rc = vdReadHelperAsync(&IoCtx);
1877 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
1878 return rc;
1879}
1880
1881/**
1882 * Extended version of vdReadHelper(), implementing certain optimizations
1883 * for image cloning.
1884 *
1885 * @returns VBox status code.
1886 * @param pDisk The disk to read from.
1887 * @param pImage The image to start reading from.
1888 * @param pImageParentOverride The parent image to read from
1889 * if the starting image returns a free block.
1890 * If NULL is passed the real parent of the image
1891 * in the chain is used.
1892 * @param uOffset Offset in the disk to start reading from.
1893 * @param pvBuf Where to store the read data.
1894 * @param cbRead How much to read.
1895 * @param fZeroFreeBlocks Flag whether free blocks should be zeroed.
1896 * If false and no image has data for sepcified
1897 * range VERR_VD_BLOCK_FREE is returned.
1898 * Note that unallocated blocks are still zeroed
1899 * if at least one image has valid data for a part
1900 * of the range.
1901 * @param fUpdateCache Flag whether to update the attached cache if
1902 * available.
1903 * @param cImagesRead Number of images in the chain to read until
1904 * the read is cut off. A value of 0 disables the cut off.
1905 */
1906static int vdReadHelperEx(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1907 uint64_t uOffset, void *pvBuf, size_t cbRead,
1908 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
1909{
1910 int rc = VINF_SUCCESS;
1911 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
1912 RTSGSEG Segment;
1913 RTSGBUF SgBuf;
1914 VDIOCTX IoCtx;
1915 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
1916
1917 rc = RTSemEventCreate(&hEventComplete);
1918 if (RT_FAILURE(rc))
1919 return rc;
1920
1921 if (fZeroFreeBlocks)
1922 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1923 if (fUpdateCache)
1924 fFlags |= VDIOCTX_FLAGS_READ_UPDATE_CACHE;
1925
1926 Segment.pvSeg = pvBuf;
1927 Segment.cbSeg = cbRead;
1928 RTSgBufInit(&SgBuf, &Segment, 1);
1929 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
1930 NULL, vdReadHelperAsync, fFlags);
1931
1932 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
1933 IoCtx.Req.Io.cImagesRead = cImagesRead;
1934 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
1935 IoCtx.Type.Root.pvUser1 = pDisk;
1936 IoCtx.Type.Root.pvUser2 = hEventComplete;
1937 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
1938 RTSemEventDestroy(hEventComplete);
1939 return rc;
1940}
1941
1942/**
1943 * internal: read the specified amount of data in whatever blocks the backend
1944 * will give us.
1945 */
1946static int vdReadHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
1947 void *pvBuf, size_t cbRead, bool fUpdateCache)
1948{
1949 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
1950 true /* fZeroFreeBlocks */, fUpdateCache, 0);
1951}
1952
1953/**
1954 * internal: mark the disk as not modified.
1955 */
1956static void vdResetModifiedFlag(PVDISK pDisk)
1957{
1958 if (pDisk->uModified & VD_IMAGE_MODIFIED_FLAG)
1959 {
1960 /* generate new last-modified uuid */
1961 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1962 {
1963 RTUUID Uuid;
1964
1965 RTUuidCreate(&Uuid);
1966 pDisk->pLast->Backend->pfnSetModificationUuid(pDisk->pLast->pBackendData,
1967 &Uuid);
1968
1969 if (pDisk->pCache)
1970 pDisk->pCache->Backend->pfnSetModificationUuid(pDisk->pCache->pBackendData,
1971 &Uuid);
1972 }
1973
1974 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FLAG;
1975 }
1976}
1977
1978/**
1979 * internal: mark the disk as modified.
1980 */
1981static void vdSetModifiedFlag(PVDISK pDisk)
1982{
1983 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
1984 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
1985 {
1986 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
1987
1988 /* First modify, so create a UUID and ensure it's written to disk. */
1989 vdResetModifiedFlag(pDisk);
1990
1991 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1992 {
1993 VDIOCTX IoCtx;
1994 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, NULL,
1995 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
1996 pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData, &IoCtx);
1997 }
1998 }
1999}
2000
2001/**
2002 * internal: write buffer to the image, taking care of block boundaries and
2003 * write optimizations.
2004 */
2005static int vdWriteHelperEx(PVDISK pDisk, PVDIMAGE pImage,
2006 PVDIMAGE pImageParentOverride, uint64_t uOffset,
2007 const void *pvBuf, size_t cbWrite,
2008 uint32_t fFlags, unsigned cImagesRead)
2009{
2010 int rc = VINF_SUCCESS;
2011 RTSGSEG Segment;
2012 RTSGBUF SgBuf;
2013 VDIOCTX IoCtx;
2014 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
2015
2016 rc = RTSemEventCreate(&hEventComplete);
2017 if (RT_FAILURE(rc))
2018 return rc;
2019
2020 fFlags |= VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
2021
2022 Segment.pvSeg = (void *)pvBuf;
2023 Segment.cbSeg = cbWrite;
2024 RTSgBufInit(&SgBuf, &Segment, 1);
2025 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
2026 NULL, vdWriteHelperAsync, fFlags);
2027
2028 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
2029 IoCtx.Req.Io.cImagesRead = cImagesRead;
2030 IoCtx.pIoCtxParent = NULL;
2031 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
2032 IoCtx.Type.Root.pvUser1 = pDisk;
2033 IoCtx.Type.Root.pvUser2 = hEventComplete;
2034 if (RT_SUCCESS(rc))
2035 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
2036
2037 RTSemEventDestroy(hEventComplete);
2038 return rc;
2039}
2040
2041/**
2042 * internal: write buffer to the image, taking care of block boundaries and
2043 * write optimizations.
2044 */
2045static int vdWriteHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
2046 const void *pvBuf, size_t cbWrite, uint32_t fFlags)
2047{
2048 return vdWriteHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbWrite,
2049 fFlags, 0);
2050}
2051
2052/**
2053 * Internal: Copies the content of one disk to another one applying optimizations
2054 * to speed up the copy process if possible.
2055 */
2056static int vdCopyHelper(PVDISK pDiskFrom, PVDIMAGE pImageFrom, PVDISK pDiskTo,
2057 uint64_t cbSize, unsigned cImagesFromRead, unsigned cImagesToRead,
2058 bool fSuppressRedundantIo, PVDINTERFACEPROGRESS pIfProgress,
2059 PVDINTERFACEPROGRESS pDstIfProgress)
2060{
2061 int rc = VINF_SUCCESS;
2062 int rc2;
2063 uint64_t uOffset = 0;
2064 uint64_t cbRemaining = cbSize;
2065 void *pvBuf = NULL;
2066 bool fLockReadFrom = false;
2067 bool fLockWriteTo = false;
2068 bool fBlockwiseCopy = false;
2069 unsigned uProgressOld = 0;
2070
2071 LogFlowFunc(("pDiskFrom=%#p pImageFrom=%#p pDiskTo=%#p cbSize=%llu cImagesFromRead=%u cImagesToRead=%u fSuppressRedundantIo=%RTbool pIfProgress=%#p pDstIfProgress=%#p\n",
2072 pDiskFrom, pImageFrom, pDiskTo, cbSize, cImagesFromRead, cImagesToRead, fSuppressRedundantIo, pDstIfProgress, pDstIfProgress));
2073
2074 if ( (fSuppressRedundantIo || (cImagesFromRead > 0))
2075 && RTListIsEmpty(&pDiskFrom->ListFilterChainRead))
2076 fBlockwiseCopy = true;
2077
2078 /* Allocate tmp buffer. */
2079 pvBuf = RTMemTmpAlloc(VD_MERGE_BUFFER_SIZE);
2080 if (!pvBuf)
2081 return rc;
2082
2083 do
2084 {
2085 size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining);
2086
2087 /* Note that we don't attempt to synchronize cross-disk accesses.
2088 * It wouldn't be very difficult to do, just the lock order would
2089 * need to be defined somehow to prevent deadlocks. Postpone such
2090 * magic as there is no use case for this. */
2091
2092 rc2 = vdThreadStartRead(pDiskFrom);
2093 AssertRC(rc2);
2094 fLockReadFrom = true;
2095
2096 if (fBlockwiseCopy)
2097 {
2098 RTSGSEG SegmentBuf;
2099 RTSGBUF SgBuf;
2100 VDIOCTX IoCtx;
2101
2102 SegmentBuf.pvSeg = pvBuf;
2103 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
2104 RTSgBufInit(&SgBuf, &SegmentBuf, 1);
2105 vdIoCtxInit(&IoCtx, pDiskFrom, VDIOCTXTXDIR_READ, 0, 0, NULL,
2106 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
2107
2108 /* Read the source data. */
2109 rc = pImageFrom->Backend->pfnRead(pImageFrom->pBackendData,
2110 uOffset, cbThisRead, &IoCtx,
2111 &cbThisRead);
2112
2113 if ( rc == VERR_VD_BLOCK_FREE
2114 && cImagesFromRead != 1)
2115 {
2116 unsigned cImagesToProcess = cImagesFromRead;
2117
2118 for (PVDIMAGE pCurrImage = pImageFrom->pPrev;
2119 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
2120 pCurrImage = pCurrImage->pPrev)
2121 {
2122 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
2123 uOffset, cbThisRead,
2124 &IoCtx, &cbThisRead);
2125 if (cImagesToProcess == 1)
2126 break;
2127 else if (cImagesToProcess > 0)
2128 cImagesToProcess--;
2129 }
2130 }
2131 }
2132 else
2133 rc = vdReadHelper(pDiskFrom, pImageFrom, uOffset, pvBuf, cbThisRead,
2134 false /* fUpdateCache */);
2135
2136 if (RT_FAILURE(rc) && rc != VERR_VD_BLOCK_FREE)
2137 break;
2138
2139 rc2 = vdThreadFinishRead(pDiskFrom);
2140 AssertRC(rc2);
2141 fLockReadFrom = false;
2142
2143 if (rc != VERR_VD_BLOCK_FREE)
2144 {
2145 rc2 = vdThreadStartWrite(pDiskTo);
2146 AssertRC(rc2);
2147 fLockWriteTo = true;
2148
2149 /* Only do collapsed I/O if we are copying the data blockwise. */
2150 rc = vdWriteHelperEx(pDiskTo, pDiskTo->pLast, NULL, uOffset, pvBuf,
2151 cbThisRead, VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG /* fFlags */,
2152 fBlockwiseCopy ? cImagesToRead : 0);
2153 if (RT_FAILURE(rc))
2154 break;
2155
2156 rc2 = vdThreadFinishWrite(pDiskTo);
2157 AssertRC(rc2);
2158 fLockWriteTo = false;
2159 }
2160 else /* Don't propagate the error to the outside */
2161 rc = VINF_SUCCESS;
2162
2163 uOffset += cbThisRead;
2164 cbRemaining -= cbThisRead;
2165
2166 unsigned uProgressNew = uOffset * 99 / cbSize;
2167 if (uProgressNew != uProgressOld)
2168 {
2169 uProgressOld = uProgressNew;
2170
2171 if (pIfProgress && pIfProgress->pfnProgress)
2172 {
2173 rc = pIfProgress->pfnProgress(pIfProgress->Core.pvUser,
2174 uProgressOld);
2175 if (RT_FAILURE(rc))
2176 break;
2177 }
2178 if (pDstIfProgress && pDstIfProgress->pfnProgress)
2179 {
2180 rc = pDstIfProgress->pfnProgress(pDstIfProgress->Core.pvUser,
2181 uProgressOld);
2182 if (RT_FAILURE(rc))
2183 break;
2184 }
2185 }
2186 } while (uOffset < cbSize);
2187
2188 RTMemFree(pvBuf);
2189
2190 if (fLockReadFrom)
2191 {
2192 rc2 = vdThreadFinishRead(pDiskFrom);
2193 AssertRC(rc2);
2194 }
2195
2196 if (fLockWriteTo)
2197 {
2198 rc2 = vdThreadFinishWrite(pDiskTo);
2199 AssertRC(rc2);
2200 }
2201
2202 LogFlowFunc(("returns rc=%Rrc\n", rc));
2203 return rc;
2204}
2205
2206/**
2207 * Flush helper async version.
2208 */
2209static DECLCALLBACK(int) vdSetModifiedHelperAsync(PVDIOCTX pIoCtx)
2210{
2211 int rc = VINF_SUCCESS;
2212 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2213
2214 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2215 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2216 rc = VINF_SUCCESS;
2217
2218 return rc;
2219}
2220
2221/**
2222 * internal: mark the disk as modified - async version.
2223 */
2224static int vdSetModifiedFlagAsync(PVDISK pDisk, PVDIOCTX pIoCtx)
2225{
2226 int rc = VINF_SUCCESS;
2227
2228 VD_IS_LOCKED(pDisk);
2229
2230 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
2231 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
2232 {
2233 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2234 if (RT_SUCCESS(rc))
2235 {
2236 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
2237
2238 /* First modify, so create a UUID and ensure it's written to disk. */
2239 vdResetModifiedFlag(pDisk);
2240
2241 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
2242 {
2243 PVDIOCTX pIoCtxFlush = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_FLUSH,
2244 0, 0, pDisk->pLast,
2245 NULL, pIoCtx, 0, 0, NULL,
2246 vdSetModifiedHelperAsync);
2247
2248 if (pIoCtxFlush)
2249 {
2250 rc = vdIoCtxProcessLocked(pIoCtxFlush);
2251 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2252 {
2253 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs */);
2254 vdIoCtxFree(pDisk, pIoCtxFlush);
2255 }
2256 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2257 {
2258 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2259 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2260 }
2261 else /* Another error */
2262 vdIoCtxFree(pDisk, pIoCtxFlush);
2263 }
2264 else
2265 rc = VERR_NO_MEMORY;
2266 }
2267 }
2268 }
2269
2270 return rc;
2271}
2272
2273static DECLCALLBACK(int) vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
2274{
2275 int rc = VINF_SUCCESS;
2276 PVDIMAGE pImage = pIoCtx->Req.Io.pImageStart;
2277 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2278 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2279 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2280
2281 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2282 rc = pImage->Backend->pfnWrite(pImage->pBackendData,
2283 pIoCtx->Req.Io.uOffset - cbPreRead,
2284 cbPreRead + cbThisWrite + cbPostRead,
2285 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
2286 Assert(rc != VERR_VD_BLOCK_FREE);
2287 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
2288 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPostRead == 0);
2289 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2290 rc = VINF_SUCCESS;
2291 else if (rc == VERR_VD_IOCTX_HALT)
2292 {
2293 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2294 rc = VINF_SUCCESS;
2295 }
2296
2297 LogFlowFunc(("returns rc=%Rrc\n", rc));
2298 return rc;
2299}
2300
2301static DECLCALLBACK(int) vdWriteHelperOptimizedCmpAndWriteAsync(PVDIOCTX pIoCtx)
2302{
2303 int rc = VINF_SUCCESS;
2304 size_t cbThisWrite = 0;
2305 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2306 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2307 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2308 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2309 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2310 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2311
2312 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2313
2314 AssertPtr(pIoCtxParent);
2315 Assert(!pIoCtxParent->pIoCtxParent);
2316 Assert(!pIoCtx->Req.Io.cbTransferLeft && !pIoCtx->cMetaTransfersPending);
2317
2318 vdIoCtxChildReset(pIoCtx);
2319 cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2320 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2321
2322 /* Check if the write would modify anything in this block. */
2323 if (!RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &pIoCtxParent->Req.Io.SgBuf, cbThisWrite))
2324 {
2325 RTSGBUF SgBufSrcTmp;
2326
2327 RTSgBufClone(&SgBufSrcTmp, &pIoCtxParent->Req.Io.SgBuf);
2328 RTSgBufAdvance(&SgBufSrcTmp, cbThisWrite);
2329 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbThisWrite);
2330
2331 if (!cbWriteCopy || !RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &SgBufSrcTmp, cbWriteCopy))
2332 {
2333 /* Block is completely unchanged, so no need to write anything. */
2334 LogFlowFunc(("Block didn't changed\n"));
2335 ASMAtomicWriteU32(&pIoCtx->Req.Io.cbTransferLeft, 0);
2336 RTSgBufAdvance(&pIoCtxParent->Req.Io.SgBuf, cbThisWrite);
2337 return VINF_VD_ASYNC_IO_FINISHED;
2338 }
2339 }
2340
2341 /* Copy the data to the right place in the buffer. */
2342 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2343 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2344 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2345
2346 /* Handle the data that goes after the write to fill the block. */
2347 if (cbPostRead)
2348 {
2349 /* Now assemble the remaining data. */
2350 if (cbWriteCopy)
2351 {
2352 /*
2353 * The S/G buffer of the parent needs to be cloned because
2354 * it is not allowed to modify the state.
2355 */
2356 RTSGBUF SgBufParentTmp;
2357
2358 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2359 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2360 }
2361
2362 /* Zero out the remainder of this block. Will never be visible, as this
2363 * is beyond the limit of the image. */
2364 if (cbFill)
2365 {
2366 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
2367 vdIoCtxSet(pIoCtx, '\0', cbFill);
2368 }
2369 }
2370
2371 /* Write the full block to the virtual disk. */
2372 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2373 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2374
2375 return rc;
2376}
2377
2378static DECLCALLBACK(int) vdWriteHelperOptimizedPreReadAsync(PVDIOCTX pIoCtx)
2379{
2380 int rc = VINF_SUCCESS;
2381
2382 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2383
2384 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2385
2386 if ( pIoCtx->Req.Io.cbTransferLeft
2387 && !pIoCtx->cDataTransfersPending)
2388 rc = vdReadHelperAsync(pIoCtx);
2389
2390 if ( ( RT_SUCCESS(rc)
2391 || (rc == VERR_VD_ASYNC_IO_IN_PROGRESS))
2392 && ( pIoCtx->Req.Io.cbTransferLeft
2393 || pIoCtx->cMetaTransfersPending))
2394 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2395 else
2396 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCmpAndWriteAsync;
2397
2398 return rc;
2399}
2400
2401/**
2402 * internal: write a complete block (only used for diff images), taking the
2403 * remaining data from parent images. This implementation optimizes out writes
2404 * that do not change the data relative to the state as of the parent images.
2405 * All backends which support differential/growing images support this - async version.
2406 */
2407static DECLCALLBACK(int) vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx)
2408{
2409 PVDISK pDisk = pIoCtx->pDisk;
2410 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2411 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2412 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2413 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2414 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2415 size_t cbFill = 0;
2416 size_t cbWriteCopy = 0;
2417 size_t cbReadImage = 0;
2418
2419 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2420
2421 AssertPtr(pIoCtx->pIoCtxParent);
2422 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2423
2424 if (cbPostRead)
2425 {
2426 /* Figure out how much we cannot read from the image, because
2427 * the last block to write might exceed the nominal size of the
2428 * image for technical reasons. */
2429 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2430 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2431
2432 /* If we have data to be written, use that instead of reading
2433 * data from the image. */
2434 if (cbWrite > cbThisWrite)
2435 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2436
2437 /* The rest must be read from the image. */
2438 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2439 }
2440
2441 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2442 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2443 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2444
2445 /* Read the entire data of the block so that we can compare whether it will
2446 * be modified by the write or not. */
2447 size_t cbTmp = cbPreRead + cbThisWrite + cbPostRead - cbFill; Assert(cbTmp == (uint32_t)cbTmp);
2448 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTmp;
2449 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2450 pIoCtx->Req.Io.uOffset -= cbPreRead;
2451
2452 /* Next step */
2453 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
2454 return VINF_SUCCESS;
2455}
2456
2457static DECLCALLBACK(int) vdWriteHelperStandardReadImageAsync(PVDIOCTX pIoCtx)
2458{
2459 int rc = VINF_SUCCESS;
2460
2461 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2462
2463 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2464
2465 if ( pIoCtx->Req.Io.cbTransferLeft
2466 && !pIoCtx->cDataTransfersPending)
2467 rc = vdReadHelperAsync(pIoCtx);
2468
2469 if ( RT_SUCCESS(rc)
2470 && ( pIoCtx->Req.Io.cbTransferLeft
2471 || pIoCtx->cMetaTransfersPending))
2472 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2473 else
2474 {
2475 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2476
2477 /* Zero out the remainder of this block. Will never be visible, as this
2478 * is beyond the limit of the image. */
2479 if (cbFill)
2480 vdIoCtxSet(pIoCtx, '\0', cbFill);
2481
2482 /* Write the full block to the virtual disk. */
2483 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2484
2485 vdIoCtxChildReset(pIoCtx);
2486 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2487 }
2488
2489 return rc;
2490}
2491
2492static DECLCALLBACK(int) vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
2493{
2494 int rc = VINF_SUCCESS;
2495 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2496 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2497 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2498
2499 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2500
2501 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2502 if (cbPostRead)
2503 {
2504 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2505 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2506 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2507
2508 /* Now assemble the remaining data. */
2509 if (cbWriteCopy)
2510 {
2511 /*
2512 * The S/G buffer of the parent needs to be cloned because
2513 * it is not allowed to modify the state.
2514 */
2515 RTSGBUF SgBufParentTmp;
2516
2517 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2518 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2519 }
2520
2521 if (cbReadImage)
2522 {
2523 /* Read remaining data. */
2524 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardReadImageAsync;
2525
2526 /* Read the data that goes before the write to fill the block. */
2527 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbReadImage; Assert(cbReadImage == (uint32_t)cbReadImage);
2528 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2529 pIoCtx->Req.Io.uOffset += cbWriteCopy;
2530 }
2531 else
2532 {
2533 /* Zero out the remainder of this block. Will never be visible, as this
2534 * is beyond the limit of the image. */
2535 if (cbFill)
2536 vdIoCtxSet(pIoCtx, '\0', cbFill);
2537
2538 /* Write the full block to the virtual disk. */
2539 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2540 vdIoCtxChildReset(pIoCtx);
2541 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2542 }
2543 }
2544 else
2545 {
2546 /* Write the full block to the virtual disk. */
2547 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2548 vdIoCtxChildReset(pIoCtx);
2549 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2550 }
2551
2552 return rc;
2553}
2554
2555static DECLCALLBACK(int) vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
2556{
2557 int rc = VINF_SUCCESS;
2558
2559 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2560
2561 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2562
2563 if ( pIoCtx->Req.Io.cbTransferLeft
2564 && !pIoCtx->cDataTransfersPending)
2565 rc = vdReadHelperAsync(pIoCtx);
2566
2567 if ( RT_SUCCESS(rc)
2568 && ( pIoCtx->Req.Io.cbTransferLeft
2569 || pIoCtx->cMetaTransfersPending))
2570 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2571 else
2572 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2573
2574 return rc;
2575}
2576
2577static DECLCALLBACK(int) vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
2578{
2579 PVDISK pDisk = pIoCtx->pDisk;
2580 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2581 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2582 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2583 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2584 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2585 size_t cbFill = 0;
2586 size_t cbWriteCopy = 0;
2587 size_t cbReadImage = 0;
2588
2589 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2590
2591 AssertPtr(pIoCtx->pIoCtxParent);
2592 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2593
2594 /* Calculate the amount of data to read that goes after the write to fill the block. */
2595 if (cbPostRead)
2596 {
2597 /* If we have data to be written, use that instead of reading
2598 * data from the image. */
2599 if (cbWrite > cbThisWrite)
2600 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2601 else
2602 cbWriteCopy = 0;
2603
2604 /* Figure out how much we cannot read from the image, because
2605 * the last block to write might exceed the nominal size of the
2606 * image for technical reasons. */
2607 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2608 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2609
2610 /* The rest must be read from the image. */
2611 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2612 }
2613
2614 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2615 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2616 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2617
2618 /* Next step */
2619 if (cbPreRead)
2620 {
2621 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
2622
2623 /* Read the data that goes before the write to fill the block. */
2624 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbPreRead; Assert(cbPreRead == (uint32_t)cbPreRead);
2625 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2626 pIoCtx->Req.Io.uOffset -= cbPreRead;
2627 }
2628 else
2629 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2630
2631 return VINF_SUCCESS;
2632}
2633
2634/**
2635 * internal: write buffer to the image, taking care of block boundaries and
2636 * write optimizations - async version.
2637 */
2638static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx)
2639{
2640 int rc;
2641 size_t cbWrite = pIoCtx->Req.Io.cbTransfer;
2642 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
2643 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2644 PVDISK pDisk = pIoCtx->pDisk;
2645 unsigned fWrite;
2646 size_t cbThisWrite;
2647 size_t cbPreRead, cbPostRead;
2648
2649 /* Apply write filter chain here if it was not done already. */
2650 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_WRITE_FILTER_APPLIED))
2651 {
2652 rc = vdFilterChainApplyWrite(pDisk, uOffset, cbWrite, pIoCtx);
2653 if (RT_FAILURE(rc))
2654 return rc;
2655 pIoCtx->fFlags |= VDIOCTX_FLAGS_WRITE_FILTER_APPLIED;
2656 }
2657
2658 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG))
2659 {
2660 rc = vdSetModifiedFlagAsync(pDisk, pIoCtx);
2661 if (RT_FAILURE(rc)) /* Includes I/O in progress. */
2662 return rc;
2663 }
2664
2665 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
2666 if (RT_FAILURE(rc))
2667 return rc;
2668
2669 /* Loop until all written. */
2670 do
2671 {
2672 /* Try to write the possibly partial block to the last opened image.
2673 * This works when the block is already allocated in this image or
2674 * if it is a full-block write (and allocation isn't suppressed below).
2675 * For image formats which don't support zero blocks, it's beneficial
2676 * to avoid unnecessarily allocating unchanged blocks. This prevents
2677 * unwanted expanding of images. VMDK is an example. */
2678 cbThisWrite = cbWrite;
2679
2680 /*
2681 * Check whether there is a full block write in progress which was not allocated.
2682 * Defer I/O if the range interferes.
2683 */
2684 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
2685 && uOffset >= pDisk->uOffsetStartLocked
2686 && uOffset < pDisk->uOffsetEndLocked)
2687 {
2688 Log(("Interferring write while allocating a new block => deferring write\n"));
2689 vdIoCtxDefer(pDisk, pIoCtx);
2690 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2691 break;
2692 }
2693
2694 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2695 ? 0 : VD_WRITE_NO_ALLOC;
2696 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset, cbThisWrite,
2697 pIoCtx, &cbThisWrite, &cbPreRead, &cbPostRead,
2698 fWrite);
2699 if (rc == VERR_VD_BLOCK_FREE)
2700 {
2701 /* Lock the disk .*/
2702 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2703 if (RT_SUCCESS(rc))
2704 {
2705 /*
2706 * Allocate segment and buffer in one go.
2707 * A bit hackish but avoids the need to allocate memory twice.
2708 */
2709 PRTSGBUF pTmp = (PRTSGBUF)RTMemAlloc(cbPreRead + cbThisWrite + cbPostRead + sizeof(RTSGSEG) + sizeof(RTSGBUF));
2710 AssertBreakStmt(pTmp, rc = VERR_NO_MEMORY);
2711 PRTSGSEG pSeg = (PRTSGSEG)(pTmp + 1);
2712
2713 pSeg->pvSeg = pSeg + 1;
2714 pSeg->cbSeg = cbPreRead + cbThisWrite + cbPostRead;
2715 RTSgBufInit(pTmp, pSeg, 1);
2716
2717 PVDIOCTX pIoCtxWrite = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_WRITE,
2718 uOffset, pSeg->cbSeg, pImage,
2719 pTmp,
2720 pIoCtx, cbThisWrite,
2721 cbWrite,
2722 pTmp,
2723 (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2724 ? vdWriteHelperStandardAsync
2725 : vdWriteHelperOptimizedAsync);
2726 if (!pIoCtxWrite)
2727 {
2728 RTMemTmpFree(pTmp);
2729 rc = VERR_NO_MEMORY;
2730 break;
2731 }
2732
2733 LogFlowFunc(("Disk is growing because of pIoCtx=%#p pIoCtxWrite=%#p\n",
2734 pIoCtx, pIoCtxWrite));
2735
2736 /* Save the current range for the growing operation to check for intersecting requests later. */
2737 pDisk->uOffsetStartLocked = uOffset - cbPreRead;
2738 pDisk->uOffsetEndLocked = uOffset + cbThisWrite + cbPostRead;
2739
2740 pIoCtxWrite->Type.Child.cbPreRead = cbPreRead;
2741 pIoCtxWrite->Type.Child.cbPostRead = cbPostRead;
2742 pIoCtxWrite->Req.Io.pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
2743
2744 /* Process the write request */
2745 rc = vdIoCtxProcessLocked(pIoCtxWrite);
2746
2747 if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
2748 {
2749 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2750 vdIoCtxFree(pDisk, pIoCtxWrite);
2751 break;
2752 }
2753 else if ( rc == VINF_VD_ASYNC_IO_FINISHED
2754 && ASMAtomicCmpXchgBool(&pIoCtxWrite->fComplete, true, false))
2755 {
2756 LogFlow(("Child write request completed\n"));
2757 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbThisWrite);
2758 Assert(cbThisWrite == (uint32_t)cbThisWrite);
2759 rc = pIoCtxWrite->rcReq;
2760 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisWrite);
2761 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2762 vdIoCtxFree(pDisk, pIoCtxWrite);
2763 }
2764 else
2765 {
2766 LogFlow(("Child write pending\n"));
2767 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2768 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2769 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2770 cbWrite -= cbThisWrite;
2771 uOffset += cbThisWrite;
2772 break;
2773 }
2774 }
2775 else
2776 {
2777 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2778 break;
2779 }
2780 }
2781
2782 if (rc == VERR_VD_IOCTX_HALT)
2783 {
2784 cbWrite -= cbThisWrite;
2785 uOffset += cbThisWrite;
2786 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2787 break;
2788 }
2789 else if (rc == VERR_VD_NOT_ENOUGH_METADATA)
2790 break;
2791
2792 cbWrite -= cbThisWrite;
2793 uOffset += cbThisWrite;
2794 } while (cbWrite != 0 && (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
2795
2796 if ( rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2797 || rc == VERR_VD_NOT_ENOUGH_METADATA
2798 || rc == VERR_VD_IOCTX_HALT)
2799 {
2800 /*
2801 * Tell the caller that we don't need to go back here because all
2802 * writes are initiated.
2803 */
2804 if ( !cbWrite
2805 && rc != VERR_VD_IOCTX_HALT)
2806 rc = VINF_SUCCESS;
2807
2808 pIoCtx->Req.Io.uOffset = uOffset;
2809 pIoCtx->Req.Io.cbTransfer = cbWrite;
2810 }
2811
2812 return rc;
2813}
2814
2815/**
2816 * Flush helper async version.
2817 */
2818static DECLCALLBACK(int) vdFlushHelperAsync(PVDIOCTX pIoCtx)
2819{
2820 int rc = VINF_SUCCESS;
2821 PVDISK pDisk = pIoCtx->pDisk;
2822 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2823
2824 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2825 if (RT_SUCCESS(rc))
2826 {
2827 /* Mark the whole disk as locked. */
2828 pDisk->uOffsetStartLocked = 0;
2829 pDisk->uOffsetEndLocked = UINT64_C(0xffffffffffffffff);
2830
2831 vdResetModifiedFlag(pDisk);
2832 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2833 if ( ( RT_SUCCESS(rc)
2834 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2835 || rc == VERR_VD_IOCTX_HALT)
2836 && pDisk->pCache)
2837 {
2838 rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData, pIoCtx);
2839 if ( RT_SUCCESS(rc)
2840 || ( rc != VERR_VD_ASYNC_IO_IN_PROGRESS
2841 && rc != VERR_VD_IOCTX_HALT))
2842 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2843 else if (rc != VERR_VD_IOCTX_HALT)
2844 rc = VINF_SUCCESS;
2845 }
2846 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2847 rc = VINF_SUCCESS;
2848 else if (rc != VERR_VD_IOCTX_HALT)/* Some other error. */
2849 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2850 }
2851
2852 return rc;
2853}
2854
2855/**
2856 * Async discard helper - discards a whole block which is recorded in the block
2857 * tree.
2858 *
2859 * @returns VBox status code.
2860 * @param pIoCtx The I/O context to operate on.
2861 */
2862static DECLCALLBACK(int) vdDiscardWholeBlockAsync(PVDIOCTX pIoCtx)
2863{
2864 int rc = VINF_SUCCESS;
2865 PVDISK pDisk = pIoCtx->pDisk;
2866 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2867 PVDDISCARDBLOCK pBlock = pIoCtx->Req.Discard.pBlock;
2868 size_t cbPreAllocated, cbPostAllocated, cbActuallyDiscarded;
2869
2870 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2871
2872 AssertPtr(pBlock);
2873
2874 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2875 pBlock->Core.Key, pBlock->cbDiscard,
2876 &cbPreAllocated, &cbPostAllocated,
2877 &cbActuallyDiscarded, NULL, 0);
2878 Assert(rc != VERR_VD_DISCARD_ALIGNMENT_NOT_MET);
2879 Assert(!cbPreAllocated);
2880 Assert(!cbPostAllocated);
2881 Assert(cbActuallyDiscarded == pBlock->cbDiscard || RT_FAILURE(rc));
2882
2883 /* Remove the block on success. */
2884 if ( RT_SUCCESS(rc)
2885 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2886 {
2887 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2888 Assert(pBlockRemove == pBlock); RT_NOREF1(pBlockRemove);
2889
2890 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2891 RTListNodeRemove(&pBlock->NodeLru);
2892 RTMemFree(pBlock->pbmAllocated);
2893 RTMemFree(pBlock);
2894 pIoCtx->Req.Discard.pBlock = NULL;/* Safety precaution. */
2895 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
2896 rc = VINF_SUCCESS;
2897 }
2898
2899 LogFlowFunc(("returns rc=%Rrc\n", rc));
2900 return rc;
2901}
2902
2903/**
2904 * Removes the least recently used blocks from the waiting list until
2905 * the new value is reached - version for async I/O.
2906 *
2907 * @returns VBox status code.
2908 * @param pDisk VD disk container.
2909 * @param pIoCtx The I/O context associated with this discard operation.
2910 * @param cbDiscardingNew How many bytes should be waiting on success.
2911 * The number of bytes waiting can be less.
2912 */
2913static int vdDiscardRemoveBlocksAsync(PVDISK pDisk, PVDIOCTX pIoCtx, size_t cbDiscardingNew)
2914{
2915 int rc = VINF_SUCCESS;
2916 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2917
2918 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
2919 pDisk, pDiscard, cbDiscardingNew));
2920
2921 while (pDiscard->cbDiscarding > cbDiscardingNew)
2922 {
2923 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
2924
2925 Assert(!RTListIsEmpty(&pDiscard->ListLru));
2926
2927 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
2928 uint64_t offStart = pBlock->Core.Key;
2929 uint32_t idxStart = 0;
2930 size_t cbLeft = pBlock->cbDiscard;
2931 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
2932 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
2933
2934 while (cbLeft > 0)
2935 {
2936 int32_t idxEnd;
2937 size_t cbThis = cbLeft;
2938
2939 if (fAllocated)
2940 {
2941 /* Check for the first unallocated bit. */
2942 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
2943 if (idxEnd != -1)
2944 {
2945 cbThis = (idxEnd - idxStart) * 512;
2946 fAllocated = false;
2947 }
2948 }
2949 else
2950 {
2951 /* Mark as unused and check for the first set bit. */
2952 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
2953 if (idxEnd != -1)
2954 cbThis = (idxEnd - idxStart) * 512;
2955
2956 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2957 offStart, cbThis, NULL, NULL, &cbThis,
2958 NULL, VD_DISCARD_MARK_UNUSED);
2959 if ( RT_FAILURE(rc)
2960 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2961 break;
2962
2963 fAllocated = true;
2964 }
2965
2966 idxStart = idxEnd;
2967 offStart += cbThis;
2968 cbLeft -= cbThis;
2969 }
2970
2971 if ( RT_FAILURE(rc)
2972 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2973 break;
2974
2975 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2976 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
2977 RTListNodeRemove(&pBlock->NodeLru);
2978
2979 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2980 RTMemFree(pBlock->pbmAllocated);
2981 RTMemFree(pBlock);
2982 }
2983
2984 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2985 rc = VINF_SUCCESS;
2986
2987 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
2988
2989 LogFlowFunc(("returns rc=%Rrc\n", rc));
2990 return rc;
2991}
2992
2993/**
2994 * Async discard helper - discards the current range if there is no matching
2995 * block in the tree.
2996 *
2997 * @returns VBox status code.
2998 * @param pIoCtx The I/O context to operate on.
2999 */
3000static DECLCALLBACK(int) vdDiscardCurrentRangeAsync(PVDIOCTX pIoCtx)
3001{
3002 PVDISK pDisk = pIoCtx->pDisk;
3003 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3004 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3005 size_t cbThisDiscard = pIoCtx->Req.Discard.cbThisDiscard;
3006 void *pbmAllocated = NULL;
3007 size_t cbPreAllocated, cbPostAllocated;
3008 int rc = VINF_SUCCESS;
3009
3010 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3011
3012 /* No block found, try to discard using the backend first. */
3013 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
3014 offStart, cbThisDiscard, &cbPreAllocated,
3015 &cbPostAllocated, &cbThisDiscard,
3016 &pbmAllocated, 0);
3017 if (rc == VERR_VD_DISCARD_ALIGNMENT_NOT_MET)
3018 {
3019 /* Create new discard block. */
3020 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTMemAllocZ(sizeof(VDDISCARDBLOCK));
3021 if (pBlock)
3022 {
3023 pBlock->Core.Key = offStart - cbPreAllocated;
3024 pBlock->Core.KeyLast = offStart + cbThisDiscard + cbPostAllocated - 1;
3025 pBlock->cbDiscard = cbPreAllocated + cbThisDiscard + cbPostAllocated;
3026 pBlock->pbmAllocated = pbmAllocated;
3027 bool fInserted = RTAvlrU64Insert(pDiscard->pTreeBlocks, &pBlock->Core);
3028 Assert(fInserted); NOREF(fInserted);
3029
3030 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3031 pDiscard->cbDiscarding += pBlock->cbDiscard;
3032
3033 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3034 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3035 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3036 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3037
3038 if (pDiscard->cbDiscarding > VD_DISCARD_REMOVE_THRESHOLD)
3039 rc = vdDiscardRemoveBlocksAsync(pDisk, pIoCtx, VD_DISCARD_REMOVE_THRESHOLD);
3040 else
3041 rc = VINF_SUCCESS;
3042
3043 if (RT_SUCCESS(rc))
3044 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
3045 }
3046 else
3047 {
3048 RTMemFree(pbmAllocated);
3049 rc = VERR_NO_MEMORY;
3050 }
3051 }
3052 else if ( RT_SUCCESS(rc)
3053 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) /* Save state and andvance to next range. */
3054 {
3055 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3056 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3057 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3058 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3059 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3060 rc = VINF_SUCCESS;
3061 }
3062
3063 LogFlowFunc(("returns rc=%Rrc\n", rc));
3064 return rc;
3065}
3066
3067/**
3068 * Async discard helper - entry point.
3069 *
3070 * @returns VBox status code.
3071 * @param pIoCtx The I/O context to operate on.
3072 */
3073static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx)
3074{
3075 int rc = VINF_SUCCESS;
3076 PVDISK pDisk = pIoCtx->pDisk;
3077 PCRTRANGE paRanges = pIoCtx->Req.Discard.paRanges;
3078 unsigned cRanges = pIoCtx->Req.Discard.cRanges;
3079 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3080
3081 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3082
3083 /* Check if the I/O context processed all ranges. */
3084 if ( pIoCtx->Req.Discard.idxRange == cRanges
3085 && !pIoCtx->Req.Discard.cbDiscardLeft)
3086 {
3087 LogFlowFunc(("All ranges discarded, completing\n"));
3088 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDeferredReqs*/);
3089 return VINF_SUCCESS;
3090 }
3091
3092 if (pDisk->pIoCtxLockOwner != pIoCtx)
3093 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
3094
3095 if (RT_SUCCESS(rc))
3096 {
3097 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3098 size_t cbDiscardLeft = pIoCtx->Req.Discard.cbDiscardLeft;
3099 size_t cbThisDiscard;
3100
3101 pDisk->uOffsetStartLocked = offStart;
3102 pDisk->uOffsetEndLocked = offStart + cbDiscardLeft;
3103
3104 if (RT_UNLIKELY(!pDiscard))
3105 {
3106 pDiscard = vdDiscardStateCreate();
3107 if (!pDiscard)
3108 return VERR_NO_MEMORY;
3109
3110 pDisk->pDiscard = pDiscard;
3111 }
3112
3113 if (!pIoCtx->Req.Discard.cbDiscardLeft)
3114 {
3115 offStart = paRanges[pIoCtx->Req.Discard.idxRange].offStart;
3116 cbDiscardLeft = paRanges[pIoCtx->Req.Discard.idxRange].cbRange;
3117 LogFlowFunc(("New range descriptor loaded (%u) offStart=%llu cbDiscard=%zu\n",
3118 pIoCtx->Req.Discard.idxRange, offStart, cbDiscardLeft));
3119 pIoCtx->Req.Discard.idxRange++;
3120 }
3121
3122 /* Look for a matching block in the AVL tree first. */
3123 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, false);
3124 if (!pBlock || pBlock->Core.KeyLast < offStart)
3125 {
3126 PVDDISCARDBLOCK pBlockAbove = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, true);
3127
3128 /* Clip range to remain in the current block. */
3129 if (pBlockAbove)
3130 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlockAbove->Core.KeyLast - offStart + 1);
3131 else
3132 cbThisDiscard = cbDiscardLeft;
3133
3134 Assert(!(cbThisDiscard % 512));
3135 pIoCtx->Req.Discard.pBlock = NULL;
3136 pIoCtx->pfnIoCtxTransferNext = vdDiscardCurrentRangeAsync;
3137 }
3138 else
3139 {
3140 /* Range lies partly in the block, update allocation bitmap. */
3141 int32_t idxStart, idxEnd;
3142
3143 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlock->Core.KeyLast - offStart + 1);
3144
3145 AssertPtr(pBlock);
3146
3147 Assert(!(cbThisDiscard % 512));
3148 Assert(!((offStart - pBlock->Core.Key) % 512));
3149
3150 idxStart = (offStart - pBlock->Core.Key) / 512;
3151 idxEnd = idxStart + (int32_t)(cbThisDiscard / 512);
3152
3153 ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd);
3154
3155 cbDiscardLeft -= cbThisDiscard;
3156 offStart += cbThisDiscard;
3157
3158 /* Call the backend to discard the block if it is completely unallocated now. */
3159 if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, (uint32_t)(pBlock->cbDiscard / 512)) == -1)
3160 {
3161 pIoCtx->Req.Discard.pBlock = pBlock;
3162 pIoCtx->pfnIoCtxTransferNext = vdDiscardWholeBlockAsync;
3163 rc = VINF_SUCCESS;
3164 }
3165 else
3166 {
3167 RTListNodeRemove(&pBlock->NodeLru);
3168 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3169
3170 /* Start with next range. */
3171 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3172 rc = VINF_SUCCESS;
3173 }
3174 }
3175
3176 /* Save state in the context. */
3177 pIoCtx->Req.Discard.offCur = offStart;
3178 pIoCtx->Req.Discard.cbDiscardLeft = cbDiscardLeft;
3179 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3180 }
3181
3182 LogFlowFunc(("returns rc=%Rrc\n", rc));
3183 return rc;
3184}
3185
3186/**
3187 * VD async I/O interface open callback.
3188 */
3189static DECLCALLBACK(int) vdIOOpenFallback(void *pvUser, const char *pszLocation,
3190 uint32_t fOpen, PFNVDCOMPLETED pfnCompleted,
3191 void **ppStorage)
3192{
3193 RT_NOREF1(pvUser);
3194 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)RTMemAllocZ(sizeof(VDIIOFALLBACKSTORAGE));
3195
3196 if (!pStorage)
3197 return VERR_NO_MEMORY;
3198
3199 pStorage->pfnCompleted = pfnCompleted;
3200
3201 /* Open the file. */
3202 int rc = RTFileOpen(&pStorage->File, pszLocation, fOpen);
3203 if (RT_SUCCESS(rc))
3204 {
3205 *ppStorage = pStorage;
3206 return VINF_SUCCESS;
3207 }
3208
3209 RTMemFree(pStorage);
3210 return rc;
3211}
3212
3213/**
3214 * VD async I/O interface close callback.
3215 */
3216static DECLCALLBACK(int) vdIOCloseFallback(void *pvUser, void *pvStorage)
3217{
3218 RT_NOREF1(pvUser);
3219 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3220
3221 RTFileClose(pStorage->File);
3222 RTMemFree(pStorage);
3223 return VINF_SUCCESS;
3224}
3225
3226static DECLCALLBACK(int) vdIODeleteFallback(void *pvUser, const char *pcszFilename)
3227{
3228 RT_NOREF1(pvUser);
3229 return RTFileDelete(pcszFilename);
3230}
3231
3232static DECLCALLBACK(int) vdIOMoveFallback(void *pvUser, const char *pcszSrc, const char *pcszDst, unsigned fMove)
3233{
3234 RT_NOREF1(pvUser);
3235 return RTFileMove(pcszSrc, pcszDst, fMove);
3236}
3237
3238static DECLCALLBACK(int) vdIOGetFreeSpaceFallback(void *pvUser, const char *pcszFilename, int64_t *pcbFreeSpace)
3239{
3240 RT_NOREF1(pvUser);
3241 return RTFsQuerySizes(pcszFilename, NULL, pcbFreeSpace, NULL, NULL);
3242}
3243
3244static DECLCALLBACK(int) vdIOGetModificationTimeFallback(void *pvUser, const char *pcszFilename, PRTTIMESPEC pModificationTime)
3245{
3246 RT_NOREF1(pvUser);
3247 RTFSOBJINFO info;
3248 int rc = RTPathQueryInfo(pcszFilename, &info, RTFSOBJATTRADD_NOTHING);
3249 if (RT_SUCCESS(rc))
3250 *pModificationTime = info.ModificationTime;
3251 return rc;
3252}
3253
3254/**
3255 * VD async I/O interface callback for retrieving the file size.
3256 */
3257static DECLCALLBACK(int) vdIOGetSizeFallback(void *pvUser, void *pvStorage, uint64_t *pcbSize)
3258{
3259 RT_NOREF1(pvUser);
3260 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3261
3262 return RTFileQuerySize(pStorage->File, pcbSize);
3263}
3264
3265/**
3266 * VD async I/O interface callback for setting the file size.
3267 */
3268static DECLCALLBACK(int) vdIOSetSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize)
3269{
3270 RT_NOREF1(pvUser);
3271 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3272
3273 return RTFileSetSize(pStorage->File, cbSize);
3274}
3275
3276/**
3277 * VD async I/O interface callback for setting the file allocation size.
3278 */
3279static DECLCALLBACK(int) vdIOSetAllocationSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize,
3280 uint32_t fFlags)
3281{
3282 RT_NOREF2(pvUser, fFlags);
3283 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3284
3285 return RTFileSetAllocationSize(pStorage->File, cbSize, RTFILE_ALLOC_SIZE_F_DEFAULT);
3286}
3287
3288/**
3289 * VD async I/O interface callback for a synchronous write to the file.
3290 */
3291static DECLCALLBACK(int) vdIOWriteSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3292 const void *pvBuf, size_t cbWrite, size_t *pcbWritten)
3293{
3294 RT_NOREF1(pvUser);
3295 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3296
3297 return RTFileWriteAt(pStorage->File, uOffset, pvBuf, cbWrite, pcbWritten);
3298}
3299
3300/**
3301 * VD async I/O interface callback for a synchronous read from the file.
3302 */
3303static DECLCALLBACK(int) vdIOReadSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3304 void *pvBuf, size_t cbRead, size_t *pcbRead)
3305{
3306 RT_NOREF1(pvUser);
3307 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3308
3309 return RTFileReadAt(pStorage->File, uOffset, pvBuf, cbRead, pcbRead);
3310}
3311
3312/**
3313 * VD async I/O interface callback for a synchronous flush of the file data.
3314 */
3315static DECLCALLBACK(int) vdIOFlushSyncFallback(void *pvUser, void *pvStorage)
3316{
3317 RT_NOREF1(pvUser);
3318 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3319
3320 return RTFileFlush(pStorage->File);
3321}
3322
3323/**
3324 * VD async I/O interface callback for a asynchronous read from the file.
3325 */
3326static DECLCALLBACK(int) vdIOReadAsyncFallback(void *pvUser, void *pStorage, uint64_t uOffset,
3327 PCRTSGSEG paSegments, size_t cSegments,
3328 size_t cbRead, void *pvCompletion,
3329 void **ppTask)
3330{
3331 RT_NOREF8(pvUser, pStorage, uOffset, paSegments, cSegments, cbRead, pvCompletion, ppTask);
3332 AssertFailed();
3333 return VERR_NOT_IMPLEMENTED;
3334}
3335
3336/**
3337 * VD async I/O interface callback for a asynchronous write to the file.
3338 */
3339static DECLCALLBACK(int) vdIOWriteAsyncFallback(void *pvUser, void *pStorage, uint64_t uOffset,
3340 PCRTSGSEG paSegments, size_t cSegments,
3341 size_t cbWrite, void *pvCompletion,
3342 void **ppTask)
3343{
3344 RT_NOREF8(pvUser, pStorage, uOffset, paSegments, cSegments, cbWrite, pvCompletion, ppTask);
3345 AssertFailed();
3346 return VERR_NOT_IMPLEMENTED;
3347}
3348
3349/**
3350 * VD async I/O interface callback for a asynchronous flush of the file data.
3351 */
3352static DECLCALLBACK(int) vdIOFlushAsyncFallback(void *pvUser, void *pStorage,
3353 void *pvCompletion, void **ppTask)
3354{
3355 RT_NOREF4(pvUser, pStorage, pvCompletion, ppTask);
3356 AssertFailed();
3357 return VERR_NOT_IMPLEMENTED;
3358}
3359
3360/**
3361 * Internal - Continues an I/O context after
3362 * it was halted because of an active transfer.
3363 */
3364static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
3365{
3366 PVDISK pDisk = pIoCtx->pDisk;
3367 int rc = VINF_SUCCESS;
3368
3369 VD_IS_LOCKED(pDisk);
3370
3371 if (RT_FAILURE(rcReq))
3372 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
3373
3374 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
3375 {
3376 /* Continue the transfer */
3377 rc = vdIoCtxProcessLocked(pIoCtx);
3378
3379 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3380 && ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false))
3381 {
3382 LogFlowFunc(("I/O context completed pIoCtx=%#p\n", pIoCtx));
3383 bool fFreeCtx = RT_BOOL(!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3384 if (pIoCtx->pIoCtxParent)
3385 {
3386 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
3387
3388 Assert(!pIoCtxParent->pIoCtxParent);
3389 if (RT_FAILURE(pIoCtx->rcReq))
3390 ASMAtomicCmpXchgS32(&pIoCtxParent->rcReq, pIoCtx->rcReq, VINF_SUCCESS);
3391
3392 ASMAtomicDecU32(&pIoCtxParent->cDataTransfersPending);
3393
3394 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE)
3395 {
3396 LogFlowFunc(("I/O context transferred %u bytes for the parent pIoCtxParent=%p\n",
3397 pIoCtx->Type.Child.cbTransferParent, pIoCtxParent));
3398
3399 /* Update the parent state. */
3400 Assert(pIoCtxParent->Req.Io.cbTransferLeft >= pIoCtx->Type.Child.cbTransferParent);
3401 ASMAtomicSubU32(&pIoCtxParent->Req.Io.cbTransferLeft, (uint32_t)pIoCtx->Type.Child.cbTransferParent);
3402 }
3403 else
3404 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH);
3405
3406 /*
3407 * A completed child write means that we finished growing the image.
3408 * We have to process any pending writes now.
3409 */
3410 vdIoCtxUnlockDisk(pDisk, pIoCtxParent, false /* fProcessDeferredReqs */);
3411
3412 /* Unblock the parent */
3413 pIoCtxParent->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3414
3415 rc = vdIoCtxProcessLocked(pIoCtxParent);
3416
3417 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3418 && ASMAtomicCmpXchgBool(&pIoCtxParent->fComplete, true, false))
3419 {
3420 LogFlowFunc(("Parent I/O context completed pIoCtxParent=%#p rcReq=%Rrc\n", pIoCtxParent, pIoCtxParent->rcReq));
3421 bool fFreeParentCtx = RT_BOOL(!(pIoCtxParent->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3422 vdIoCtxRootComplete(pDisk, pIoCtxParent);
3423 vdThreadFinishWrite(pDisk);
3424
3425 if (fFreeParentCtx)
3426 vdIoCtxFree(pDisk, pIoCtxParent);
3427 vdDiskProcessBlockedIoCtx(pDisk);
3428 }
3429 else if (!vdIoCtxIsDiskLockOwner(pDisk, pIoCtx))
3430 {
3431 /* Process any pending writes if the current request didn't caused another growing. */
3432 vdDiskProcessBlockedIoCtx(pDisk);
3433 }
3434 }
3435 else
3436 {
3437 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH)
3438 {
3439 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDerredReqs */);
3440 vdThreadFinishWrite(pDisk);
3441 }
3442 else if ( pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE
3443 || pIoCtx->enmTxDir == VDIOCTXTXDIR_DISCARD)
3444 vdThreadFinishWrite(pDisk);
3445 else
3446 {
3447 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_READ);
3448 vdThreadFinishRead(pDisk);
3449 }
3450
3451 LogFlowFunc(("I/O context completed pIoCtx=%#p rcReq=%Rrc\n", pIoCtx, pIoCtx->rcReq));
3452 vdIoCtxRootComplete(pDisk, pIoCtx);
3453 }
3454
3455 if (fFreeCtx)
3456 vdIoCtxFree(pDisk, pIoCtx);
3457 }
3458 }
3459
3460 return VINF_SUCCESS;
3461}
3462
3463/**
3464 * Internal - Called when user transfer completed.
3465 */
3466static int vdUserXferCompleted(PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
3467 PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3468 size_t cbTransfer, int rcReq)
3469{
3470 int rc = VINF_SUCCESS;
3471 PVDISK pDisk = pIoCtx->pDisk;
3472
3473 LogFlowFunc(("pIoStorage=%#p pIoCtx=%#p pfnComplete=%#p pvUser=%#p cbTransfer=%zu rcReq=%Rrc\n",
3474 pIoStorage, pIoCtx, pfnComplete, pvUser, cbTransfer, rcReq));
3475
3476 VD_IS_LOCKED(pDisk);
3477
3478 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbTransfer);
3479 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTransfer); Assert(cbTransfer == (uint32_t)cbTransfer);
3480 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
3481
3482 if (pfnComplete)
3483 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3484
3485 if (RT_SUCCESS(rc))
3486 rc = vdIoCtxContinue(pIoCtx, rcReq);
3487 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3488 rc = VINF_SUCCESS;
3489
3490 return rc;
3491}
3492
3493static void vdIoCtxContinueDeferredList(PVDIOSTORAGE pIoStorage, PRTLISTANCHOR pListWaiting,
3494 PFNVDXFERCOMPLETED pfnComplete, void *pvUser, int rcReq)
3495{
3496 LogFlowFunc(("pIoStorage=%#p pListWaiting=%#p pfnComplete=%#p pvUser=%#p rcReq=%Rrc\n",
3497 pIoStorage, pListWaiting, pfnComplete, pvUser, rcReq));
3498
3499 /* Go through the waiting list and continue the I/O contexts. */
3500 while (!RTListIsEmpty(pListWaiting))
3501 {
3502 int rc = VINF_SUCCESS;
3503 PVDIOCTXDEFERRED pDeferred = RTListGetFirst(pListWaiting, VDIOCTXDEFERRED, NodeDeferred);
3504 PVDIOCTX pIoCtx = pDeferred->pIoCtx;
3505 RTListNodeRemove(&pDeferred->NodeDeferred);
3506
3507 RTMemFree(pDeferred);
3508 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
3509
3510 if (pfnComplete)
3511 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3512
3513 LogFlow(("Completion callback for I/O context %#p returned %Rrc\n", pIoCtx, rc));
3514
3515 if (RT_SUCCESS(rc))
3516 {
3517 rc = vdIoCtxContinue(pIoCtx, rcReq);
3518 AssertRC(rc);
3519 }
3520 else
3521 Assert(rc == VERR_VD_ASYNC_IO_IN_PROGRESS);
3522 }
3523}
3524
3525/**
3526 * Internal - Called when a meta transfer completed.
3527 */
3528static int vdMetaXferCompleted(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3529 PVDMETAXFER pMetaXfer, int rcReq)
3530{
3531 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3532 RTLISTANCHOR ListIoCtxWaiting;
3533 bool fFlush;
3534
3535 LogFlowFunc(("pIoStorage=%#p pfnComplete=%#p pvUser=%#p pMetaXfer=%#p rcReq=%Rrc\n",
3536 pIoStorage, pfnComplete, pvUser, pMetaXfer, rcReq));
3537
3538 VD_IS_LOCKED(pDisk);
3539
3540 fFlush = VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_FLUSH;
3541
3542 if (!fFlush)
3543 {
3544 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3545
3546 if (RT_FAILURE(rcReq))
3547 {
3548 /* Remove from the AVL tree. */
3549 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3550 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3551 Assert(fRemoved); NOREF(fRemoved);
3552 /* If this was a write check if there is a shadow buffer with updated data. */
3553 if (pMetaXfer->pbDataShw)
3554 {
3555 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
3556 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3557 RTListConcatenate(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3558 RTMemFree(pMetaXfer->pbDataShw);
3559 pMetaXfer->pbDataShw = NULL;
3560 }
3561 RTMemFree(pMetaXfer);
3562 }
3563 else
3564 {
3565 /* Increase the reference counter to make sure it doesn't go away before the last context is processed. */
3566 pMetaXfer->cRefs++;
3567 }
3568 }
3569 else
3570 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3571
3572 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3573 vdIoCtxContinueDeferredList(pIoStorage, &ListIoCtxWaiting, pfnComplete, pvUser, rcReq);
3574
3575 /*
3576 * If there is a shadow buffer and the previous write was successful update with the
3577 * new data and trigger a new write.
3578 */
3579 if ( pMetaXfer->pbDataShw
3580 && RT_SUCCESS(rcReq)
3581 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
3582 {
3583 LogFlowFunc(("pMetaXfer=%#p Updating from shadow buffer and triggering new write\n", pMetaXfer));
3584 memcpy(pMetaXfer->abData, pMetaXfer->pbDataShw, pMetaXfer->cbMeta);
3585 RTMemFree(pMetaXfer->pbDataShw);
3586 pMetaXfer->pbDataShw = NULL;
3587 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3588
3589 /* Setup a new I/O write. */
3590 PVDIOTASK pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
3591 if (RT_LIKELY(pIoTask))
3592 {
3593 void *pvTask = NULL;
3594 RTSGSEG Seg;
3595
3596 Seg.cbSeg = pMetaXfer->cbMeta;
3597 Seg.pvSeg = pMetaXfer->abData;
3598
3599 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
3600 rcReq = pIoStorage->pVDIo->pInterfaceIo->pfnWriteAsync(pIoStorage->pVDIo->pInterfaceIo->Core.pvUser,
3601 pIoStorage->pStorage,
3602 pMetaXfer->Core.Key, &Seg, 1,
3603 pMetaXfer->cbMeta, pIoTask,
3604 &pvTask);
3605 if ( RT_SUCCESS(rcReq)
3606 || rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3607 {
3608 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3609 vdIoTaskFree(pDisk, pIoTask);
3610 }
3611 else
3612 RTListMove(&pMetaXfer->ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3613 }
3614 else
3615 rcReq = VERR_NO_MEMORY;
3616
3617 /* Cleanup if there was an error or the request completed already. */
3618 if (rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3619 vdIoCtxContinueDeferredList(pIoStorage, &pMetaXfer->ListIoCtxShwWrites, pfnComplete, pvUser, rcReq);
3620 }
3621
3622 /* Remove if not used anymore. */
3623 if (!fFlush)
3624 {
3625 pMetaXfer->cRefs--;
3626 if (!pMetaXfer->cRefs && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting))
3627 {
3628 /* Remove from the AVL tree. */
3629 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3630 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3631 Assert(fRemoved); NOREF(fRemoved);
3632 RTMemFree(pMetaXfer);
3633 }
3634 }
3635 else if (fFlush)
3636 RTMemFree(pMetaXfer);
3637
3638 return VINF_SUCCESS;
3639}
3640
3641/**
3642 * Processes a list of waiting I/O tasks. The disk lock must be held by caller.
3643 *
3644 * @returns nothing.
3645 * @param pDisk The disk to process the list for.
3646 */
3647static void vdIoTaskProcessWaitingList(PVDISK pDisk)
3648{
3649 LogFlowFunc(("pDisk=%#p\n", pDisk));
3650
3651 VD_IS_LOCKED(pDisk);
3652
3653 PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK);
3654
3655 Log(("I/O task list cleared\n"));
3656
3657 /* Reverse order. */
3658 PVDIOTASK pCur = pHead;
3659 pHead = NULL;
3660 while (pCur)
3661 {
3662 PVDIOTASK pInsert = pCur;
3663 pCur = pCur->pNext;
3664 pInsert->pNext = pHead;
3665 pHead = pInsert;
3666 }
3667
3668 while (pHead)
3669 {
3670 PVDIOSTORAGE pIoStorage = pHead->pIoStorage;
3671
3672 if (!pHead->fMeta)
3673 vdUserXferCompleted(pIoStorage, pHead->Type.User.pIoCtx,
3674 pHead->pfnComplete, pHead->pvUser,
3675 pHead->Type.User.cbTransfer, pHead->rcReq);
3676 else
3677 vdMetaXferCompleted(pIoStorage, pHead->pfnComplete, pHead->pvUser,
3678 pHead->Type.Meta.pMetaXfer, pHead->rcReq);
3679
3680 pCur = pHead;
3681 pHead = pHead->pNext;
3682 vdIoTaskFree(pDisk, pCur);
3683 }
3684}
3685
3686/**
3687 * Process any I/O context on the halted list.
3688 *
3689 * @returns nothing.
3690 * @param pDisk The disk.
3691 */
3692static void vdIoCtxProcessHaltedList(PVDISK pDisk)
3693{
3694 LogFlowFunc(("pDisk=%#p\n", pDisk));
3695
3696 VD_IS_LOCKED(pDisk);
3697
3698 /* Get the waiting list and process it in FIFO order. */
3699 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHaltedHead, NULL, PVDIOCTX);
3700
3701 /* Reverse it. */
3702 PVDIOCTX pCur = pIoCtxHead;
3703 pIoCtxHead = NULL;
3704 while (pCur)
3705 {
3706 PVDIOCTX pInsert = pCur;
3707 pCur = pCur->pIoCtxNext;
3708 pInsert->pIoCtxNext = pIoCtxHead;
3709 pIoCtxHead = pInsert;
3710 }
3711
3712 /* Process now. */
3713 pCur = pIoCtxHead;
3714 while (pCur)
3715 {
3716 PVDIOCTX pTmp = pCur;
3717
3718 pCur = pCur->pIoCtxNext;
3719 pTmp->pIoCtxNext = NULL;
3720
3721 /* Continue */
3722 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3723 vdIoCtxContinue(pTmp, pTmp->rcReq);
3724 }
3725}
3726
3727/**
3728 * Unlock the disk and process pending tasks.
3729 *
3730 * @returns VBox status code.
3731 * @param pDisk The disk to unlock.
3732 * @param pIoCtxRc The I/O context to get the status code from, optional.
3733 */
3734static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc)
3735{
3736 int rc = VINF_SUCCESS;
3737
3738 VD_IS_LOCKED(pDisk);
3739
3740 /*
3741 * Process the list of waiting I/O tasks first
3742 * because they might complete I/O contexts.
3743 * Same for the list of halted I/O contexts.
3744 * Afterwards comes the list of new I/O contexts.
3745 */
3746 vdIoTaskProcessWaitingList(pDisk);
3747 vdIoCtxProcessHaltedList(pDisk);
3748 rc = vdDiskProcessWaitingIoCtx(pDisk, pIoCtxRc);
3749 ASMAtomicXchgBool(&pDisk->fLocked, false);
3750
3751 /*
3752 * Need to check for new I/O tasks and waiting I/O contexts now
3753 * again as other threads might added them while we processed
3754 * previous lists.
3755 */
3756 while ( ASMAtomicUoReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX) != NULL
3757 || ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK) != NULL
3758 || ASMAtomicUoReadPtrT(&pDisk->pIoCtxHaltedHead, PVDIOCTX) != NULL)
3759 {
3760 /* Try lock disk again. */
3761 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3762 {
3763 vdIoTaskProcessWaitingList(pDisk);
3764 vdIoCtxProcessHaltedList(pDisk);
3765 vdDiskProcessWaitingIoCtx(pDisk, NULL);
3766 ASMAtomicXchgBool(&pDisk->fLocked, false);
3767 }
3768 else /* Let the other thread everything when he unlocks the disk. */
3769 break;
3770 }
3771
3772 return rc;
3773}
3774
3775/**
3776 * Try to lock the disk to complete pressing of the I/O task.
3777 * The completion is deferred if the disk is locked already.
3778 *
3779 * @returns nothing.
3780 * @param pIoTask The I/O task to complete.
3781 */
3782static void vdXferTryLockDiskDeferIoTask(PVDIOTASK pIoTask)
3783{
3784 PVDIOSTORAGE pIoStorage = pIoTask->pIoStorage;
3785 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3786
3787 Log(("Deferring I/O task pIoTask=%p\n", pIoTask));
3788
3789 /* Put it on the waiting list. */
3790 PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK);
3791 PVDIOTASK pHeadOld;
3792 pIoTask->pNext = pNext;
3793 while (!ASMAtomicCmpXchgExPtr(&pDisk->pIoTasksPendingHead, pIoTask, pNext, &pHeadOld))
3794 {
3795 pNext = pHeadOld;
3796 Assert(pNext != pIoTask);
3797 pIoTask->pNext = pNext;
3798 ASMNopPause();
3799 }
3800
3801 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3802 {
3803 /* Release disk lock, it will take care of processing all lists. */
3804 vdDiskUnlock(pDisk, NULL);
3805 }
3806}
3807
3808static DECLCALLBACK(int) vdIOIntReqCompleted(void *pvUser, int rcReq)
3809{
3810 PVDIOTASK pIoTask = (PVDIOTASK)pvUser;
3811
3812 LogFlowFunc(("Task completed pIoTask=%#p\n", pIoTask));
3813
3814 pIoTask->rcReq = rcReq;
3815 vdXferTryLockDiskDeferIoTask(pIoTask);
3816 return VINF_SUCCESS;
3817}
3818
3819/**
3820 * VD I/O interface callback for opening a file.
3821 */
3822static DECLCALLBACK(int) vdIOIntOpen(void *pvUser, const char *pszLocation,
3823 unsigned uOpenFlags, PPVDIOSTORAGE ppIoStorage)
3824{
3825 int rc = VINF_SUCCESS;
3826 PVDIO pVDIo = (PVDIO)pvUser;
3827 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
3828
3829 if (!pIoStorage)
3830 return VERR_NO_MEMORY;
3831
3832 /* Create the AVl tree. */
3833 pIoStorage->pTreeMetaXfers = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
3834 if (pIoStorage->pTreeMetaXfers)
3835 {
3836 rc = pVDIo->pInterfaceIo->pfnOpen(pVDIo->pInterfaceIo->Core.pvUser,
3837 pszLocation, uOpenFlags,
3838 vdIOIntReqCompleted,
3839 &pIoStorage->pStorage);
3840 if (RT_SUCCESS(rc))
3841 {
3842 pIoStorage->pVDIo = pVDIo;
3843 *ppIoStorage = pIoStorage;
3844 return VINF_SUCCESS;
3845 }
3846
3847 RTMemFree(pIoStorage->pTreeMetaXfers);
3848 }
3849 else
3850 rc = VERR_NO_MEMORY;
3851
3852 RTMemFree(pIoStorage);
3853 return rc;
3854}
3855
3856static DECLCALLBACK(int) vdIOIntTreeMetaXferDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
3857{
3858 RT_NOREF2(pNode, pvUser);
3859 AssertMsgFailed(("Tree should be empty at this point!\n"));
3860 return VINF_SUCCESS;
3861}
3862
3863static DECLCALLBACK(int) vdIOIntClose(void *pvUser, PVDIOSTORAGE pIoStorage)
3864{
3865 int rc = VINF_SUCCESS;
3866 PVDIO pVDIo = (PVDIO)pvUser;
3867
3868 /* We free everything here, even if closing the file failed for some reason. */
3869 rc = pVDIo->pInterfaceIo->pfnClose(pVDIo->pInterfaceIo->Core.pvUser, pIoStorage->pStorage);
3870 RTAvlrFileOffsetDestroy(pIoStorage->pTreeMetaXfers, vdIOIntTreeMetaXferDestroy, NULL);
3871 RTMemFree(pIoStorage->pTreeMetaXfers);
3872 RTMemFree(pIoStorage);
3873 return rc;
3874}
3875
3876static DECLCALLBACK(int) vdIOIntDelete(void *pvUser, const char *pcszFilename)
3877{
3878 PVDIO pVDIo = (PVDIO)pvUser;
3879 return pVDIo->pInterfaceIo->pfnDelete(pVDIo->pInterfaceIo->Core.pvUser,
3880 pcszFilename);
3881}
3882
3883static DECLCALLBACK(int) vdIOIntMove(void *pvUser, const char *pcszSrc, const char *pcszDst,
3884 unsigned fMove)
3885{
3886 PVDIO pVDIo = (PVDIO)pvUser;
3887 return pVDIo->pInterfaceIo->pfnMove(pVDIo->pInterfaceIo->Core.pvUser,
3888 pcszSrc, pcszDst, fMove);
3889}
3890
3891static DECLCALLBACK(int) vdIOIntGetFreeSpace(void *pvUser, const char *pcszFilename,
3892 int64_t *pcbFreeSpace)
3893{
3894 PVDIO pVDIo = (PVDIO)pvUser;
3895 return pVDIo->pInterfaceIo->pfnGetFreeSpace(pVDIo->pInterfaceIo->Core.pvUser,
3896 pcszFilename, pcbFreeSpace);
3897}
3898
3899static DECLCALLBACK(int) vdIOIntGetModificationTime(void *pvUser, const char *pcszFilename,
3900 PRTTIMESPEC pModificationTime)
3901{
3902 PVDIO pVDIo = (PVDIO)pvUser;
3903 return pVDIo->pInterfaceIo->pfnGetModificationTime(pVDIo->pInterfaceIo->Core.pvUser,
3904 pcszFilename, pModificationTime);
3905}
3906
3907static DECLCALLBACK(int) vdIOIntGetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3908 uint64_t *pcbSize)
3909{
3910 PVDIO pVDIo = (PVDIO)pvUser;
3911 return pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3912 pIoStorage->pStorage, pcbSize);
3913}
3914
3915static DECLCALLBACK(int) vdIOIntSetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3916 uint64_t cbSize)
3917{
3918 PVDIO pVDIo = (PVDIO)pvUser;
3919 return pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3920 pIoStorage->pStorage, cbSize);
3921}
3922
3923static DECLCALLBACK(int) vdIOIntSetAllocationSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3924 uint64_t cbSize, uint32_t fFlags,
3925 PVDINTERFACEPROGRESS pIfProgress,
3926 unsigned uPercentStart, unsigned uPercentSpan)
3927{
3928 PVDIO pVDIo = (PVDIO)pvUser;
3929 int rc = pVDIo->pInterfaceIo->pfnSetAllocationSize(pVDIo->pInterfaceIo->Core.pvUser,
3930 pIoStorage->pStorage, cbSize, fFlags);
3931 if (rc == VERR_NOT_SUPPORTED)
3932 {
3933 /* Fallback if the underlying medium does not support optimized storage allocation. */
3934 uint64_t cbSizeCur = 0;
3935 rc = pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3936 pIoStorage->pStorage, &cbSizeCur);
3937 if (RT_SUCCESS(rc))
3938 {
3939 if (cbSizeCur < cbSize)
3940 {
3941 const size_t cbBuf = 128 * _1K;
3942 void *pvBuf = RTMemTmpAllocZ(cbBuf);
3943 if (RT_LIKELY(pvBuf))
3944 {
3945 uint64_t cbFill = cbSize - cbSizeCur;
3946 uint64_t uOff = 0;
3947
3948 /* Write data to all blocks. */
3949 while ( uOff < cbFill
3950 && RT_SUCCESS(rc))
3951 {
3952 size_t cbChunk = (size_t)RT_MIN(cbFill - uOff, cbBuf);
3953
3954 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
3955 pIoStorage->pStorage, cbSizeCur + uOff,
3956 pvBuf, cbChunk, NULL);
3957 if (RT_SUCCESS(rc))
3958 {
3959 uOff += cbChunk;
3960
3961 rc = vdIfProgress(pIfProgress, uPercentStart + uOff * uPercentSpan / cbFill);
3962 }
3963 }
3964
3965 RTMemTmpFree(pvBuf);
3966 }
3967 else
3968 rc = VERR_NO_MEMORY;
3969 }
3970 else if (cbSizeCur > cbSize)
3971 rc = pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3972 pIoStorage->pStorage, cbSize);
3973 }
3974 }
3975
3976 if (RT_SUCCESS(rc))
3977 rc = vdIfProgress(pIfProgress, uPercentStart + uPercentSpan);
3978
3979 return rc;
3980}
3981
3982static DECLCALLBACK(int) vdIOIntReadUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
3983 PVDIOCTX pIoCtx, size_t cbRead)
3984{
3985 int rc = VINF_SUCCESS;
3986 PVDIO pVDIo = (PVDIO)pvUser;
3987 PVDISK pDisk = pVDIo->pDisk;
3988
3989 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbRead=%u\n",
3990 pvUser, pIoStorage, uOffset, pIoCtx, cbRead));
3991
3992 /** @todo Enable check for sync I/O later. */
3993 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
3994 VD_IS_LOCKED(pDisk);
3995
3996 Assert(cbRead > 0);
3997
3998 if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
3999 {
4000 RTSGSEG Seg;
4001 unsigned cSegments = 1;
4002 size_t cbTaskRead = 0;
4003
4004 /* Synchronous I/O contexts only have one buffer segment. */
4005 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
4006 ("Invalid number of buffer segments for synchronous I/O context"),
4007 VERR_INVALID_PARAMETER);
4008
4009 cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbRead);
4010 Assert(cbRead == cbTaskRead);
4011 Assert(cSegments == 1);
4012 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
4013 pIoStorage->pStorage, uOffset,
4014 Seg.pvSeg, cbRead, NULL);
4015 if (RT_SUCCESS(rc))
4016 {
4017 Assert(cbRead == (uint32_t)cbRead);
4018 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbRead);
4019 }
4020 }
4021 else
4022 {
4023 /* Build the S/G array and spawn a new I/O task */
4024 while (cbRead)
4025 {
4026 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
4027 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
4028 size_t cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbRead);
4029
4030 Assert(cSegments > 0);
4031 Assert(cbTaskRead > 0);
4032 AssertMsg(cbTaskRead <= cbRead, ("Invalid number of bytes to read\n"));
4033
4034 LogFlow(("Reading %u bytes into %u segments\n", cbTaskRead, cSegments));
4035
4036#ifdef RT_STRICT
4037 for (unsigned i = 0; i < cSegments; i++)
4038 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4039 ("Segment %u is invalid\n", i));
4040#endif
4041
4042 Assert(cbTaskRead == (uint32_t)cbTaskRead);
4043 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, NULL, NULL, pIoCtx, (uint32_t)cbTaskRead);
4044
4045 if (!pIoTask)
4046 return VERR_NO_MEMORY;
4047
4048 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4049
4050 void *pvTask;
4051 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4052 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4053 pIoStorage->pStorage, uOffset,
4054 aSeg, cSegments, cbTaskRead, pIoTask,
4055 &pvTask);
4056 if (RT_SUCCESS(rc))
4057 {
4058 AssertMsg(cbTaskRead <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4059 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskRead);
4060 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4061 vdIoTaskFree(pDisk, pIoTask);
4062 }
4063 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4064 {
4065 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4066 vdIoTaskFree(pDisk, pIoTask);
4067 break;
4068 }
4069
4070 uOffset += cbTaskRead;
4071 cbRead -= cbTaskRead;
4072 }
4073 }
4074
4075 LogFlowFunc(("returns rc=%Rrc\n", rc));
4076 return rc;
4077}
4078
4079static DECLCALLBACK(int) vdIOIntWriteUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4080 PVDIOCTX pIoCtx, size_t cbWrite, PFNVDXFERCOMPLETED pfnComplete,
4081 void *pvCompleteUser)
4082{
4083 int rc = VINF_SUCCESS;
4084 PVDIO pVDIo = (PVDIO)pvUser;
4085 PVDISK pDisk = pVDIo->pDisk;
4086
4087 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbWrite=%u\n",
4088 pvUser, pIoStorage, uOffset, pIoCtx, cbWrite));
4089
4090 /** @todo Enable check for sync I/O later. */
4091 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4092 VD_IS_LOCKED(pDisk);
4093
4094 Assert(cbWrite > 0);
4095
4096 if (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4097 {
4098 RTSGSEG Seg;
4099 unsigned cSegments = 1;
4100 size_t cbTaskWrite = 0;
4101
4102 /* Synchronous I/O contexts only have one buffer segment. */
4103 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
4104 ("Invalid number of buffer segments for synchronous I/O context"),
4105 VERR_INVALID_PARAMETER);
4106
4107 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbWrite);
4108 Assert(cbWrite == cbTaskWrite);
4109 Assert(cSegments == 1);
4110 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4111 pIoStorage->pStorage, uOffset,
4112 Seg.pvSeg, cbWrite, NULL);
4113 if (RT_SUCCESS(rc))
4114 {
4115 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbWrite);
4116 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbWrite);
4117 }
4118 }
4119 else
4120 {
4121 /* Build the S/G array and spawn a new I/O task */
4122 while (cbWrite)
4123 {
4124 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
4125 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
4126 size_t cbTaskWrite = 0;
4127
4128 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbWrite);
4129
4130 Assert(cSegments > 0);
4131 Assert(cbTaskWrite > 0);
4132 AssertMsg(cbTaskWrite <= cbWrite, ("Invalid number of bytes to write\n"));
4133
4134 LogFlow(("Writing %u bytes from %u segments\n", cbTaskWrite, cSegments));
4135
4136#ifdef DEBUG
4137 for (unsigned i = 0; i < cSegments; i++)
4138 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4139 ("Segment %u is invalid\n", i));
4140#endif
4141
4142 Assert(cbTaskWrite == (uint32_t)cbTaskWrite);
4143 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, pfnComplete, pvCompleteUser, pIoCtx, (uint32_t)cbTaskWrite);
4144
4145 if (!pIoTask)
4146 return VERR_NO_MEMORY;
4147
4148 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4149
4150 void *pvTask;
4151 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4152 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4153 pIoStorage->pStorage,
4154 uOffset, aSeg, cSegments,
4155 cbTaskWrite, pIoTask, &pvTask);
4156 if (RT_SUCCESS(rc))
4157 {
4158 AssertMsg(cbTaskWrite <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4159 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskWrite);
4160 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4161 vdIoTaskFree(pDisk, pIoTask);
4162 }
4163 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4164 {
4165 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4166 vdIoTaskFree(pDisk, pIoTask);
4167 break;
4168 }
4169
4170 uOffset += cbTaskWrite;
4171 cbWrite -= cbTaskWrite;
4172 }
4173 }
4174
4175 LogFlowFunc(("returns rc=%Rrc\n", rc));
4176 return rc;
4177}
4178
4179static DECLCALLBACK(int) vdIOIntReadMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4180 void *pvBuf, size_t cbRead, PVDIOCTX pIoCtx,
4181 PPVDMETAXFER ppMetaXfer, PFNVDXFERCOMPLETED pfnComplete,
4182 void *pvCompleteUser)
4183{
4184 PVDIO pVDIo = (PVDIO)pvUser;
4185 PVDISK pDisk = pVDIo->pDisk;
4186 int rc = VINF_SUCCESS;
4187 RTSGSEG Seg;
4188 PVDIOTASK pIoTask;
4189 PVDMETAXFER pMetaXfer = NULL;
4190 void *pvTask = NULL;
4191
4192 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbRead=%u\n",
4193 pvUser, pIoStorage, uOffset, pvBuf, cbRead));
4194
4195 AssertMsgReturn( pIoCtx
4196 || (!ppMetaXfer && !pfnComplete && !pvCompleteUser),
4197 ("A synchronous metadata read is requested but the parameters are wrong\n"),
4198 VERR_INVALID_POINTER);
4199
4200 /** @todo Enable check for sync I/O later. */
4201 if ( pIoCtx
4202 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4203 VD_IS_LOCKED(pDisk);
4204
4205 if ( !pIoCtx
4206 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4207 {
4208 /* Handle synchronous metadata I/O. */
4209 /** @todo Integrate with metadata transfers below. */
4210 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
4211 pIoStorage->pStorage, uOffset,
4212 pvBuf, cbRead, NULL);
4213 if (ppMetaXfer)
4214 *ppMetaXfer = NULL;
4215 }
4216 else
4217 {
4218 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4219 if (!pMetaXfer)
4220 {
4221#ifdef RT_STRICT
4222 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGetBestFit(pIoStorage->pTreeMetaXfers, uOffset, false /* fAbove */);
4223 AssertMsg(!pMetaXfer || (pMetaXfer->Core.Key + (RTFOFF)pMetaXfer->cbMeta <= (RTFOFF)uOffset),
4224 ("Overlapping meta transfers!\n"));
4225#endif
4226
4227 /* Allocate a new meta transfer. */
4228 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbRead);
4229 if (!pMetaXfer)
4230 return VERR_NO_MEMORY;
4231
4232 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4233 if (!pIoTask)
4234 {
4235 RTMemFree(pMetaXfer);
4236 return VERR_NO_MEMORY;
4237 }
4238
4239 Seg.cbSeg = cbRead;
4240 Seg.pvSeg = pMetaXfer->abData;
4241
4242 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_READ);
4243 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4244 pIoStorage->pStorage,
4245 uOffset, &Seg, 1,
4246 cbRead, pIoTask, &pvTask);
4247
4248 if (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4249 {
4250 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4251 Assert(fInserted); NOREF(fInserted);
4252 }
4253 else
4254 RTMemFree(pMetaXfer);
4255
4256 if (RT_SUCCESS(rc))
4257 {
4258 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4259 vdIoTaskFree(pDisk, pIoTask);
4260 }
4261 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS && !pfnComplete)
4262 rc = VERR_VD_NOT_ENOUGH_METADATA;
4263 }
4264
4265 Assert(RT_VALID_PTR(pMetaXfer) || RT_FAILURE(rc));
4266
4267 if (RT_SUCCESS(rc) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4268 {
4269 /* If it is pending add the request to the list. */
4270 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_READ)
4271 {
4272 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4273 AssertPtr(pDeferred);
4274
4275 RTListInit(&pDeferred->NodeDeferred);
4276 pDeferred->pIoCtx = pIoCtx;
4277
4278 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4279 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4280 rc = VERR_VD_NOT_ENOUGH_METADATA;
4281 }
4282 else
4283 {
4284 /* Transfer the data. */
4285 pMetaXfer->cRefs++;
4286 Assert(pMetaXfer->cbMeta >= cbRead);
4287 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4288 if (pMetaXfer->pbDataShw)
4289 memcpy(pvBuf, pMetaXfer->pbDataShw, cbRead);
4290 else
4291 memcpy(pvBuf, pMetaXfer->abData, cbRead);
4292 *ppMetaXfer = pMetaXfer;
4293 }
4294 }
4295 }
4296
4297 LogFlowFunc(("returns rc=%Rrc\n", rc));
4298 return rc;
4299}
4300
4301static DECLCALLBACK(int) vdIOIntWriteMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4302 const void *pvBuf, size_t cbWrite, PVDIOCTX pIoCtx,
4303 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4304{
4305 PVDIO pVDIo = (PVDIO)pvUser;
4306 PVDISK pDisk = pVDIo->pDisk;
4307 int rc = VINF_SUCCESS;
4308 RTSGSEG Seg;
4309 PVDIOTASK pIoTask;
4310 PVDMETAXFER pMetaXfer = NULL;
4311 bool fInTree = false;
4312 void *pvTask = NULL;
4313
4314 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbWrite=%u\n",
4315 pvUser, pIoStorage, uOffset, pvBuf, cbWrite));
4316
4317 AssertMsgReturn( pIoCtx
4318 || (!pfnComplete && !pvCompleteUser),
4319 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4320 VERR_INVALID_POINTER);
4321
4322 /** @todo Enable check for sync I/O later. */
4323 if ( pIoCtx
4324 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4325 VD_IS_LOCKED(pDisk);
4326
4327 if ( !pIoCtx
4328 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4329 {
4330 /* Handle synchronous metadata I/O. */
4331 /** @todo Integrate with metadata transfers below. */
4332 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4333 pIoStorage->pStorage, uOffset,
4334 pvBuf, cbWrite, NULL);
4335 }
4336 else
4337 {
4338 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4339 if (!pMetaXfer)
4340 {
4341 /* Allocate a new meta transfer. */
4342 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbWrite);
4343 if (!pMetaXfer)
4344 return VERR_NO_MEMORY;
4345 }
4346 else
4347 {
4348 Assert(pMetaXfer->cbMeta >= cbWrite);
4349 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4350 fInTree = true;
4351 }
4352
4353 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4354 {
4355 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4356 if (!pIoTask)
4357 {
4358 RTMemFree(pMetaXfer);
4359 return VERR_NO_MEMORY;
4360 }
4361
4362 memcpy(pMetaXfer->abData, pvBuf, cbWrite);
4363 Seg.cbSeg = cbWrite;
4364 Seg.pvSeg = pMetaXfer->abData;
4365
4366 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4367
4368 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
4369 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4370 pIoStorage->pStorage,
4371 uOffset, &Seg, 1, cbWrite, pIoTask,
4372 &pvTask);
4373 if (RT_SUCCESS(rc))
4374 {
4375 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4376 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4377 vdIoTaskFree(pDisk, pIoTask);
4378 if (fInTree && !pMetaXfer->cRefs)
4379 {
4380 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4381 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4382 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4383 RTMemFree(pMetaXfer);
4384 pMetaXfer = NULL;
4385 }
4386 }
4387 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4388 {
4389 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4390 AssertPtr(pDeferred);
4391
4392 RTListInit(&pDeferred->NodeDeferred);
4393 pDeferred->pIoCtx = pIoCtx;
4394
4395 if (!fInTree)
4396 {
4397 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4398 Assert(fInserted); NOREF(fInserted);
4399 }
4400
4401 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4402 }
4403 else
4404 {
4405 RTMemFree(pMetaXfer);
4406 pMetaXfer = NULL;
4407 }
4408 }
4409 else
4410 {
4411 /* I/O is in progress, update shadow buffer and add to waiting list. */
4412 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4413 if (!pMetaXfer->pbDataShw)
4414 {
4415 /* Allocate shadow buffer and set initial state. */
4416 LogFlowFunc(("pMetaXfer=%#p Creating shadow buffer\n", pMetaXfer));
4417 pMetaXfer->pbDataShw = (uint8_t *)RTMemAlloc(pMetaXfer->cbMeta);
4418 if (RT_LIKELY(pMetaXfer->pbDataShw))
4419 memcpy(pMetaXfer->pbDataShw, pMetaXfer->abData, pMetaXfer->cbMeta);
4420 else
4421 rc = VERR_NO_MEMORY;
4422 }
4423
4424 if (RT_SUCCESS(rc))
4425 {
4426 /* Update with written data and append to waiting list. */
4427 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4428 if (pDeferred)
4429 {
4430 LogFlowFunc(("pMetaXfer=%#p Updating shadow buffer\n", pMetaXfer));
4431
4432 RTListInit(&pDeferred->NodeDeferred);
4433 pDeferred->pIoCtx = pIoCtx;
4434 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4435 memcpy(pMetaXfer->pbDataShw, pvBuf, cbWrite);
4436 RTListAppend(&pMetaXfer->ListIoCtxShwWrites, &pDeferred->NodeDeferred);
4437 }
4438 else
4439 {
4440 /*
4441 * Free shadow buffer if there is no one depending on it, i.e.
4442 * we just allocated it.
4443 */
4444 if (RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites))
4445 {
4446 RTMemFree(pMetaXfer->pbDataShw);
4447 pMetaXfer->pbDataShw = NULL;
4448 }
4449 rc = VERR_NO_MEMORY;
4450 }
4451 }
4452 }
4453 }
4454
4455 LogFlowFunc(("returns rc=%Rrc\n", rc));
4456 return rc;
4457}
4458
4459static DECLCALLBACK(void) vdIOIntMetaXferRelease(void *pvUser, PVDMETAXFER pMetaXfer)
4460{
4461 PVDIO pVDIo = (PVDIO)pvUser;
4462 PVDISK pDisk = pVDIo->pDisk;
4463 PVDIOSTORAGE pIoStorage;
4464
4465 /*
4466 * It is possible that we get called with a NULL metadata xfer handle
4467 * for synchronous I/O. Just exit.
4468 */
4469 if (!pMetaXfer)
4470 return;
4471
4472 pIoStorage = pMetaXfer->pIoStorage;
4473
4474 VD_IS_LOCKED(pDisk);
4475
4476 Assert( VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE
4477 || VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4478 Assert(pMetaXfer->cRefs > 0);
4479
4480 pMetaXfer->cRefs--;
4481 if ( !pMetaXfer->cRefs
4482 && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting)
4483 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4484 {
4485 /* Free the meta data entry. */
4486 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4487 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4488 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4489
4490 RTMemFree(pMetaXfer);
4491 }
4492}
4493
4494static DECLCALLBACK(int) vdIOIntFlush(void *pvUser, PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
4495 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4496{
4497 PVDIO pVDIo = (PVDIO)pvUser;
4498 PVDISK pDisk = pVDIo->pDisk;
4499 int rc = VINF_SUCCESS;
4500 PVDIOTASK pIoTask;
4501 PVDMETAXFER pMetaXfer = NULL;
4502 void *pvTask = NULL;
4503
4504 LogFlowFunc(("pvUser=%#p pIoStorage=%#p pIoCtx=%#p\n",
4505 pvUser, pIoStorage, pIoCtx));
4506
4507 AssertMsgReturn( pIoCtx
4508 || (!pfnComplete && !pvCompleteUser),
4509 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4510 VERR_INVALID_POINTER);
4511
4512 /** @todo Enable check for sync I/O later. */
4513 if ( pIoCtx
4514 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4515 VD_IS_LOCKED(pDisk);
4516
4517 if (pVDIo->fIgnoreFlush)
4518 return VINF_SUCCESS;
4519
4520 if ( !pIoCtx
4521 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4522 {
4523 /* Handle synchronous flushes. */
4524 /** @todo Integrate with metadata transfers below. */
4525 rc = pVDIo->pInterfaceIo->pfnFlushSync(pVDIo->pInterfaceIo->Core.pvUser,
4526 pIoStorage->pStorage);
4527 }
4528 else
4529 {
4530 /* Allocate a new meta transfer. */
4531 pMetaXfer = vdMetaXferAlloc(pIoStorage, 0, 0);
4532 if (!pMetaXfer)
4533 return VERR_NO_MEMORY;
4534
4535 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
4536 if (!pIoTask)
4537 {
4538 RTMemFree(pMetaXfer);
4539 return VERR_NO_MEMORY;
4540 }
4541
4542 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4543
4544 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4545 AssertPtr(pDeferred);
4546
4547 RTListInit(&pDeferred->NodeDeferred);
4548 pDeferred->pIoCtx = pIoCtx;
4549
4550 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4551 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_FLUSH);
4552 rc = pVDIo->pInterfaceIo->pfnFlushAsync(pVDIo->pInterfaceIo->Core.pvUser,
4553 pIoStorage->pStorage,
4554 pIoTask, &pvTask);
4555 if (RT_SUCCESS(rc))
4556 {
4557 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4558 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4559 vdIoTaskFree(pDisk, pIoTask);
4560 RTMemFree(pDeferred);
4561 RTMemFree(pMetaXfer);
4562 }
4563 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4564 RTMemFree(pMetaXfer);
4565 }
4566
4567 LogFlowFunc(("returns rc=%Rrc\n", rc));
4568 return rc;
4569}
4570
4571static DECLCALLBACK(size_t) vdIOIntIoCtxCopyTo(void *pvUser, PVDIOCTX pIoCtx,
4572 const void *pvBuf, size_t cbBuf)
4573{
4574 PVDIO pVDIo = (PVDIO)pvUser;
4575 PVDISK pDisk = pVDIo->pDisk;
4576 size_t cbCopied = 0;
4577
4578 /** @todo Enable check for sync I/O later. */
4579 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4580 VD_IS_LOCKED(pDisk);
4581
4582 cbCopied = vdIoCtxCopyTo(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4583 Assert(cbCopied == cbBuf);
4584
4585 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCopied); - triggers with vdCopyHelper/dmgRead.
4586 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4587
4588 return cbCopied;
4589}
4590
4591static DECLCALLBACK(size_t) vdIOIntIoCtxCopyFrom(void *pvUser, PVDIOCTX pIoCtx,
4592 void *pvBuf, size_t cbBuf)
4593{
4594 PVDIO pVDIo = (PVDIO)pvUser;
4595 PVDISK pDisk = pVDIo->pDisk;
4596 size_t cbCopied = 0;
4597
4598 /** @todo Enable check for sync I/O later. */
4599 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4600 VD_IS_LOCKED(pDisk);
4601
4602 cbCopied = vdIoCtxCopyFrom(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4603 Assert(cbCopied == cbBuf);
4604
4605 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft > cbCopied); - triggers with vdCopyHelper/dmgRead.
4606 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4607
4608 return cbCopied;
4609}
4610
4611static DECLCALLBACK(size_t) vdIOIntIoCtxSet(void *pvUser, PVDIOCTX pIoCtx, int ch, size_t cb)
4612{
4613 PVDIO pVDIo = (PVDIO)pvUser;
4614 PVDISK pDisk = pVDIo->pDisk;
4615 size_t cbSet = 0;
4616
4617 /** @todo Enable check for sync I/O later. */
4618 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4619 VD_IS_LOCKED(pDisk);
4620
4621 cbSet = vdIoCtxSet(pIoCtx, ch, cb);
4622 Assert(cbSet == cb);
4623
4624 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbSet); - triggers with vdCopyHelper/dmgRead.
4625 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbSet);
4626
4627 return cbSet;
4628}
4629
4630static DECLCALLBACK(size_t) vdIOIntIoCtxSegArrayCreate(void *pvUser, PVDIOCTX pIoCtx,
4631 PRTSGSEG paSeg, unsigned *pcSeg,
4632 size_t cbData)
4633{
4634 PVDIO pVDIo = (PVDIO)pvUser;
4635 PVDISK pDisk = pVDIo->pDisk;
4636 size_t cbCreated = 0;
4637
4638 /** @todo It is possible that this gets called from a filter plugin
4639 * outside of the disk lock. Refine assertion or remove completely. */
4640#if 0
4641 /** @todo Enable check for sync I/O later. */
4642 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4643 VD_IS_LOCKED(pDisk);
4644#else
4645 NOREF(pDisk);
4646#endif
4647
4648 cbCreated = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, paSeg, pcSeg, cbData);
4649 Assert(!paSeg || cbData == cbCreated);
4650
4651 return cbCreated;
4652}
4653
4654static DECLCALLBACK(void) vdIOIntIoCtxCompleted(void *pvUser, PVDIOCTX pIoCtx, int rcReq,
4655 size_t cbCompleted)
4656{
4657 PVDIO pVDIo = (PVDIO)pvUser;
4658 PVDISK pDisk = pVDIo->pDisk;
4659
4660 LogFlowFunc(("pvUser=%#p pIoCtx=%#p rcReq=%Rrc cbCompleted=%zu\n",
4661 pvUser, pIoCtx, rcReq, cbCompleted));
4662
4663 /*
4664 * Grab the disk critical section to avoid races with other threads which
4665 * might still modify the I/O context.
4666 * Example is that iSCSI is doing an asynchronous write but calls us already
4667 * while the other thread is still hanging in vdWriteHelperAsync and couldn't update
4668 * the blocked state yet.
4669 * It can overwrite the state to true before we call vdIoCtxContinue and the
4670 * the request would hang indefinite.
4671 */
4672 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
4673 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCompleted);
4674 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCompleted);
4675
4676 /* Set next transfer function if the current one finished.
4677 * @todo: Find a better way to prevent vdIoCtxContinue from calling the current helper again. */
4678 if (!pIoCtx->Req.Io.cbTransferLeft)
4679 {
4680 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
4681 pIoCtx->pfnIoCtxTransferNext = NULL;
4682 }
4683
4684 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHaltedHead, pIoCtx);
4685 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
4686 {
4687 /* Immediately drop the lock again, it will take care of processing the list. */
4688 vdDiskUnlock(pDisk, NULL);
4689 }
4690}
4691
4692static DECLCALLBACK(bool) vdIOIntIoCtxIsSynchronous(void *pvUser, PVDIOCTX pIoCtx)
4693{
4694 NOREF(pvUser);
4695 return !!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC);
4696}
4697
4698static DECLCALLBACK(bool) vdIOIntIoCtxIsZero(void *pvUser, PVDIOCTX pIoCtx, size_t cbCheck,
4699 bool fAdvance)
4700{
4701 NOREF(pvUser);
4702
4703 bool fIsZero = RTSgBufIsZero(&pIoCtx->Req.Io.SgBuf, cbCheck);
4704 if (fIsZero && fAdvance)
4705 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbCheck);
4706
4707 return fIsZero;
4708}
4709
4710static DECLCALLBACK(size_t) vdIOIntIoCtxGetDataUnitSize(void *pvUser, PVDIOCTX pIoCtx)
4711{
4712 RT_NOREF1(pIoCtx);
4713 PVDIO pVDIo = (PVDIO)pvUser;
4714 PVDISK pDisk = pVDIo->pDisk;
4715 size_t cbSector = 0;
4716
4717 PVDIMAGE pImage = vdGetImageByNumber(pDisk, VD_LAST_IMAGE);
4718 AssertPtrReturn(pImage, 0);
4719
4720 PCVDREGIONLIST pRegionList = NULL;
4721 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
4722 if (RT_SUCCESS(rc))
4723 {
4724 cbSector = pRegionList->aRegions[0].cbBlock;
4725
4726 AssertPtr(pImage->Backend->pfnRegionListRelease);
4727 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
4728 }
4729
4730 return cbSector;
4731}
4732
4733/**
4734 * VD I/O interface callback for opening a file (limited version for VDGetFormat).
4735 */
4736static DECLCALLBACK(int) vdIOIntOpenLimited(void *pvUser, const char *pszLocation,
4737 uint32_t fOpen, PPVDIOSTORAGE ppIoStorage)
4738{
4739 int rc = VINF_SUCCESS;
4740 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4741 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
4742
4743 if (!pIoStorage)
4744 return VERR_NO_MEMORY;
4745
4746 rc = pInterfaceIo->pfnOpen(NULL, pszLocation, fOpen, NULL, &pIoStorage->pStorage);
4747 if (RT_SUCCESS(rc))
4748 *ppIoStorage = pIoStorage;
4749 else
4750 RTMemFree(pIoStorage);
4751
4752 return rc;
4753}
4754
4755static DECLCALLBACK(int) vdIOIntCloseLimited(void *pvUser, PVDIOSTORAGE pIoStorage)
4756{
4757 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4758 int rc = pInterfaceIo->pfnClose(NULL, pIoStorage->pStorage);
4759
4760 RTMemFree(pIoStorage);
4761 return rc;
4762}
4763
4764static DECLCALLBACK(int) vdIOIntDeleteLimited(void *pvUser, const char *pcszFilename)
4765{
4766 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4767 return pInterfaceIo->pfnDelete(NULL, pcszFilename);
4768}
4769
4770static DECLCALLBACK(int) vdIOIntMoveLimited(void *pvUser, const char *pcszSrc,
4771 const char *pcszDst, unsigned fMove)
4772{
4773 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4774 return pInterfaceIo->pfnMove(NULL, pcszSrc, pcszDst, fMove);
4775}
4776
4777static DECLCALLBACK(int) vdIOIntGetFreeSpaceLimited(void *pvUser, const char *pcszFilename,
4778 int64_t *pcbFreeSpace)
4779{
4780 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4781 return pInterfaceIo->pfnGetFreeSpace(NULL, pcszFilename, pcbFreeSpace);
4782}
4783
4784static DECLCALLBACK(int) vdIOIntGetModificationTimeLimited(void *pvUser,
4785 const char *pcszFilename,
4786 PRTTIMESPEC pModificationTime)
4787{
4788 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4789 return pInterfaceIo->pfnGetModificationTime(NULL, pcszFilename, pModificationTime);
4790}
4791
4792static DECLCALLBACK(int) vdIOIntGetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4793 uint64_t *pcbSize)
4794{
4795 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4796 return pInterfaceIo->pfnGetSize(NULL, pIoStorage->pStorage, pcbSize);
4797}
4798
4799static DECLCALLBACK(int) vdIOIntSetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4800 uint64_t cbSize)
4801{
4802 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4803 return pInterfaceIo->pfnSetSize(NULL, pIoStorage->pStorage, cbSize);
4804}
4805
4806static DECLCALLBACK(int) vdIOIntWriteUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4807 uint64_t uOffset, PVDIOCTX pIoCtx,
4808 size_t cbWrite,
4809 PFNVDXFERCOMPLETED pfnComplete,
4810 void *pvCompleteUser)
4811{
4812 NOREF(pvUser);
4813 NOREF(pStorage);
4814 NOREF(uOffset);
4815 NOREF(pIoCtx);
4816 NOREF(cbWrite);
4817 NOREF(pfnComplete);
4818 NOREF(pvCompleteUser);
4819 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4820}
4821
4822static DECLCALLBACK(int) vdIOIntReadUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4823 uint64_t uOffset, PVDIOCTX pIoCtx,
4824 size_t cbRead)
4825{
4826 NOREF(pvUser);
4827 NOREF(pStorage);
4828 NOREF(uOffset);
4829 NOREF(pIoCtx);
4830 NOREF(cbRead);
4831 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4832}
4833
4834static DECLCALLBACK(int) vdIOIntWriteMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4835 uint64_t uOffset, const void *pvBuffer,
4836 size_t cbBuffer, PVDIOCTX pIoCtx,
4837 PFNVDXFERCOMPLETED pfnComplete,
4838 void *pvCompleteUser)
4839{
4840 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4841
4842 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4843 ("Async I/O not implemented for the limited interface"),
4844 VERR_NOT_SUPPORTED);
4845
4846 return pInterfaceIo->pfnWriteSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4847}
4848
4849static DECLCALLBACK(int) vdIOIntReadMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4850 uint64_t uOffset, void *pvBuffer,
4851 size_t cbBuffer, PVDIOCTX pIoCtx,
4852 PPVDMETAXFER ppMetaXfer,
4853 PFNVDXFERCOMPLETED pfnComplete,
4854 void *pvCompleteUser)
4855{
4856 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4857
4858 AssertMsgReturn(!pIoCtx && !ppMetaXfer && !pfnComplete && !pvCompleteUser,
4859 ("Async I/O not implemented for the limited interface"),
4860 VERR_NOT_SUPPORTED);
4861
4862 return pInterfaceIo->pfnReadSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4863}
4864
4865#if 0 /* unsed */
4866static int vdIOIntMetaXferReleaseLimited(void *pvUser, PVDMETAXFER pMetaXfer)
4867{
4868 /* This is a NOP in this case. */
4869 NOREF(pvUser);
4870 NOREF(pMetaXfer);
4871 return VINF_SUCCESS;
4872}
4873#endif
4874
4875static DECLCALLBACK(int) vdIOIntFlushLimited(void *pvUser, PVDIOSTORAGE pStorage,
4876 PVDIOCTX pIoCtx,
4877 PFNVDXFERCOMPLETED pfnComplete,
4878 void *pvCompleteUser)
4879{
4880 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4881
4882 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4883 ("Async I/O not implemented for the limited interface"),
4884 VERR_NOT_SUPPORTED);
4885
4886 return pInterfaceIo->pfnFlushSync(NULL, pStorage->pStorage);
4887}
4888
4889/**
4890 * internal: send output to the log (unconditionally).
4891 */
4892static DECLCALLBACK(int) vdLogMessage(void *pvUser, const char *pszFormat, va_list args)
4893{
4894 NOREF(pvUser);
4895 RTLogPrintfV(pszFormat, args);
4896 return VINF_SUCCESS;
4897}
4898
4899DECLINLINE(int) vdMessageWrapper(PVDISK pDisk, const char *pszFormat, ...)
4900{
4901 va_list va;
4902 va_start(va, pszFormat);
4903 int rc = pDisk->pInterfaceError->pfnMessage(pDisk->pInterfaceError->Core.pvUser,
4904 pszFormat, va);
4905 va_end(va);
4906 return rc;
4907}
4908
4909
4910/**
4911 * internal: adjust PCHS geometry
4912 */
4913static void vdFixupPCHSGeometry(PVDGEOMETRY pPCHS, uint64_t cbSize)
4914{
4915 /* Fix broken PCHS geometry. Can happen for two reasons: either the backend
4916 * mixes up PCHS and LCHS, or the application used to create the source
4917 * image has put garbage in it. Additionally, if the PCHS geometry covers
4918 * more than the image size, set it back to the default. */
4919 if ( pPCHS->cHeads > 16
4920 || pPCHS->cSectors > 63
4921 || pPCHS->cCylinders == 0
4922 || (uint64_t)pPCHS->cHeads * pPCHS->cSectors * pPCHS->cCylinders * 512 > cbSize)
4923 {
4924 Assert(!(RT_MIN(cbSize / 512 / 16 / 63, 16383) - (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383)));
4925 pPCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383);
4926 pPCHS->cHeads = 16;
4927 pPCHS->cSectors = 63;
4928 }
4929}
4930
4931/**
4932 * internal: adjust LCHS geometry
4933 */
4934static void vdFixupLCHSGeometry(PVDGEOMETRY pLCHS, uint64_t cbSize)
4935{
4936 /* Fix broken LCHS geometry. Can happen for two reasons: either the backend
4937 * mixes up PCHS and LCHS, or the application used to create the source
4938 * image has put garbage in it. The fix in this case is to clear the LCHS
4939 * geometry to trigger autodetection when it is used next. If the geometry
4940 * already says "please autodetect" (cylinders=0) keep it. */
4941 if ( ( pLCHS->cHeads > 255
4942 || pLCHS->cHeads == 0
4943 || pLCHS->cSectors > 63
4944 || pLCHS->cSectors == 0)
4945 && pLCHS->cCylinders != 0)
4946 {
4947 pLCHS->cCylinders = 0;
4948 pLCHS->cHeads = 0;
4949 pLCHS->cSectors = 0;
4950 }
4951 /* Always recompute the number of cylinders stored in the LCHS
4952 * geometry if it isn't set to "autotedetect" at the moment.
4953 * This is very useful if the destination image size is
4954 * larger or smaller than the source image size. Do not modify
4955 * the number of heads and sectors. Windows guests hate it. */
4956 if ( pLCHS->cCylinders != 0
4957 && pLCHS->cHeads != 0 /* paranoia */
4958 && pLCHS->cSectors != 0 /* paranoia */)
4959 {
4960 Assert(!(RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024) - (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024)));
4961 pLCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024);
4962 }
4963}
4964
4965/**
4966 * Sets the I/O callbacks of the given interface to the fallback methods
4967 *
4968 * @returns nothing.
4969 * @param pIfIo The I/O interface to setup.
4970 */
4971static void vdIfIoFallbackCallbacksSetup(PVDINTERFACEIO pIfIo)
4972{
4973 pIfIo->pfnOpen = vdIOOpenFallback;
4974 pIfIo->pfnClose = vdIOCloseFallback;
4975 pIfIo->pfnDelete = vdIODeleteFallback;
4976 pIfIo->pfnMove = vdIOMoveFallback;
4977 pIfIo->pfnGetFreeSpace = vdIOGetFreeSpaceFallback;
4978 pIfIo->pfnGetModificationTime = vdIOGetModificationTimeFallback;
4979 pIfIo->pfnGetSize = vdIOGetSizeFallback;
4980 pIfIo->pfnSetSize = vdIOSetSizeFallback;
4981 pIfIo->pfnSetAllocationSize = vdIOSetAllocationSizeFallback;
4982 pIfIo->pfnReadSync = vdIOReadSyncFallback;
4983 pIfIo->pfnWriteSync = vdIOWriteSyncFallback;
4984 pIfIo->pfnFlushSync = vdIOFlushSyncFallback;
4985 pIfIo->pfnReadAsync = vdIOReadAsyncFallback;
4986 pIfIo->pfnWriteAsync = vdIOWriteAsyncFallback;
4987 pIfIo->pfnFlushAsync = vdIOFlushAsyncFallback;
4988}
4989
4990/**
4991 * Sets the internal I/O callbacks of the given interface.
4992 *
4993 * @returns nothing.
4994 * @param pIfIoInt The internal I/O interface to setup.
4995 */
4996static void vdIfIoIntCallbacksSetup(PVDINTERFACEIOINT pIfIoInt)
4997{
4998 pIfIoInt->pfnOpen = vdIOIntOpen;
4999 pIfIoInt->pfnClose = vdIOIntClose;
5000 pIfIoInt->pfnDelete = vdIOIntDelete;
5001 pIfIoInt->pfnMove = vdIOIntMove;
5002 pIfIoInt->pfnGetFreeSpace = vdIOIntGetFreeSpace;
5003 pIfIoInt->pfnGetModificationTime = vdIOIntGetModificationTime;
5004 pIfIoInt->pfnGetSize = vdIOIntGetSize;
5005 pIfIoInt->pfnSetSize = vdIOIntSetSize;
5006 pIfIoInt->pfnSetAllocationSize = vdIOIntSetAllocationSize;
5007 pIfIoInt->pfnReadUser = vdIOIntReadUser;
5008 pIfIoInt->pfnWriteUser = vdIOIntWriteUser;
5009 pIfIoInt->pfnReadMeta = vdIOIntReadMeta;
5010 pIfIoInt->pfnWriteMeta = vdIOIntWriteMeta;
5011 pIfIoInt->pfnMetaXferRelease = vdIOIntMetaXferRelease;
5012 pIfIoInt->pfnFlush = vdIOIntFlush;
5013 pIfIoInt->pfnIoCtxCopyFrom = vdIOIntIoCtxCopyFrom;
5014 pIfIoInt->pfnIoCtxCopyTo = vdIOIntIoCtxCopyTo;
5015 pIfIoInt->pfnIoCtxSet = vdIOIntIoCtxSet;
5016 pIfIoInt->pfnIoCtxSegArrayCreate = vdIOIntIoCtxSegArrayCreate;
5017 pIfIoInt->pfnIoCtxCompleted = vdIOIntIoCtxCompleted;
5018 pIfIoInt->pfnIoCtxIsSynchronous = vdIOIntIoCtxIsSynchronous;
5019 pIfIoInt->pfnIoCtxIsZero = vdIOIntIoCtxIsZero;
5020 pIfIoInt->pfnIoCtxGetDataUnitSize = vdIOIntIoCtxGetDataUnitSize;
5021}
5022
5023/**
5024 * Internally used completion handler for synchronous I/O contexts.
5025 */
5026static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq)
5027{
5028 RT_NOREF2(pvUser1, rcReq);
5029 RTSEMEVENT hEvent = (RTSEMEVENT)pvUser2;
5030
5031 RTSemEventSignal(hEvent);
5032}
5033
5034
5035VBOXDDU_DECL(int) VDInit(void)
5036{
5037 int rc = vdPluginInit();
5038 LogRel(("VD: VDInit finished with %Rrc\n", rc));
5039 return rc;
5040}
5041
5042
5043VBOXDDU_DECL(int) VDShutdown(void)
5044{
5045 return vdPluginTerm();
5046}
5047
5048
5049VBOXDDU_DECL(int) VDPluginLoadFromFilename(const char *pszFilename)
5050{
5051 if (!vdPluginIsInitialized())
5052 {
5053 int rc = VDInit();
5054 if (RT_FAILURE(rc))
5055 return rc;
5056 }
5057
5058 return vdPluginLoadFromFilename(pszFilename);
5059}
5060
5061/**
5062 * Load all plugins from a given path.
5063 *
5064 * @returns VBox statuse code.
5065 * @param pszPath The path to load plugins from.
5066 */
5067VBOXDDU_DECL(int) VDPluginLoadFromPath(const char *pszPath)
5068{
5069 if (!vdPluginIsInitialized())
5070 {
5071 int rc = VDInit();
5072 if (RT_FAILURE(rc))
5073 return rc;
5074 }
5075
5076 return vdPluginLoadFromPath(pszPath);
5077}
5078
5079
5080VBOXDDU_DECL(int) VDPluginUnloadFromFilename(const char *pszFilename)
5081{
5082 if (!vdPluginIsInitialized())
5083 {
5084 int rc = VDInit();
5085 if (RT_FAILURE(rc))
5086 return rc;
5087 }
5088
5089 return vdPluginUnloadFromFilename(pszFilename);
5090}
5091
5092
5093VBOXDDU_DECL(int) VDPluginUnloadFromPath(const char *pszPath)
5094{
5095 if (!vdPluginIsInitialized())
5096 {
5097 int rc = VDInit();
5098 if (RT_FAILURE(rc))
5099 return rc;
5100 }
5101
5102 return vdPluginUnloadFromPath(pszPath);
5103}
5104
5105
5106VBOXDDU_DECL(int) VDBackendInfo(unsigned cEntriesAlloc, PVDBACKENDINFO pEntries,
5107 unsigned *pcEntriesUsed)
5108{
5109 int rc = VINF_SUCCESS;
5110
5111 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5112 /* Check arguments. */
5113 AssertMsgReturn(cEntriesAlloc, ("cEntriesAlloc=%u\n", cEntriesAlloc), VERR_INVALID_PARAMETER);
5114 AssertPtrReturn(pEntries, VERR_INVALID_POINTER);
5115 AssertPtrReturn(pcEntriesUsed, VERR_INVALID_POINTER);
5116 if (!vdPluginIsInitialized())
5117 VDInit();
5118
5119 uint32_t cBackends = vdGetImageBackendCount();
5120 if (cEntriesAlloc < cBackends)
5121 {
5122 *pcEntriesUsed = cBackends;
5123 return VERR_BUFFER_OVERFLOW;
5124 }
5125
5126 for (unsigned i = 0; i < cBackends; i++)
5127 {
5128 PCVDIMAGEBACKEND pBackend;
5129 rc = vdQueryImageBackend(i, &pBackend);
5130 AssertRC(rc);
5131
5132 pEntries[i].pszBackend = pBackend->pszBackendName;
5133 pEntries[i].uBackendCaps = pBackend->uBackendCaps;
5134 pEntries[i].paFileExtensions = pBackend->paFileExtensions;
5135 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5136 pEntries[i].pfnComposeLocation = pBackend->pfnComposeLocation;
5137 pEntries[i].pfnComposeName = pBackend->pfnComposeName;
5138 }
5139
5140 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5141 *pcEntriesUsed = cBackends;
5142 return rc;
5143}
5144
5145
5146VBOXDDU_DECL(int) VDBackendInfoOne(const char *pszBackend, PVDBACKENDINFO pEntry)
5147{
5148 LogFlowFunc(("pszBackend=%#p pEntry=%#p\n", pszBackend, pEntry));
5149 /* Check arguments. */
5150 AssertPtrReturn(pszBackend, VERR_INVALID_POINTER);
5151 AssertPtrReturn(pEntry, VERR_INVALID_POINTER);
5152 if (!vdPluginIsInitialized())
5153 VDInit();
5154
5155 PCVDIMAGEBACKEND pBackend;
5156 int rc = vdFindImageBackend(pszBackend, &pBackend);
5157 if (RT_SUCCESS(rc))
5158 {
5159 pEntry->pszBackend = pBackend->pszBackendName;
5160 pEntry->uBackendCaps = pBackend->uBackendCaps;
5161 pEntry->paFileExtensions = pBackend->paFileExtensions;
5162 pEntry->paConfigInfo = pBackend->paConfigInfo;
5163 }
5164
5165 return rc;
5166}
5167
5168
5169VBOXDDU_DECL(int) VDFilterInfo(unsigned cEntriesAlloc, PVDFILTERINFO pEntries,
5170 unsigned *pcEntriesUsed)
5171{
5172 int rc = VINF_SUCCESS;
5173
5174 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5175 /* Check arguments. */
5176 AssertMsgReturn(cEntriesAlloc,
5177 ("cEntriesAlloc=%u\n", cEntriesAlloc),
5178 VERR_INVALID_PARAMETER);
5179 AssertPtrReturn(pEntries, VERR_INVALID_POINTER);
5180 AssertPtrReturn(pcEntriesUsed, VERR_INVALID_POINTER);
5181 if (!vdPluginIsInitialized())
5182 VDInit();
5183
5184 uint32_t cBackends = vdGetFilterBackendCount();
5185 if (cEntriesAlloc < cBackends)
5186 {
5187 *pcEntriesUsed = cBackends;
5188 return VERR_BUFFER_OVERFLOW;
5189 }
5190
5191 for (unsigned i = 0; i < cBackends; i++)
5192 {
5193 PCVDFILTERBACKEND pBackend;
5194 rc = vdQueryFilterBackend(i, &pBackend);
5195 pEntries[i].pszFilter = pBackend->pszBackendName;
5196 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5197 }
5198
5199 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5200 *pcEntriesUsed = cBackends;
5201 return rc;
5202}
5203
5204
5205VBOXDDU_DECL(int) VDFilterInfoOne(const char *pszFilter, PVDFILTERINFO pEntry)
5206{
5207 LogFlowFunc(("pszFilter=%#p pEntry=%#p\n", pszFilter, pEntry));
5208 /* Check arguments. */
5209 AssertPtrReturn(pszFilter, VERR_INVALID_POINTER);
5210 AssertPtrReturn(pEntry, VERR_INVALID_POINTER);
5211 if (!vdPluginIsInitialized())
5212 VDInit();
5213
5214 PCVDFILTERBACKEND pBackend;
5215 int rc = vdFindFilterBackend(pszFilter, &pBackend);
5216 if (RT_SUCCESS(rc))
5217 {
5218 pEntry->pszFilter = pBackend->pszBackendName;
5219 pEntry->paConfigInfo = pBackend->paConfigInfo;
5220 }
5221
5222 return rc;
5223}
5224
5225
5226VBOXDDU_DECL(int) VDCreate(PVDINTERFACE pVDIfsDisk, VDTYPE enmType, PVDISK *ppDisk)
5227{
5228 int rc = VINF_SUCCESS;
5229 PVDISK pDisk = NULL;
5230
5231 LogFlowFunc(("pVDIfsDisk=%#p\n", pVDIfsDisk));
5232 /* Check arguments. */
5233 AssertPtrReturn(ppDisk, VERR_INVALID_POINTER);
5234
5235 do
5236 {
5237 pDisk = (PVDISK)RTMemAllocZ(sizeof(VDISK));
5238 if (pDisk)
5239 {
5240 pDisk->u32Signature = VDISK_SIGNATURE;
5241 pDisk->enmType = enmType;
5242 pDisk->cImages = 0;
5243 pDisk->pBase = NULL;
5244 pDisk->pLast = NULL;
5245 pDisk->cbSize = 0;
5246 pDisk->PCHSGeometry.cCylinders = 0;
5247 pDisk->PCHSGeometry.cHeads = 0;
5248 pDisk->PCHSGeometry.cSectors = 0;
5249 pDisk->LCHSGeometry.cCylinders = 0;
5250 pDisk->LCHSGeometry.cHeads = 0;
5251 pDisk->LCHSGeometry.cSectors = 0;
5252 pDisk->pVDIfsDisk = pVDIfsDisk;
5253 pDisk->pInterfaceError = NULL;
5254 pDisk->pInterfaceThreadSync = NULL;
5255 pDisk->pIoCtxLockOwner = NULL;
5256 pDisk->pIoCtxHead = NULL;
5257 pDisk->fLocked = false;
5258 pDisk->hMemCacheIoCtx = NIL_RTMEMCACHE;
5259 pDisk->hMemCacheIoTask = NIL_RTMEMCACHE;
5260 RTListInit(&pDisk->ListFilterChainWrite);
5261 RTListInit(&pDisk->ListFilterChainRead);
5262
5263 /* Create the I/O ctx cache */
5264 rc = RTMemCacheCreate(&pDisk->hMemCacheIoCtx, sizeof(VDIOCTX), 0, UINT32_MAX,
5265 NULL, NULL, NULL, 0);
5266 if (RT_FAILURE(rc))
5267 break;
5268
5269 /* Create the I/O task cache */
5270 rc = RTMemCacheCreate(&pDisk->hMemCacheIoTask, sizeof(VDIOTASK), 0, UINT32_MAX,
5271 NULL, NULL, NULL, 0);
5272 if (RT_FAILURE(rc))
5273 break;
5274
5275 pDisk->pInterfaceError = VDIfErrorGet(pVDIfsDisk);
5276 pDisk->pInterfaceThreadSync = VDIfThreadSyncGet(pVDIfsDisk);
5277
5278 *ppDisk = pDisk;
5279 }
5280 else
5281 {
5282 rc = VERR_NO_MEMORY</