VirtualBox

source: vbox/trunk/src/VBox/Storage/VD.cpp@ 98187

Last change on this file since 98187 was 98103, checked in by vboxsync, 2 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 339.3 KB
<
Line 
1/* $Id: VD.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * VD - Virtual disk container implementation.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_VD
33#include <VBox/vd.h>
34#include <VBox/err.h>
35#include <VBox/sup.h>
36#include <VBox/log.h>
37
38#include <iprt/alloc.h>
39#include <iprt/assert.h>
40#include <iprt/uuid.h>
41#include <iprt/file.h>
42#include <iprt/string.h>
43#include <iprt/asm.h>
44#include <iprt/param.h>
45#include <iprt/path.h>
46#include <iprt/sg.h>
47#include <iprt/semaphore.h>
48#include <iprt/vector.h>
49
50#include "VDInternal.h"
51
52/** Buffer size used for merging images. */
53#define VD_MERGE_BUFFER_SIZE (16 * _1M)
54
55/** Maximum number of segments in one I/O task. */
56#define VD_IO_TASK_SEGMENTS_MAX 64
57
58/** Threshold after not recently used blocks are removed from the list. */
59#define VD_DISCARD_REMOVE_THRESHOLD (10 * _1M) /** @todo experiment */
60
61/**
62 * VD async I/O interface storage descriptor.
63 */
64typedef struct VDIIOFALLBACKSTORAGE
65{
66 /** File handle. */
67 RTFILE File;
68 /** Completion callback. */
69 PFNVDCOMPLETED pfnCompleted;
70 /** Thread for async access. */
71 RTTHREAD ThreadAsync;
72} VDIIOFALLBACKSTORAGE, *PVDIIOFALLBACKSTORAGE;
73
74/**
75 * uModified bit flags.
76 */
77#define VD_IMAGE_MODIFIED_FLAG RT_BIT(0)
78#define VD_IMAGE_MODIFIED_FIRST RT_BIT(1)
79#define VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE RT_BIT(2)
80
81
82# define VD_IS_LOCKED(a_pDisk) \
83 do \
84 { \
85 NOREF(a_pDisk); \
86 AssertMsg((a_pDisk)->fLocked, \
87 ("Lock not held\n"));\
88 } while(0)
89
90/**
91 * VBox parent read descriptor, used internally for compaction.
92 */
93typedef struct VDPARENTSTATEDESC
94{
95 /** Pointer to disk descriptor. */
96 PVDISK pDisk;
97 /** Pointer to image descriptor. */
98 PVDIMAGE pImage;
99} VDPARENTSTATEDESC, *PVDPARENTSTATEDESC;
100
101/**
102 * Transfer direction.
103 */
104typedef enum VDIOCTXTXDIR
105{
106 /** Read */
107 VDIOCTXTXDIR_READ = 0,
108 /** Write */
109 VDIOCTXTXDIR_WRITE,
110 /** Flush */
111 VDIOCTXTXDIR_FLUSH,
112 /** Discard */
113 VDIOCTXTXDIR_DISCARD,
114 /** 32bit hack */
115 VDIOCTXTXDIR_32BIT_HACK = 0x7fffffff
116} VDIOCTXTXDIR, *PVDIOCTXTXDIR;
117
118/** Transfer function */
119typedef DECLCALLBACKTYPE(int, FNVDIOCTXTRANSFER ,(PVDIOCTX pIoCtx));
120/** Pointer to a transfer function. */
121typedef FNVDIOCTXTRANSFER *PFNVDIOCTXTRANSFER;
122
123/**
124 * I/O context
125 */
126typedef struct VDIOCTX
127{
128 /** Pointer to the next I/O context. */
129 struct VDIOCTX * volatile pIoCtxNext;
130 /** Disk this is request is for. */
131 PVDISK pDisk;
132 /** Return code. */
133 int rcReq;
134 /** Various flags for the I/O context. */
135 uint32_t fFlags;
136 /** Number of data transfers currently pending. */
137 volatile uint32_t cDataTransfersPending;
138 /** How many meta data transfers are pending. */
139 volatile uint32_t cMetaTransfersPending;
140 /** Flag whether the request finished */
141 volatile bool fComplete;
142 /** Temporary allocated memory which is freed
143 * when the context completes. */
144 void *pvAllocation;
145 /** Transfer function. */
146 PFNVDIOCTXTRANSFER pfnIoCtxTransfer;
147 /** Next transfer part after the current one completed. */
148 PFNVDIOCTXTRANSFER pfnIoCtxTransferNext;
149 /** Transfer direction */
150 VDIOCTXTXDIR enmTxDir;
151 /** Request type dependent data. */
152 union
153 {
154 /** I/O request (read/write). */
155 struct
156 {
157 /** Number of bytes left until this context completes. */
158 volatile uint32_t cbTransferLeft;
159 /** Current offset */
160 volatile uint64_t uOffset;
161 /** Number of bytes to transfer */
162 volatile size_t cbTransfer;
163 /** Current image in the chain. */
164 PVDIMAGE pImageCur;
165 /** Start image to read from. pImageCur is reset to this
166 * value after it reached the first image in the chain. */
167 PVDIMAGE pImageStart;
168 /** S/G buffer */
169 RTSGBUF SgBuf;
170 /** Number of bytes to clear in the buffer before the current read. */
171 size_t cbBufClear;
172 /** Number of images to read. */
173 unsigned cImagesRead;
174 /** Override for the parent image to start reading from. */
175 PVDIMAGE pImageParentOverride;
176 /** Original offset of the transfer - required for filtering read requests. */
177 uint64_t uOffsetXferOrig;
178 /** Original size of the transfer - required for fitlering read requests. */
179 size_t cbXferOrig;
180 } Io;
181 /** Discard requests. */
182 struct
183 {
184 /** Pointer to the range descriptor array. */
185 PCRTRANGE paRanges;
186 /** Number of ranges in the array. */
187 unsigned cRanges;
188 /** Range descriptor index which is processed. */
189 unsigned idxRange;
190 /** Start offset to discard currently. */
191 uint64_t offCur;
192 /** How many bytes left to discard in the current range. */
193 size_t cbDiscardLeft;
194 /** How many bytes to discard in the current block (<= cbDiscardLeft). */
195 size_t cbThisDiscard;
196 /** Discard block handled currently. */
197 PVDDISCARDBLOCK pBlock;
198 } Discard;
199 } Req;
200 /** Parent I/O context if any. Sets the type of the context (root/child) */
201 PVDIOCTX pIoCtxParent;
202 /** Type dependent data (root/child) */
203 union
204 {
205 /** Root data */
206 struct
207 {
208 /** Completion callback */
209 PFNVDASYNCTRANSFERCOMPLETE pfnComplete;
210 /** User argument 1 passed on completion. */
211 void *pvUser1;
212 /** User argument 2 passed on completion. */
213 void *pvUser2;
214 } Root;
215 /** Child data */
216 struct
217 {
218 /** Saved start offset */
219 uint64_t uOffsetSaved;
220 /** Saved transfer size */
221 size_t cbTransferLeftSaved;
222 /** Number of bytes transferred from the parent if this context completes. */
223 size_t cbTransferParent;
224 /** Number of bytes to pre read */
225 size_t cbPreRead;
226 /** Number of bytes to post read. */
227 size_t cbPostRead;
228 /** Number of bytes to write left in the parent. */
229 size_t cbWriteParent;
230 /** Write type dependent data. */
231 union
232 {
233 /** Optimized */
234 struct
235 {
236 /** Bytes to fill to satisfy the block size. Not part of the virtual disk. */
237 size_t cbFill;
238 /** Bytes to copy instead of reading from the parent */
239 size_t cbWriteCopy;
240 /** Bytes to read from the image. */
241 size_t cbReadImage;
242 } Optimized;
243 } Write;
244 } Child;
245 } Type;
246} VDIOCTX;
247
248/** Default flags for an I/O context, i.e. unblocked and async. */
249#define VDIOCTX_FLAGS_DEFAULT (0)
250/** Flag whether the context is blocked. */
251#define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
252/** Flag whether the I/O context is using synchronous I/O. */
253#define VDIOCTX_FLAGS_SYNC RT_BIT_32(1)
254/** Flag whether the read should update the cache. */
255#define VDIOCTX_FLAGS_READ_UPDATE_CACHE RT_BIT_32(2)
256/** Flag whether free blocks should be zeroed.
257 * If false and no image has data for sepcified
258 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
259 * Note that unallocated blocks are still zeroed
260 * if at least one image has valid data for a part
261 * of the range.
262 */
263#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
264/** Don't free the I/O context when complete because
265 * it was alloacted elsewhere (stack, ...). */
266#define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4)
267/** Don't set the modified flag for this I/O context when writing. */
268#define VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG RT_BIT_32(5)
269/** The write filter was applied already and shouldn't be applied a second time.
270 * Used at the beginning of vdWriteHelperAsync() because it might be called
271 * multiple times.
272 */
273#define VDIOCTX_FLAGS_WRITE_FILTER_APPLIED RT_BIT_32(6)
274
275/** NIL I/O context pointer value. */
276#define NIL_VDIOCTX ((PVDIOCTX)0)
277
278/**
279 * List node for deferred I/O contexts.
280 */
281typedef struct VDIOCTXDEFERRED
282{
283 /** Node in the list of deferred requests.
284 * A request can be deferred if the image is growing
285 * and the request accesses the same range or if
286 * the backend needs to read or write metadata from the disk
287 * before it can continue. */
288 RTLISTNODE NodeDeferred;
289 /** I/O context this entry points to. */
290 PVDIOCTX pIoCtx;
291} VDIOCTXDEFERRED, *PVDIOCTXDEFERRED;
292
293/**
294 * I/O task.
295 */
296typedef struct VDIOTASK
297{
298 /** Next I/O task waiting in the list. */
299 struct VDIOTASK * volatile pNext;
300 /** Storage this task belongs to. */
301 PVDIOSTORAGE pIoStorage;
302 /** Optional completion callback. */
303 PFNVDXFERCOMPLETED pfnComplete;
304 /** Opaque user data. */
305 void *pvUser;
306 /** Completion status code for the task. */
307 int rcReq;
308 /** Flag whether this is a meta data transfer. */
309 bool fMeta;
310 /** Type dependent data. */
311 union
312 {
313 /** User data transfer. */
314 struct
315 {
316 /** Number of bytes this task transferred. */
317 uint32_t cbTransfer;
318 /** Pointer to the I/O context the task belongs. */
319 PVDIOCTX pIoCtx;
320 } User;
321 /** Meta data transfer. */
322 struct
323 {
324 /** Meta transfer this task is for. */
325 PVDMETAXFER pMetaXfer;
326 } Meta;
327 } Type;
328} VDIOTASK;
329
330/**
331 * Storage handle.
332 */
333typedef struct VDIOSTORAGE
334{
335 /** Image I/O state this storage handle belongs to. */
336 PVDIO pVDIo;
337 /** AVL tree for pending async metadata transfers. */
338 PAVLRFOFFTREE pTreeMetaXfers;
339 /** Storage handle */
340 void *pStorage;
341} VDIOSTORAGE;
342
343/**
344 * Metadata transfer.
345 *
346 * @note This entry can't be freed if either the list is not empty or
347 * the reference counter is not 0.
348 * The assumption is that the backends don't need to read huge amounts of
349 * metadata to complete a transfer so the additional memory overhead should
350 * be relatively small.
351 */
352typedef struct VDMETAXFER
353{
354 /** AVL core for fast search (the file offset is the key) */
355 AVLRFOFFNODECORE Core;
356 /** I/O storage for this transfer. */
357 PVDIOSTORAGE pIoStorage;
358 /** Flags. */
359 uint32_t fFlags;
360 /** List of I/O contexts waiting for this metadata transfer to complete. */
361 RTLISTNODE ListIoCtxWaiting;
362 /** Number of references to this entry. */
363 unsigned cRefs;
364 /** Size of the data stored with this entry. */
365 size_t cbMeta;
366 /** Shadow buffer which is used in case a write is still active and other
367 * writes update the shadow buffer. */
368 uint8_t *pbDataShw;
369 /** List of I/O contexts updating the shadow buffer while there is a write
370 * in progress. */
371 RTLISTNODE ListIoCtxShwWrites;
372 /** Data stored - variable size. */
373 uint8_t abData[1];
374} VDMETAXFER;
375
376/**
377 * The transfer direction for the metadata.
378 */
379#define VDMETAXFER_TXDIR_MASK 0x3
380#define VDMETAXFER_TXDIR_NONE 0x0
381#define VDMETAXFER_TXDIR_WRITE 0x1
382#define VDMETAXFER_TXDIR_READ 0x2
383#define VDMETAXFER_TXDIR_FLUSH 0x3
384#define VDMETAXFER_TXDIR_GET(flags) ((flags) & VDMETAXFER_TXDIR_MASK)
385#define VDMETAXFER_TXDIR_SET(flags, dir) ((flags) = (flags & ~VDMETAXFER_TXDIR_MASK) | (dir))
386
387/** Forward declaration of the async discard helper. */
388static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx);
389static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx);
390static void vdDiskProcessBlockedIoCtx(PVDISK pDisk);
391static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc);
392static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
393
394/**
395 * internal: issue error message.
396 */
397static int vdError(PVDISK pDisk, int rc, RT_SRC_POS_DECL,
398 const char *pszFormat, ...)
399{
400 va_list va;
401 va_start(va, pszFormat);
402 if (pDisk->pInterfaceError)
403 pDisk->pInterfaceError->pfnError(pDisk->pInterfaceError->Core.pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va);
404 va_end(va);
405 return rc;
406}
407
408/**
409 * internal: thread synchronization, start read.
410 */
411DECLINLINE(int) vdThreadStartRead(PVDISK pDisk)
412{
413 int rc = VINF_SUCCESS;
414 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
415 rc = pDisk->pInterfaceThreadSync->pfnStartRead(pDisk->pInterfaceThreadSync->Core.pvUser);
416 return rc;
417}
418
419/**
420 * internal: thread synchronization, finish read.
421 */
422DECLINLINE(int) vdThreadFinishRead(PVDISK pDisk)
423{
424 int rc = VINF_SUCCESS;
425 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
426 rc = pDisk->pInterfaceThreadSync->pfnFinishRead(pDisk->pInterfaceThreadSync->Core.pvUser);
427 return rc;
428}
429
430/**
431 * internal: thread synchronization, start write.
432 */
433DECLINLINE(int) vdThreadStartWrite(PVDISK pDisk)
434{
435 int rc = VINF_SUCCESS;
436 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
437 rc = pDisk->pInterfaceThreadSync->pfnStartWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
438 return rc;
439}
440
441/**
442 * internal: thread synchronization, finish write.
443 */
444DECLINLINE(int) vdThreadFinishWrite(PVDISK pDisk)
445{
446 int rc = VINF_SUCCESS;
447 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
448 rc = pDisk->pInterfaceThreadSync->pfnFinishWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
449 return rc;
450}
451
452/**
453 * internal: add image structure to the end of images list.
454 */
455static void vdAddImageToList(PVDISK pDisk, PVDIMAGE pImage)
456{
457 pImage->pPrev = NULL;
458 pImage->pNext = NULL;
459
460 if (pDisk->pBase)
461 {
462 Assert(pDisk->cImages > 0);
463 pImage->pPrev = pDisk->pLast;
464 pDisk->pLast->pNext = pImage;
465 pDisk->pLast = pImage;
466 }
467 else
468 {
469 Assert(pDisk->cImages == 0);
470 pDisk->pBase = pImage;
471 pDisk->pLast = pImage;
472 }
473
474 pDisk->cImages++;
475}
476
477/**
478 * internal: remove image structure from the images list.
479 */
480static void vdRemoveImageFromList(PVDISK pDisk, PVDIMAGE pImage)
481{
482 Assert(pDisk->cImages > 0);
483
484 if (pImage->pPrev)
485 pImage->pPrev->pNext = pImage->pNext;
486 else
487 pDisk->pBase = pImage->pNext;
488
489 if (pImage->pNext)
490 pImage->pNext->pPrev = pImage->pPrev;
491 else
492 pDisk->pLast = pImage->pPrev;
493
494 pImage->pPrev = NULL;
495 pImage->pNext = NULL;
496
497 pDisk->cImages--;
498}
499
500/**
501 * Release a referene to the filter decrementing the counter and destroying the filter
502 * when the counter reaches zero.
503 *
504 * @returns The new reference count.
505 * @param pFilter The filter to release.
506 */
507static uint32_t vdFilterRelease(PVDFILTER pFilter)
508{
509 uint32_t cRefs = ASMAtomicDecU32(&pFilter->cRefs);
510 if (!cRefs)
511 {
512 pFilter->pBackend->pfnDestroy(pFilter->pvBackendData);
513 RTMemFree(pFilter);
514 }
515
516 return cRefs;
517}
518
519/**
520 * Increments the reference counter of the given filter.
521 *
522 * @return The new reference count.
523 * @param pFilter The filter.
524 */
525static uint32_t vdFilterRetain(PVDFILTER pFilter)
526{
527 return ASMAtomicIncU32(&pFilter->cRefs);
528}
529
530/**
531 * internal: find image by index into the images list.
532 */
533static PVDIMAGE vdGetImageByNumber(PVDISK pDisk, unsigned nImage)
534{
535 PVDIMAGE pImage = pDisk->pBase;
536 if (nImage == VD_LAST_IMAGE)
537 return pDisk->pLast;
538 while (pImage && nImage)
539 {
540 pImage = pImage->pNext;
541 nImage--;
542 }
543 return pImage;
544}
545
546/**
547 * Creates a new region list from the given one converting to match the flags if necessary.
548 *
549 * @returns VBox status code.
550 * @param pRegionList The region list to convert from.
551 * @param fFlags The flags for the new region list.
552 * @param ppRegionList Where to store the new region list on success.
553 */
554static int vdRegionListConv(PCVDREGIONLIST pRegionList, uint32_t fFlags, PPVDREGIONLIST ppRegionList)
555{
556 int rc = VINF_SUCCESS;
557 PVDREGIONLIST pRegionListNew = (PVDREGIONLIST)RTMemDup(pRegionList,
558 RT_UOFFSETOF_DYN(VDREGIONLIST, aRegions[pRegionList->cRegions]));
559 if (RT_LIKELY(pRegionListNew))
560 {
561 /* Do we have to convert anything? */
562 if (pRegionList->fFlags != fFlags)
563 {
564 uint64_t offRegionNext = 0;
565
566 pRegionListNew->fFlags = fFlags;
567 for (unsigned i = 0; i < pRegionListNew->cRegions; i++)
568 {
569 PVDREGIONDESC pRegion = &pRegionListNew->aRegions[i];
570
571 if ( (fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
572 && !(pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS))
573 {
574 Assert(!(pRegion->cRegionBlocksOrBytes % pRegion->cbBlock));
575
576 /* Convert from bytes to logical blocks. */
577 pRegion->offRegion = offRegionNext;
578 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes / pRegion->cbBlock;
579 offRegionNext += pRegion->cRegionBlocksOrBytes;
580 }
581 else
582 {
583 /* Convert from logical blocks to bytes. */
584 pRegion->offRegion = offRegionNext;
585 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes * pRegion->cbBlock;
586 offRegionNext += pRegion->cRegionBlocksOrBytes;
587 }
588 }
589 }
590
591 *ppRegionList = pRegionListNew;
592 }
593 else
594 rc = VERR_NO_MEMORY;
595
596 return rc;
597}
598
599/**
600 * Returns the virtual size of the image in bytes.
601 *
602 * @returns Size of the given image in bytes.
603 * @param pImage The image to get the size from.
604 */
605static uint64_t vdImageGetSize(PVDIMAGE pImage)
606{
607 uint64_t cbImage = 0;
608
609 if (pImage->cbImage == VD_IMAGE_SIZE_UNINITIALIZED)
610 {
611 PCVDREGIONLIST pRegionList = NULL;
612 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
613 if (RT_SUCCESS(rc))
614 {
615 if (pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
616 {
617 PVDREGIONLIST pRegionListConv = NULL;
618 rc = vdRegionListConv(pRegionList, 0, &pRegionListConv);
619 if (RT_SUCCESS(rc))
620 {
621 for (uint32_t i = 0; i < pRegionListConv->cRegions; i++)
622 cbImage += pRegionListConv->aRegions[i].cRegionBlocksOrBytes;
623
624 VDRegionListFree(pRegionListConv);
625 }
626 }
627 else
628 for (uint32_t i = 0; i < pRegionList->cRegions; i++)
629 cbImage += pRegionList->aRegions[i].cRegionBlocksOrBytes;
630
631 AssertPtr(pImage->Backend->pfnRegionListRelease);
632 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
633 pImage->cbImage = cbImage; /* Cache the value. */
634 }
635 }
636 else
637 cbImage = pImage->cbImage;
638
639 return cbImage;
640}
641
642/**
643 * Applies the filter chain to the given write request.
644 *
645 * @returns VBox status code.
646 * @param pDisk The HDD container.
647 * @param uOffset The start offset of the write.
648 * @param cbWrite Number of bytes to write.
649 * @param pIoCtx The I/O context associated with the request.
650 */
651static int vdFilterChainApplyWrite(PVDISK pDisk, uint64_t uOffset, size_t cbWrite,
652 PVDIOCTX pIoCtx)
653{
654 int rc = VINF_SUCCESS;
655
656 VD_IS_LOCKED(pDisk);
657
658 PVDFILTER pFilter;
659 RTListForEach(&pDisk->ListFilterChainWrite, pFilter, VDFILTER, ListNodeChainWrite)
660 {
661 rc = pFilter->pBackend->pfnFilterWrite(pFilter->pvBackendData, uOffset, cbWrite, pIoCtx);
662 if (RT_FAILURE(rc))
663 break;
664 /* Reset S/G buffer for the next filter. */
665 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
666 }
667
668 return rc;
669}
670
671/**
672 * Applies the filter chain to the given read request.
673 *
674 * @returns VBox status code.
675 * @param pDisk The HDD container.
676 * @param uOffset The start offset of the read.
677 * @param cbRead Number of bytes read.
678 * @param pIoCtx The I/O context associated with the request.
679 */
680static int vdFilterChainApplyRead(PVDISK pDisk, uint64_t uOffset, size_t cbRead,
681 PVDIOCTX pIoCtx)
682{
683 int rc = VINF_SUCCESS;
684
685 VD_IS_LOCKED(pDisk);
686
687 /* Reset buffer before starting. */
688 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
689
690 PVDFILTER pFilter;
691 RTListForEach(&pDisk->ListFilterChainRead, pFilter, VDFILTER, ListNodeChainRead)
692 {
693 rc = pFilter->pBackend->pfnFilterRead(pFilter->pvBackendData, uOffset, cbRead, pIoCtx);
694 if (RT_FAILURE(rc))
695 break;
696 /* Reset S/G buffer for the next filter. */
697 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
698 }
699
700 return rc;
701}
702
703DECLINLINE(void) vdIoCtxRootComplete(PVDISK pDisk, PVDIOCTX pIoCtx)
704{
705 if ( RT_SUCCESS(pIoCtx->rcReq)
706 && pIoCtx->enmTxDir == VDIOCTXTXDIR_READ)
707 pIoCtx->rcReq = vdFilterChainApplyRead(pDisk, pIoCtx->Req.Io.uOffsetXferOrig,
708 pIoCtx->Req.Io.cbXferOrig, pIoCtx);
709
710 pIoCtx->Type.Root.pfnComplete(pIoCtx->Type.Root.pvUser1,
711 pIoCtx->Type.Root.pvUser2,
712 pIoCtx->rcReq);
713}
714
715/**
716 * Initialize the structure members of a given I/O context.
717 */
718DECLINLINE(void) vdIoCtxInit(PVDIOCTX pIoCtx, PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
719 uint64_t uOffset, size_t cbTransfer, PVDIMAGE pImageStart,
720 PCRTSGBUF pSgBuf, void *pvAllocation,
721 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
722{
723 pIoCtx->pDisk = pDisk;
724 pIoCtx->enmTxDir = enmTxDir;
725 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTransfer; Assert((uint32_t)cbTransfer == cbTransfer);
726 pIoCtx->Req.Io.uOffset = uOffset;
727 pIoCtx->Req.Io.cbTransfer = cbTransfer;
728 pIoCtx->Req.Io.pImageStart = pImageStart;
729 pIoCtx->Req.Io.pImageCur = pImageStart;
730 pIoCtx->Req.Io.cbBufClear = 0;
731 pIoCtx->Req.Io.pImageParentOverride = NULL;
732 pIoCtx->Req.Io.uOffsetXferOrig = uOffset;
733 pIoCtx->Req.Io.cbXferOrig = cbTransfer;
734 pIoCtx->cDataTransfersPending = 0;
735 pIoCtx->cMetaTransfersPending = 0;
736 pIoCtx->fComplete = false;
737 pIoCtx->fFlags = fFlags;
738 pIoCtx->pvAllocation = pvAllocation;
739 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
740 pIoCtx->pfnIoCtxTransferNext = NULL;
741 pIoCtx->rcReq = VINF_SUCCESS;
742 pIoCtx->pIoCtxParent = NULL;
743
744 /* There is no S/G list for a flush request. */
745 if ( enmTxDir != VDIOCTXTXDIR_FLUSH
746 && enmTxDir != VDIOCTXTXDIR_DISCARD)
747 RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pSgBuf);
748 else
749 memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF));
750}
751
752/**
753 * Internal: Tries to read the desired range from the given cache.
754 *
755 * @returns VBox status code.
756 * @retval VERR_VD_BLOCK_FREE if the block is not in the cache.
757 * pcbRead will be set to the number of bytes not in the cache.
758 * Everything thereafter might be in the cache.
759 * @param pCache The cache to read from.
760 * @param uOffset Offset of the virtual disk to read.
761 * @param cbRead How much to read.
762 * @param pIoCtx The I/O context to read into.
763 * @param pcbRead Where to store the number of bytes actually read.
764 * On success this indicates the number of bytes read from the cache.
765 * If VERR_VD_BLOCK_FREE is returned this gives the number of bytes
766 * which are not in the cache.
767 * In both cases everything beyond this value
768 * might or might not be in the cache.
769 */
770static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
771 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
772{
773 int rc = VINF_SUCCESS;
774
775 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbRead=%zu pcbRead=%#p\n",
776 pCache, uOffset, pIoCtx, cbRead, pcbRead));
777
778 AssertPtr(pCache);
779 AssertPtr(pcbRead);
780
781 rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, cbRead,
782 pIoCtx, pcbRead);
783
784 LogFlowFunc(("returns rc=%Rrc pcbRead=%zu\n", rc, *pcbRead));
785 return rc;
786}
787
788/**
789 * Internal: Writes data for the given block into the cache.
790 *
791 * @returns VBox status code.
792 * @param pCache The cache to write to.
793 * @param uOffset Offset of the virtual disk to write to the cache.
794 * @param cbWrite How much to write.
795 * @param pIoCtx The I/O context to write from.
796 * @param pcbWritten How much data could be written, optional.
797 */
798static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, size_t cbWrite,
799 PVDIOCTX pIoCtx, size_t *pcbWritten)
800{
801 int rc = VINF_SUCCESS;
802
803 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbWrite=%zu pcbWritten=%#p\n",
804 pCache, uOffset, pIoCtx, cbWrite, pcbWritten));
805
806 AssertPtr(pCache);
807 AssertPtr(pIoCtx);
808 Assert(cbWrite > 0);
809
810 if (pcbWritten)
811 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
812 pIoCtx, pcbWritten);
813 else
814 {
815 size_t cbWritten = 0;
816
817 do
818 {
819 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
820 pIoCtx, &cbWritten);
821 uOffset += cbWritten;
822 cbWrite -= cbWritten;
823 } while ( cbWrite
824 && ( RT_SUCCESS(rc)
825 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
826 }
827
828 LogFlowFunc(("returns rc=%Rrc pcbWritten=%zu\n",
829 rc, pcbWritten ? *pcbWritten : cbWrite));
830 return rc;
831}
832
833/**
834 * Creates a new empty discard state.
835 *
836 * @returns Pointer to the new discard state or NULL if out of memory.
837 */
838static PVDDISCARDSTATE vdDiscardStateCreate(void)
839{
840 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
841
842 if (pDiscard)
843 {
844 RTListInit(&pDiscard->ListLru);
845 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
846 if (!pDiscard->pTreeBlocks)
847 {
848 RTMemFree(pDiscard);
849 pDiscard = NULL;
850 }
851 }
852
853 return pDiscard;
854}
855
856/**
857 * Removes the least recently used blocks from the waiting list until
858 * the new value is reached.
859 *
860 * @returns VBox status code.
861 * @param pDisk VD disk container.
862 * @param pDiscard The discard state.
863 * @param cbDiscardingNew How many bytes should be waiting on success.
864 * The number of bytes waiting can be less.
865 */
866static int vdDiscardRemoveBlocks(PVDISK pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
867{
868 int rc = VINF_SUCCESS;
869
870 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
871 pDisk, pDiscard, cbDiscardingNew));
872
873 while (pDiscard->cbDiscarding > cbDiscardingNew)
874 {
875 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
876
877 Assert(!RTListIsEmpty(&pDiscard->ListLru));
878
879 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
880 uint64_t offStart = pBlock->Core.Key;
881 uint32_t idxStart = 0;
882 size_t cbLeft = pBlock->cbDiscard;
883 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
884 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
885
886 while (cbLeft > 0)
887 {
888 int32_t idxEnd;
889 size_t cbThis = cbLeft;
890
891 if (fAllocated)
892 {
893 /* Check for the first unallocated bit. */
894 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
895 if (idxEnd != -1)
896 {
897 cbThis = (idxEnd - idxStart) * 512;
898 fAllocated = false;
899 }
900 }
901 else
902 {
903 /* Mark as unused and check for the first set bit. */
904 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
905 if (idxEnd != -1)
906 cbThis = (idxEnd - idxStart) * 512;
907
908
909 VDIOCTX IoCtx;
910 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
911 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
912 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
913 &IoCtx, offStart, cbThis, NULL,
914 NULL, &cbThis, NULL,
915 VD_DISCARD_MARK_UNUSED);
916 if (RT_FAILURE(rc))
917 break;
918
919 fAllocated = true;
920 }
921
922 idxStart = idxEnd;
923 offStart += cbThis;
924 cbLeft -= cbThis;
925 }
926
927 if (RT_FAILURE(rc))
928 break;
929
930 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
931 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
932 RTListNodeRemove(&pBlock->NodeLru);
933
934 pDiscard->cbDiscarding -= pBlock->cbDiscard;
935 RTMemFree(pBlock->pbmAllocated);
936 RTMemFree(pBlock);
937 }
938
939 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
940
941 LogFlowFunc(("returns rc=%Rrc\n", rc));
942 return rc;
943}
944
945/**
946 * Destroys the current discard state, writing any waiting blocks to the image.
947 *
948 * @returns VBox status code.
949 * @param pDisk VD disk container.
950 */
951static int vdDiscardStateDestroy(PVDISK pDisk)
952{
953 int rc = VINF_SUCCESS;
954
955 if (pDisk->pDiscard)
956 {
957 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
958 AssertRC(rc);
959 RTMemFree(pDisk->pDiscard->pTreeBlocks);
960 RTMemFree(pDisk->pDiscard);
961 pDisk->pDiscard = NULL;
962 }
963
964 return rc;
965}
966
967/**
968 * Marks the given range as allocated in the image.
969 * Required if there are discards in progress and a write to a block which can get discarded
970 * is written to.
971 *
972 * @returns VBox status code.
973 * @param pDisk VD container data.
974 * @param uOffset First byte to mark as allocated.
975 * @param cbRange Number of bytes to mark as allocated.
976 */
977static int vdDiscardSetRangeAllocated(PVDISK pDisk, uint64_t uOffset, size_t cbRange)
978{
979 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
980 int rc = VINF_SUCCESS;
981
982 if (pDiscard)
983 {
984 do
985 {
986 size_t cbThisRange = cbRange;
987 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
988
989 if (pBlock)
990 {
991 int32_t idxStart, idxEnd;
992
993 Assert(!(cbThisRange % 512));
994 Assert(!((uOffset - pBlock->Core.Key) % 512));
995
996 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
997
998 idxStart = (uOffset - pBlock->Core.Key) / 512;
999 idxEnd = idxStart + (int32_t)(cbThisRange / 512);
1000 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
1001 }
1002 else
1003 {
1004 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
1005 if (pBlock)
1006 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
1007 }
1008
1009 Assert(cbRange >= cbThisRange);
1010
1011 uOffset += cbThisRange;
1012 cbRange -= cbThisRange;
1013 } while (cbRange != 0);
1014 }
1015
1016 return rc;
1017}
1018
1019DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1020 uint64_t uOffset, size_t cbTransfer,
1021 PVDIMAGE pImageStart,PCRTSGBUF pSgBuf,
1022 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1023 uint32_t fFlags)
1024{
1025 PVDIOCTX pIoCtx = NULL;
1026
1027 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1028 if (RT_LIKELY(pIoCtx))
1029 {
1030 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1031 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1032 }
1033
1034 return pIoCtx;
1035}
1036
1037DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1038 uint64_t uOffset, size_t cbTransfer,
1039 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
1040 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1041 void *pvUser1, void *pvUser2,
1042 void *pvAllocation,
1043 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1044 uint32_t fFlags)
1045{
1046 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1047 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
1048
1049 if (RT_LIKELY(pIoCtx))
1050 {
1051 pIoCtx->pIoCtxParent = NULL;
1052 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1053 pIoCtx->Type.Root.pvUser1 = pvUser1;
1054 pIoCtx->Type.Root.pvUser2 = pvUser2;
1055 }
1056
1057 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
1058 return pIoCtx;
1059}
1060
1061DECLINLINE(void) vdIoCtxDiscardInit(PVDIOCTX pIoCtx, PVDISK pDisk, PCRTRANGE paRanges,
1062 unsigned cRanges, PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1063 void *pvUser1, void *pvUser2, void *pvAllocation,
1064 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
1065{
1066 pIoCtx->pIoCtxNext = NULL;
1067 pIoCtx->pDisk = pDisk;
1068 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;
1069 pIoCtx->cDataTransfersPending = 0;
1070 pIoCtx->cMetaTransfersPending = 0;
1071 pIoCtx->fComplete = false;
1072 pIoCtx->fFlags = fFlags;
1073 pIoCtx->pvAllocation = pvAllocation;
1074 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
1075 pIoCtx->pfnIoCtxTransferNext = NULL;
1076 pIoCtx->rcReq = VINF_SUCCESS;
1077 pIoCtx->Req.Discard.paRanges = paRanges;
1078 pIoCtx->Req.Discard.cRanges = cRanges;
1079 pIoCtx->Req.Discard.idxRange = 0;
1080 pIoCtx->Req.Discard.cbDiscardLeft = 0;
1081 pIoCtx->Req.Discard.offCur = 0;
1082 pIoCtx->Req.Discard.cbThisDiscard = 0;
1083
1084 pIoCtx->pIoCtxParent = NULL;
1085 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1086 pIoCtx->Type.Root.pvUser1 = pvUser1;
1087 pIoCtx->Type.Root.pvUser2 = pvUser2;
1088}
1089
1090DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVDISK pDisk, PCRTRANGE paRanges,
1091 unsigned cRanges,
1092 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1093 void *pvUser1, void *pvUser2,
1094 void *pvAllocation,
1095 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1096 uint32_t fFlags)
1097{
1098 PVDIOCTX pIoCtx = NULL;
1099
1100 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1101 if (RT_LIKELY(pIoCtx))
1102 {
1103 vdIoCtxDiscardInit(pIoCtx, pDisk, paRanges, cRanges, pfnComplete, pvUser1,
1104 pvUser2, pvAllocation, pfnIoCtxTransfer, fFlags);
1105 }
1106
1107 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
1108 return pIoCtx;
1109}
1110
1111DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
1112 uint64_t uOffset, size_t cbTransfer,
1113 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
1114 PVDIOCTX pIoCtxParent, size_t cbTransferParent,
1115 size_t cbWriteParent, void *pvAllocation,
1116 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
1117{
1118 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
1119 pSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE);
1120
1121 AssertPtr(pIoCtxParent);
1122 Assert(!pIoCtxParent->pIoCtxParent);
1123
1124 if (RT_LIKELY(pIoCtx))
1125 {
1126 pIoCtx->pIoCtxParent = pIoCtxParent;
1127 pIoCtx->Type.Child.uOffsetSaved = uOffset;
1128 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
1129 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;
1130 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;
1131 }
1132
1133 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
1134 return pIoCtx;
1135}
1136
1137DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
1138{
1139 PVDIOTASK pIoTask = NULL;
1140
1141 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1142 if (pIoTask)
1143 {
1144 pIoTask->pIoStorage = pIoStorage;
1145 pIoTask->pfnComplete = pfnComplete;
1146 pIoTask->pvUser = pvUser;
1147 pIoTask->fMeta = false;
1148 pIoTask->Type.User.cbTransfer = cbTransfer;
1149 pIoTask->Type.User.pIoCtx = pIoCtx;
1150 }
1151
1152 return pIoTask;
1153}
1154
1155DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
1156{
1157 PVDIOTASK pIoTask = NULL;
1158
1159 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
1160 if (pIoTask)
1161 {
1162 pIoTask->pIoStorage = pIoStorage;
1163 pIoTask->pfnComplete = pfnComplete;
1164 pIoTask->pvUser = pvUser;
1165 pIoTask->fMeta = true;
1166 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
1167 }
1168
1169 return pIoTask;
1170}
1171
1172DECLINLINE(void) vdIoCtxFree(PVDISK pDisk, PVDIOCTX pIoCtx)
1173{
1174 Log(("Freeing I/O context %#p\n", pIoCtx));
1175
1176 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
1177 {
1178 if (pIoCtx->pvAllocation)
1179 RTMemFree(pIoCtx->pvAllocation);
1180#ifdef DEBUG
1181 memset(&pIoCtx->pDisk, 0xff, sizeof(void *));
1182#endif
1183 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
1184 }
1185}
1186
1187DECLINLINE(void) vdIoTaskFree(PVDISK pDisk, PVDIOTASK pIoTask)
1188{
1189#ifdef DEBUG
1190 memset(pIoTask, 0xff, sizeof(VDIOTASK));
1191#endif
1192 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
1193}
1194
1195DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
1196{
1197 AssertPtr(pIoCtx->pIoCtxParent);
1198
1199 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
1200 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;
1201 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved;
1202 Assert((uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved == pIoCtx->Type.Child.cbTransferLeftSaved);
1203}
1204
1205DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
1206{
1207 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_UOFFSETOF_DYN(VDMETAXFER, abData[cb]));
1208
1209 if (RT_LIKELY(pMetaXfer))
1210 {
1211 pMetaXfer->Core.Key = uOffset;
1212 pMetaXfer->Core.KeyLast = uOffset + cb - 1;
1213 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;
1214 pMetaXfer->cbMeta = cb;
1215 pMetaXfer->pIoStorage = pIoStorage;
1216 pMetaXfer->cRefs = 0;
1217 pMetaXfer->pbDataShw = NULL;
1218 RTListInit(&pMetaXfer->ListIoCtxWaiting);
1219 RTListInit(&pMetaXfer->ListIoCtxShwWrites);
1220 }
1221 return pMetaXfer;
1222}
1223
1224DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
1225{
1226 /* Put it on the waiting list. */
1227 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
1228 PVDIOCTX pHeadOld;
1229 pIoCtx->pIoCtxNext = pNext;
1230 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
1231 {
1232 pNext = pHeadOld;
1233 Assert(pNext != pIoCtx);
1234 pIoCtx->pIoCtxNext = pNext;
1235 ASMNopPause();
1236 }
1237}
1238
1239DECLINLINE(void) vdIoCtxDefer(PVDISK pDisk, PVDIOCTX pIoCtx)
1240{
1241 LogFlowFunc(("Deferring I/O context pIoCtx=%#p\n", pIoCtx));
1242
1243 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
1244 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1245 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
1246}
1247
1248static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
1249{
1250 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
1251}
1252
1253#if 0 /* unused */
1254static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
1255{
1256 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
1257}
1258#endif
1259
1260static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
1261{
1262 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1263}
1264
1265static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
1266{
1267 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
1268}
1269
1270static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
1271{
1272 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
1273}
1274
1275/**
1276 * Returns whether the given I/O context has completed.
1277 *
1278 * @returns Flag whether the I/O context is complete.
1279 * @param pIoCtx The I/O context to check.
1280 */
1281DECLINLINE(bool) vdIoCtxIsComplete(PVDIOCTX pIoCtx)
1282{
1283 if ( !pIoCtx->cMetaTransfersPending
1284 && !pIoCtx->cDataTransfersPending
1285 && !pIoCtx->pfnIoCtxTransfer)
1286 return true;
1287
1288 /*
1289 * We complete the I/O context in case of an error
1290 * if there is no I/O task pending.
1291 */
1292 if ( RT_FAILURE(pIoCtx->rcReq)
1293 && !pIoCtx->cMetaTransfersPending
1294 && !pIoCtx->cDataTransfersPending)
1295 return true;
1296
1297 return false;
1298}
1299
1300/**
1301 * Returns whether the given I/O context is blocked due to a metadata transfer
1302 * or because the backend blocked it.
1303 *
1304 * @returns Flag whether the I/O context is blocked.
1305 * @param pIoCtx The I/O context to check.
1306 */
1307DECLINLINE(bool) vdIoCtxIsBlocked(PVDIOCTX pIoCtx)
1308{
1309 /* Don't change anything if there is a metadata transfer pending or we are blocked. */
1310 if ( pIoCtx->cMetaTransfersPending
1311 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1312 return true;
1313
1314 return false;
1315}
1316
1317/**
1318 * Process the I/O context, core method which assumes that the I/O context
1319 * acquired the lock.
1320 *
1321 * @returns VBox status code.
1322 * @param pIoCtx I/O context to process.
1323 */
1324static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
1325{
1326 int rc = VINF_SUCCESS;
1327
1328 VD_IS_LOCKED(pIoCtx->pDisk);
1329
1330 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
1331
1332 if (!vdIoCtxIsComplete(pIoCtx))
1333 {
1334 if (!vdIoCtxIsBlocked(pIoCtx))
1335 {
1336 if (pIoCtx->pfnIoCtxTransfer)
1337 {
1338 /* Call the transfer function advancing to the next while there is no error. */
1339 while ( pIoCtx->pfnIoCtxTransfer
1340 && !pIoCtx->cMetaTransfersPending
1341 && RT_SUCCESS(rc))
1342 {
1343 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
1344 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
1345
1346 /* Advance to the next part of the transfer if the current one succeeded. */
1347 if (RT_SUCCESS(rc))
1348 {
1349 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
1350 pIoCtx->pfnIoCtxTransferNext = NULL;
1351 }
1352 }
1353 }
1354
1355 if ( RT_SUCCESS(rc)
1356 && !pIoCtx->cMetaTransfersPending
1357 && !pIoCtx->cDataTransfersPending
1358 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1359 rc = VINF_VD_ASYNC_IO_FINISHED;
1360 else if ( RT_SUCCESS(rc)
1361 || rc == VERR_VD_NOT_ENOUGH_METADATA
1362 || rc == VERR_VD_IOCTX_HALT)
1363 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1364 else if ( RT_FAILURE(rc)
1365 && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
1366 {
1367 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
1368
1369 /*
1370 * The I/O context completed if we have an error and there is no data
1371 * or meta data transfer pending.
1372 */
1373 if ( !pIoCtx->cMetaTransfersPending
1374 && !pIoCtx->cDataTransfersPending)
1375 rc = VINF_VD_ASYNC_IO_FINISHED;
1376 else
1377 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1378 }
1379 }
1380 else
1381 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1382 }
1383 else
1384 rc = VINF_VD_ASYNC_IO_FINISHED;
1385
1386 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
1387 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
1388 pIoCtx->fComplete));
1389
1390 return rc;
1391}
1392
1393/**
1394 * Processes the list of waiting I/O contexts.
1395 *
1396 * @returns VBox status code, only valid if pIoCtxRc is not NULL, treat as void
1397 * function otherwise.
1398 * @param pDisk The disk structure.
1399 * @param pIoCtxRc An I/O context handle which waits on the list. When processed
1400 * The status code is returned. NULL if there is no I/O context
1401 * to return the status code for.
1402 */
1403static int vdDiskProcessWaitingIoCtx(PVDISK pDisk, PVDIOCTX pIoCtxRc)
1404{
1405 int rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1406
1407 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
1408
1409 VD_IS_LOCKED(pDisk);
1410
1411 /* Get the waiting list and process it in FIFO order. */
1412 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
1413
1414 /* Reverse it. */
1415 PVDIOCTX pCur = pIoCtxHead;
1416 pIoCtxHead = NULL;
1417 while (pCur)
1418 {
1419 PVDIOCTX pInsert = pCur;
1420 pCur = pCur->pIoCtxNext;
1421 pInsert->pIoCtxNext = pIoCtxHead;
1422 pIoCtxHead = pInsert;
1423 }
1424
1425 /* Process now. */
1426 pCur = pIoCtxHead;
1427 while (pCur)
1428 {
1429 int rcTmp;
1430 PVDIOCTX pTmp = pCur;
1431
1432 pCur = pCur->pIoCtxNext;
1433 pTmp->pIoCtxNext = NULL;
1434
1435 /*
1436 * Need to clear the sync flag here if there is a new I/O context
1437 * with it set and the context is not given in pIoCtxRc.
1438 * This happens most likely on a different thread and that one shouldn't
1439 * process the context synchronously.
1440 *
1441 * The thread who issued the context will wait on the event semaphore
1442 * anyway which is signalled when the completion handler is called.
1443 */
1444 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC
1445 && pTmp != pIoCtxRc)
1446 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
1447
1448 rcTmp = vdIoCtxProcessLocked(pTmp);
1449 if (pTmp == pIoCtxRc)
1450 {
1451 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1452 && RT_SUCCESS(pTmp->rcReq)
1453 && pTmp->enmTxDir == VDIOCTXTXDIR_READ)
1454 {
1455 int rc2 = vdFilterChainApplyRead(pDisk, pTmp->Req.Io.uOffsetXferOrig,
1456 pTmp->Req.Io.cbXferOrig, pTmp);
1457 if (RT_FAILURE(rc2))
1458 rcTmp = rc2;
1459 }
1460
1461 /* The given I/O context was processed, pass the return code to the caller. */
1462 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1463 && (pTmp->fFlags & VDIOCTX_FLAGS_SYNC))
1464 rc = pTmp->rcReq;
1465 else
1466 rc = rcTmp;
1467 }
1468 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1469 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1470 {
1471 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1472 vdThreadFinishWrite(pDisk);
1473
1474 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1475 vdIoCtxRootComplete(pDisk, pTmp);
1476
1477 if (fFreeCtx)
1478 vdIoCtxFree(pDisk, pTmp);
1479 }
1480 }
1481
1482 LogFlowFunc(("returns rc=%Rrc\n", rc));
1483 return rc;
1484}
1485
1486/**
1487 * Processes the list of blocked I/O contexts.
1488 *
1489 * @returns nothing.
1490 * @param pDisk The disk structure.
1491 */
1492static void vdDiskProcessBlockedIoCtx(PVDISK pDisk)
1493{
1494 LogFlowFunc(("pDisk=%#p\n", pDisk));
1495
1496 VD_IS_LOCKED(pDisk);
1497
1498 /* Get the waiting list and process it in FIFO order. */
1499 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
1500
1501 /* Reverse it. */
1502 PVDIOCTX pCur = pIoCtxHead;
1503 pIoCtxHead = NULL;
1504 while (pCur)
1505 {
1506 PVDIOCTX pInsert = pCur;
1507 pCur = pCur->pIoCtxNext;
1508 pInsert->pIoCtxNext = pIoCtxHead;
1509 pIoCtxHead = pInsert;
1510 }
1511
1512 /* Process now. */
1513 pCur = pIoCtxHead;
1514 while (pCur)
1515 {
1516 int rc;
1517 PVDIOCTX pTmp = pCur;
1518
1519 pCur = pCur->pIoCtxNext;
1520 pTmp->pIoCtxNext = NULL;
1521
1522 Assert(!pTmp->pIoCtxParent);
1523 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
1524 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
1525
1526 rc = vdIoCtxProcessLocked(pTmp);
1527 if ( rc == VINF_VD_ASYNC_IO_FINISHED
1528 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1529 {
1530 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1531 vdThreadFinishWrite(pDisk);
1532
1533 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
1534 vdIoCtxRootComplete(pDisk, pTmp);
1535 if (fFreeCtx)
1536 vdIoCtxFree(pDisk, pTmp);
1537 }
1538 }
1539
1540 LogFlowFunc(("returns\n"));
1541}
1542
1543/**
1544 * Processes the I/O context trying to lock the criticial section.
1545 * The context is deferred if the critical section is busy.
1546 *
1547 * @returns VBox status code.
1548 * @param pIoCtx The I/O context to process.
1549 */
1550static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
1551{
1552 int rc = VINF_SUCCESS;
1553 PVDISK pDisk = pIoCtx->pDisk;
1554
1555 Log(("Defer pIoCtx=%#p\n", pIoCtx));
1556
1557 /* Put it on the waiting list first. */
1558 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
1559
1560 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
1561 {
1562 /* Leave it again, the context will be processed just before leaving the lock. */
1563 LogFlowFunc(("Successfully acquired the lock\n"));
1564 rc = vdDiskUnlock(pDisk, pIoCtx);
1565 }
1566 else
1567 {
1568 LogFlowFunc(("Lock is held\n"));
1569 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1570 }
1571
1572 return rc;
1573}
1574
1575/**
1576 * Process the I/O context in a synchronous manner, waiting
1577 * for it to complete.
1578 *
1579 * @returns VBox status code of the completed request.
1580 * @param pIoCtx The sync I/O context.
1581 * @param hEventComplete Event sempahore to wait on for completion.
1582 */
1583static int vdIoCtxProcessSync(PVDIOCTX pIoCtx, RTSEMEVENT hEventComplete)
1584{
1585 int rc = VINF_SUCCESS;
1586 PVDISK pDisk = pIoCtx->pDisk;
1587
1588 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
1589
1590 AssertMsg(pIoCtx->fFlags & (VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE),
1591 ("I/O context is not marked as synchronous\n"));
1592
1593 rc = vdIoCtxProcessTryLockDefer(pIoCtx);
1594 if (rc == VINF_VD_ASYNC_IO_FINISHED)
1595 rc = VINF_SUCCESS;
1596
1597 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1598 {
1599 rc = RTSemEventWait(hEventComplete, RT_INDEFINITE_WAIT);
1600 AssertRC(rc);
1601 }
1602
1603 rc = pIoCtx->rcReq;
1604 vdIoCtxFree(pDisk, pIoCtx);
1605
1606 return rc;
1607}
1608
1609DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVDISK pDisk, PVDIOCTX pIoCtx)
1610{
1611 return pDisk->pIoCtxLockOwner == pIoCtx;
1612}
1613
1614static int vdIoCtxLockDisk(PVDISK pDisk, PVDIOCTX pIoCtx)
1615{
1616 int rc = VINF_SUCCESS;
1617
1618 VD_IS_LOCKED(pDisk);
1619
1620 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
1621
1622 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
1623 {
1624 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
1625 vdIoCtxDefer(pDisk, pIoCtx);
1626 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1627 }
1628
1629 LogFlowFunc(("returns -> %Rrc\n", rc));
1630 return rc;
1631}
1632
1633static void vdIoCtxUnlockDisk(PVDISK pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
1634{
1635 RT_NOREF1(pIoCtx);
1636 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
1637 pDisk, pIoCtx, fProcessBlockedReqs));
1638
1639 VD_IS_LOCKED(pDisk);
1640
1641 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
1642 Assert(pDisk->pIoCtxLockOwner == pIoCtx);
1643 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
1644
1645 if (fProcessBlockedReqs)
1646 {
1647 /* Process any blocked writes if the current request didn't caused another growing. */
1648 vdDiskProcessBlockedIoCtx(pDisk);
1649 }
1650
1651 LogFlowFunc(("returns\n"));
1652}
1653
1654/**
1655 * Internal: Reads a given amount of data from the image chain of the disk.
1656 **/
1657static int vdDiskReadHelper(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1658 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
1659{
1660 RT_NOREF1(pDisk);
1661 int rc = VINF_SUCCESS;
1662 size_t cbThisRead = cbRead;
1663
1664 AssertPtr(pcbThisRead);
1665
1666 *pcbThisRead = 0;
1667
1668 /*
1669 * Try to read from the given image.
1670 * If the block is not allocated read from override chain if present.
1671 */
1672 rc = pImage->Backend->pfnRead(pImage->pBackendData,
1673 uOffset, cbThisRead, pIoCtx,
1674 &cbThisRead);
1675
1676 if (rc == VERR_VD_BLOCK_FREE)
1677 {
1678 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
1679 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
1680 pCurrImage = pCurrImage->pPrev)
1681 {
1682 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1683 uOffset, cbThisRead, pIoCtx,
1684 &cbThisRead);
1685 }
1686 }
1687
1688 if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE)
1689 *pcbThisRead = cbThisRead;
1690
1691 return rc;
1692}
1693
1694/**
1695 * internal: read the specified amount of data in whatever blocks the backend
1696 * will give us - async version.
1697 */
1698static DECLCALLBACK(int) vdReadHelperAsync(PVDIOCTX pIoCtx)
1699{
1700 int rc;
1701 PVDISK pDisk = pIoCtx->pDisk;
1702 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;
1703 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
1704 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;
1705 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
1706 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;
1707 size_t cbThisRead;
1708
1709 /*
1710 * Check whether there is a full block write in progress which was not allocated.
1711 * Defer I/O if the range interferes but only if it does not belong to the
1712 * write doing the allocation.
1713 */
1714 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
1715 && uOffset >= pDisk->uOffsetStartLocked
1716 && uOffset < pDisk->uOffsetEndLocked
1717 && ( !pIoCtx->pIoCtxParent
1718 || pIoCtx->pIoCtxParent != pDisk->pIoCtxLockOwner))
1719 {
1720 Log(("Interferring read while allocating a new block => deferring read\n"));
1721 vdIoCtxDefer(pDisk, pIoCtx);
1722 return VERR_VD_ASYNC_IO_IN_PROGRESS;
1723 }
1724
1725 /* Loop until all reads started or we have a backend which needs to read metadata. */
1726 do
1727 {
1728 /* Search for image with allocated block. Do not attempt to read more
1729 * than the previous reads marked as valid. Otherwise this would return
1730 * stale data when different block sizes are used for the images. */
1731 cbThisRead = cbToRead;
1732
1733 if ( pDisk->pCache
1734 && !pImageParentOverride)
1735 {
1736 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
1737 pIoCtx, &cbThisRead);
1738 if (rc == VERR_VD_BLOCK_FREE)
1739 {
1740 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
1741 pIoCtx, &cbThisRead);
1742
1743 /* If the read was successful, write the data back into the cache. */
1744 if ( RT_SUCCESS(rc)
1745 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UPDATE_CACHE)
1746 {
1747 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
1748 pIoCtx, NULL);
1749 }
1750 }
1751 }
1752 else
1753 {
1754 /*
1755 * Try to read from the given image.
1756 * If the block is not allocated read from override chain if present.
1757 */
1758 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1759 uOffset, cbThisRead, pIoCtx,
1760 &cbThisRead);
1761
1762 if ( rc == VERR_VD_BLOCK_FREE
1763 && cImagesRead != 1)
1764 {
1765 unsigned cImagesToProcess = cImagesRead;
1766
1767 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
1768 pIoCtx->Req.Io.pImageParentOverride = NULL;
1769
1770 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
1771 {
1772 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1773 uOffset, cbThisRead,
1774 pIoCtx, &cbThisRead);
1775 if (cImagesToProcess == 1)
1776 break;
1777 else if (cImagesToProcess > 0)
1778 cImagesToProcess--;
1779
1780 if (rc == VERR_VD_BLOCK_FREE)
1781 pCurrImage = pCurrImage->pPrev;
1782 }
1783 }
1784 }
1785
1786 /* The task state will be updated on success already, don't do it here!. */
1787 if (rc == VERR_VD_BLOCK_FREE)
1788 {
1789 /* No image in the chain contains the data for the block. */
1790 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisRead); Assert(cbThisRead == (uint32_t)cbThisRead);
1791
1792 /* Fill the free space with 0 if we are told to do so
1793 * or a previous read returned valid data. */
1794 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
1795 vdIoCtxSet(pIoCtx, '\0', cbThisRead);
1796 else
1797 pIoCtx->Req.Io.cbBufClear += cbThisRead;
1798
1799 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
1800 rc = VINF_VD_NEW_ZEROED_BLOCK;
1801 else
1802 rc = VINF_SUCCESS;
1803 }
1804 else if (rc == VERR_VD_IOCTX_HALT)
1805 {
1806 uOffset += cbThisRead;
1807 cbToRead -= cbThisRead;
1808 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
1809 }
1810 else if ( RT_SUCCESS(rc)
1811 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1812 {
1813 /* First not free block, fill the space before with 0. */
1814 if ( pIoCtx->Req.Io.cbBufClear
1815 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1816 {
1817 RTSGBUF SgBuf;
1818 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
1819 RTSgBufReset(&SgBuf);
1820 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
1821 pIoCtx->Req.Io.cbBufClear = 0;
1822 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1823 }
1824 rc = VINF_SUCCESS;
1825 }
1826
1827 if (RT_FAILURE(rc))
1828 break;
1829
1830 cbToRead -= cbThisRead;
1831 uOffset += cbThisRead;
1832 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
1833 } while (cbToRead != 0 && RT_SUCCESS(rc));
1834
1835 if ( rc == VERR_VD_NOT_ENOUGH_METADATA
1836 || rc == VERR_VD_IOCTX_HALT)
1837 {
1838 /* Save the current state. */
1839 pIoCtx->Req.Io.uOffset = uOffset;
1840 pIoCtx->Req.Io.cbTransfer = cbToRead;
1841 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
1842 }
1843
1844 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1845 ? VERR_VD_BLOCK_FREE
1846 : rc;
1847}
1848
1849/**
1850 * internal: parent image read wrapper for compacting.
1851 */
1852static DECLCALLBACK(int) vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
1853 size_t cbRead)
1854{
1855 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
1856
1857 /** @todo
1858 * Only used for compaction so far which is not possible to mix with async I/O.
1859 * Needs to be changed if we want to support online compaction of images.
1860 */
1861 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
1862 AssertMsgReturn(!fLocked,
1863 ("Calling synchronous parent read while another thread holds the disk lock\n"),
1864 VERR_VD_INVALID_STATE);
1865
1866 /* Fake an I/O context. */
1867 RTSGSEG Segment;
1868 RTSGBUF SgBuf;
1869 VDIOCTX IoCtx;
1870
1871 Segment.pvSeg = pvBuf;
1872 Segment.cbSeg = cbRead;
1873 RTSgBufInit(&SgBuf, &Segment, 1);
1874 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
1875 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
1876 int rc = vdReadHelperAsync(&IoCtx);
1877 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
1878 return rc;
1879}
1880
1881/**
1882 * Extended version of vdReadHelper(), implementing certain optimizations
1883 * for image cloning.
1884 *
1885 * @returns VBox status code.
1886 * @param pDisk The disk to read from.
1887 * @param pImage The image to start reading from.
1888 * @param pImageParentOverride The parent image to read from
1889 * if the starting image returns a free block.
1890 * If NULL is passed the real parent of the image
1891 * in the chain is used.
1892 * @param uOffset Offset in the disk to start reading from.
1893 * @param pvBuf Where to store the read data.
1894 * @param cbRead How much to read.
1895 * @param fZeroFreeBlocks Flag whether free blocks should be zeroed.
1896 * If false and no image has data for sepcified
1897 * range VERR_VD_BLOCK_FREE is returned.
1898 * Note that unallocated blocks are still zeroed
1899 * if at least one image has valid data for a part
1900 * of the range.
1901 * @param fUpdateCache Flag whether to update the attached cache if
1902 * available.
1903 * @param cImagesRead Number of images in the chain to read until
1904 * the read is cut off. A value of 0 disables the cut off.
1905 */
1906static int vdReadHelperEx(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
1907 uint64_t uOffset, void *pvBuf, size_t cbRead,
1908 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
1909{
1910 int rc = VINF_SUCCESS;
1911 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
1912 RTSGSEG Segment;
1913 RTSGBUF SgBuf;
1914 VDIOCTX IoCtx;
1915 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
1916
1917 rc = RTSemEventCreate(&hEventComplete);
1918 if (RT_FAILURE(rc))
1919 return rc;
1920
1921 if (fZeroFreeBlocks)
1922 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1923 if (fUpdateCache)
1924 fFlags |= VDIOCTX_FLAGS_READ_UPDATE_CACHE;
1925
1926 Segment.pvSeg = pvBuf;
1927 Segment.cbSeg = cbRead;
1928 RTSgBufInit(&SgBuf, &Segment, 1);
1929 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
1930 NULL, vdReadHelperAsync, fFlags);
1931
1932 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
1933 IoCtx.Req.Io.cImagesRead = cImagesRead;
1934 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
1935 IoCtx.Type.Root.pvUser1 = pDisk;
1936 IoCtx.Type.Root.pvUser2 = hEventComplete;
1937 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
1938 RTSemEventDestroy(hEventComplete);
1939 return rc;
1940}
1941
1942/**
1943 * internal: read the specified amount of data in whatever blocks the backend
1944 * will give us.
1945 */
1946static int vdReadHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
1947 void *pvBuf, size_t cbRead, bool fUpdateCache)
1948{
1949 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
1950 true /* fZeroFreeBlocks */, fUpdateCache, 0);
1951}
1952
1953/**
1954 * internal: mark the disk as not modified.
1955 */
1956static void vdResetModifiedFlag(PVDISK pDisk)
1957{
1958 if (pDisk->uModified & VD_IMAGE_MODIFIED_FLAG)
1959 {
1960 /* generate new last-modified uuid */
1961 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1962 {
1963 RTUUID Uuid;
1964
1965 RTUuidCreate(&Uuid);
1966 pDisk->pLast->Backend->pfnSetModificationUuid(pDisk->pLast->pBackendData,
1967 &Uuid);
1968
1969 if (pDisk->pCache)
1970 pDisk->pCache->Backend->pfnSetModificationUuid(pDisk->pCache->pBackendData,
1971 &Uuid);
1972 }
1973
1974 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FLAG;
1975 }
1976}
1977
1978/**
1979 * internal: mark the disk as modified.
1980 */
1981static void vdSetModifiedFlag(PVDISK pDisk)
1982{
1983 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
1984 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
1985 {
1986 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
1987
1988 /* First modify, so create a UUID and ensure it's written to disk. */
1989 vdResetModifiedFlag(pDisk);
1990
1991 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
1992 {
1993 VDIOCTX IoCtx;
1994 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, NULL,
1995 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
1996 pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData, &IoCtx);
1997 }
1998 }
1999}
2000
2001/**
2002 * internal: write buffer to the image, taking care of block boundaries and
2003 * write optimizations.
2004 */
2005static int vdWriteHelperEx(PVDISK pDisk, PVDIMAGE pImage,
2006 PVDIMAGE pImageParentOverride, uint64_t uOffset,
2007 const void *pvBuf, size_t cbWrite,
2008 uint32_t fFlags, unsigned cImagesRead)
2009{
2010 int rc = VINF_SUCCESS;
2011 RTSGSEG Segment;
2012 RTSGBUF SgBuf;
2013 VDIOCTX IoCtx;
2014 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
2015
2016 rc = RTSemEventCreate(&hEventComplete);
2017 if (RT_FAILURE(rc))
2018 return rc;
2019
2020 fFlags |= VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
2021
2022 Segment.pvSeg = (void *)pvBuf;
2023 Segment.cbSeg = cbWrite;
2024 RTSgBufInit(&SgBuf, &Segment, 1);
2025 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
2026 NULL, vdWriteHelperAsync, fFlags);
2027
2028 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
2029 IoCtx.Req.Io.cImagesRead = cImagesRead;
2030 IoCtx.pIoCtxParent = NULL;
2031 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
2032 IoCtx.Type.Root.pvUser1 = pDisk;
2033 IoCtx.Type.Root.pvUser2 = hEventComplete;
2034 if (RT_SUCCESS(rc))
2035 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
2036
2037 RTSemEventDestroy(hEventComplete);
2038 return rc;
2039}
2040
2041/**
2042 * internal: write buffer to the image, taking care of block boundaries and
2043 * write optimizations.
2044 */
2045static int vdWriteHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
2046 const void *pvBuf, size_t cbWrite, uint32_t fFlags)
2047{
2048 return vdWriteHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbWrite,
2049 fFlags, 0);
2050}
2051
2052/**
2053 * Internal: Copies the content of one disk to another one applying optimizations
2054 * to speed up the copy process if possible.
2055 */
2056static int vdCopyHelper(PVDISK pDiskFrom, PVDIMAGE pImageFrom, PVDISK pDiskTo,
2057 uint64_t cbSize, unsigned cImagesFromRead, unsigned cImagesToRead,
2058 bool fSuppressRedundantIo, PVDINTERFACEPROGRESS pIfProgress,
2059 PVDINTERFACEPROGRESS pDstIfProgress)
2060{
2061 int rc = VINF_SUCCESS;
2062 int rc2;
2063 uint64_t uOffset = 0;
2064 uint64_t cbRemaining = cbSize;
2065 void *pvBuf = NULL;
2066 bool fLockReadFrom = false;
2067 bool fLockWriteTo = false;
2068 bool fBlockwiseCopy = false;
2069 unsigned uProgressOld = 0;
2070
2071 LogFlowFunc(("pDiskFrom=%#p pImageFrom=%#p pDiskTo=%#p cbSize=%llu cImagesFromRead=%u cImagesToRead=%u fSuppressRedundantIo=%RTbool pIfProgress=%#p pDstIfProgress=%#p\n",
2072 pDiskFrom, pImageFrom, pDiskTo, cbSize, cImagesFromRead, cImagesToRead, fSuppressRedundantIo, pDstIfProgress, pDstIfProgress));
2073
2074 if ( (fSuppressRedundantIo || (cImagesFromRead > 0))
2075 && RTListIsEmpty(&pDiskFrom->ListFilterChainRead))
2076 fBlockwiseCopy = true;
2077
2078 /* Allocate tmp buffer. */
2079 pvBuf = RTMemTmpAlloc(VD_MERGE_BUFFER_SIZE);
2080 if (!pvBuf)
2081 return rc;
2082
2083 do
2084 {
2085 size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining);
2086
2087 /* Note that we don't attempt to synchronize cross-disk accesses.
2088 * It wouldn't be very difficult to do, just the lock order would
2089 * need to be defined somehow to prevent deadlocks. Postpone such
2090 * magic as there is no use case for this. */
2091
2092 rc2 = vdThreadStartRead(pDiskFrom);
2093 AssertRC(rc2);
2094 fLockReadFrom = true;
2095
2096 if (fBlockwiseCopy)
2097 {
2098 RTSGSEG SegmentBuf;
2099 RTSGBUF SgBuf;
2100 VDIOCTX IoCtx;
2101
2102 SegmentBuf.pvSeg = pvBuf;
2103 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
2104 RTSgBufInit(&SgBuf, &SegmentBuf, 1);
2105 vdIoCtxInit(&IoCtx, pDiskFrom, VDIOCTXTXDIR_READ, 0, 0, NULL,
2106 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
2107
2108 /* Read the source data. */
2109 rc = pImageFrom->Backend->pfnRead(pImageFrom->pBackendData,
2110 uOffset, cbThisRead, &IoCtx,
2111 &cbThisRead);
2112
2113 if ( rc == VERR_VD_BLOCK_FREE
2114 && cImagesFromRead != 1)
2115 {
2116 unsigned cImagesToProcess = cImagesFromRead;
2117
2118 for (PVDIMAGE pCurrImage = pImageFrom->pPrev;
2119 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
2120 pCurrImage = pCurrImage->pPrev)
2121 {
2122 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
2123 uOffset, cbThisRead,
2124 &IoCtx, &cbThisRead);
2125 if (cImagesToProcess == 1)
2126 break;
2127 else if (cImagesToProcess > 0)
2128 cImagesToProcess--;
2129 }
2130 }
2131 }
2132 else
2133 rc = vdReadHelper(pDiskFrom, pImageFrom, uOffset, pvBuf, cbThisRead,
2134 false /* fUpdateCache */);
2135
2136 if (RT_FAILURE(rc) && rc != VERR_VD_BLOCK_FREE)
2137 break;
2138
2139 rc2 = vdThreadFinishRead(pDiskFrom);
2140 AssertRC(rc2);
2141 fLockReadFrom = false;
2142
2143 if (rc != VERR_VD_BLOCK_FREE)
2144 {
2145 rc2 = vdThreadStartWrite(pDiskTo);
2146 AssertRC(rc2);
2147 fLockWriteTo = true;
2148
2149 /* Only do collapsed I/O if we are copying the data blockwise. */
2150 rc = vdWriteHelperEx(pDiskTo, pDiskTo->pLast, NULL, uOffset, pvBuf,
2151 cbThisRead, VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG /* fFlags */,
2152 fBlockwiseCopy ? cImagesToRead : 0);
2153 if (RT_FAILURE(rc))
2154 break;
2155
2156 rc2 = vdThreadFinishWrite(pDiskTo);
2157 AssertRC(rc2);
2158 fLockWriteTo = false;
2159 }
2160 else /* Don't propagate the error to the outside */
2161 rc = VINF_SUCCESS;
2162
2163 uOffset += cbThisRead;
2164 cbRemaining -= cbThisRead;
2165
2166 unsigned uProgressNew = uOffset * 99 / cbSize;
2167 if (uProgressNew != uProgressOld)
2168 {
2169 uProgressOld = uProgressNew;
2170
2171 if (pIfProgress && pIfProgress->pfnProgress)
2172 {
2173 rc = pIfProgress->pfnProgress(pIfProgress->Core.pvUser,
2174 uProgressOld);
2175 if (RT_FAILURE(rc))
2176 break;
2177 }
2178 if (pDstIfProgress && pDstIfProgress->pfnProgress)
2179 {
2180 rc = pDstIfProgress->pfnProgress(pDstIfProgress->Core.pvUser,
2181 uProgressOld);
2182 if (RT_FAILURE(rc))
2183 break;
2184 }
2185 }
2186 } while (uOffset < cbSize);
2187
2188 RTMemFree(pvBuf);
2189
2190 if (fLockReadFrom)
2191 {
2192 rc2 = vdThreadFinishRead(pDiskFrom);
2193 AssertRC(rc2);
2194 }
2195
2196 if (fLockWriteTo)
2197 {
2198 rc2 = vdThreadFinishWrite(pDiskTo);
2199 AssertRC(rc2);
2200 }
2201
2202 LogFlowFunc(("returns rc=%Rrc\n", rc));
2203 return rc;
2204}
2205
2206/**
2207 * Flush helper async version.
2208 */
2209static DECLCALLBACK(int) vdSetModifiedHelperAsync(PVDIOCTX pIoCtx)
2210{
2211 int rc = VINF_SUCCESS;
2212 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2213
2214 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2215 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2216 rc = VINF_SUCCESS;
2217
2218 return rc;
2219}
2220
2221/**
2222 * internal: mark the disk as modified - async version.
2223 */
2224static int vdSetModifiedFlagAsync(PVDISK pDisk, PVDIOCTX pIoCtx)
2225{
2226 int rc = VINF_SUCCESS;
2227
2228 VD_IS_LOCKED(pDisk);
2229
2230 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
2231 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
2232 {
2233 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2234 if (RT_SUCCESS(rc))
2235 {
2236 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
2237
2238 /* First modify, so create a UUID and ensure it's written to disk. */
2239 vdResetModifiedFlag(pDisk);
2240
2241 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
2242 {
2243 PVDIOCTX pIoCtxFlush = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_FLUSH,
2244 0, 0, pDisk->pLast,
2245 NULL, pIoCtx, 0, 0, NULL,
2246 vdSetModifiedHelperAsync);
2247
2248 if (pIoCtxFlush)
2249 {
2250 rc = vdIoCtxProcessLocked(pIoCtxFlush);
2251 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2252 {
2253 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs */);
2254 vdIoCtxFree(pDisk, pIoCtxFlush);
2255 }
2256 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2257 {
2258 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2259 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2260 }
2261 else /* Another error */
2262 vdIoCtxFree(pDisk, pIoCtxFlush);
2263 }
2264 else
2265 rc = VERR_NO_MEMORY;
2266 }
2267 }
2268 }
2269
2270 return rc;
2271}
2272
2273static DECLCALLBACK(int) vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
2274{
2275 int rc = VINF_SUCCESS;
2276 PVDIMAGE pImage = pIoCtx->Req.Io.pImageStart;
2277 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2278 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2279 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2280
2281 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2282 rc = pImage->Backend->pfnWrite(pImage->pBackendData,
2283 pIoCtx->Req.Io.uOffset - cbPreRead,
2284 cbPreRead + cbThisWrite + cbPostRead,
2285 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
2286 Assert(rc != VERR_VD_BLOCK_FREE);
2287 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
2288 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPostRead == 0);
2289 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2290 rc = VINF_SUCCESS;
2291 else if (rc == VERR_VD_IOCTX_HALT)
2292 {
2293 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2294 rc = VINF_SUCCESS;
2295 }
2296
2297 LogFlowFunc(("returns rc=%Rrc\n", rc));
2298 return rc;
2299}
2300
2301static DECLCALLBACK(int) vdWriteHelperOptimizedCmpAndWriteAsync(PVDIOCTX pIoCtx)
2302{
2303 int rc = VINF_SUCCESS;
2304 size_t cbThisWrite = 0;
2305 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2306 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2307 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2308 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2309 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2310 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2311
2312 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2313
2314 AssertPtr(pIoCtxParent);
2315 Assert(!pIoCtxParent->pIoCtxParent);
2316 Assert(!pIoCtx->Req.Io.cbTransferLeft && !pIoCtx->cMetaTransfersPending);
2317
2318 vdIoCtxChildReset(pIoCtx);
2319 cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2320 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2321
2322 /* Check if the write would modify anything in this block. */
2323 if (!RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &pIoCtxParent->Req.Io.SgBuf, cbThisWrite))
2324 {
2325 RTSGBUF SgBufSrcTmp;
2326
2327 RTSgBufClone(&SgBufSrcTmp, &pIoCtxParent->Req.Io.SgBuf);
2328 RTSgBufAdvance(&SgBufSrcTmp, cbThisWrite);
2329 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbThisWrite);
2330
2331 if (!cbWriteCopy || !RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &SgBufSrcTmp, cbWriteCopy))
2332 {
2333 /* Block is completely unchanged, so no need to write anything. */
2334 LogFlowFunc(("Block didn't changed\n"));
2335 ASMAtomicWriteU32(&pIoCtx->Req.Io.cbTransferLeft, 0);
2336 RTSgBufAdvance(&pIoCtxParent->Req.Io.SgBuf, cbThisWrite);
2337 return VINF_VD_ASYNC_IO_FINISHED;
2338 }
2339 }
2340
2341 /* Copy the data to the right place in the buffer. */
2342 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2343 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
2344 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2345
2346 /* Handle the data that goes after the write to fill the block. */
2347 if (cbPostRead)
2348 {
2349 /* Now assemble the remaining data. */
2350 if (cbWriteCopy)
2351 {
2352 /*
2353 * The S/G buffer of the parent needs to be cloned because
2354 * it is not allowed to modify the state.
2355 */
2356 RTSGBUF SgBufParentTmp;
2357
2358 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2359 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2360 }
2361
2362 /* Zero out the remainder of this block. Will never be visible, as this
2363 * is beyond the limit of the image. */
2364 if (cbFill)
2365 {
2366 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
2367 vdIoCtxSet(pIoCtx, '\0', cbFill);
2368 }
2369 }
2370
2371 /* Write the full block to the virtual disk. */
2372 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2373 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2374
2375 return rc;
2376}
2377
2378static DECLCALLBACK(int) vdWriteHelperOptimizedPreReadAsync(PVDIOCTX pIoCtx)
2379{
2380 int rc = VINF_SUCCESS;
2381
2382 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2383
2384 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2385
2386 if ( pIoCtx->Req.Io.cbTransferLeft
2387 && !pIoCtx->cDataTransfersPending)
2388 rc = vdReadHelperAsync(pIoCtx);
2389
2390 if ( ( RT_SUCCESS(rc)
2391 || (rc == VERR_VD_ASYNC_IO_IN_PROGRESS))
2392 && ( pIoCtx->Req.Io.cbTransferLeft
2393 || pIoCtx->cMetaTransfersPending))
2394 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2395 else
2396 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCmpAndWriteAsync;
2397
2398 return rc;
2399}
2400
2401/**
2402 * internal: write a complete block (only used for diff images), taking the
2403 * remaining data from parent images. This implementation optimizes out writes
2404 * that do not change the data relative to the state as of the parent images.
2405 * All backends which support differential/growing images support this - async version.
2406 */
2407static DECLCALLBACK(int) vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx)
2408{
2409 PVDISK pDisk = pIoCtx->pDisk;
2410 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2411 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2412 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2413 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2414 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2415 size_t cbFill = 0;
2416 size_t cbWriteCopy = 0;
2417 size_t cbReadImage = 0;
2418
2419 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2420
2421 AssertPtr(pIoCtx->pIoCtxParent);
2422 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2423
2424 if (cbPostRead)
2425 {
2426 /* Figure out how much we cannot read from the image, because
2427 * the last block to write might exceed the nominal size of the
2428 * image for technical reasons. */
2429 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2430 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2431
2432 /* If we have data to be written, use that instead of reading
2433 * data from the image. */
2434 if (cbWrite > cbThisWrite)
2435 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2436
2437 /* The rest must be read from the image. */
2438 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2439 }
2440
2441 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2442 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2443 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2444
2445 /* Read the entire data of the block so that we can compare whether it will
2446 * be modified by the write or not. */
2447 size_t cbTmp = cbPreRead + cbThisWrite + cbPostRead - cbFill; Assert(cbTmp == (uint32_t)cbTmp);
2448 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTmp;
2449 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2450 pIoCtx->Req.Io.uOffset -= cbPreRead;
2451
2452 /* Next step */
2453 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
2454 return VINF_SUCCESS;
2455}
2456
2457static DECLCALLBACK(int) vdWriteHelperStandardReadImageAsync(PVDIOCTX pIoCtx)
2458{
2459 int rc = VINF_SUCCESS;
2460
2461 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2462
2463 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2464
2465 if ( pIoCtx->Req.Io.cbTransferLeft
2466 && !pIoCtx->cDataTransfersPending)
2467 rc = vdReadHelperAsync(pIoCtx);
2468
2469 if ( RT_SUCCESS(rc)
2470 && ( pIoCtx->Req.Io.cbTransferLeft
2471 || pIoCtx->cMetaTransfersPending))
2472 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2473 else
2474 {
2475 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2476
2477 /* Zero out the remainder of this block. Will never be visible, as this
2478 * is beyond the limit of the image. */
2479 if (cbFill)
2480 vdIoCtxSet(pIoCtx, '\0', cbFill);
2481
2482 /* Write the full block to the virtual disk. */
2483 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2484
2485 vdIoCtxChildReset(pIoCtx);
2486 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2487 }
2488
2489 return rc;
2490}
2491
2492static DECLCALLBACK(int) vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
2493{
2494 int rc = VINF_SUCCESS;
2495 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2496 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2497 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2498
2499 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2500
2501 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2502 if (cbPostRead)
2503 {
2504 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2505 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2506 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2507
2508 /* Now assemble the remaining data. */
2509 if (cbWriteCopy)
2510 {
2511 /*
2512 * The S/G buffer of the parent needs to be cloned because
2513 * it is not allowed to modify the state.
2514 */
2515 RTSGBUF SgBufParentTmp;
2516
2517 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2518 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2519 }
2520
2521 if (cbReadImage)
2522 {
2523 /* Read remaining data. */
2524 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardReadImageAsync;
2525
2526 /* Read the data that goes before the write to fill the block. */
2527 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbReadImage; Assert(cbReadImage == (uint32_t)cbReadImage);
2528 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2529 pIoCtx->Req.Io.uOffset += cbWriteCopy;
2530 }
2531 else
2532 {
2533 /* Zero out the remainder of this block. Will never be visible, as this
2534 * is beyond the limit of the image. */
2535 if (cbFill)
2536 vdIoCtxSet(pIoCtx, '\0', cbFill);
2537
2538 /* Write the full block to the virtual disk. */
2539 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2540 vdIoCtxChildReset(pIoCtx);
2541 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2542 }
2543 }
2544 else
2545 {
2546 /* Write the full block to the virtual disk. */
2547 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2548 vdIoCtxChildReset(pIoCtx);
2549 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2550 }
2551
2552 return rc;
2553}
2554
2555static DECLCALLBACK(int) vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
2556{
2557 int rc = VINF_SUCCESS;
2558
2559 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2560
2561 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2562
2563 if ( pIoCtx->Req.Io.cbTransferLeft
2564 && !pIoCtx->cDataTransfersPending)
2565 rc = vdReadHelperAsync(pIoCtx);
2566
2567 if ( RT_SUCCESS(rc)
2568 && ( pIoCtx->Req.Io.cbTransferLeft
2569 || pIoCtx->cMetaTransfersPending))
2570 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2571 else
2572 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2573
2574 return rc;
2575}
2576
2577static DECLCALLBACK(int) vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
2578{
2579 PVDISK pDisk = pIoCtx->pDisk;
2580 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2581 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2582 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2583 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2584 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2585 size_t cbFill = 0;
2586 size_t cbWriteCopy = 0;
2587 size_t cbReadImage = 0;
2588
2589 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2590
2591 AssertPtr(pIoCtx->pIoCtxParent);
2592 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2593
2594 /* Calculate the amount of data to read that goes after the write to fill the block. */
2595 if (cbPostRead)
2596 {
2597 /* If we have data to be written, use that instead of reading
2598 * data from the image. */
2599 if (cbWrite > cbThisWrite)
2600 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2601 else
2602 cbWriteCopy = 0;
2603
2604 /* Figure out how much we cannot read from the image, because
2605 * the last block to write might exceed the nominal size of the
2606 * image for technical reasons. */
2607 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2608 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2609
2610 /* The rest must be read from the image. */
2611 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2612 }
2613
2614 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2615 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2616 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2617
2618 /* Next step */
2619 if (cbPreRead)
2620 {
2621 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
2622
2623 /* Read the data that goes before the write to fill the block. */
2624 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbPreRead; Assert(cbPreRead == (uint32_t)cbPreRead);
2625 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2626 pIoCtx->Req.Io.uOffset -= cbPreRead;
2627 }
2628 else
2629 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2630
2631 return VINF_SUCCESS;
2632}
2633
2634/**
2635 * internal: write buffer to the image, taking care of block boundaries and
2636 * write optimizations - async version.
2637 */
2638static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx)
2639{
2640 int rc;
2641 size_t cbWrite = pIoCtx->Req.Io.cbTransfer;
2642 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
2643 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2644 PVDISK pDisk = pIoCtx->pDisk;
2645 unsigned fWrite;
2646 size_t cbThisWrite;
2647 size_t cbPreRead, cbPostRead;
2648
2649 /* Apply write filter chain here if it was not done already. */
2650 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_WRITE_FILTER_APPLIED))
2651 {
2652 rc = vdFilterChainApplyWrite(pDisk, uOffset, cbWrite, pIoCtx);
2653 if (RT_FAILURE(rc))
2654 return rc;
2655 pIoCtx->fFlags |= VDIOCTX_FLAGS_WRITE_FILTER_APPLIED;
2656 }
2657
2658 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG))
2659 {
2660 rc = vdSetModifiedFlagAsync(pDisk, pIoCtx);
2661 if (RT_FAILURE(rc)) /* Includes I/O in progress. */
2662 return rc;
2663 }
2664
2665 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
2666 if (RT_FAILURE(rc))
2667 return rc;
2668
2669 /* Loop until all written. */
2670 do
2671 {
2672 /* Try to write the possibly partial block to the last opened image.
2673 * This works when the block is already allocated in this image or
2674 * if it is a full-block write (and allocation isn't suppressed below).
2675 * For image formats which don't support zero blocks, it's beneficial
2676 * to avoid unnecessarily allocating unchanged blocks. This prevents
2677 * unwanted expanding of images. VMDK is an example. */
2678 cbThisWrite = cbWrite;
2679
2680 /*
2681 * Check whether there is a full block write in progress which was not allocated.
2682 * Defer I/O if the range interferes.
2683 */
2684 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
2685 && uOffset >= pDisk->uOffsetStartLocked
2686 && uOffset < pDisk->uOffsetEndLocked)
2687 {
2688 Log(("Interferring write while allocating a new block => deferring write\n"));
2689 vdIoCtxDefer(pDisk, pIoCtx);
2690 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2691 break;
2692 }
2693
2694 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2695 ? 0 : VD_WRITE_NO_ALLOC;
2696 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset, cbThisWrite,
2697 pIoCtx, &cbThisWrite, &cbPreRead, &cbPostRead,
2698 fWrite);
2699 if (rc == VERR_VD_BLOCK_FREE)
2700 {
2701 /* Lock the disk .*/
2702 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2703 if (RT_SUCCESS(rc))
2704 {
2705 /*
2706 * Allocate segment and buffer in one go.
2707 * A bit hackish but avoids the need to allocate memory twice.
2708 */
2709 PRTSGBUF pTmp = (PRTSGBUF)RTMemAlloc(cbPreRead + cbThisWrite + cbPostRead + sizeof(RTSGSEG) + sizeof(RTSGBUF));
2710 AssertBreakStmt(pTmp, rc = VERR_NO_MEMORY);
2711 PRTSGSEG pSeg = (PRTSGSEG)(pTmp + 1);
2712
2713 pSeg->pvSeg = pSeg + 1;
2714 pSeg->cbSeg = cbPreRead + cbThisWrite + cbPostRead;
2715 RTSgBufInit(pTmp, pSeg, 1);
2716
2717 PVDIOCTX pIoCtxWrite = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_WRITE,
2718 uOffset, pSeg->cbSeg, pImage,
2719 pTmp,
2720 pIoCtx, cbThisWrite,
2721 cbWrite,
2722 pTmp,
2723 (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2724 ? vdWriteHelperStandardAsync
2725 : vdWriteHelperOptimizedAsync);
2726 if (!pIoCtxWrite)
2727 {
2728 RTMemTmpFree(pTmp);
2729 rc = VERR_NO_MEMORY;
2730 break;
2731 }
2732
2733 LogFlowFunc(("Disk is growing because of pIoCtx=%#p pIoCtxWrite=%#p\n",
2734 pIoCtx, pIoCtxWrite));
2735
2736 /* Save the current range for the growing operation to check for intersecting requests later. */
2737 pDisk->uOffsetStartLocked = uOffset - cbPreRead;
2738 pDisk->uOffsetEndLocked = uOffset + cbThisWrite + cbPostRead;
2739
2740 pIoCtxWrite->Type.Child.cbPreRead = cbPreRead;
2741 pIoCtxWrite->Type.Child.cbPostRead = cbPostRead;
2742 pIoCtxWrite->Req.Io.pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
2743
2744 /* Process the write request */
2745 rc = vdIoCtxProcessLocked(pIoCtxWrite);
2746
2747 if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
2748 {
2749 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2750 vdIoCtxFree(pDisk, pIoCtxWrite);
2751 break;
2752 }
2753 else if ( rc == VINF_VD_ASYNC_IO_FINISHED
2754 && ASMAtomicCmpXchgBool(&pIoCtxWrite->fComplete, true, false))
2755 {
2756 LogFlow(("Child write request completed\n"));
2757 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbThisWrite);
2758 Assert(cbThisWrite == (uint32_t)cbThisWrite);
2759 rc = pIoCtxWrite->rcReq;
2760 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisWrite);
2761 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
2762 vdIoCtxFree(pDisk, pIoCtxWrite);
2763 }
2764 else
2765 {
2766 LogFlow(("Child write pending\n"));
2767 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
2768 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2769 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2770 cbWrite -= cbThisWrite;
2771 uOffset += cbThisWrite;
2772 break;
2773 }
2774 }
2775 else
2776 {
2777 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2778 break;
2779 }
2780 }
2781
2782 if (rc == VERR_VD_IOCTX_HALT)
2783 {
2784 cbWrite -= cbThisWrite;
2785 uOffset += cbThisWrite;
2786 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
2787 break;
2788 }
2789 else if (rc == VERR_VD_NOT_ENOUGH_METADATA)
2790 break;
2791
2792 cbWrite -= cbThisWrite;
2793 uOffset += cbThisWrite;
2794 } while (cbWrite != 0 && (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
2795
2796 if ( rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2797 || rc == VERR_VD_NOT_ENOUGH_METADATA
2798 || rc == VERR_VD_IOCTX_HALT)
2799 {
2800 /*
2801 * Tell the caller that we don't need to go back here because all
2802 * writes are initiated.
2803 */
2804 if ( !cbWrite
2805 && rc != VERR_VD_IOCTX_HALT)
2806 rc = VINF_SUCCESS;
2807
2808 pIoCtx->Req.Io.uOffset = uOffset;
2809 pIoCtx->Req.Io.cbTransfer = cbWrite;
2810 }
2811
2812 return rc;
2813}
2814
2815/**
2816 * Flush helper async version.
2817 */
2818static DECLCALLBACK(int) vdFlushHelperAsync(PVDIOCTX pIoCtx)
2819{
2820 int rc = VINF_SUCCESS;
2821 PVDISK pDisk = pIoCtx->pDisk;
2822 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
2823
2824 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2825 if (RT_SUCCESS(rc))
2826 {
2827 /* Mark the whole disk as locked. */
2828 pDisk->uOffsetStartLocked = 0;
2829 pDisk->uOffsetEndLocked = UINT64_C(0xffffffffffffffff);
2830
2831 vdResetModifiedFlag(pDisk);
2832 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
2833 if ( ( RT_SUCCESS(rc)
2834 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2835 || rc == VERR_VD_IOCTX_HALT)
2836 && pDisk->pCache)
2837 {
2838 rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData, pIoCtx);
2839 if ( RT_SUCCESS(rc)
2840 || ( rc != VERR_VD_ASYNC_IO_IN_PROGRESS
2841 && rc != VERR_VD_IOCTX_HALT))
2842 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2843 else if (rc != VERR_VD_IOCTX_HALT)
2844 rc = VINF_SUCCESS;
2845 }
2846 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2847 rc = VINF_SUCCESS;
2848 else if (rc != VERR_VD_IOCTX_HALT)/* Some other error. */
2849 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
2850 }
2851
2852 return rc;
2853}
2854
2855/**
2856 * Async discard helper - discards a whole block which is recorded in the block
2857 * tree.
2858 *
2859 * @returns VBox status code.
2860 * @param pIoCtx The I/O context to operate on.
2861 */
2862static DECLCALLBACK(int) vdDiscardWholeBlockAsync(PVDIOCTX pIoCtx)
2863{
2864 int rc = VINF_SUCCESS;
2865 PVDISK pDisk = pIoCtx->pDisk;
2866 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2867 PVDDISCARDBLOCK pBlock = pIoCtx->Req.Discard.pBlock;
2868 size_t cbPreAllocated, cbPostAllocated, cbActuallyDiscarded;
2869
2870 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2871
2872 AssertPtr(pBlock);
2873
2874 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2875 pBlock->Core.Key, pBlock->cbDiscard,
2876 &cbPreAllocated, &cbPostAllocated,
2877 &cbActuallyDiscarded, NULL, 0);
2878 Assert(rc != VERR_VD_DISCARD_ALIGNMENT_NOT_MET);
2879 Assert(!cbPreAllocated);
2880 Assert(!cbPostAllocated);
2881 Assert(cbActuallyDiscarded == pBlock->cbDiscard || RT_FAILURE(rc));
2882
2883 /* Remove the block on success. */
2884 if ( RT_SUCCESS(rc)
2885 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2886 {
2887 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2888 Assert(pBlockRemove == pBlock); RT_NOREF1(pBlockRemove);
2889
2890 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2891 RTListNodeRemove(&pBlock->NodeLru);
2892 RTMemFree(pBlock->pbmAllocated);
2893 RTMemFree(pBlock);
2894 pIoCtx->Req.Discard.pBlock = NULL;/* Safety precaution. */
2895 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
2896 rc = VINF_SUCCESS;
2897 }
2898
2899 LogFlowFunc(("returns rc=%Rrc\n", rc));
2900 return rc;
2901}
2902
2903/**
2904 * Removes the least recently used blocks from the waiting list until
2905 * the new value is reached - version for async I/O.
2906 *
2907 * @returns VBox status code.
2908 * @param pDisk VD disk container.
2909 * @param pIoCtx The I/O context associated with this discard operation.
2910 * @param cbDiscardingNew How many bytes should be waiting on success.
2911 * The number of bytes waiting can be less.
2912 */
2913static int vdDiscardRemoveBlocksAsync(PVDISK pDisk, PVDIOCTX pIoCtx, size_t cbDiscardingNew)
2914{
2915 int rc = VINF_SUCCESS;
2916 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2917
2918 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
2919 pDisk, pDiscard, cbDiscardingNew));
2920
2921 while (pDiscard->cbDiscarding > cbDiscardingNew)
2922 {
2923 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
2924
2925 Assert(!RTListIsEmpty(&pDiscard->ListLru));
2926
2927 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
2928 uint64_t offStart = pBlock->Core.Key;
2929 uint32_t idxStart = 0;
2930 size_t cbLeft = pBlock->cbDiscard;
2931 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
2932 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
2933
2934 while (cbLeft > 0)
2935 {
2936 int32_t idxEnd;
2937 size_t cbThis = cbLeft;
2938
2939 if (fAllocated)
2940 {
2941 /* Check for the first unallocated bit. */
2942 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
2943 if (idxEnd != -1)
2944 {
2945 cbThis = (idxEnd - idxStart) * 512;
2946 fAllocated = false;
2947 }
2948 }
2949 else
2950 {
2951 /* Mark as unused and check for the first set bit. */
2952 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
2953 if (idxEnd != -1)
2954 cbThis = (idxEnd - idxStart) * 512;
2955
2956 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
2957 offStart, cbThis, NULL, NULL, &cbThis,
2958 NULL, VD_DISCARD_MARK_UNUSED);
2959 if ( RT_FAILURE(rc)
2960 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2961 break;
2962
2963 fAllocated = true;
2964 }
2965
2966 idxStart = idxEnd;
2967 offStart += cbThis;
2968 cbLeft -= cbThis;
2969 }
2970
2971 if ( RT_FAILURE(rc)
2972 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2973 break;
2974
2975 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
2976 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
2977 RTListNodeRemove(&pBlock->NodeLru);
2978
2979 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2980 RTMemFree(pBlock->pbmAllocated);
2981 RTMemFree(pBlock);
2982 }
2983
2984 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2985 rc = VINF_SUCCESS;
2986
2987 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
2988
2989 LogFlowFunc(("returns rc=%Rrc\n", rc));
2990 return rc;
2991}
2992
2993/**
2994 * Async discard helper - discards the current range if there is no matching
2995 * block in the tree.
2996 *
2997 * @returns VBox status code.
2998 * @param pIoCtx The I/O context to operate on.
2999 */
3000static DECLCALLBACK(int) vdDiscardCurrentRangeAsync(PVDIOCTX pIoCtx)
3001{
3002 PVDISK pDisk = pIoCtx->pDisk;
3003 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3004 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3005 size_t cbThisDiscard = pIoCtx->Req.Discard.cbThisDiscard;
3006 void *pbmAllocated = NULL;
3007 size_t cbPreAllocated, cbPostAllocated;
3008 int rc = VINF_SUCCESS;
3009
3010 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3011
3012 /* No block found, try to discard using the backend first. */
3013 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
3014 offStart, cbThisDiscard, &cbPreAllocated,
3015 &cbPostAllocated, &cbThisDiscard,
3016 &pbmAllocated, 0);
3017 if (rc == VERR_VD_DISCARD_ALIGNMENT_NOT_MET)
3018 {
3019 /* Create new discard block. */
3020 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTMemAllocZ(sizeof(VDDISCARDBLOCK));
3021 if (pBlock)
3022 {
3023 pBlock->Core.Key = offStart - cbPreAllocated;
3024 pBlock->Core.KeyLast = offStart + cbThisDiscard + cbPostAllocated - 1;
3025 pBlock->cbDiscard = cbPreAllocated + cbThisDiscard + cbPostAllocated;
3026 pBlock->pbmAllocated = pbmAllocated;
3027 bool fInserted = RTAvlrU64Insert(pDiscard->pTreeBlocks, &pBlock->Core);
3028 Assert(fInserted); NOREF(fInserted);
3029
3030 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3031 pDiscard->cbDiscarding += pBlock->cbDiscard;
3032
3033 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3034 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3035 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3036 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3037
3038 if (pDiscard->cbDiscarding > VD_DISCARD_REMOVE_THRESHOLD)
3039 rc = vdDiscardRemoveBlocksAsync(pDisk, pIoCtx, VD_DISCARD_REMOVE_THRESHOLD);
3040 else
3041 rc = VINF_SUCCESS;
3042
3043 if (RT_SUCCESS(rc))
3044 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
3045 }
3046 else
3047 {
3048 RTMemFree(pbmAllocated);
3049 rc = VERR_NO_MEMORY;
3050 }
3051 }
3052 else if ( RT_SUCCESS(rc)
3053 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) /* Save state and andvance to next range. */
3054 {
3055 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3056 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3057 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3058 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3059 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3060 rc = VINF_SUCCESS;
3061 }
3062
3063 LogFlowFunc(("returns rc=%Rrc\n", rc));
3064 return rc;
3065}
3066
3067/**
3068 * Async discard helper - entry point.
3069 *
3070 * @returns VBox status code.
3071 * @param pIoCtx The I/O context to operate on.
3072 */
3073static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx)
3074{
3075 int rc = VINF_SUCCESS;
3076 PVDISK pDisk = pIoCtx->pDisk;
3077 PCRTRANGE paRanges = pIoCtx->Req.Discard.paRanges;
3078 unsigned cRanges = pIoCtx->Req.Discard.cRanges;
3079 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3080
3081 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3082
3083 /* Check if the I/O context processed all ranges. */
3084 if ( pIoCtx->Req.Discard.idxRange == cRanges
3085 && !pIoCtx->Req.Discard.cbDiscardLeft)
3086 {
3087 LogFlowFunc(("All ranges discarded, completing\n"));
3088 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDeferredReqs*/);
3089 return VINF_SUCCESS;
3090 }
3091
3092 if (pDisk->pIoCtxLockOwner != pIoCtx)
3093 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
3094
3095 if (RT_SUCCESS(rc))
3096 {
3097 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3098 size_t cbDiscardLeft = pIoCtx->Req.Discard.cbDiscardLeft;
3099 size_t cbThisDiscard;
3100
3101 pDisk->uOffsetStartLocked = offStart;
3102 pDisk->uOffsetEndLocked = offStart + cbDiscardLeft;
3103
3104 if (RT_UNLIKELY(!pDiscard))
3105 {
3106 pDiscard = vdDiscardStateCreate();
3107 if (!pDiscard)
3108 return VERR_NO_MEMORY;
3109
3110 pDisk->pDiscard = pDiscard;
3111 }
3112
3113 if (!pIoCtx->Req.Discard.cbDiscardLeft)
3114 {
3115 offStart = paRanges[pIoCtx->Req.Discard.idxRange].offStart;
3116 cbDiscardLeft = paRanges[pIoCtx->Req.Discard.idxRange].cbRange;
3117 LogFlowFunc(("New range descriptor loaded (%u) offStart=%llu cbDiscard=%zu\n",
3118 pIoCtx->Req.Discard.idxRange, offStart, cbDiscardLeft));
3119 pIoCtx->Req.Discard.idxRange++;
3120 }
3121
3122 /* Look for a matching block in the AVL tree first. */
3123 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, false);
3124 if (!pBlock || pBlock->Core.KeyLast < offStart)
3125 {
3126 PVDDISCARDBLOCK pBlockAbove = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, true);
3127
3128 /* Clip range to remain in the current block. */
3129 if (pBlockAbove)
3130 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlockAbove->Core.KeyLast - offStart + 1);
3131 else
3132 cbThisDiscard = cbDiscardLeft;
3133
3134 Assert(!(cbThisDiscard % 512));
3135 pIoCtx->Req.Discard.pBlock = NULL;
3136 pIoCtx->pfnIoCtxTransferNext = vdDiscardCurrentRangeAsync;
3137 }
3138 else
3139 {
3140 /* Range lies partly in the block, update allocation bitmap. */
3141 int32_t idxStart, idxEnd;
3142
3143 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlock->Core.KeyLast - offStart + 1);
3144
3145 AssertPtr(pBlock);
3146
3147 Assert(!(cbThisDiscard % 512));
3148 Assert(!((offStart - pBlock->Core.Key) % 512));
3149
3150 idxStart = (offStart - pBlock->Core.Key) / 512;
3151 idxEnd = idxStart + (int32_t)(cbThisDiscard / 512);
3152
3153 ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd);
3154
3155 cbDiscardLeft -= cbThisDiscard;
3156 offStart += cbThisDiscard;
3157
3158 /* Call the backend to discard the block if it is completely unallocated now. */
3159 if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, (uint32_t)(pBlock->cbDiscard / 512)) == -1)
3160 {
3161 pIoCtx->Req.Discard.pBlock = pBlock;
3162 pIoCtx->pfnIoCtxTransferNext = vdDiscardWholeBlockAsync;
3163 rc = VINF_SUCCESS;
3164 }
3165 else
3166 {
3167 RTListNodeRemove(&pBlock->NodeLru);
3168 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3169
3170 /* Start with next range. */
3171 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3172 rc = VINF_SUCCESS;
3173 }
3174 }
3175
3176 /* Save state in the context. */
3177 pIoCtx->Req.Discard.offCur = offStart;
3178 pIoCtx->Req.Discard.cbDiscardLeft = cbDiscardLeft;
3179 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3180 }
3181
3182 LogFlowFunc(("returns rc=%Rrc\n", rc));
3183 return rc;
3184}
3185
3186/**
3187 * VD async I/O interface open callback.
3188 */
3189static DECLCALLBACK(int) vdIOOpenFallback(void *pvUser, const char *pszLocation,
3190 uint32_t fOpen, PFNVDCOMPLETED pfnCompleted,
3191 void **ppStorage)
3192{
3193 RT_NOREF1(pvUser);
3194 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)RTMemAllocZ(sizeof(VDIIOFALLBACKSTORAGE));
3195
3196 if (!pStorage)
3197 return VERR_NO_MEMORY;
3198
3199 pStorage->pfnCompleted = pfnCompleted;
3200
3201 /* Open the file. */
3202 int rc = RTFileOpen(&pStorage->File, pszLocation, fOpen);
3203 if (RT_SUCCESS(rc))
3204 {
3205 *ppStorage = pStorage;
3206 return VINF_SUCCESS;
3207 }
3208
3209 RTMemFree(pStorage);
3210 return rc;
3211}
3212
3213/**
3214 * VD async I/O interface close callback.
3215 */
3216static DECLCALLBACK(int) vdIOCloseFallback(void *pvUser, void *pvStorage)
3217{
3218 RT_NOREF1(pvUser);
3219 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3220
3221 RTFileClose(pStorage->File);
3222 RTMemFree(pStorage);
3223 return VINF_SUCCESS;
3224}
3225
3226static DECLCALLBACK(int) vdIODeleteFallback(void *pvUser, const char *pcszFilename)
3227{
3228 RT_NOREF1(pvUser);
3229 return RTFileDelete(pcszFilename);
3230}
3231
3232static DECLCALLBACK(int) vdIOMoveFallback(void *pvUser, const char *pcszSrc, const char *pcszDst, unsigned fMove)
3233{
3234 RT_NOREF1(pvUser);
3235 return RTFileMove(pcszSrc, pcszDst, fMove);
3236}
3237
3238static DECLCALLBACK(int) vdIOGetFreeSpaceFallback(void *pvUser, const char *pcszFilename, int64_t *pcbFreeSpace)
3239{
3240 RT_NOREF1(pvUser);
3241 return RTFsQuerySizes(pcszFilename, NULL, pcbFreeSpace, NULL, NULL);
3242}
3243
3244static DECLCALLBACK(int) vdIOGetModificationTimeFallback(void *pvUser, const char *pcszFilename, PRTTIMESPEC pModificationTime)
3245{
3246 RT_NOREF1(pvUser);
3247 RTFSOBJINFO info;
3248 int rc = RTPathQueryInfo(pcszFilename, &info, RTFSOBJATTRADD_NOTHING);
3249 if (RT_SUCCESS(rc))
3250 *pModificationTime = info.ModificationTime;
3251 return rc;
3252}
3253
3254/**
3255 * VD async I/O interface callback for retrieving the file size.
3256 */
3257static DECLCALLBACK(int) vdIOGetSizeFallback(void *pvUser, void *pvStorage, uint64_t *pcbSize)
3258{
3259 RT_NOREF1(pvUser);
3260 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3261
3262 return RTFileQuerySize(pStorage->File, pcbSize);
3263}
3264
3265/**
3266 * VD async I/O interface callback for setting the file size.
3267 */
3268static DECLCALLBACK(int) vdIOSetSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize)
3269{
3270 RT_NOREF1(pvUser);
3271 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3272
3273 return RTFileSetSize(pStorage->File, cbSize);
3274}
3275
3276/**
3277 * VD async I/O interface callback for setting the file allocation size.
3278 */
3279static DECLCALLBACK(int) vdIOSetAllocationSizeFallback(void *pvUser, void *pvStorage, uint64_t cbSize,
3280 uint32_t fFlags)
3281{
3282 RT_NOREF2(pvUser, fFlags);
3283 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3284
3285 return RTFileSetAllocationSize(pStorage->File, cbSize, RTFILE_ALLOC_SIZE_F_DEFAULT);
3286}
3287
3288/**
3289 * VD async I/O interface callback for a synchronous write to the file.
3290 */
3291static DECLCALLBACK(int) vdIOWriteSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3292 const void *pvBuf, size_t cbWrite, size_t *pcbWritten)
3293{
3294 RT_NOREF1(pvUser);
3295 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3296
3297 return RTFileWriteAt(pStorage->File, uOffset, pvBuf, cbWrite, pcbWritten);
3298}
3299
3300/**
3301 * VD async I/O interface callback for a synchronous read from the file.
3302 */
3303static DECLCALLBACK(int) vdIOReadSyncFallback(void *pvUser, void *pvStorage, uint64_t uOffset,
3304 void *pvBuf, size_t cbRead, size_t *pcbRead)
3305{
3306 RT_NOREF1(pvUser);
3307 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3308
3309 return RTFileReadAt(pStorage->File, uOffset, pvBuf, cbRead, pcbRead);
3310}
3311
3312/**
3313 * VD async I/O interface callback for a synchronous flush of the file data.
3314 */
3315static DECLCALLBACK(int) vdIOFlushSyncFallback(void *pvUser, void *pvStorage)
3316{
3317 RT_NOREF1(pvUser);
3318 PVDIIOFALLBACKSTORAGE pStorage = (PVDIIOFALLBACKSTORAGE)pvStorage;
3319
3320 return RTFileFlush(pStorage->File);
3321}
3322
3323/**
3324 * Internal - Continues an I/O context after
3325 * it was halted because of an active transfer.
3326 */
3327static int vdIoCtxContinue(PVDIOCTX pIoCtx, int rcReq)
3328{
3329 PVDISK pDisk = pIoCtx->pDisk;
3330 int rc = VINF_SUCCESS;
3331
3332 VD_IS_LOCKED(pDisk);
3333
3334 if (RT_FAILURE(rcReq))
3335 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
3336
3337 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
3338 {
3339 /* Continue the transfer */
3340 rc = vdIoCtxProcessLocked(pIoCtx);
3341
3342 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3343 && ASMAtomicCmpXchgBool(&pIoCtx->fComplete, true, false))
3344 {
3345 LogFlowFunc(("I/O context completed pIoCtx=%#p\n", pIoCtx));
3346 bool fFreeCtx = RT_BOOL(!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3347 if (pIoCtx->pIoCtxParent)
3348 {
3349 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
3350
3351 Assert(!pIoCtxParent->pIoCtxParent);
3352 if (RT_FAILURE(pIoCtx->rcReq))
3353 ASMAtomicCmpXchgS32(&pIoCtxParent->rcReq, pIoCtx->rcReq, VINF_SUCCESS);
3354
3355 ASMAtomicDecU32(&pIoCtxParent->cDataTransfersPending);
3356
3357 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE)
3358 {
3359 LogFlowFunc(("I/O context transferred %u bytes for the parent pIoCtxParent=%p\n",
3360 pIoCtx->Type.Child.cbTransferParent, pIoCtxParent));
3361
3362 /* Update the parent state. */
3363 Assert(pIoCtxParent->Req.Io.cbTransferLeft >= pIoCtx->Type.Child.cbTransferParent);
3364 ASMAtomicSubU32(&pIoCtxParent->Req.Io.cbTransferLeft, (uint32_t)pIoCtx->Type.Child.cbTransferParent);
3365 }
3366 else
3367 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH);
3368
3369 /*
3370 * A completed child write means that we finished growing the image.
3371 * We have to process any pending writes now.
3372 */
3373 vdIoCtxUnlockDisk(pDisk, pIoCtxParent, false /* fProcessDeferredReqs */);
3374
3375 /* Unblock the parent */
3376 pIoCtxParent->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3377
3378 rc = vdIoCtxProcessLocked(pIoCtxParent);
3379
3380 if ( rc == VINF_VD_ASYNC_IO_FINISHED
3381 && ASMAtomicCmpXchgBool(&pIoCtxParent->fComplete, true, false))
3382 {
3383 LogFlowFunc(("Parent I/O context completed pIoCtxParent=%#p rcReq=%Rrc\n", pIoCtxParent, pIoCtxParent->rcReq));
3384 bool fFreeParentCtx = RT_BOOL(!(pIoCtxParent->fFlags & VDIOCTX_FLAGS_DONT_FREE));
3385 vdIoCtxRootComplete(pDisk, pIoCtxParent);
3386 vdThreadFinishWrite(pDisk);
3387
3388 if (fFreeParentCtx)
3389 vdIoCtxFree(pDisk, pIoCtxParent);
3390 vdDiskProcessBlockedIoCtx(pDisk);
3391 }
3392 else if (!vdIoCtxIsDiskLockOwner(pDisk, pIoCtx))
3393 {
3394 /* Process any pending writes if the current request didn't caused another growing. */
3395 vdDiskProcessBlockedIoCtx(pDisk);
3396 }
3397 }
3398 else
3399 {
3400 if (pIoCtx->enmTxDir == VDIOCTXTXDIR_FLUSH)
3401 {
3402 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDerredReqs */);
3403 vdThreadFinishWrite(pDisk);
3404 }
3405 else if ( pIoCtx->enmTxDir == VDIOCTXTXDIR_WRITE
3406 || pIoCtx->enmTxDir == VDIOCTXTXDIR_DISCARD)
3407 vdThreadFinishWrite(pDisk);
3408 else
3409 {
3410 Assert(pIoCtx->enmTxDir == VDIOCTXTXDIR_READ);
3411 vdThreadFinishRead(pDisk);
3412 }
3413
3414 LogFlowFunc(("I/O context completed pIoCtx=%#p rcReq=%Rrc\n", pIoCtx, pIoCtx->rcReq));
3415 vdIoCtxRootComplete(pDisk, pIoCtx);
3416 }
3417
3418 if (fFreeCtx)
3419 vdIoCtxFree(pDisk, pIoCtx);
3420 }
3421 }
3422
3423 return VINF_SUCCESS;
3424}
3425
3426/**
3427 * Internal - Called when user transfer completed.
3428 */
3429static int vdUserXferCompleted(PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
3430 PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3431 size_t cbTransfer, int rcReq)
3432{
3433 int rc = VINF_SUCCESS;
3434 PVDISK pDisk = pIoCtx->pDisk;
3435
3436 LogFlowFunc(("pIoStorage=%#p pIoCtx=%#p pfnComplete=%#p pvUser=%#p cbTransfer=%zu rcReq=%Rrc\n",
3437 pIoStorage, pIoCtx, pfnComplete, pvUser, cbTransfer, rcReq));
3438
3439 VD_IS_LOCKED(pDisk);
3440
3441 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbTransfer);
3442 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTransfer); Assert(cbTransfer == (uint32_t)cbTransfer);
3443 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
3444
3445 if (pfnComplete)
3446 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3447
3448 if (RT_SUCCESS(rc))
3449 rc = vdIoCtxContinue(pIoCtx, rcReq);
3450 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
3451 rc = VINF_SUCCESS;
3452
3453 return rc;
3454}
3455
3456static void vdIoCtxContinueDeferredList(PVDIOSTORAGE pIoStorage, PRTLISTANCHOR pListWaiting,
3457 PFNVDXFERCOMPLETED pfnComplete, void *pvUser, int rcReq)
3458{
3459 LogFlowFunc(("pIoStorage=%#p pListWaiting=%#p pfnComplete=%#p pvUser=%#p rcReq=%Rrc\n",
3460 pIoStorage, pListWaiting, pfnComplete, pvUser, rcReq));
3461
3462 /* Go through the waiting list and continue the I/O contexts. */
3463 while (!RTListIsEmpty(pListWaiting))
3464 {
3465 int rc = VINF_SUCCESS;
3466 PVDIOCTXDEFERRED pDeferred = RTListGetFirst(pListWaiting, VDIOCTXDEFERRED, NodeDeferred);
3467 PVDIOCTX pIoCtx = pDeferred->pIoCtx;
3468 RTListNodeRemove(&pDeferred->NodeDeferred);
3469
3470 RTMemFree(pDeferred);
3471 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
3472
3473 if (pfnComplete)
3474 rc = pfnComplete(pIoStorage->pVDIo->pBackendData, pIoCtx, pvUser, rcReq);
3475
3476 LogFlow(("Completion callback for I/O context %#p returned %Rrc\n", pIoCtx, rc));
3477
3478 if (RT_SUCCESS(rc))
3479 {
3480 rc = vdIoCtxContinue(pIoCtx, rcReq);
3481 AssertRC(rc);
3482 }
3483 else
3484 Assert(rc == VERR_VD_ASYNC_IO_IN_PROGRESS);
3485 }
3486}
3487
3488/**
3489 * Internal - Called when a meta transfer completed.
3490 */
3491static int vdMetaXferCompleted(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser,
3492 PVDMETAXFER pMetaXfer, int rcReq)
3493{
3494 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3495 RTLISTANCHOR ListIoCtxWaiting;
3496 bool fFlush;
3497
3498 LogFlowFunc(("pIoStorage=%#p pfnComplete=%#p pvUser=%#p pMetaXfer=%#p rcReq=%Rrc\n",
3499 pIoStorage, pfnComplete, pvUser, pMetaXfer, rcReq));
3500
3501 VD_IS_LOCKED(pDisk);
3502
3503 fFlush = VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_FLUSH;
3504
3505 if (!fFlush)
3506 {
3507 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3508
3509 if (RT_FAILURE(rcReq))
3510 {
3511 /* Remove from the AVL tree. */
3512 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3513 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3514 Assert(fRemoved); NOREF(fRemoved);
3515 /* If this was a write check if there is a shadow buffer with updated data. */
3516 if (pMetaXfer->pbDataShw)
3517 {
3518 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
3519 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3520 RTListConcatenate(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3521 RTMemFree(pMetaXfer->pbDataShw);
3522 pMetaXfer->pbDataShw = NULL;
3523 }
3524 RTMemFree(pMetaXfer);
3525 }
3526 else
3527 {
3528 /* Increase the reference counter to make sure it doesn't go away before the last context is processed. */
3529 pMetaXfer->cRefs++;
3530 }
3531 }
3532 else
3533 RTListMove(&ListIoCtxWaiting, &pMetaXfer->ListIoCtxWaiting);
3534
3535 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3536 vdIoCtxContinueDeferredList(pIoStorage, &ListIoCtxWaiting, pfnComplete, pvUser, rcReq);
3537
3538 /*
3539 * If there is a shadow buffer and the previous write was successful update with the
3540 * new data and trigger a new write.
3541 */
3542 if ( pMetaXfer->pbDataShw
3543 && RT_SUCCESS(rcReq)
3544 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
3545 {
3546 LogFlowFunc(("pMetaXfer=%#p Updating from shadow buffer and triggering new write\n", pMetaXfer));
3547 memcpy(pMetaXfer->abData, pMetaXfer->pbDataShw, pMetaXfer->cbMeta);
3548 RTMemFree(pMetaXfer->pbDataShw);
3549 pMetaXfer->pbDataShw = NULL;
3550 Assert(!RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites));
3551
3552 /* Setup a new I/O write. */
3553 PVDIOTASK pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
3554 if (RT_LIKELY(pIoTask))
3555 {
3556 void *pvTask = NULL;
3557 RTSGSEG Seg;
3558
3559 Seg.cbSeg = pMetaXfer->cbMeta;
3560 Seg.pvSeg = pMetaXfer->abData;
3561
3562 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
3563 rcReq = pIoStorage->pVDIo->pInterfaceIo->pfnWriteAsync(pIoStorage->pVDIo->pInterfaceIo->Core.pvUser,
3564 pIoStorage->pStorage,
3565 pMetaXfer->Core.Key, &Seg, 1,
3566 pMetaXfer->cbMeta, pIoTask,
3567 &pvTask);
3568 if ( RT_SUCCESS(rcReq)
3569 || rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3570 {
3571 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
3572 vdIoTaskFree(pDisk, pIoTask);
3573 }
3574 else
3575 RTListMove(&pMetaXfer->ListIoCtxWaiting, &pMetaXfer->ListIoCtxShwWrites);
3576 }
3577 else
3578 rcReq = VERR_NO_MEMORY;
3579
3580 /* Cleanup if there was an error or the request completed already. */
3581 if (rcReq != VERR_VD_ASYNC_IO_IN_PROGRESS)
3582 vdIoCtxContinueDeferredList(pIoStorage, &pMetaXfer->ListIoCtxShwWrites, pfnComplete, pvUser, rcReq);
3583 }
3584
3585 /* Remove if not used anymore. */
3586 if (!fFlush)
3587 {
3588 pMetaXfer->cRefs--;
3589 if (!pMetaXfer->cRefs && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting))
3590 {
3591 /* Remove from the AVL tree. */
3592 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
3593 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
3594 Assert(fRemoved); NOREF(fRemoved);
3595 RTMemFree(pMetaXfer);
3596 }
3597 }
3598 else if (fFlush)
3599 RTMemFree(pMetaXfer);
3600
3601 return VINF_SUCCESS;
3602}
3603
3604/**
3605 * Processes a list of waiting I/O tasks. The disk lock must be held by caller.
3606 *
3607 * @returns nothing.
3608 * @param pDisk The disk to process the list for.
3609 */
3610static void vdIoTaskProcessWaitingList(PVDISK pDisk)
3611{
3612 LogFlowFunc(("pDisk=%#p\n", pDisk));
3613
3614 VD_IS_LOCKED(pDisk);
3615
3616 PVDIOTASK pHead = ASMAtomicXchgPtrT(&pDisk->pIoTasksPendingHead, NULL, PVDIOTASK);
3617
3618 Log(("I/O task list cleared\n"));
3619
3620 /* Reverse order. */
3621 PVDIOTASK pCur = pHead;
3622 pHead = NULL;
3623 while (pCur)
3624 {
3625 PVDIOTASK pInsert = pCur;
3626 pCur = pCur->pNext;
3627 pInsert->pNext = pHead;
3628 pHead = pInsert;
3629 }
3630
3631 while (pHead)
3632 {
3633 PVDIOSTORAGE pIoStorage = pHead->pIoStorage;
3634
3635 if (!pHead->fMeta)
3636 vdUserXferCompleted(pIoStorage, pHead->Type.User.pIoCtx,
3637 pHead->pfnComplete, pHead->pvUser,
3638 pHead->Type.User.cbTransfer, pHead->rcReq);
3639 else
3640 vdMetaXferCompleted(pIoStorage, pHead->pfnComplete, pHead->pvUser,
3641 pHead->Type.Meta.pMetaXfer, pHead->rcReq);
3642
3643 pCur = pHead;
3644 pHead = pHead->pNext;
3645 vdIoTaskFree(pDisk, pCur);
3646 }
3647}
3648
3649/**
3650 * Process any I/O context on the halted list.
3651 *
3652 * @returns nothing.
3653 * @param pDisk The disk.
3654 */
3655static void vdIoCtxProcessHaltedList(PVDISK pDisk)
3656{
3657 LogFlowFunc(("pDisk=%#p\n", pDisk));
3658
3659 VD_IS_LOCKED(pDisk);
3660
3661 /* Get the waiting list and process it in FIFO order. */
3662 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHaltedHead, NULL, PVDIOCTX);
3663
3664 /* Reverse it. */
3665 PVDIOCTX pCur = pIoCtxHead;
3666 pIoCtxHead = NULL;
3667 while (pCur)
3668 {
3669 PVDIOCTX pInsert = pCur;
3670 pCur = pCur->pIoCtxNext;
3671 pInsert->pIoCtxNext = pIoCtxHead;
3672 pIoCtxHead = pInsert;
3673 }
3674
3675 /* Process now. */
3676 pCur = pIoCtxHead;
3677 while (pCur)
3678 {
3679 PVDIOCTX pTmp = pCur;
3680
3681 pCur = pCur->pIoCtxNext;
3682 pTmp->pIoCtxNext = NULL;
3683
3684 /* Continue */
3685 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
3686 vdIoCtxContinue(pTmp, pTmp->rcReq);
3687 }
3688}
3689
3690/**
3691 * Unlock the disk and process pending tasks.
3692 *
3693 * @returns VBox status code.
3694 * @param pDisk The disk to unlock.
3695 * @param pIoCtxRc The I/O context to get the status code from, optional.
3696 */
3697static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc)
3698{
3699 int rc = VINF_SUCCESS;
3700
3701 VD_IS_LOCKED(pDisk);
3702
3703 /*
3704 * Process the list of waiting I/O tasks first
3705 * because they might complete I/O contexts.
3706 * Same for the list of halted I/O contexts.
3707 * Afterwards comes the list of new I/O contexts.
3708 */
3709 vdIoTaskProcessWaitingList(pDisk);
3710 vdIoCtxProcessHaltedList(pDisk);
3711 rc = vdDiskProcessWaitingIoCtx(pDisk, pIoCtxRc);
3712 ASMAtomicXchgBool(&pDisk->fLocked, false);
3713
3714 /*
3715 * Need to check for new I/O tasks and waiting I/O contexts now
3716 * again as other threads might added them while we processed
3717 * previous lists.
3718 */
3719 while ( ASMAtomicUoReadPtrT(&pDisk->pIoCtxHead, PVDIOCTX) != NULL
3720 || ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK) != NULL
3721 || ASMAtomicUoReadPtrT(&pDisk->pIoCtxHaltedHead, PVDIOCTX) != NULL)
3722 {
3723 /* Try lock disk again. */
3724 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3725 {
3726 vdIoTaskProcessWaitingList(pDisk);
3727 vdIoCtxProcessHaltedList(pDisk);
3728 vdDiskProcessWaitingIoCtx(pDisk, NULL);
3729 ASMAtomicXchgBool(&pDisk->fLocked, false);
3730 }
3731 else /* Let the other thread everything when he unlocks the disk. */
3732 break;
3733 }
3734
3735 return rc;
3736}
3737
3738/**
3739 * Try to lock the disk to complete pressing of the I/O task.
3740 * The completion is deferred if the disk is locked already.
3741 *
3742 * @returns nothing.
3743 * @param pIoTask The I/O task to complete.
3744 */
3745static void vdXferTryLockDiskDeferIoTask(PVDIOTASK pIoTask)
3746{
3747 PVDIOSTORAGE pIoStorage = pIoTask->pIoStorage;
3748 PVDISK pDisk = pIoStorage->pVDIo->pDisk;
3749
3750 Log(("Deferring I/O task pIoTask=%p\n", pIoTask));
3751
3752 /* Put it on the waiting list. */
3753 PVDIOTASK pNext = ASMAtomicUoReadPtrT(&pDisk->pIoTasksPendingHead, PVDIOTASK);
3754 PVDIOTASK pHeadOld;
3755 pIoTask->pNext = pNext;
3756 while (!ASMAtomicCmpXchgExPtr(&pDisk->pIoTasksPendingHead, pIoTask, pNext, &pHeadOld))
3757 {
3758 pNext = pHeadOld;
3759 Assert(pNext != pIoTask);
3760 pIoTask->pNext = pNext;
3761 ASMNopPause();
3762 }
3763
3764 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
3765 {
3766 /* Release disk lock, it will take care of processing all lists. */
3767 vdDiskUnlock(pDisk, NULL);
3768 }
3769}
3770
3771static DECLCALLBACK(int) vdIOIntReqCompleted(void *pvUser, int rcReq)
3772{
3773 PVDIOTASK pIoTask = (PVDIOTASK)pvUser;
3774
3775 LogFlowFunc(("Task completed pIoTask=%#p\n", pIoTask));
3776
3777 pIoTask->rcReq = rcReq;
3778 vdXferTryLockDiskDeferIoTask(pIoTask);
3779 return VINF_SUCCESS;
3780}
3781
3782/**
3783 * VD I/O interface callback for opening a file.
3784 */
3785static DECLCALLBACK(int) vdIOIntOpen(void *pvUser, const char *pszLocation,
3786 unsigned uOpenFlags, PPVDIOSTORAGE ppIoStorage)
3787{
3788 int rc = VINF_SUCCESS;
3789 PVDIO pVDIo = (PVDIO)pvUser;
3790 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
3791
3792 if (!pIoStorage)
3793 return VERR_NO_MEMORY;
3794
3795 /* Create the AVl tree. */
3796 pIoStorage->pTreeMetaXfers = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
3797 if (pIoStorage->pTreeMetaXfers)
3798 {
3799 rc = pVDIo->pInterfaceIo->pfnOpen(pVDIo->pInterfaceIo->Core.pvUser,
3800 pszLocation, uOpenFlags,
3801 vdIOIntReqCompleted,
3802 &pIoStorage->pStorage);
3803 if (RT_SUCCESS(rc))
3804 {
3805 pIoStorage->pVDIo = pVDIo;
3806 *ppIoStorage = pIoStorage;
3807 return VINF_SUCCESS;
3808 }
3809
3810 RTMemFree(pIoStorage->pTreeMetaXfers);
3811 }
3812 else
3813 rc = VERR_NO_MEMORY;
3814
3815 RTMemFree(pIoStorage);
3816 return rc;
3817}
3818
3819static DECLCALLBACK(int) vdIOIntTreeMetaXferDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
3820{
3821 RT_NOREF2(pNode, pvUser);
3822 AssertMsgFailed(("Tree should be empty at this point!\n"));
3823 return VINF_SUCCESS;
3824}
3825
3826static DECLCALLBACK(int) vdIOIntClose(void *pvUser, PVDIOSTORAGE pIoStorage)
3827{
3828 int rc = VINF_SUCCESS;
3829 PVDIO pVDIo = (PVDIO)pvUser;
3830
3831 /* We free everything here, even if closing the file failed for some reason. */
3832 rc = pVDIo->pInterfaceIo->pfnClose(pVDIo->pInterfaceIo->Core.pvUser, pIoStorage->pStorage);
3833 RTAvlrFileOffsetDestroy(pIoStorage->pTreeMetaXfers, vdIOIntTreeMetaXferDestroy, NULL);
3834 RTMemFree(pIoStorage->pTreeMetaXfers);
3835 RTMemFree(pIoStorage);
3836 return rc;
3837}
3838
3839static DECLCALLBACK(int) vdIOIntDelete(void *pvUser, const char *pcszFilename)
3840{
3841 PVDIO pVDIo = (PVDIO)pvUser;
3842 return pVDIo->pInterfaceIo->pfnDelete(pVDIo->pInterfaceIo->Core.pvUser,
3843 pcszFilename);
3844}
3845
3846static DECLCALLBACK(int) vdIOIntMove(void *pvUser, const char *pcszSrc, const char *pcszDst,
3847 unsigned fMove)
3848{
3849 PVDIO pVDIo = (PVDIO)pvUser;
3850 return pVDIo->pInterfaceIo->pfnMove(pVDIo->pInterfaceIo->Core.pvUser,
3851 pcszSrc, pcszDst, fMove);
3852}
3853
3854static DECLCALLBACK(int) vdIOIntGetFreeSpace(void *pvUser, const char *pcszFilename,
3855 int64_t *pcbFreeSpace)
3856{
3857 PVDIO pVDIo = (PVDIO)pvUser;
3858 return pVDIo->pInterfaceIo->pfnGetFreeSpace(pVDIo->pInterfaceIo->Core.pvUser,
3859 pcszFilename, pcbFreeSpace);
3860}
3861
3862static DECLCALLBACK(int) vdIOIntGetModificationTime(void *pvUser, const char *pcszFilename,
3863 PRTTIMESPEC pModificationTime)
3864{
3865 PVDIO pVDIo = (PVDIO)pvUser;
3866 return pVDIo->pInterfaceIo->pfnGetModificationTime(pVDIo->pInterfaceIo->Core.pvUser,
3867 pcszFilename, pModificationTime);
3868}
3869
3870static DECLCALLBACK(int) vdIOIntGetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3871 uint64_t *pcbSize)
3872{
3873 PVDIO pVDIo = (PVDIO)pvUser;
3874 return pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3875 pIoStorage->pStorage, pcbSize);
3876}
3877
3878static DECLCALLBACK(int) vdIOIntSetSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3879 uint64_t cbSize)
3880{
3881 PVDIO pVDIo = (PVDIO)pvUser;
3882 return pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3883 pIoStorage->pStorage, cbSize);
3884}
3885
3886static DECLCALLBACK(int) vdIOIntSetAllocationSize(void *pvUser, PVDIOSTORAGE pIoStorage,
3887 uint64_t cbSize, uint32_t fFlags,
3888 PVDINTERFACEPROGRESS pIfProgress,
3889 unsigned uPercentStart, unsigned uPercentSpan)
3890{
3891 PVDIO pVDIo = (PVDIO)pvUser;
3892 int rc = pVDIo->pInterfaceIo->pfnSetAllocationSize(pVDIo->pInterfaceIo->Core.pvUser,
3893 pIoStorage->pStorage, cbSize, fFlags);
3894 if (rc == VERR_NOT_SUPPORTED)
3895 {
3896 /* Fallback if the underlying medium does not support optimized storage allocation. */
3897 uint64_t cbSizeCur = 0;
3898 rc = pVDIo->pInterfaceIo->pfnGetSize(pVDIo->pInterfaceIo->Core.pvUser,
3899 pIoStorage->pStorage, &cbSizeCur);
3900 if (RT_SUCCESS(rc))
3901 {
3902 if (cbSizeCur < cbSize)
3903 {
3904 const size_t cbBuf = 128 * _1K;
3905 void *pvBuf = RTMemTmpAllocZ(cbBuf);
3906 if (RT_LIKELY(pvBuf))
3907 {
3908 uint64_t cbFill = cbSize - cbSizeCur;
3909 uint64_t uOff = 0;
3910
3911 /* Write data to all blocks. */
3912 while ( uOff < cbFill
3913 && RT_SUCCESS(rc))
3914 {
3915 size_t cbChunk = (size_t)RT_MIN(cbFill - uOff, cbBuf);
3916
3917 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
3918 pIoStorage->pStorage, cbSizeCur + uOff,
3919 pvBuf, cbChunk, NULL);
3920 if (RT_SUCCESS(rc))
3921 {
3922 uOff += cbChunk;
3923
3924 rc = vdIfProgress(pIfProgress, uPercentStart + uOff * uPercentSpan / cbFill);
3925 }
3926 }
3927
3928 RTMemTmpFree(pvBuf);
3929 }
3930 else
3931 rc = VERR_NO_MEMORY;
3932 }
3933 else if (cbSizeCur > cbSize)
3934 rc = pVDIo->pInterfaceIo->pfnSetSize(pVDIo->pInterfaceIo->Core.pvUser,
3935 pIoStorage->pStorage, cbSize);
3936 }
3937 }
3938
3939 if (RT_SUCCESS(rc))
3940 rc = vdIfProgress(pIfProgress, uPercentStart + uPercentSpan);
3941
3942 return rc;
3943}
3944
3945static DECLCALLBACK(int) vdIOIntReadUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
3946 PVDIOCTX pIoCtx, size_t cbRead)
3947{
3948 int rc = VINF_SUCCESS;
3949 PVDIO pVDIo = (PVDIO)pvUser;
3950 PVDISK pDisk = pVDIo->pDisk;
3951
3952 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbRead=%u\n",
3953 pvUser, pIoStorage, uOffset, pIoCtx, cbRead));
3954
3955 /** @todo Enable check for sync I/O later. */
3956 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
3957 VD_IS_LOCKED(pDisk);
3958
3959 Assert(cbRead > 0);
3960
3961 if ( (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
3962 || !pVDIo->pInterfaceIo->pfnReadAsync)
3963 {
3964 RTSGSEG Seg;
3965 unsigned cSegments = 1;
3966 size_t cbTaskRead = 0;
3967
3968 /* Synchronous I/O contexts only have one buffer segment. */
3969 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
3970 ("Invalid number of buffer segments for synchronous I/O context"),
3971 VERR_INVALID_PARAMETER);
3972
3973 cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbRead);
3974 Assert(cbRead == cbTaskRead);
3975 Assert(cSegments == 1);
3976 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
3977 pIoStorage->pStorage, uOffset,
3978 Seg.pvSeg, cbRead, NULL);
3979 if (RT_SUCCESS(rc))
3980 {
3981 Assert(cbRead == (uint32_t)cbRead);
3982 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbRead);
3983 }
3984 }
3985 else
3986 {
3987 /* Build the S/G array and spawn a new I/O task */
3988 while (cbRead)
3989 {
3990 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
3991 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
3992 size_t cbTaskRead = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbRead);
3993
3994 Assert(cSegments > 0);
3995 Assert(cbTaskRead > 0);
3996 AssertMsg(cbTaskRead <= cbRead, ("Invalid number of bytes to read\n"));
3997
3998 LogFlow(("Reading %u bytes into %u segments\n", cbTaskRead, cSegments));
3999
4000#ifdef RT_STRICT
4001 for (unsigned i = 0; i < cSegments; i++)
4002 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4003 ("Segment %u is invalid\n", i));
4004#endif
4005
4006 Assert(cbTaskRead == (uint32_t)cbTaskRead);
4007 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, NULL, NULL, pIoCtx, (uint32_t)cbTaskRead);
4008
4009 if (!pIoTask)
4010 return VERR_NO_MEMORY;
4011
4012 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4013
4014 void *pvTask;
4015 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4016 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4017 pIoStorage->pStorage, uOffset,
4018 aSeg, cSegments, cbTaskRead, pIoTask,
4019 &pvTask);
4020 if (RT_SUCCESS(rc))
4021 {
4022 AssertMsg(cbTaskRead <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4023 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskRead);
4024 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4025 vdIoTaskFree(pDisk, pIoTask);
4026 }
4027 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4028 {
4029 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4030 vdIoTaskFree(pDisk, pIoTask);
4031 break;
4032 }
4033
4034 uOffset += cbTaskRead;
4035 cbRead -= cbTaskRead;
4036 }
4037 }
4038
4039 LogFlowFunc(("returns rc=%Rrc\n", rc));
4040 return rc;
4041}
4042
4043static DECLCALLBACK(int) vdIOIntWriteUser(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4044 PVDIOCTX pIoCtx, size_t cbWrite, PFNVDXFERCOMPLETED pfnComplete,
4045 void *pvCompleteUser)
4046{
4047 int rc = VINF_SUCCESS;
4048 PVDIO pVDIo = (PVDIO)pvUser;
4049 PVDISK pDisk = pVDIo->pDisk;
4050
4051 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pIoCtx=%#p cbWrite=%u\n",
4052 pvUser, pIoStorage, uOffset, pIoCtx, cbWrite));
4053
4054 /** @todo Enable check for sync I/O later. */
4055 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4056 VD_IS_LOCKED(pDisk);
4057
4058 Assert(cbWrite > 0);
4059
4060 if ( (pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC)
4061 || !pVDIo->pInterfaceIo->pfnWriteAsync)
4062 {
4063 RTSGSEG Seg;
4064 unsigned cSegments = 1;
4065 size_t cbTaskWrite = 0;
4066
4067 /* Synchronous I/O contexts only have one buffer segment. */
4068 AssertMsgReturn(pIoCtx->Req.Io.SgBuf.cSegs == 1,
4069 ("Invalid number of buffer segments for synchronous I/O context"),
4070 VERR_INVALID_PARAMETER);
4071
4072 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, &Seg, &cSegments, cbWrite);
4073 Assert(cbWrite == cbTaskWrite);
4074 Assert(cSegments == 1);
4075 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4076 pIoStorage->pStorage, uOffset,
4077 Seg.pvSeg, cbWrite, NULL);
4078 if (RT_SUCCESS(rc))
4079 {
4080 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbWrite);
4081 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbWrite);
4082 }
4083 }
4084 else
4085 {
4086 /* Build the S/G array and spawn a new I/O task */
4087 while (cbWrite)
4088 {
4089 RTSGSEG aSeg[VD_IO_TASK_SEGMENTS_MAX];
4090 unsigned cSegments = VD_IO_TASK_SEGMENTS_MAX;
4091 size_t cbTaskWrite = 0;
4092
4093 cbTaskWrite = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, aSeg, &cSegments, cbWrite);
4094
4095 Assert(cSegments > 0);
4096 Assert(cbTaskWrite > 0);
4097 AssertMsg(cbTaskWrite <= cbWrite, ("Invalid number of bytes to write\n"));
4098
4099 LogFlow(("Writing %u bytes from %u segments\n", cbTaskWrite, cSegments));
4100
4101#ifdef DEBUG
4102 for (unsigned i = 0; i < cSegments; i++)
4103 AssertMsg(aSeg[i].pvSeg && !(aSeg[i].cbSeg % 512),
4104 ("Segment %u is invalid\n", i));
4105#endif
4106
4107 Assert(cbTaskWrite == (uint32_t)cbTaskWrite);
4108 PVDIOTASK pIoTask = vdIoTaskUserAlloc(pIoStorage, pfnComplete, pvCompleteUser, pIoCtx, (uint32_t)cbTaskWrite);
4109
4110 if (!pIoTask)
4111 return VERR_NO_MEMORY;
4112
4113 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
4114
4115 void *pvTask;
4116 Log(("Spawning pIoTask=%p pIoCtx=%p\n", pIoTask, pIoCtx));
4117 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4118 pIoStorage->pStorage,
4119 uOffset, aSeg, cSegments,
4120 cbTaskWrite, pIoTask, &pvTask);
4121 if (RT_SUCCESS(rc))
4122 {
4123 AssertMsg(cbTaskWrite <= pIoCtx->Req.Io.cbTransferLeft, ("Impossible!\n"));
4124 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbTaskWrite);
4125 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4126 vdIoTaskFree(pDisk, pIoTask);
4127 }
4128 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4129 {
4130 ASMAtomicDecU32(&pIoCtx->cDataTransfersPending);
4131 vdIoTaskFree(pDisk, pIoTask);
4132 break;
4133 }
4134
4135 uOffset += cbTaskWrite;
4136 cbWrite -= cbTaskWrite;
4137 }
4138 }
4139
4140 LogFlowFunc(("returns rc=%Rrc\n", rc));
4141 return rc;
4142}
4143
4144static DECLCALLBACK(int) vdIOIntReadMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4145 void *pvBuf, size_t cbRead, PVDIOCTX pIoCtx,
4146 PPVDMETAXFER ppMetaXfer, PFNVDXFERCOMPLETED pfnComplete,
4147 void *pvCompleteUser)
4148{
4149 PVDIO pVDIo = (PVDIO)pvUser;
4150 PVDISK pDisk = pVDIo->pDisk;
4151 int rc = VINF_SUCCESS;
4152 RTSGSEG Seg;
4153 PVDIOTASK pIoTask;
4154 PVDMETAXFER pMetaXfer = NULL;
4155 void *pvTask = NULL;
4156
4157 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbRead=%u\n",
4158 pvUser, pIoStorage, uOffset, pvBuf, cbRead));
4159
4160 AssertMsgReturn( pIoCtx
4161 || (!ppMetaXfer && !pfnComplete && !pvCompleteUser),
4162 ("A synchronous metadata read is requested but the parameters are wrong\n"),
4163 VERR_INVALID_POINTER);
4164
4165 /** @todo Enable check for sync I/O later. */
4166 if ( pIoCtx
4167 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4168 VD_IS_LOCKED(pDisk);
4169
4170 if ( !pIoCtx
4171 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC
4172 || !pVDIo->pInterfaceIo->pfnReadAsync)
4173 {
4174 /* Handle synchronous metadata I/O. */
4175 /** @todo Integrate with metadata transfers below. */
4176 rc = pVDIo->pInterfaceIo->pfnReadSync(pVDIo->pInterfaceIo->Core.pvUser,
4177 pIoStorage->pStorage, uOffset,
4178 pvBuf, cbRead, NULL);
4179 if (ppMetaXfer)
4180 *ppMetaXfer = NULL;
4181 }
4182 else
4183 {
4184 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4185 if (!pMetaXfer)
4186 {
4187#ifdef RT_STRICT
4188 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGetBestFit(pIoStorage->pTreeMetaXfers, uOffset, false /* fAbove */);
4189 AssertMsg(!pMetaXfer || (pMetaXfer->Core.Key + (RTFOFF)pMetaXfer->cbMeta <= (RTFOFF)uOffset),
4190 ("Overlapping meta transfers!\n"));
4191#endif
4192
4193 /* Allocate a new meta transfer. */
4194 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbRead);
4195 if (!pMetaXfer)
4196 return VERR_NO_MEMORY;
4197
4198 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4199 if (!pIoTask)
4200 {
4201 RTMemFree(pMetaXfer);
4202 return VERR_NO_MEMORY;
4203 }
4204
4205 Seg.cbSeg = cbRead;
4206 Seg.pvSeg = pMetaXfer->abData;
4207
4208 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_READ);
4209 rc = pVDIo->pInterfaceIo->pfnReadAsync(pVDIo->pInterfaceIo->Core.pvUser,
4210 pIoStorage->pStorage,
4211 uOffset, &Seg, 1,
4212 cbRead, pIoTask, &pvTask);
4213
4214 if (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4215 {
4216 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4217 Assert(fInserted); NOREF(fInserted);
4218 }
4219 else
4220 RTMemFree(pMetaXfer);
4221
4222 if (RT_SUCCESS(rc))
4223 {
4224 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4225 vdIoTaskFree(pDisk, pIoTask);
4226 }
4227 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS && !pfnComplete)
4228 rc = VERR_VD_NOT_ENOUGH_METADATA;
4229 }
4230
4231 Assert(RT_VALID_PTR(pMetaXfer) || RT_FAILURE(rc));
4232
4233 if (RT_SUCCESS(rc) || rc == VERR_VD_NOT_ENOUGH_METADATA || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4234 {
4235 /* If it is pending add the request to the list. */
4236 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_READ)
4237 {
4238 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4239 AssertPtr(pDeferred);
4240
4241 RTListInit(&pDeferred->NodeDeferred);
4242 pDeferred->pIoCtx = pIoCtx;
4243
4244 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4245 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4246 rc = VERR_VD_NOT_ENOUGH_METADATA;
4247 }
4248 else
4249 {
4250 /* Transfer the data. */
4251 pMetaXfer->cRefs++;
4252 Assert(pMetaXfer->cbMeta >= cbRead);
4253 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4254 if (pMetaXfer->pbDataShw)
4255 memcpy(pvBuf, pMetaXfer->pbDataShw, cbRead);
4256 else
4257 memcpy(pvBuf, pMetaXfer->abData, cbRead);
4258 *ppMetaXfer = pMetaXfer;
4259 }
4260 }
4261 }
4262
4263 LogFlowFunc(("returns rc=%Rrc\n", rc));
4264 return rc;
4265}
4266
4267static DECLCALLBACK(int) vdIOIntWriteMeta(void *pvUser, PVDIOSTORAGE pIoStorage, uint64_t uOffset,
4268 const void *pvBuf, size_t cbWrite, PVDIOCTX pIoCtx,
4269 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4270{
4271 PVDIO pVDIo = (PVDIO)pvUser;
4272 PVDISK pDisk = pVDIo->pDisk;
4273 int rc = VINF_SUCCESS;
4274 RTSGSEG Seg;
4275 PVDIOTASK pIoTask;
4276 PVDMETAXFER pMetaXfer = NULL;
4277 bool fInTree = false;
4278 void *pvTask = NULL;
4279
4280 LogFlowFunc(("pvUser=%#p pIoStorage=%#p uOffset=%llu pvBuf=%#p cbWrite=%u\n",
4281 pvUser, pIoStorage, uOffset, pvBuf, cbWrite));
4282
4283 AssertMsgReturn( pIoCtx
4284 || (!pfnComplete && !pvCompleteUser),
4285 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4286 VERR_INVALID_POINTER);
4287
4288 /** @todo Enable check for sync I/O later. */
4289 if ( pIoCtx
4290 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4291 VD_IS_LOCKED(pDisk);
4292
4293 if ( !pIoCtx
4294 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC
4295 || !pVDIo->pInterfaceIo->pfnWriteAsync)
4296 {
4297 /* Handle synchronous metadata I/O. */
4298 /** @todo Integrate with metadata transfers below. */
4299 rc = pVDIo->pInterfaceIo->pfnWriteSync(pVDIo->pInterfaceIo->Core.pvUser,
4300 pIoStorage->pStorage, uOffset,
4301 pvBuf, cbWrite, NULL);
4302 }
4303 else
4304 {
4305 pMetaXfer = (PVDMETAXFER)RTAvlrFileOffsetGet(pIoStorage->pTreeMetaXfers, uOffset);
4306 if (!pMetaXfer)
4307 {
4308 /* Allocate a new meta transfer. */
4309 pMetaXfer = vdMetaXferAlloc(pIoStorage, uOffset, cbWrite);
4310 if (!pMetaXfer)
4311 return VERR_NO_MEMORY;
4312 }
4313 else
4314 {
4315 Assert(pMetaXfer->cbMeta >= cbWrite);
4316 Assert(pMetaXfer->Core.Key == (RTFOFF)uOffset);
4317 fInTree = true;
4318 }
4319
4320 if (VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4321 {
4322 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvCompleteUser, pMetaXfer);
4323 if (!pIoTask)
4324 {
4325 RTMemFree(pMetaXfer);
4326 return VERR_NO_MEMORY;
4327 }
4328
4329 memcpy(pMetaXfer->abData, pvBuf, cbWrite);
4330 Seg.cbSeg = cbWrite;
4331 Seg.pvSeg = pMetaXfer->abData;
4332
4333 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4334
4335 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_WRITE);
4336 rc = pVDIo->pInterfaceIo->pfnWriteAsync(pVDIo->pInterfaceIo->Core.pvUser,
4337 pIoStorage->pStorage,
4338 uOffset, &Seg, 1, cbWrite, pIoTask,
4339 &pvTask);
4340 if (RT_SUCCESS(rc))
4341 {
4342 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4343 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4344 vdIoTaskFree(pDisk, pIoTask);
4345 if (fInTree && !pMetaXfer->cRefs)
4346 {
4347 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4348 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4349 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4350 RTMemFree(pMetaXfer);
4351 pMetaXfer = NULL;
4352 }
4353 }
4354 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
4355 {
4356 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4357 AssertPtr(pDeferred);
4358
4359 RTListInit(&pDeferred->NodeDeferred);
4360 pDeferred->pIoCtx = pIoCtx;
4361
4362 if (!fInTree)
4363 {
4364 bool fInserted = RTAvlrFileOffsetInsert(pIoStorage->pTreeMetaXfers, &pMetaXfer->Core);
4365 Assert(fInserted); NOREF(fInserted);
4366 }
4367
4368 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4369 }
4370 else
4371 {
4372 RTMemFree(pMetaXfer);
4373 pMetaXfer = NULL;
4374 }
4375 }
4376 else
4377 {
4378 /* I/O is in progress, update shadow buffer and add to waiting list. */
4379 Assert(VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4380 if (!pMetaXfer->pbDataShw)
4381 {
4382 /* Allocate shadow buffer and set initial state. */
4383 LogFlowFunc(("pMetaXfer=%#p Creating shadow buffer\n", pMetaXfer));
4384 pMetaXfer->pbDataShw = (uint8_t *)RTMemAlloc(pMetaXfer->cbMeta);
4385 if (RT_LIKELY(pMetaXfer->pbDataShw))
4386 memcpy(pMetaXfer->pbDataShw, pMetaXfer->abData, pMetaXfer->cbMeta);
4387 else
4388 rc = VERR_NO_MEMORY;
4389 }
4390
4391 if (RT_SUCCESS(rc))
4392 {
4393 /* Update with written data and append to waiting list. */
4394 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4395 if (pDeferred)
4396 {
4397 LogFlowFunc(("pMetaXfer=%#p Updating shadow buffer\n", pMetaXfer));
4398
4399 RTListInit(&pDeferred->NodeDeferred);
4400 pDeferred->pIoCtx = pIoCtx;
4401 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4402 memcpy(pMetaXfer->pbDataShw, pvBuf, cbWrite);
4403 RTListAppend(&pMetaXfer->ListIoCtxShwWrites, &pDeferred->NodeDeferred);
4404 }
4405 else
4406 {
4407 /*
4408 * Free shadow buffer if there is no one depending on it, i.e.
4409 * we just allocated it.
4410 */
4411 if (RTListIsEmpty(&pMetaXfer->ListIoCtxShwWrites))
4412 {
4413 RTMemFree(pMetaXfer->pbDataShw);
4414 pMetaXfer->pbDataShw = NULL;
4415 }
4416 rc = VERR_NO_MEMORY;
4417 }
4418 }
4419 }
4420 }
4421
4422 LogFlowFunc(("returns rc=%Rrc\n", rc));
4423 return rc;
4424}
4425
4426static DECLCALLBACK(void) vdIOIntMetaXferRelease(void *pvUser, PVDMETAXFER pMetaXfer)
4427{
4428 PVDIO pVDIo = (PVDIO)pvUser;
4429 PVDISK pDisk = pVDIo->pDisk;
4430 PVDIOSTORAGE pIoStorage;
4431
4432 /*
4433 * It is possible that we get called with a NULL metadata xfer handle
4434 * for synchronous I/O. Just exit.
4435 */
4436 if (!pMetaXfer)
4437 return;
4438
4439 pIoStorage = pMetaXfer->pIoStorage;
4440
4441 VD_IS_LOCKED(pDisk);
4442
4443 Assert( VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE
4444 || VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_WRITE);
4445 Assert(pMetaXfer->cRefs > 0);
4446
4447 pMetaXfer->cRefs--;
4448 if ( !pMetaXfer->cRefs
4449 && RTListIsEmpty(&pMetaXfer->ListIoCtxWaiting)
4450 && VDMETAXFER_TXDIR_GET(pMetaXfer->fFlags) == VDMETAXFER_TXDIR_NONE)
4451 {
4452 /* Free the meta data entry. */
4453 LogFlow(("Removing meta xfer=%#p\n", pMetaXfer));
4454 bool fRemoved = RTAvlrFileOffsetRemove(pIoStorage->pTreeMetaXfers, pMetaXfer->Core.Key) != NULL;
4455 AssertMsg(fRemoved, ("Metadata transfer wasn't removed\n")); NOREF(fRemoved);
4456
4457 RTMemFree(pMetaXfer);
4458 }
4459}
4460
4461static DECLCALLBACK(int) vdIOIntFlush(void *pvUser, PVDIOSTORAGE pIoStorage, PVDIOCTX pIoCtx,
4462 PFNVDXFERCOMPLETED pfnComplete, void *pvCompleteUser)
4463{
4464 PVDIO pVDIo = (PVDIO)pvUser;
4465 PVDISK pDisk = pVDIo->pDisk;
4466 int rc = VINF_SUCCESS;
4467 PVDIOTASK pIoTask;
4468 PVDMETAXFER pMetaXfer = NULL;
4469 void *pvTask = NULL;
4470
4471 LogFlowFunc(("pvUser=%#p pIoStorage=%#p pIoCtx=%#p\n",
4472 pvUser, pIoStorage, pIoCtx));
4473
4474 AssertMsgReturn( pIoCtx
4475 || (!pfnComplete && !pvCompleteUser),
4476 ("A synchronous metadata write is requested but the parameters are wrong\n"),
4477 VERR_INVALID_POINTER);
4478
4479 /** @todo Enable check for sync I/O later. */
4480 if ( pIoCtx
4481 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4482 VD_IS_LOCKED(pDisk);
4483
4484 if (pVDIo->fIgnoreFlush)
4485 return VINF_SUCCESS;
4486
4487 if ( !pIoCtx
4488 || pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC
4489 || !pVDIo->pInterfaceIo->pfnFlushAsync)
4490 {
4491 /* Handle synchronous flushes. */
4492 /** @todo Integrate with metadata transfers below. */
4493 rc = pVDIo->pInterfaceIo->pfnFlushSync(pVDIo->pInterfaceIo->Core.pvUser,
4494 pIoStorage->pStorage);
4495 }
4496 else
4497 {
4498 /* Allocate a new meta transfer. */
4499 pMetaXfer = vdMetaXferAlloc(pIoStorage, 0, 0);
4500 if (!pMetaXfer)
4501 return VERR_NO_MEMORY;
4502
4503 pIoTask = vdIoTaskMetaAlloc(pIoStorage, pfnComplete, pvUser, pMetaXfer);
4504 if (!pIoTask)
4505 {
4506 RTMemFree(pMetaXfer);
4507 return VERR_NO_MEMORY;
4508 }
4509
4510 ASMAtomicIncU32(&pIoCtx->cMetaTransfersPending);
4511
4512 PVDIOCTXDEFERRED pDeferred = (PVDIOCTXDEFERRED)RTMemAllocZ(sizeof(VDIOCTXDEFERRED));
4513 AssertPtr(pDeferred);
4514
4515 RTListInit(&pDeferred->NodeDeferred);
4516 pDeferred->pIoCtx = pIoCtx;
4517
4518 RTListAppend(&pMetaXfer->ListIoCtxWaiting, &pDeferred->NodeDeferred);
4519 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_FLUSH);
4520 rc = pVDIo->pInterfaceIo->pfnFlushAsync(pVDIo->pInterfaceIo->Core.pvUser,
4521 pIoStorage->pStorage,
4522 pIoTask, &pvTask);
4523 if (RT_SUCCESS(rc))
4524 {
4525 VDMETAXFER_TXDIR_SET(pMetaXfer->fFlags, VDMETAXFER_TXDIR_NONE);
4526 ASMAtomicDecU32(&pIoCtx->cMetaTransfersPending);
4527 vdIoTaskFree(pDisk, pIoTask);
4528 RTMemFree(pDeferred);
4529 RTMemFree(pMetaXfer);
4530 }
4531 else if (rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
4532 RTMemFree(pMetaXfer);
4533 }
4534
4535 LogFlowFunc(("returns rc=%Rrc\n", rc));
4536 return rc;
4537}
4538
4539static DECLCALLBACK(size_t) vdIOIntIoCtxCopyTo(void *pvUser, PVDIOCTX pIoCtx,
4540 const void *pvBuf, size_t cbBuf)
4541{
4542 PVDIO pVDIo = (PVDIO)pvUser;
4543 PVDISK pDisk = pVDIo->pDisk;
4544 size_t cbCopied = 0;
4545
4546 /** @todo Enable check for sync I/O later. */
4547 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4548 VD_IS_LOCKED(pDisk);
4549
4550 cbCopied = vdIoCtxCopyTo(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4551 Assert(cbCopied == cbBuf);
4552
4553 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCopied); - triggers with vdCopyHelper/dmgRead.
4554 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4555
4556 return cbCopied;
4557}
4558
4559static DECLCALLBACK(size_t) vdIOIntIoCtxCopyFrom(void *pvUser, PVDIOCTX pIoCtx,
4560 void *pvBuf, size_t cbBuf)
4561{
4562 PVDIO pVDIo = (PVDIO)pvUser;
4563 PVDISK pDisk = pVDIo->pDisk;
4564 size_t cbCopied = 0;
4565
4566 /** @todo Enable check for sync I/O later. */
4567 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4568 VD_IS_LOCKED(pDisk);
4569
4570 cbCopied = vdIoCtxCopyFrom(pIoCtx, (uint8_t *)pvBuf, cbBuf);
4571 Assert(cbCopied == cbBuf);
4572
4573 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft > cbCopied); - triggers with vdCopyHelper/dmgRead.
4574 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCopied);
4575
4576 return cbCopied;
4577}
4578
4579static DECLCALLBACK(size_t) vdIOIntIoCtxSet(void *pvUser, PVDIOCTX pIoCtx, int ch, size_t cb)
4580{
4581 PVDIO pVDIo = (PVDIO)pvUser;
4582 PVDISK pDisk = pVDIo->pDisk;
4583 size_t cbSet = 0;
4584
4585 /** @todo Enable check for sync I/O later. */
4586 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4587 VD_IS_LOCKED(pDisk);
4588
4589 cbSet = vdIoCtxSet(pIoCtx, ch, cb);
4590 Assert(cbSet == cb);
4591
4592 /// @todo Assert(pIoCtx->Req.Io.cbTransferLeft >= cbSet); - triggers with vdCopyHelper/dmgRead.
4593 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbSet);
4594
4595 return cbSet;
4596}
4597
4598static DECLCALLBACK(size_t) vdIOIntIoCtxSegArrayCreate(void *pvUser, PVDIOCTX pIoCtx,
4599 PRTSGSEG paSeg, unsigned *pcSeg,
4600 size_t cbData)
4601{
4602 PVDIO pVDIo = (PVDIO)pvUser;
4603 PVDISK pDisk = pVDIo->pDisk;
4604 size_t cbCreated = 0;
4605
4606 /** @todo It is possible that this gets called from a filter plugin
4607 * outside of the disk lock. Refine assertion or remove completely. */
4608#if 0
4609 /** @todo Enable check for sync I/O later. */
4610 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC))
4611 VD_IS_LOCKED(pDisk);
4612#else
4613 NOREF(pDisk);
4614#endif
4615
4616 cbCreated = RTSgBufSegArrayCreate(&pIoCtx->Req.Io.SgBuf, paSeg, pcSeg, cbData);
4617 Assert(!paSeg || cbData == cbCreated);
4618
4619 return cbCreated;
4620}
4621
4622static DECLCALLBACK(void) vdIOIntIoCtxCompleted(void *pvUser, PVDIOCTX pIoCtx, int rcReq,
4623 size_t cbCompleted)
4624{
4625 PVDIO pVDIo = (PVDIO)pvUser;
4626 PVDISK pDisk = pVDIo->pDisk;
4627
4628 LogFlowFunc(("pvUser=%#p pIoCtx=%#p rcReq=%Rrc cbCompleted=%zu\n",
4629 pvUser, pIoCtx, rcReq, cbCompleted));
4630
4631 /*
4632 * Grab the disk critical section to avoid races with other threads which
4633 * might still modify the I/O context.
4634 * Example is that iSCSI is doing an asynchronous write but calls us already
4635 * while the other thread is still hanging in vdWriteHelperAsync and couldn't update
4636 * the blocked state yet.
4637 * It can overwrite the state to true before we call vdIoCtxContinue and the
4638 * the request would hang indefinite.
4639 */
4640 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rcReq, VINF_SUCCESS);
4641 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbCompleted);
4642 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbCompleted);
4643
4644 /* Set next transfer function if the current one finished.
4645 * @todo: Find a better way to prevent vdIoCtxContinue from calling the current helper again. */
4646 if (!pIoCtx->Req.Io.cbTransferLeft)
4647 {
4648 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
4649 pIoCtx->pfnIoCtxTransferNext = NULL;
4650 }
4651
4652 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHaltedHead, pIoCtx);
4653 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
4654 {
4655 /* Immediately drop the lock again, it will take care of processing the list. */
4656 vdDiskUnlock(pDisk, NULL);
4657 }
4658}
4659
4660static DECLCALLBACK(bool) vdIOIntIoCtxIsSynchronous(void *pvUser, PVDIOCTX pIoCtx)
4661{
4662 NOREF(pvUser);
4663 return !!(pIoCtx->fFlags & VDIOCTX_FLAGS_SYNC);
4664}
4665
4666static DECLCALLBACK(bool) vdIOIntIoCtxIsZero(void *pvUser, PVDIOCTX pIoCtx, size_t cbCheck,
4667 bool fAdvance)
4668{
4669 NOREF(pvUser);
4670
4671 bool fIsZero = RTSgBufIsZero(&pIoCtx->Req.Io.SgBuf, cbCheck);
4672 if (fIsZero && fAdvance)
4673 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbCheck);
4674
4675 return fIsZero;
4676}
4677
4678static DECLCALLBACK(size_t) vdIOIntIoCtxGetDataUnitSize(void *pvUser, PVDIOCTX pIoCtx)
4679{
4680 RT_NOREF1(pIoCtx);
4681 PVDIO pVDIo = (PVDIO)pvUser;
4682 PVDISK pDisk = pVDIo->pDisk;
4683 size_t cbSector = 0;
4684
4685 PVDIMAGE pImage = vdGetImageByNumber(pDisk, VD_LAST_IMAGE);
4686 AssertPtrReturn(pImage, 0);
4687
4688 PCVDREGIONLIST pRegionList = NULL;
4689 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
4690 if (RT_SUCCESS(rc))
4691 {
4692 cbSector = pRegionList->aRegions[0].cbBlock;
4693
4694 AssertPtr(pImage->Backend->pfnRegionListRelease);
4695 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
4696 }
4697
4698 return cbSector;
4699}
4700
4701/**
4702 * VD I/O interface callback for opening a file (limited version for VDGetFormat).
4703 */
4704static DECLCALLBACK(int) vdIOIntOpenLimited(void *pvUser, const char *pszLocation,
4705 uint32_t fOpen, PPVDIOSTORAGE ppIoStorage)
4706{
4707 int rc = VINF_SUCCESS;
4708 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4709 PVDIOSTORAGE pIoStorage = (PVDIOSTORAGE)RTMemAllocZ(sizeof(VDIOSTORAGE));
4710
4711 if (!pIoStorage)
4712 return VERR_NO_MEMORY;
4713
4714 rc = pInterfaceIo->pfnOpen(NULL, pszLocation, fOpen, NULL, &pIoStorage->pStorage);
4715 if (RT_SUCCESS(rc))
4716 *ppIoStorage = pIoStorage;
4717 else
4718 RTMemFree(pIoStorage);
4719
4720 return rc;
4721}
4722
4723static DECLCALLBACK(int) vdIOIntCloseLimited(void *pvUser, PVDIOSTORAGE pIoStorage)
4724{
4725 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4726 int rc = pInterfaceIo->pfnClose(NULL, pIoStorage->pStorage);
4727
4728 RTMemFree(pIoStorage);
4729 return rc;
4730}
4731
4732static DECLCALLBACK(int) vdIOIntDeleteLimited(void *pvUser, const char *pcszFilename)
4733{
4734 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4735 return pInterfaceIo->pfnDelete(NULL, pcszFilename);
4736}
4737
4738static DECLCALLBACK(int) vdIOIntMoveLimited(void *pvUser, const char *pcszSrc,
4739 const char *pcszDst, unsigned fMove)
4740{
4741 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4742 return pInterfaceIo->pfnMove(NULL, pcszSrc, pcszDst, fMove);
4743}
4744
4745static DECLCALLBACK(int) vdIOIntGetFreeSpaceLimited(void *pvUser, const char *pcszFilename,
4746 int64_t *pcbFreeSpace)
4747{
4748 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4749 return pInterfaceIo->pfnGetFreeSpace(NULL, pcszFilename, pcbFreeSpace);
4750}
4751
4752static DECLCALLBACK(int) vdIOIntGetModificationTimeLimited(void *pvUser,
4753 const char *pcszFilename,
4754 PRTTIMESPEC pModificationTime)
4755{
4756 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4757 return pInterfaceIo->pfnGetModificationTime(NULL, pcszFilename, pModificationTime);
4758}
4759
4760static DECLCALLBACK(int) vdIOIntGetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4761 uint64_t *pcbSize)
4762{
4763 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4764 return pInterfaceIo->pfnGetSize(NULL, pIoStorage->pStorage, pcbSize);
4765}
4766
4767static DECLCALLBACK(int) vdIOIntSetSizeLimited(void *pvUser, PVDIOSTORAGE pIoStorage,
4768 uint64_t cbSize)
4769{
4770 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4771 return pInterfaceIo->pfnSetSize(NULL, pIoStorage->pStorage, cbSize);
4772}
4773
4774static DECLCALLBACK(int) vdIOIntWriteUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4775 uint64_t uOffset, PVDIOCTX pIoCtx,
4776 size_t cbWrite,
4777 PFNVDXFERCOMPLETED pfnComplete,
4778 void *pvCompleteUser)
4779{
4780 NOREF(pvUser);
4781 NOREF(pStorage);
4782 NOREF(uOffset);
4783 NOREF(pIoCtx);
4784 NOREF(cbWrite);
4785 NOREF(pfnComplete);
4786 NOREF(pvCompleteUser);
4787 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4788}
4789
4790static DECLCALLBACK(int) vdIOIntReadUserLimited(void *pvUser, PVDIOSTORAGE pStorage,
4791 uint64_t uOffset, PVDIOCTX pIoCtx,
4792 size_t cbRead)
4793{
4794 NOREF(pvUser);
4795 NOREF(pStorage);
4796 NOREF(uOffset);
4797 NOREF(pIoCtx);
4798 NOREF(cbRead);
4799 AssertMsgFailedReturn(("This needs to be implemented when called\n"), VERR_NOT_IMPLEMENTED);
4800}
4801
4802static DECLCALLBACK(int) vdIOIntWriteMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4803 uint64_t uOffset, const void *pvBuffer,
4804 size_t cbBuffer, PVDIOCTX pIoCtx,
4805 PFNVDXFERCOMPLETED pfnComplete,
4806 void *pvCompleteUser)
4807{
4808 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4809
4810 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4811 ("Async I/O not implemented for the limited interface"),
4812 VERR_NOT_SUPPORTED);
4813
4814 return pInterfaceIo->pfnWriteSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4815}
4816
4817static DECLCALLBACK(int) vdIOIntReadMetaLimited(void *pvUser, PVDIOSTORAGE pStorage,
4818 uint64_t uOffset, void *pvBuffer,
4819 size_t cbBuffer, PVDIOCTX pIoCtx,
4820 PPVDMETAXFER ppMetaXfer,
4821 PFNVDXFERCOMPLETED pfnComplete,
4822 void *pvCompleteUser)
4823{
4824 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4825
4826 AssertMsgReturn(!pIoCtx && !ppMetaXfer && !pfnComplete && !pvCompleteUser,
4827 ("Async I/O not implemented for the limited interface"),
4828 VERR_NOT_SUPPORTED);
4829
4830 return pInterfaceIo->pfnReadSync(NULL, pStorage->pStorage, uOffset, pvBuffer, cbBuffer, NULL);
4831}
4832
4833#if 0 /* unsed */
4834static int vdIOIntMetaXferReleaseLimited(void *pvUser, PVDMETAXFER pMetaXfer)
4835{
4836 /* This is a NOP in this case. */
4837 NOREF(pvUser);
4838 NOREF(pMetaXfer);
4839 return VINF_SUCCESS;
4840}
4841#endif
4842
4843static DECLCALLBACK(int) vdIOIntFlushLimited(void *pvUser, PVDIOSTORAGE pStorage,
4844 PVDIOCTX pIoCtx,
4845 PFNVDXFERCOMPLETED pfnComplete,
4846 void *pvCompleteUser)
4847{
4848 PVDINTERFACEIO pInterfaceIo = (PVDINTERFACEIO)pvUser;
4849
4850 AssertMsgReturn(!pIoCtx && !pfnComplete && !pvCompleteUser,
4851 ("Async I/O not implemented for the limited interface"),
4852 VERR_NOT_SUPPORTED);
4853
4854 return pInterfaceIo->pfnFlushSync(NULL, pStorage->pStorage);
4855}
4856
4857/**
4858 * internal: send output to the log (unconditionally).
4859 */
4860static DECLCALLBACK(int) vdLogMessage(void *pvUser, const char *pszFormat, va_list args)
4861{
4862 NOREF(pvUser);
4863 RTLogPrintfV(pszFormat, args);
4864 return VINF_SUCCESS;
4865}
4866
4867DECLINLINE(int) vdMessageWrapper(PVDISK pDisk, const char *pszFormat, ...)
4868{
4869 va_list va;
4870 va_start(va, pszFormat);
4871 int rc = pDisk->pInterfaceError->pfnMessage(pDisk->pInterfaceError->Core.pvUser,
4872 pszFormat, va);
4873 va_end(va);
4874 return rc;
4875}
4876
4877
4878/**
4879 * internal: adjust PCHS geometry
4880 */
4881static void vdFixupPCHSGeometry(PVDGEOMETRY pPCHS, uint64_t cbSize)
4882{
4883 /* Fix broken PCHS geometry. Can happen for two reasons: either the backend
4884 * mixes up PCHS and LCHS, or the application used to create the source
4885 * image has put garbage in it. Additionally, if the PCHS geometry covers
4886 * more than the image size, set it back to the default. */
4887 if ( pPCHS->cHeads > 16
4888 || pPCHS->cSectors > 63
4889 || pPCHS->cCylinders == 0
4890 || (uint64_t)pPCHS->cHeads * pPCHS->cSectors * pPCHS->cCylinders * 512 > cbSize)
4891 {
4892 Assert(!(RT_MIN(cbSize / 512 / 16 / 63, 16383) - (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383)));
4893 pPCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / 16 / 63, 16383);
4894 pPCHS->cHeads = 16;
4895 pPCHS->cSectors = 63;
4896 }
4897}
4898
4899/**
4900 * internal: adjust LCHS geometry
4901 */
4902static void vdFixupLCHSGeometry(PVDGEOMETRY pLCHS, uint64_t cbSize)
4903{
4904 /* Fix broken LCHS geometry. Can happen for two reasons: either the backend
4905 * mixes up PCHS and LCHS, or the application used to create the source
4906 * image has put garbage in it. The fix in this case is to clear the LCHS
4907 * geometry to trigger autodetection when it is used next. If the geometry
4908 * already says "please autodetect" (cylinders=0) keep it. */
4909 if ( ( pLCHS->cHeads > 255
4910 || pLCHS->cHeads == 0
4911 || pLCHS->cSectors > 63
4912 || pLCHS->cSectors == 0)
4913 && pLCHS->cCylinders != 0)
4914 {
4915 pLCHS->cCylinders = 0;
4916 pLCHS->cHeads = 0;
4917 pLCHS->cSectors = 0;
4918 }
4919 /* Always recompute the number of cylinders stored in the LCHS
4920 * geometry if it isn't set to "autotedetect" at the moment.
4921 * This is very useful if the destination image size is
4922 * larger or smaller than the source image size. Do not modify
4923 * the number of heads and sectors. Windows guests hate it. */
4924 if ( pLCHS->cCylinders != 0
4925 && pLCHS->cHeads != 0 /* paranoia */
4926 && pLCHS->cSectors != 0 /* paranoia */)
4927 {
4928 Assert(!(RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024) - (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024)));
4929 pLCHS->cCylinders = (uint32_t)RT_MIN(cbSize / 512 / pLCHS->cHeads / pLCHS->cSectors, 1024);
4930 }
4931}
4932
4933/**
4934 * Sets the I/O callbacks of the given interface to the fallback methods
4935 *
4936 * @returns nothing.
4937 * @param pIfIo The I/O interface to setup.
4938 */
4939static void vdIfIoFallbackCallbacksSetup(PVDINTERFACEIO pIfIo)
4940{
4941 pIfIo->pfnOpen = vdIOOpenFallback;
4942 pIfIo->pfnClose = vdIOCloseFallback;
4943 pIfIo->pfnDelete = vdIODeleteFallback;
4944 pIfIo->pfnMove = vdIOMoveFallback;
4945 pIfIo->pfnGetFreeSpace = vdIOGetFreeSpaceFallback;
4946 pIfIo->pfnGetModificationTime = vdIOGetModificationTimeFallback;
4947 pIfIo->pfnGetSize = vdIOGetSizeFallback;
4948 pIfIo->pfnSetSize = vdIOSetSizeFallback;
4949 pIfIo->pfnSetAllocationSize = vdIOSetAllocationSizeFallback;
4950 pIfIo->pfnReadSync = vdIOReadSyncFallback;
4951 pIfIo->pfnWriteSync = vdIOWriteSyncFallback;
4952 pIfIo->pfnFlushSync = vdIOFlushSyncFallback;
4953 pIfIo->pfnReadAsync = NULL;
4954 pIfIo->pfnWriteAsync = NULL;
4955 pIfIo->pfnFlushAsync = NULL;
4956}
4957
4958/**
4959 * Sets the internal I/O callbacks of the given interface.
4960 *
4961 * @returns nothing.
4962 * @param pIfIoInt The internal I/O interface to setup.
4963 */
4964static void vdIfIoIntCallbacksSetup(PVDINTERFACEIOINT pIfIoInt)
4965{
4966 pIfIoInt->pfnOpen = vdIOIntOpen;
4967 pIfIoInt->pfnClose = vdIOIntClose;
4968 pIfIoInt->pfnDelete = vdIOIntDelete;
4969 pIfIoInt->pfnMove = vdIOIntMove;
4970 pIfIoInt->pfnGetFreeSpace = vdIOIntGetFreeSpace;
4971 pIfIoInt->pfnGetModificationTime = vdIOIntGetModificationTime;
4972 pIfIoInt->pfnGetSize = vdIOIntGetSize;
4973 pIfIoInt->pfnSetSize = vdIOIntSetSize;
4974 pIfIoInt->pfnSetAllocationSize = vdIOIntSetAllocationSize;
4975 pIfIoInt->pfnReadUser = vdIOIntReadUser;
4976 pIfIoInt->pfnWriteUser = vdIOIntWriteUser;
4977 pIfIoInt->pfnReadMeta = vdIOIntReadMeta;
4978 pIfIoInt->pfnWriteMeta = vdIOIntWriteMeta;
4979 pIfIoInt->pfnMetaXferRelease = vdIOIntMetaXferRelease;
4980 pIfIoInt->pfnFlush = vdIOIntFlush;
4981 pIfIoInt->pfnIoCtxCopyFrom = vdIOIntIoCtxCopyFrom;
4982 pIfIoInt->pfnIoCtxCopyTo = vdIOIntIoCtxCopyTo;
4983 pIfIoInt->pfnIoCtxSet = vdIOIntIoCtxSet;
4984 pIfIoInt->pfnIoCtxSegArrayCreate = vdIOIntIoCtxSegArrayCreate;
4985 pIfIoInt->pfnIoCtxCompleted = vdIOIntIoCtxCompleted;
4986 pIfIoInt->pfnIoCtxIsSynchronous = vdIOIntIoCtxIsSynchronous;
4987 pIfIoInt->pfnIoCtxIsZero = vdIOIntIoCtxIsZero;
4988 pIfIoInt->pfnIoCtxGetDataUnitSize = vdIOIntIoCtxGetDataUnitSize;
4989}
4990
4991/**
4992 * Internally used completion handler for synchronous I/O contexts.
4993 */
4994static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq)
4995{
4996 RT_NOREF2(pvUser1, rcReq);
4997 RTSEMEVENT hEvent = (RTSEMEVENT)pvUser2;
4998
4999 RTSemEventSignal(hEvent);
5000}
5001
5002
5003VBOXDDU_DECL(int) VDInit(void)
5004{
5005 int rc = vdPluginInit();
5006 LogRel(("VD: VDInit finished with %Rrc\n", rc));
5007 return rc;
5008}
5009
5010
5011VBOXDDU_DECL(int) VDShutdown(void)
5012{
5013 return vdPluginTerm();
5014}
5015
5016
5017VBOXDDU_DECL(int) VDPluginLoadFromFilename(const char *pszFilename)
5018{
5019 if (!vdPluginIsInitialized())
5020 {
5021 int rc = VDInit();
5022 if (RT_FAILURE(rc))
5023 return rc;
5024 }
5025
5026 return vdPluginLoadFromFilename(pszFilename);
5027}
5028
5029/**
5030 * Load all plugins from a given path.
5031 *
5032 * @returns VBox statuse code.
5033 * @param pszPath The path to load plugins from.
5034 */
5035VBOXDDU_DECL(int) VDPluginLoadFromPath(const char *pszPath)
5036{
5037 if (!vdPluginIsInitialized())
5038 {
5039 int rc = VDInit();
5040 if (RT_FAILURE(rc))
5041 return rc;
5042 }
5043
5044 return vdPluginLoadFromPath(pszPath);
5045}
5046
5047
5048VBOXDDU_DECL(int) VDPluginUnloadFromFilename(const char *pszFilename)
5049{
5050 if (!vdPluginIsInitialized())
5051 {
5052 int rc = VDInit();
5053 if (RT_FAILURE(rc))
5054 return rc;
5055 }
5056
5057 return vdPluginUnloadFromFilename(pszFilename);
5058}
5059
5060
5061VBOXDDU_DECL(int) VDPluginUnloadFromPath(const char *pszPath)
5062{
5063 if (!vdPluginIsInitialized())
5064 {
5065 int rc = VDInit();
5066 if (RT_FAILURE(rc))
5067 return rc;
5068 }
5069
5070 return vdPluginUnloadFromPath(pszPath);
5071}
5072
5073
5074VBOXDDU_DECL(int) VDBackendInfo(unsigned cEntriesAlloc, PVDBACKENDINFO pEntries,
5075 unsigned *pcEntriesUsed)
5076{
5077 int rc = VINF_SUCCESS;
5078
5079 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5080 /* Check arguments. */
5081 AssertMsgReturn(cEntriesAlloc, ("cEntriesAlloc=%u\n", cEntriesAlloc), VERR_INVALID_PARAMETER);
5082 AssertPtrReturn(pEntries, VERR_INVALID_POINTER);
5083 AssertPtrReturn(pcEntriesUsed, VERR_INVALID_POINTER);
5084 if (!vdPluginIsInitialized())
5085 VDInit();
5086
5087 uint32_t cBackends = vdGetImageBackendCount();
5088 if (cEntriesAlloc < cBackends)
5089 {
5090 *pcEntriesUsed = cBackends;
5091 return VERR_BUFFER_OVERFLOW;
5092 }
5093
5094 for (unsigned i = 0; i < cBackends; i++)
5095 {
5096 PCVDIMAGEBACKEND pBackend;
5097 rc = vdQueryImageBackend(i, &pBackend);
5098 AssertRC(rc);
5099
5100 pEntries[i].pszBackend = pBackend->pszBackendName;
5101 pEntries[i].uBackendCaps = pBackend->uBackendCaps;
5102 pEntries[i].paFileExtensions = pBackend->paFileExtensions;
5103 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5104 pEntries[i].pfnComposeLocation = pBackend->pfnComposeLocation;
5105 pEntries[i].pfnComposeName = pBackend->pfnComposeName;
5106 }
5107
5108 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5109 *pcEntriesUsed = cBackends;
5110 return rc;
5111}
5112
5113
5114VBOXDDU_DECL(int) VDBackendInfoOne(const char *pszBackend, PVDBACKENDINFO pEntry)
5115{
5116 LogFlowFunc(("pszBackend=%#p pEntry=%#p\n", pszBackend, pEntry));
5117 /* Check arguments. */
5118 AssertPtrReturn(pszBackend, VERR_INVALID_POINTER);
5119 AssertPtrReturn(pEntry, VERR_INVALID_POINTER);
5120 if (!vdPluginIsInitialized())
5121 VDInit();
5122
5123 PCVDIMAGEBACKEND pBackend;
5124 int rc = vdFindImageBackend(pszBackend, &pBackend);
5125 if (RT_SUCCESS(rc))
5126 {
5127 pEntry->pszBackend = pBackend->pszBackendName;
5128 pEntry->uBackendCaps = pBackend->uBackendCaps;
5129 pEntry->paFileExtensions = pBackend->paFileExtensions;
5130 pEntry->paConfigInfo = pBackend->paConfigInfo;
5131 }
5132
5133 return rc;
5134}
5135
5136
5137VBOXDDU_DECL(int) VDFilterInfo(unsigned cEntriesAlloc, PVDFILTERINFO pEntries,
5138 unsigned *pcEntriesUsed)
5139{
5140 int rc = VINF_SUCCESS;
5141
5142 LogFlowFunc(("cEntriesAlloc=%u pEntries=%#p pcEntriesUsed=%#p\n", cEntriesAlloc, pEntries, pcEntriesUsed));
5143 /* Check arguments. */
5144 AssertMsgReturn(cEntriesAlloc,
5145 ("cEntriesAlloc=%u\n", cEntriesAlloc),
5146 VERR_INVALID_PARAMETER);
5147 AssertPtrReturn(pEntries, VERR_INVALID_POINTER);
5148 AssertPtrReturn(pcEntriesUsed, VERR_INVALID_POINTER);
5149 if (!vdPluginIsInitialized())
5150 VDInit();
5151
5152 uint32_t cBackends = vdGetFilterBackendCount();
5153 if (cEntriesAlloc < cBackends)
5154 {
5155 *pcEntriesUsed = cBackends;
5156 return VERR_BUFFER_OVERFLOW;
5157 }
5158
5159 for (unsigned i = 0; i < cBackends; i++)
5160 {
5161 PCVDFILTERBACKEND pBackend;
5162 rc = vdQueryFilterBackend(i, &pBackend);
5163 pEntries[i].pszFilter = pBackend->pszBackendName;
5164 pEntries[i].paConfigInfo = pBackend->paConfigInfo;
5165 }
5166
5167 LogFlowFunc(("returns %Rrc *pcEntriesUsed=%u\n", rc, cBackends));
5168 *pcEntriesUsed = cBackends;
5169 return rc;
5170}
5171
5172
5173VBOXDDU_DECL(int) VDFilterInfoOne(const char *pszFilter, PVDFILTERINFO pEntry)
5174{
5175 LogFlowFunc(("pszFilter=%#p pEntry=%#p\n", pszFilter, pEntry));
5176 /* Check arguments. */
5177 AssertPtrReturn(pszFilter, VERR_INVALID_POINTER);
5178 AssertPtrReturn(pEntry, VERR_INVALID_POINTER);
5179 if (!vdPluginIsInitialized())
5180 VDInit();
5181
5182 PCVDFILTERBACKEND pBackend;
5183 int rc = vdFindFilterBackend(pszFilter, &pBackend);
5184 if (RT_SUCCESS(rc))
5185 {
5186 pEntry->pszFilter = pBackend->pszBackendName;
5187 pEntry->paConfigInfo = pBackend->paConfigInfo;
5188 }
5189
5190 return rc;
5191}
5192
5193
5194VBOXDDU_DECL(int) VDCreate(PVDINTERFACE pVDIfsDisk, VDTYPE enmType, PVDISK *ppDisk)
5195{
5196 int rc = VINF_SUCCESS;
5197 PVDISK pDisk = NULL;
5198
5199 LogFlowFunc(("pVDIfsDisk=%#p\n", pVDIfsDisk));
5200 /* Check arguments. */
5201 AssertPtrReturn(ppDisk, VERR_INVALID_POINTER);
5202
5203 do
5204 {
5205 pDisk = (PVDISK)RTMemAllocZ(sizeof(VDISK));
5206 if (pDisk)
5207 {
5208 pDisk->u32Signature = VDISK_SIGNATURE;
5209 pDisk->enmType = enmType;
5210 pDisk->cImages = 0;
5211 pDisk->pBase = NULL;
5212 pDisk->pLast = NULL;
5213 pDisk->cbSize = 0;
5214 pDisk->PCHSGeometry.cCylinders = 0;
5215 pDisk->PCHSGeometry.cHeads = 0;
5216 pDisk->PCHSGeometry.cSectors = 0;
5217 pDisk->LCHSGeometry.cCylinders = 0;
5218 pDisk->LCHSGeometry.cHeads = 0;
5219 pDisk->LCHSGeometry.cSectors = 0;
5220 pDisk->pVDIfsDisk = pVDIfsDisk;
5221 pDisk->pInterfaceError = NULL;
5222 pDisk->pInterfaceThreadSync = NULL;
5223 pDisk->pIoCtxLockOwner = NULL;
5224 pDisk->pIoCtxHead = NULL;
5225 pDisk->fLocked = false;
5226 pDisk->hMemCacheIoCtx = NIL_RTMEMCACHE;
5227 pDisk->hMemCacheIoTask = NIL_RTMEMCACHE;
5228 RTListInit(&pDisk->ListFilterChainWrite);
5229 RTListInit(&pDisk->ListFilterChainRead);
5230
5231 /* Create the I/O ctx cache */
5232 rc = RTMemCacheCreate(&pDisk->hMemCacheIoCtx, sizeof(VDIOCTX), 0, UINT32_MAX,
5233 NULL, NULL, NULL, 0);
5234 if (RT_FAILURE(rc))
5235 break;
5236
5237 /* Create the I/O task cache */
5238 rc = RTMemCacheCreate(&pDisk->hMemCacheIoTask, sizeof(VDIOTASK), 0, UINT32_MAX,
5239 NULL, NULL, NULL, 0);
5240 if (RT_FAILURE(rc))
5241 break;
5242
5243 pDisk->pInterfaceError = VDIfErrorGet(pVDIfsDisk);
5244 pDisk->pInterfaceThreadSync = VDIfThreadSyncGet(pVDIfsDisk);
5245
5246 *ppDisk = pDisk;
5247 }
5248 else
5249 {
5250 rc = VERR_NO_MEMORY;
5251 break;
5252 }
5253 } while (0);
5254
5255 if ( RT_FAILURE(rc)
5256 && pDisk)
5257 {
5258 if (pDisk->hMemCacheIoCtx != NIL_RTMEMCACHE)
5259 RTMemCacheDestroy(pDisk->hMemCacheIoCtx);
5260 if (pDisk->hMemCacheIoTask != NIL_RTMEMCACHE)
5261 RTMemCacheDestroy(pDisk->hMemCacheIoTask);
5262 }
5263
5264 LogFlowFunc(("returns %Rrc (pDisk=%#p)\n", rc, pDisk));
5265 return rc;
5266}
5267
5268
5269VBOXDDU_DECL(int) VDDestroy(PVDISK pDisk)
5270{
5271 int rc = VINF_SUCCESS;
5272 LogFlowFunc(("pDisk=%#p\n", pDisk));
5273 do
5274 {
5275 /* sanity check */
5276 AssertPtrBreak(pDisk);
5277 AssertMsg(pDisk->u32Signature == VDISK_SIGNATURE, ("u32Signature=%08x\n", pDisk->u32Signature));
5278 Assert(!pDisk->fLocked);
5279
5280 rc = VDCloseAll(pDisk);
5281 int rc2 = VDFilterRemoveAll(pDisk);
5282 if (RT_SUCCESS(rc))
5283 rc = rc2;