VirtualBox

source: vbox/trunk/src/VBox/Storage/VD.cpp@ 103131

Last change on this file since 103131 was 100078, checked in by vboxsync, 15 months ago

Main/src-server and Storage: Immutable media handling flexibility added bugref:5995

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 339.5 KB
RevLine 
[12639]1/* $Id: VD.cpp 100078 2023-06-06 05:15:22Z vboxsync $ */
[2379]2/** @file
[66378]3 * VD - Virtual disk container implementation.
[2379]4 */
5
6/*
[98103]7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
[2379]8 *
[96407]9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
[2379]26 */
27
[57358]28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
[2379]32#define LOG_GROUP LOG_GROUP_VD
[33567]33#include <VBox/vd.h>
[2379]34#include <VBox/err.h>
[12423]35#include <VBox/sup.h>
36#include <VBox/log.h>
[2379]37
38#include <iprt/alloc.h>
39#include <iprt/assert.h>
40#include <iprt/uuid.h>
41#include <iprt/file.h>
42#include <iprt/string.h>
43#include <iprt/asm.h>
[66378]44#include <iprt/param.h>
[5101]45#include <iprt/path.h>
[28065]46#include <iprt/sg.h>
[44400]47#include <iprt/semaphore.h>
[75373]48#include <iprt/vector.h>
[2379]49
[66378]50#include "VDInternal.h"
[2379]51
[26987]52/** Buffer size used for merging images. */
53#define VD_MERGE_BUFFER_SIZE (16 * _1M)
54
[27977]55/** Maximum number of segments in one I/O task. */
56#define VD_IO_TASK_SEGMENTS_MAX 64
57
[38621]58/** Threshold after not recently used blocks are removed from the list. */
[63567]59#define VD_DISCARD_REMOVE_THRESHOLD (10 * _1M) /** @todo experiment */
[38621]60
[26987]61/**
62 * VD async I/O interface storage descriptor.
63 */
[32553]64typedef struct VDIIOFALLBACKSTORAGE
[26987]65{
66 /** File handle. */
[38469]67 RTFILE File;
[26987]68 /** Completion callback. */
[38469]69 PFNVDCOMPLETED pfnCompleted;
[26987]70 /** Thread for async access. */
[38469]71 RTTHREAD ThreadAsync;
[32553]72} VDIIOFALLBACKSTORAGE, *PVDIIOFALLBACKSTORAGE;
[26987]73
74/**
75 * uModified bit flags.
76 */
77#define VD_IMAGE_MODIFIED_FLAG RT_BIT(0)
78#define VD_IMAGE_MODIFIED_FIRST RT_BIT(1)
79#define VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE RT_BIT(2)
80
81
[44396]82# define VD_IS_LOCKED(a_pDisk) \
[30555]83 do \
84 { \
[62738]85 NOREF(a_pDisk); \
86 AssertMsg((a_pDisk)->fLocked, \
[44396]87 ("Lock not held\n"));\
[30555]88 } while(0)
[26987]89
90/**
91 * VBox parent read descriptor, used internally for compaction.
92 */
93typedef struct VDPARENTSTATEDESC
94{
95 /** Pointer to disk descriptor. */
[66250]96 PVDISK pDisk;
[26987]97 /** Pointer to image descriptor. */
98 PVDIMAGE pImage;
99} VDPARENTSTATEDESC, *PVDPARENTSTATEDESC;
100
[27808]101/**
102 * Transfer direction.
103 */
104typedef enum VDIOCTXTXDIR
105{
106 /** Read */
107 VDIOCTXTXDIR_READ = 0,
108 /** Write */
109 VDIOCTXTXDIR_WRITE,
110 /** Flush */
111 VDIOCTXTXDIR_FLUSH,
[38876]112 /** Discard */
113 VDIOCTXTXDIR_DISCARD,
[27808]114 /** 32bit hack */
115 VDIOCTXTXDIR_32BIT_HACK = 0x7fffffff
116} VDIOCTXTXDIR, *PVDIOCTXTXDIR;
[26987]117
[28620]118/** Transfer function */
[85121]119typedef DECLCALLBACKTYPE(int, FNVDIOCTXTRANSFER ,(PVDIOCTX pIoCtx));
[28620]120/** Pointer to a transfer function. */
121typedef FNVDIOCTXTRANSFER *PFNVDIOCTXTRANSFER;
122
[27808]123/**
124 * I/O context
125 */
126typedef struct VDIOCTX
127{
[39893]128 /** Pointer to the next I/O context. */
129 struct VDIOCTX * volatile pIoCtxNext;
[27977]130 /** Disk this is request is for. */
[66378]131 PVDISK pDisk;
[27977]132 /** Return code. */
133 int rcReq;
[44242]134 /** Various flags for the I/O context. */
135 uint32_t fFlags;
[29240]136 /** Number of data transfers currently pending. */
137 volatile uint32_t cDataTransfersPending;
[27977]138 /** How many meta data transfers are pending. */
139 volatile uint32_t cMetaTransfersPending;
[28065]140 /** Flag whether the request finished */
141 volatile bool fComplete;
[28154]142 /** Temporary allocated memory which is freed
143 * when the context completes. */
144 void *pvAllocation;
[28620]145 /** Transfer function. */
146 PFNVDIOCTXTRANSFER pfnIoCtxTransfer;
147 /** Next transfer part after the current one completed. */
148 PFNVDIOCTXTRANSFER pfnIoCtxTransferNext;
[38876]149 /** Transfer direction */
150 VDIOCTXTXDIR enmTxDir;
151 /** Request type dependent data. */
152 union
153 {
154 /** I/O request (read/write). */
155 struct
156 {
157 /** Number of bytes left until this context completes. */
158 volatile uint32_t cbTransferLeft;
159 /** Current offset */
160 volatile uint64_t uOffset;
161 /** Number of bytes to transfer */
162 volatile size_t cbTransfer;
163 /** Current image in the chain. */
164 PVDIMAGE pImageCur;
165 /** Start image to read from. pImageCur is reset to this
166 * value after it reached the first image in the chain. */
167 PVDIMAGE pImageStart;
168 /** S/G buffer */
169 RTSGBUF SgBuf;
[45155]170 /** Number of bytes to clear in the buffer before the current read. */
171 size_t cbBufClear;
172 /** Number of images to read. */
173 unsigned cImagesRead;
174 /** Override for the parent image to start reading from. */
175 PVDIMAGE pImageParentOverride;
[50991]176 /** Original offset of the transfer - required for filtering read requests. */
177 uint64_t uOffsetXferOrig;
178 /** Original size of the transfer - required for fitlering read requests. */
179 size_t cbXferOrig;
[38876]180 } Io;
181 /** Discard requests. */
182 struct
183 {
184 /** Pointer to the range descriptor array. */
185 PCRTRANGE paRanges;
186 /** Number of ranges in the array. */
187 unsigned cRanges;
188 /** Range descriptor index which is processed. */
189 unsigned idxRange;
190 /** Start offset to discard currently. */
191 uint64_t offCur;
192 /** How many bytes left to discard in the current range. */
193 size_t cbDiscardLeft;
194 /** How many bytes to discard in the current block (<= cbDiscardLeft). */
195 size_t cbThisDiscard;
196 /** Discard block handled currently. */
197 PVDDISCARDBLOCK pBlock;
198 } Discard;
199 } Req;
[28065]200 /** Parent I/O context if any. Sets the type of the context (root/child) */
201 PVDIOCTX pIoCtxParent;
202 /** Type dependent data (root/child) */
203 union
204 {
205 /** Root data */
206 struct
207 {
208 /** Completion callback */
209 PFNVDASYNCTRANSFERCOMPLETE pfnComplete;
210 /** User argument 1 passed on completion. */
211 void *pvUser1;
[38876]212 /** User argument 2 passed on completion. */
[28065]213 void *pvUser2;
214 } Root;
215 /** Child data */
216 struct
217 {
218 /** Saved start offset */
219 uint64_t uOffsetSaved;
220 /** Saved transfer size */
221 size_t cbTransferLeftSaved;
[33540]222 /** Number of bytes transferred from the parent if this context completes. */
[28065]223 size_t cbTransferParent;
[28620]224 /** Number of bytes to pre read */
225 size_t cbPreRead;
226 /** Number of bytes to post read. */
227 size_t cbPostRead;
[30044]228 /** Number of bytes to write left in the parent. */
229 size_t cbWriteParent;
[28620]230 /** Write type dependent data. */
231 union
232 {
233 /** Optimized */
234 struct
235 {
236 /** Bytes to fill to satisfy the block size. Not part of the virtual disk. */
237 size_t cbFill;
238 /** Bytes to copy instead of reading from the parent */
239 size_t cbWriteCopy;
240 /** Bytes to read from the image. */
241 size_t cbReadImage;
242 } Optimized;
243 } Write;
[28065]244 } Child;
245 } Type;
[27808]246} VDIOCTX;
247
[44415]248/** Default flags for an I/O context, i.e. unblocked and async. */
[46712]249#define VDIOCTX_FLAGS_DEFAULT (0)
[44242]250/** Flag whether the context is blocked. */
[46712]251#define VDIOCTX_FLAGS_BLOCKED RT_BIT_32(0)
[44242]252/** Flag whether the I/O context is using synchronous I/O. */
[46712]253#define VDIOCTX_FLAGS_SYNC RT_BIT_32(1)
[45155]254/** Flag whether the read should update the cache. */
[46712]255#define VDIOCTX_FLAGS_READ_UPDATE_CACHE RT_BIT_32(2)
[45155]256/** Flag whether free blocks should be zeroed.
257 * If false and no image has data for sepcified
258 * range VERR_VD_BLOCK_FREE is returned for the I/O context.
259 * Note that unallocated blocks are still zeroed
260 * if at least one image has valid data for a part
261 * of the range.
262 */
[46712]263#define VDIOCTX_FLAGS_ZERO_FREE_BLOCKS RT_BIT_32(3)
[45155]264/** Don't free the I/O context when complete because
265 * it was alloacted elsewhere (stack, ...). */
[46712]266#define VDIOCTX_FLAGS_DONT_FREE RT_BIT_32(4)
[51750]267/** Don't set the modified flag for this I/O context when writing. */
[46712]268#define VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG RT_BIT_32(5)
[51750]269/** The write filter was applied already and shouldn't be applied a second time.
270 * Used at the beginning of vdWriteHelperAsync() because it might be called
271 * multiple times.
272 */
273#define VDIOCTX_FLAGS_WRITE_FILTER_APPLIED RT_BIT_32(6)
[44242]274
[44396]275/** NIL I/O context pointer value. */
276#define NIL_VDIOCTX ((PVDIOCTX)0)
277
[38876]278/**
279 * List node for deferred I/O contexts.
280 */
[30555]281typedef struct VDIOCTXDEFERRED
282{
283 /** Node in the list of deferred requests.
284 * A request can be deferred if the image is growing
285 * and the request accesses the same range or if
286 * the backend needs to read or write metadata from the disk
287 * before it can continue. */
288 RTLISTNODE NodeDeferred;
289 /** I/O context this entry points to. */
290 PVDIOCTX pIoCtx;
291} VDIOCTXDEFERRED, *PVDIOCTXDEFERRED;
292
[27808]293/**
[27977]294 * I/O task.
295 */
296typedef struct VDIOTASK
297{
[44396]298 /** Next I/O task waiting in the list. */
299 struct VDIOTASK * volatile pNext;
[30555]300 /** Storage this task belongs to. */
301 PVDIOSTORAGE pIoStorage;
302 /** Optional completion callback. */
303 PFNVDXFERCOMPLETED pfnComplete;
304 /** Opaque user data. */
305 void *pvUser;
[44396]306 /** Completion status code for the task. */
307 int rcReq;
[27977]308 /** Flag whether this is a meta data transfer. */
309 bool fMeta;
310 /** Type dependent data. */
311 union
312 {
313 /** User data transfer. */
314 struct
315 {
[33540]316 /** Number of bytes this task transferred. */
[27977]317 uint32_t cbTransfer;
[30555]318 /** Pointer to the I/O context the task belongs. */
319 PVDIOCTX pIoCtx;
[27977]320 } User;
321 /** Meta data transfer. */
322 struct
323 {
[30555]324 /** Meta transfer this task is for. */
325 PVDMETAXFER pMetaXfer;
[27977]326 } Meta;
327 } Type;
[44396]328} VDIOTASK;
[27977]329
330/**
[27808]331 * Storage handle.
332 */
333typedef struct VDIOSTORAGE
334{
[33745]335 /** Image I/O state this storage handle belongs to. */
336 PVDIO pVDIo;
[30555]337 /** AVL tree for pending async metadata transfers. */
338 PAVLRFOFFTREE pTreeMetaXfers;
[32553]339 /** Storage handle */
340 void *pStorage;
[27808]341} VDIOSTORAGE;
342
[30555]343/**
344 * Metadata transfer.
345 *
346 * @note This entry can't be freed if either the list is not empty or
347 * the reference counter is not 0.
348 * The assumption is that the backends don't need to read huge amounts of
349 * metadata to complete a transfer so the additional memory overhead should
350 * be relatively small.
351 */
352typedef struct VDMETAXFER
353{
354 /** AVL core for fast search (the file offset is the key) */
355 AVLRFOFFNODECORE Core;
356 /** I/O storage for this transfer. */
357 PVDIOSTORAGE pIoStorage;
358 /** Flags. */
359 uint32_t fFlags;
360 /** List of I/O contexts waiting for this metadata transfer to complete. */
361 RTLISTNODE ListIoCtxWaiting;
362 /** Number of references to this entry. */
363 unsigned cRefs;
364 /** Size of the data stored with this entry. */
365 size_t cbMeta;
[52358]366 /** Shadow buffer which is used in case a write is still active and other
367 * writes update the shadow buffer. */
368 uint8_t *pbDataShw;
369 /** List of I/O contexts updating the shadow buffer while there is a write
370 * in progress. */
371 RTLISTNODE ListIoCtxShwWrites;
[30555]372 /** Data stored - variable size. */
373 uint8_t abData[1];
374} VDMETAXFER;
375
376/**
377 * The transfer direction for the metadata.
378 */
379#define VDMETAXFER_TXDIR_MASK 0x3
380#define VDMETAXFER_TXDIR_NONE 0x0
381#define VDMETAXFER_TXDIR_WRITE 0x1
382#define VDMETAXFER_TXDIR_READ 0x2
383#define VDMETAXFER_TXDIR_FLUSH 0x3
384#define VDMETAXFER_TXDIR_GET(flags) ((flags) & VDMETAXFER_TXDIR_MASK)
385#define VDMETAXFER_TXDIR_SET(flags, dir) ((flags) = (flags & ~VDMETAXFER_TXDIR_MASK) | (dir))
386
[38876]387/** Forward declaration of the async discard helper. */
[57388]388static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx);
389static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx);
[66250]390static void vdDiskProcessBlockedIoCtx(PVDISK pDisk);
391static int vdDiskUnlock(PVDISK pDisk, PVDIOCTX pIoCtxRc);
[59295]392static DECLCALLBACK(void) vdIoCtxSyncComplete(void *pvUser1, void *pvUser2, int rcReq);
[38876]393
[32370]394/**
[7277]395 * internal: issue error message.
[2379]396 */
[66250]397static int vdError(PVDISK pDisk, int rc, RT_SRC_POS_DECL,
[7277]398 const char *pszFormat, ...)
[2379]399{
400 va_list va;
401 va_start(va, pszFormat);
[38469]402 if (pDisk->pInterfaceError)
403 pDisk->pInterfaceError->pfnError(pDisk->pInterfaceError->Core.pvUser, rc, RT_SRC_POS_ARGS, pszFormat, va);
[2379]404 va_end(va);
405 return rc;
406}
407
408/**
[27232]409 * internal: thread synchronization, start read.
410 */
[66250]411DECLINLINE(int) vdThreadStartRead(PVDISK pDisk)
[27232]412{
413 int rc = VINF_SUCCESS;
[38469]414 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
415 rc = pDisk->pInterfaceThreadSync->pfnStartRead(pDisk->pInterfaceThreadSync->Core.pvUser);
[27232]416 return rc;
417}
418
419/**
420 * internal: thread synchronization, finish read.
421 */
[66250]422DECLINLINE(int) vdThreadFinishRead(PVDISK pDisk)
[27232]423{
424 int rc = VINF_SUCCESS;
[38469]425 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
426 rc = pDisk->pInterfaceThreadSync->pfnFinishRead(pDisk->pInterfaceThreadSync->Core.pvUser);
[27232]427 return rc;
428}
429
430/**
431 * internal: thread synchronization, start write.
432 */
[66250]433DECLINLINE(int) vdThreadStartWrite(PVDISK pDisk)
[27232]434{
435 int rc = VINF_SUCCESS;
[38469]436 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
437 rc = pDisk->pInterfaceThreadSync->pfnStartWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
[27232]438 return rc;
439}
440
441/**
442 * internal: thread synchronization, finish write.
443 */
[66250]444DECLINLINE(int) vdThreadFinishWrite(PVDISK pDisk)
[27232]445{
446 int rc = VINF_SUCCESS;
[38469]447 if (RT_UNLIKELY(pDisk->pInterfaceThreadSync))
448 rc = pDisk->pInterfaceThreadSync->pfnFinishWrite(pDisk->pInterfaceThreadSync->Core.pvUser);
[27232]449 return rc;
450}
451
452/**
[2379]453 * internal: add image structure to the end of images list.
454 */
[66250]455static void vdAddImageToList(PVDISK pDisk, PVDIMAGE pImage)
[2379]456{
457 pImage->pPrev = NULL;
458 pImage->pNext = NULL;
459
460 if (pDisk->pBase)
461 {
462 Assert(pDisk->cImages > 0);
463 pImage->pPrev = pDisk->pLast;
464 pDisk->pLast->pNext = pImage;
465 pDisk->pLast = pImage;
466 }
467 else
468 {
469 Assert(pDisk->cImages == 0);
470 pDisk->pBase = pImage;
471 pDisk->pLast = pImage;
472 }
473
474 pDisk->cImages++;
475}
476
477/**
478 * internal: remove image structure from the images list.
479 */
[66250]480static void vdRemoveImageFromList(PVDISK pDisk, PVDIMAGE pImage)
[2379]481{
482 Assert(pDisk->cImages > 0);
483
484 if (pImage->pPrev)
485 pImage->pPrev->pNext = pImage->pNext;
486 else
487 pDisk->pBase = pImage->pNext;
488
489 if (pImage->pNext)
490 pImage->pNext->pPrev = pImage->pPrev;
491 else
492 pDisk->pLast = pImage->pPrev;
493
494 pImage->pPrev = NULL;
495 pImage->pNext = NULL;
496
497 pDisk->cImages--;
498}
499
500/**
[54340]501 * Release a referene to the filter decrementing the counter and destroying the filter
502 * when the counter reaches zero.
503 *
504 * @returns The new reference count.
505 * @param pFilter The filter to release.
[50991]506 */
[54340]507static uint32_t vdFilterRelease(PVDFILTER pFilter)
[50991]508{
[54340]509 uint32_t cRefs = ASMAtomicDecU32(&pFilter->cRefs);
510 if (!cRefs)
[50991]511 {
[54340]512 pFilter->pBackend->pfnDestroy(pFilter->pvBackendData);
513 RTMemFree(pFilter);
[50991]514 }
[54340]515
516 return cRefs;
[50991]517}
518
519/**
[54340]520 * Increments the reference counter of the given filter.
521 *
522 * @return The new reference count.
523 * @param pFilter The filter.
[50991]524 */
[54340]525static uint32_t vdFilterRetain(PVDFILTER pFilter)
[50991]526{
[54340]527 return ASMAtomicIncU32(&pFilter->cRefs);
[50991]528}
529
530/**
[2379]531 * internal: find image by index into the images list.
532 */
[66250]533static PVDIMAGE vdGetImageByNumber(PVDISK pDisk, unsigned nImage)
[2379]534{
[2590]535 PVDIMAGE pImage = pDisk->pBase;
[6291]536 if (nImage == VD_LAST_IMAGE)
537 return pDisk->pLast;
[2379]538 while (pImage && nImage)
539 {
540 pImage = pImage->pNext;
541 nImage--;
542 }
543 return pImage;
544}
545
546/**
[66486]547 * Creates a new region list from the given one converting to match the flags if necessary.
548 *
549 * @returns VBox status code.
550 * @param pRegionList The region list to convert from.
551 * @param fFlags The flags for the new region list.
552 * @param ppRegionList Where to store the new region list on success.
553 */
554static int vdRegionListConv(PCVDREGIONLIST pRegionList, uint32_t fFlags, PPVDREGIONLIST ppRegionList)
555{
556 int rc = VINF_SUCCESS;
[73097]557 PVDREGIONLIST pRegionListNew = (PVDREGIONLIST)RTMemDup(pRegionList,
558 RT_UOFFSETOF_DYN(VDREGIONLIST, aRegions[pRegionList->cRegions]));
[66486]559 if (RT_LIKELY(pRegionListNew))
560 {
561 /* Do we have to convert anything? */
562 if (pRegionList->fFlags != fFlags)
563 {
564 uint64_t offRegionNext = 0;
565
566 pRegionListNew->fFlags = fFlags;
567 for (unsigned i = 0; i < pRegionListNew->cRegions; i++)
568 {
569 PVDREGIONDESC pRegion = &pRegionListNew->aRegions[i];
570
571 if ( (fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
572 && !(pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS))
573 {
574 Assert(!(pRegion->cRegionBlocksOrBytes % pRegion->cbBlock));
575
576 /* Convert from bytes to logical blocks. */
577 pRegion->offRegion = offRegionNext;
578 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes / pRegion->cbBlock;
579 offRegionNext += pRegion->cRegionBlocksOrBytes;
580 }
581 else
582 {
583 /* Convert from logical blocks to bytes. */
584 pRegion->offRegion = offRegionNext;
585 pRegion->cRegionBlocksOrBytes = pRegion->cRegionBlocksOrBytes * pRegion->cbBlock;
586 offRegionNext += pRegion->cRegionBlocksOrBytes;
587 }
588 }
589 }
590
591 *ppRegionList = pRegionListNew;
592 }
593 else
594 rc = VERR_NO_MEMORY;
595
596 return rc;
597}
598
599/**
600 * Returns the virtual size of the image in bytes.
601 *
602 * @returns Size of the given image in bytes.
[66488]603 * @param pImage The image to get the size from.
[66486]604 */
605static uint64_t vdImageGetSize(PVDIMAGE pImage)
606{
607 uint64_t cbImage = 0;
[75349]608
609 if (pImage->cbImage == VD_IMAGE_SIZE_UNINITIALIZED)
[66486]610 {
[75349]611 PCVDREGIONLIST pRegionList = NULL;
612 int rc = pImage->Backend->pfnQueryRegions(pImage->pBackendData, &pRegionList);
613 if (RT_SUCCESS(rc))
[66486]614 {
[75349]615 if (pRegionList->fFlags & VD_REGION_LIST_F_LOC_SIZE_BLOCKS)
[66486]616 {
[75349]617 PVDREGIONLIST pRegionListConv = NULL;
618 rc = vdRegionListConv(pRegionList, 0, &pRegionListConv);
619 if (RT_SUCCESS(rc))
620 {
621 for (uint32_t i = 0; i < pRegionListConv->cRegions; i++)
622 cbImage += pRegionListConv->aRegions[i].cRegionBlocksOrBytes;
[66486]623
[75349]624 VDRegionListFree(pRegionListConv);
625 }
[66486]626 }
[75349]627 else
628 for (uint32_t i = 0; i < pRegionList->cRegions; i++)
629 cbImage += pRegionList->aRegions[i].cRegionBlocksOrBytes;
630
631 AssertPtr(pImage->Backend->pfnRegionListRelease);
632 pImage->Backend->pfnRegionListRelease(pImage->pBackendData, pRegionList);
633 pImage->cbImage = cbImage; /* Cache the value. */
[66486]634 }
635 }
[75349]636 else
637 cbImage = pImage->cbImage;
[66486]638
639 return cbImage;
640}
641
642/**
[50991]643 * Applies the filter chain to the given write request.
644 *
645 * @returns VBox status code.
646 * @param pDisk The HDD container.
647 * @param uOffset The start offset of the write.
648 * @param cbWrite Number of bytes to write.
649 * @param pIoCtx The I/O context associated with the request.
650 */
[66250]651static int vdFilterChainApplyWrite(PVDISK pDisk, uint64_t uOffset, size_t cbWrite,
[50991]652 PVDIOCTX pIoCtx)
653{
654 int rc = VINF_SUCCESS;
655
[51750]656 VD_IS_LOCKED(pDisk);
657
[54340]658 PVDFILTER pFilter;
659 RTListForEach(&pDisk->ListFilterChainWrite, pFilter, VDFILTER, ListNodeChainWrite)
[50991]660 {
[54340]661 rc = pFilter->pBackend->pfnFilterWrite(pFilter->pvBackendData, uOffset, cbWrite, pIoCtx);
662 if (RT_FAILURE(rc))
663 break;
664 /* Reset S/G buffer for the next filter. */
665 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
[50991]666 }
667
668 return rc;
669}
670
671/**
672 * Applies the filter chain to the given read request.
673 *
674 * @returns VBox status code.
675 * @param pDisk The HDD container.
676 * @param uOffset The start offset of the read.
677 * @param cbRead Number of bytes read.
678 * @param pIoCtx The I/O context associated with the request.
679 */
[66250]680static int vdFilterChainApplyRead(PVDISK pDisk, uint64_t uOffset, size_t cbRead,
[50991]681 PVDIOCTX pIoCtx)
682{
683 int rc = VINF_SUCCESS;
684
[51750]685 VD_IS_LOCKED(pDisk);
686
[54340]687 /* Reset buffer before starting. */
688 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
689
690 PVDFILTER pFilter;
691 RTListForEach(&pDisk->ListFilterChainRead, pFilter, VDFILTER, ListNodeChainRead)
[50991]692 {
[54340]693 rc = pFilter->pBackend->pfnFilterRead(pFilter->pvBackendData, uOffset, cbRead, pIoCtx);
694 if (RT_FAILURE(rc))
695 break;
696 /* Reset S/G buffer for the next filter. */
[51073]697 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
[50991]698 }
699
700 return rc;
701}
702
[66250]703DECLINLINE(void) vdIoCtxRootComplete(PVDISK pDisk, PVDIOCTX pIoCtx)
[50991]704{
705 if ( RT_SUCCESS(pIoCtx->rcReq)
706 && pIoCtx->enmTxDir == VDIOCTXTXDIR_READ)
707 pIoCtx->rcReq = vdFilterChainApplyRead(pDisk, pIoCtx->Req.Io.uOffsetXferOrig,
708 pIoCtx->Req.Io.cbXferOrig, pIoCtx);
709
710 pIoCtx->Type.Root.pfnComplete(pIoCtx->Type.Root.pvUser1,
711 pIoCtx->Type.Root.pvUser2,
712 pIoCtx->rcReq);
713}
714
715/**
[44242]716 * Initialize the structure members of a given I/O context.
717 */
[66250]718DECLINLINE(void) vdIoCtxInit(PVDIOCTX pIoCtx, PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
[44242]719 uint64_t uOffset, size_t cbTransfer, PVDIMAGE pImageStart,
[90815]720 PCRTSGBUF pSgBuf, void *pvAllocation,
[44242]721 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
722{
723 pIoCtx->pDisk = pDisk;
724 pIoCtx->enmTxDir = enmTxDir;
[48848]725 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTransfer; Assert((uint32_t)cbTransfer == cbTransfer);
[44242]726 pIoCtx->Req.Io.uOffset = uOffset;
727 pIoCtx->Req.Io.cbTransfer = cbTransfer;
728 pIoCtx->Req.Io.pImageStart = pImageStart;
729 pIoCtx->Req.Io.pImageCur = pImageStart;
[45155]730 pIoCtx->Req.Io.cbBufClear = 0;
731 pIoCtx->Req.Io.pImageParentOverride = NULL;
[50991]732 pIoCtx->Req.Io.uOffsetXferOrig = uOffset;
733 pIoCtx->Req.Io.cbXferOrig = cbTransfer;
[44242]734 pIoCtx->cDataTransfersPending = 0;
735 pIoCtx->cMetaTransfersPending = 0;
736 pIoCtx->fComplete = false;
737 pIoCtx->fFlags = fFlags;
738 pIoCtx->pvAllocation = pvAllocation;
739 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
740 pIoCtx->pfnIoCtxTransferNext = NULL;
741 pIoCtx->rcReq = VINF_SUCCESS;
[45155]742 pIoCtx->pIoCtxParent = NULL;
[44242]743
744 /* There is no S/G list for a flush request. */
745 if ( enmTxDir != VDIOCTXTXDIR_FLUSH
746 && enmTxDir != VDIOCTXTXDIR_DISCARD)
[90815]747 RTSgBufClone(&pIoCtx->Req.Io.SgBuf, pSgBuf);
[44242]748 else
749 memset(&pIoCtx->Req.Io.SgBuf, 0, sizeof(RTSGBUF));
750}
751
752/**
[32370]753 * Internal: Tries to read the desired range from the given cache.
754 *
755 * @returns VBox status code.
756 * @retval VERR_VD_BLOCK_FREE if the block is not in the cache.
757 * pcbRead will be set to the number of bytes not in the cache.
758 * Everything thereafter might be in the cache.
759 * @param pCache The cache to read from.
760 * @param uOffset Offset of the virtual disk to read.
761 * @param cbRead How much to read.
[44412]762 * @param pIoCtx The I/O context to read into.
[32370]763 * @param pcbRead Where to store the number of bytes actually read.
764 * On success this indicates the number of bytes read from the cache.
765 * If VERR_VD_BLOCK_FREE is returned this gives the number of bytes
[33540]766 * which are not in the cache.
[32370]767 * In both cases everything beyond this value
768 * might or might not be in the cache.
769 */
770static int vdCacheReadHelper(PVDCACHE pCache, uint64_t uOffset,
[45155]771 size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbRead)
[32370]772{
773 int rc = VINF_SUCCESS;
774
[44412]775 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbRead=%zu pcbRead=%#p\n",
776 pCache, uOffset, pIoCtx, cbRead, pcbRead));
[32370]777
778 AssertPtr(pCache);
779 AssertPtr(pcbRead);
780
[44412]781 rc = pCache->Backend->pfnRead(pCache->pBackendData, uOffset, cbRead,
782 pIoCtx, pcbRead);
[32370]783
784 LogFlowFunc(("returns rc=%Rrc pcbRead=%zu\n", rc, *pcbRead));
785 return rc;
786}
787
788/**
789 * Internal: Writes data for the given block into the cache.
790 *
791 * @returns VBox status code.
792 * @param pCache The cache to write to.
[41785]793 * @param uOffset Offset of the virtual disk to write to the cache.
[32370]794 * @param cbWrite How much to write.
[60032]795 * @param pIoCtx The I/O context to write from.
[32370]796 * @param pcbWritten How much data could be written, optional.
797 */
[44412]798static int vdCacheWriteHelper(PVDCACHE pCache, uint64_t uOffset, size_t cbWrite,
799 PVDIOCTX pIoCtx, size_t *pcbWritten)
[32370]800{
801 int rc = VINF_SUCCESS;
802
[44412]803 LogFlowFunc(("pCache=%#p uOffset=%llu pIoCtx=%p cbWrite=%zu pcbWritten=%#p\n",
804 pCache, uOffset, pIoCtx, cbWrite, pcbWritten));
[32370]805
806 AssertPtr(pCache);
[44412]807 AssertPtr(pIoCtx);
[32370]808 Assert(cbWrite > 0);
809
810 if (pcbWritten)
[44412]811 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
812 pIoCtx, pcbWritten);
[32370]813 else
814 {
815 size_t cbWritten = 0;
816
817 do
818 {
[44412]819 rc = pCache->Backend->pfnWrite(pCache->pBackendData, uOffset, cbWrite,
820 pIoCtx, &cbWritten);
[32370]821 uOffset += cbWritten;
822 cbWrite -= cbWritten;
823 } while ( cbWrite
[44412]824 && ( RT_SUCCESS(rc)
825 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
[32370]826 }
827
828 LogFlowFunc(("returns rc=%Rrc pcbWritten=%zu\n",
829 rc, pcbWritten ? *pcbWritten : cbWrite));
830 return rc;
831}
832
833/**
[38621]834 * Creates a new empty discard state.
835 *
836 * @returns Pointer to the new discard state or NULL if out of memory.
837 */
838static PVDDISCARDSTATE vdDiscardStateCreate(void)
839{
840 PVDDISCARDSTATE pDiscard = (PVDDISCARDSTATE)RTMemAllocZ(sizeof(VDDISCARDSTATE));
841
842 if (pDiscard)
843 {
844 RTListInit(&pDiscard->ListLru);
845 pDiscard->pTreeBlocks = (PAVLRU64TREE)RTMemAllocZ(sizeof(AVLRU64TREE));
846 if (!pDiscard->pTreeBlocks)
847 {
848 RTMemFree(pDiscard);
849 pDiscard = NULL;
850 }
851 }
852
853 return pDiscard;
854}
855
856/**
857 * Removes the least recently used blocks from the waiting list until
858 * the new value is reached.
859 *
860 * @returns VBox status code.
861 * @param pDisk VD disk container.
862 * @param pDiscard The discard state.
863 * @param cbDiscardingNew How many bytes should be waiting on success.
864 * The number of bytes waiting can be less.
865 */
[66250]866static int vdDiscardRemoveBlocks(PVDISK pDisk, PVDDISCARDSTATE pDiscard, size_t cbDiscardingNew)
[38621]867{
868 int rc = VINF_SUCCESS;
869
[38657]870 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
871 pDisk, pDiscard, cbDiscardingNew));
872
[38621]873 while (pDiscard->cbDiscarding > cbDiscardingNew)
874 {
875 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
876
877 Assert(!RTListIsEmpty(&pDiscard->ListLru));
878
879 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
880 uint64_t offStart = pBlock->Core.Key;
881 uint32_t idxStart = 0;
882 size_t cbLeft = pBlock->cbDiscard;
883 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
[48848]884 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
[38621]885
886 while (cbLeft > 0)
887 {
888 int32_t idxEnd;
889 size_t cbThis = cbLeft;
890
891 if (fAllocated)
892 {
893 /* Check for the first unallocated bit. */
894 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
895 if (idxEnd != -1)
896 {
897 cbThis = (idxEnd - idxStart) * 512;
898 fAllocated = false;
899 }
900 }
901 else
902 {
903 /* Mark as unused and check for the first set bit. */
904 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
905 if (idxEnd != -1)
906 cbThis = (idxEnd - idxStart) * 512;
907
[44242]908
909 VDIOCTX IoCtx;
910 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_DISCARD, 0, 0, NULL,
911 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
[44252]912 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData,
[44242]913 &IoCtx, offStart, cbThis, NULL,
914 NULL, &cbThis, NULL,
915 VD_DISCARD_MARK_UNUSED);
[38621]916 if (RT_FAILURE(rc))
917 break;
918
919 fAllocated = true;
920 }
921
922 idxStart = idxEnd;
923 offStart += cbThis;
924 cbLeft -= cbThis;
925 }
926
927 if (RT_FAILURE(rc))
928 break;
929
930 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
[62738]931 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
[38621]932 RTListNodeRemove(&pBlock->NodeLru);
933
934 pDiscard->cbDiscarding -= pBlock->cbDiscard;
935 RTMemFree(pBlock->pbmAllocated);
936 RTMemFree(pBlock);
937 }
938
939 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
940
[38657]941 LogFlowFunc(("returns rc=%Rrc\n", rc));
[38621]942 return rc;
943}
944
945/**
946 * Destroys the current discard state, writing any waiting blocks to the image.
947 *
948 * @returns VBox status code.
949 * @param pDisk VD disk container.
950 */
[66250]951static int vdDiscardStateDestroy(PVDISK pDisk)
[38621]952{
953 int rc = VINF_SUCCESS;
954
955 if (pDisk->pDiscard)
956 {
957 rc = vdDiscardRemoveBlocks(pDisk, pDisk->pDiscard, 0 /* Remove all blocks. */);
958 AssertRC(rc);
959 RTMemFree(pDisk->pDiscard->pTreeBlocks);
960 RTMemFree(pDisk->pDiscard);
961 pDisk->pDiscard = NULL;
962 }
963
964 return rc;
965}
966
967/**
968 * Marks the given range as allocated in the image.
969 * Required if there are discards in progress and a write to a block which can get discarded
970 * is written to.
971 *
972 * @returns VBox status code.
973 * @param pDisk VD container data.
974 * @param uOffset First byte to mark as allocated.
975 * @param cbRange Number of bytes to mark as allocated.
976 */
[66250]977static int vdDiscardSetRangeAllocated(PVDISK pDisk, uint64_t uOffset, size_t cbRange)
[38621]978{
979 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
980 int rc = VINF_SUCCESS;
981
982 if (pDiscard)
983 {
984 do
985 {
986 size_t cbThisRange = cbRange;
987 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64RangeGet(pDiscard->pTreeBlocks, uOffset);
988
989 if (pBlock)
990 {
991 int32_t idxStart, idxEnd;
992
993 Assert(!(cbThisRange % 512));
994 Assert(!((uOffset - pBlock->Core.Key) % 512));
995
[38657]996 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.KeyLast - uOffset + 1);
997
[38621]998 idxStart = (uOffset - pBlock->Core.Key) / 512;
[48848]999 idxEnd = idxStart + (int32_t)(cbThisRange / 512);
[38621]1000 ASMBitSetRange(pBlock->pbmAllocated, idxStart, idxEnd);
1001 }
1002 else
1003 {
1004 pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, uOffset, true);
1005 if (pBlock)
1006 cbThisRange = RT_MIN(cbThisRange, pBlock->Core.Key - uOffset);
1007 }
1008
[38657]1009 Assert(cbRange >= cbThisRange);
1010
[38621]1011 uOffset += cbThisRange;
1012 cbRange -= cbThisRange;
1013 } while (cbRange != 0);
1014 }
1015
1016 return rc;
1017}
1018
[66250]1019DECLINLINE(PVDIOCTX) vdIoCtxAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
[28065]1020 uint64_t uOffset, size_t cbTransfer,
[90815]1021 PVDIMAGE pImageStart,PCRTSGBUF pSgBuf,
[44242]1022 void *pvAllocation, PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1023 uint32_t fFlags)
[27808]1024{
1025 PVDIOCTX pIoCtx = NULL;
1026
1027 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
[28065]1028 if (RT_LIKELY(pIoCtx))
[27808]1029 {
[44242]1030 vdIoCtxInit(pIoCtx, pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
[90815]1031 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
[27808]1032 }
1033
1034 return pIoCtx;
1035}
1036
[66250]1037DECLINLINE(PVDIOCTX) vdIoCtxRootAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
[28620]1038 uint64_t uOffset, size_t cbTransfer,
[90815]1039 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
[28620]1040 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1041 void *pvUser1, void *pvUser2,
1042 void *pvAllocation,
[44415]1043 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1044 uint32_t fFlags)
[28065]1045{
[35321]1046 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
[90815]1047 pSgBuf, pvAllocation, pfnIoCtxTransfer, fFlags);
[28065]1048
1049 if (RT_LIKELY(pIoCtx))
1050 {
1051 pIoCtx->pIoCtxParent = NULL;
1052 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1053 pIoCtx->Type.Root.pvUser1 = pvUser1;
1054 pIoCtx->Type.Root.pvUser2 = pvUser2;
1055 }
1056
[30863]1057 LogFlow(("Allocated root I/O context %#p\n", pIoCtx));
[28065]1058 return pIoCtx;
1059}
1060
[66250]1061DECLINLINE(void) vdIoCtxDiscardInit(PVDIOCTX pIoCtx, PVDISK pDisk, PCRTRANGE paRanges,
[49387]1062 unsigned cRanges, PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1063 void *pvUser1, void *pvUser2, void *pvAllocation,
1064 PFNVDIOCTXTRANSFER pfnIoCtxTransfer, uint32_t fFlags)
1065{
1066 pIoCtx->pIoCtxNext = NULL;
1067 pIoCtx->pDisk = pDisk;
1068 pIoCtx->enmTxDir = VDIOCTXTXDIR_DISCARD;
1069 pIoCtx->cDataTransfersPending = 0;
1070 pIoCtx->cMetaTransfersPending = 0;
1071 pIoCtx->fComplete = false;
1072 pIoCtx->fFlags = fFlags;
1073 pIoCtx->pvAllocation = pvAllocation;
1074 pIoCtx->pfnIoCtxTransfer = pfnIoCtxTransfer;
1075 pIoCtx->pfnIoCtxTransferNext = NULL;
1076 pIoCtx->rcReq = VINF_SUCCESS;
1077 pIoCtx->Req.Discard.paRanges = paRanges;
1078 pIoCtx->Req.Discard.cRanges = cRanges;
1079 pIoCtx->Req.Discard.idxRange = 0;
1080 pIoCtx->Req.Discard.cbDiscardLeft = 0;
1081 pIoCtx->Req.Discard.offCur = 0;
1082 pIoCtx->Req.Discard.cbThisDiscard = 0;
1083
1084 pIoCtx->pIoCtxParent = NULL;
1085 pIoCtx->Type.Root.pfnComplete = pfnComplete;
1086 pIoCtx->Type.Root.pvUser1 = pvUser1;
1087 pIoCtx->Type.Root.pvUser2 = pvUser2;
1088}
1089
[66250]1090DECLINLINE(PVDIOCTX) vdIoCtxDiscardAlloc(PVDISK pDisk, PCRTRANGE paRanges,
[38876]1091 unsigned cRanges,
1092 PFNVDASYNCTRANSFERCOMPLETE pfnComplete,
1093 void *pvUser1, void *pvUser2,
1094 void *pvAllocation,
[44400]1095 PFNVDIOCTXTRANSFER pfnIoCtxTransfer,
1096 uint32_t fFlags)
[38876]1097{
1098 PVDIOCTX pIoCtx = NULL;
1099
1100 pIoCtx = (PVDIOCTX)RTMemCacheAlloc(pDisk->hMemCacheIoCtx);
1101 if (RT_LIKELY(pIoCtx))
1102 {
[49387]1103 vdIoCtxDiscardInit(pIoCtx, pDisk, paRanges, cRanges, pfnComplete, pvUser1,
1104 pvUser2, pvAllocation, pfnIoCtxTransfer, fFlags);
[38876]1105 }
1106
1107 LogFlow(("Allocated discard I/O context %#p\n", pIoCtx));
1108 return pIoCtx;
1109}
1110
[66250]1111DECLINLINE(PVDIOCTX) vdIoCtxChildAlloc(PVDISK pDisk, VDIOCTXTXDIR enmTxDir,
[28620]1112 uint64_t uOffset, size_t cbTransfer,
[90815]1113 PVDIMAGE pImageStart, PCRTSGBUF pSgBuf,
[28620]1114 PVDIOCTX pIoCtxParent, size_t cbTransferParent,
[30044]1115 size_t cbWriteParent, void *pvAllocation,
[28620]1116 PFNVDIOCTXTRANSFER pfnIoCtxTransfer)
[28065]1117{
[35321]1118 PVDIOCTX pIoCtx = vdIoCtxAlloc(pDisk, enmTxDir, uOffset, cbTransfer, pImageStart,
[90815]1119 pSgBuf, pvAllocation, pfnIoCtxTransfer, pIoCtxParent->fFlags & ~VDIOCTX_FLAGS_DONT_FREE);
[28065]1120
[30555]1121 AssertPtr(pIoCtxParent);
1122 Assert(!pIoCtxParent->pIoCtxParent);
1123
[28065]1124 if (RT_LIKELY(pIoCtx))
1125 {
1126 pIoCtx->pIoCtxParent = pIoCtxParent;
1127 pIoCtx->Type.Child.uOffsetSaved = uOffset;
1128 pIoCtx->Type.Child.cbTransferLeftSaved = cbTransfer;
1129 pIoCtx->Type.Child.cbTransferParent = cbTransferParent;
[30044]1130 pIoCtx->Type.Child.cbWriteParent = cbWriteParent;
[28065]1131 }
1132
[30863]1133 LogFlow(("Allocated child I/O context %#p\n", pIoCtx));
[28065]1134 return pIoCtx;
1135}
1136
[30555]1137DECLINLINE(PVDIOTASK) vdIoTaskUserAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDIOCTX pIoCtx, uint32_t cbTransfer)
[27977]1138{
1139 PVDIOTASK pIoTask = NULL;
1140
[33745]1141 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
[27977]1142 if (pIoTask)
1143 {
[30555]1144 pIoTask->pIoStorage = pIoStorage;
1145 pIoTask->pfnComplete = pfnComplete;
1146 pIoTask->pvUser = pvUser;
[27977]1147 pIoTask->fMeta = false;
1148 pIoTask->Type.User.cbTransfer = cbTransfer;
[30555]1149 pIoTask->Type.User.pIoCtx = pIoCtx;
[27977]1150 }
1151
1152 return pIoTask;
1153}
1154
[30555]1155DECLINLINE(PVDIOTASK) vdIoTaskMetaAlloc(PVDIOSTORAGE pIoStorage, PFNVDXFERCOMPLETED pfnComplete, void *pvUser, PVDMETAXFER pMetaXfer)
[27977]1156{
1157 PVDIOTASK pIoTask = NULL;
1158
[33745]1159 pIoTask = (PVDIOTASK)RTMemCacheAlloc(pIoStorage->pVDIo->pDisk->hMemCacheIoTask);
[27977]1160 if (pIoTask)
1161 {
[30555]1162 pIoTask->pIoStorage = pIoStorage;
1163 pIoTask->pfnComplete = pfnComplete;
1164 pIoTask->pvUser = pvUser;
1165 pIoTask->fMeta = true;
1166 pIoTask->Type.Meta.pMetaXfer = pMetaXfer;
[27977]1167 }
1168
1169 return pIoTask;
1170}
1171
[66250]1172DECLINLINE(void) vdIoCtxFree(PVDISK pDisk, PVDIOCTX pIoCtx)
[27808]1173{
[45155]1174 Log(("Freeing I/O context %#p\n", pIoCtx));
1175
1176 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_FREE))
1177 {
1178 if (pIoCtx->pvAllocation)
1179 RTMemFree(pIoCtx->pvAllocation);
[30863]1180#ifdef DEBUG
[45155]1181 memset(&pIoCtx->pDisk, 0xff, sizeof(void *));
[30863]1182#endif
[45155]1183 RTMemCacheFree(pDisk->hMemCacheIoCtx, pIoCtx);
1184 }
[27808]1185}
1186
[66250]1187DECLINLINE(void) vdIoTaskFree(PVDISK pDisk, PVDIOTASK pIoTask)
[27977]1188{
[50991]1189#ifdef DEBUG
[45155]1190 memset(pIoTask, 0xff, sizeof(VDIOTASK));
[50991]1191#endif
[27977]1192 RTMemCacheFree(pDisk->hMemCacheIoTask, pIoTask);
1193}
1194
[28620]1195DECLINLINE(void) vdIoCtxChildReset(PVDIOCTX pIoCtx)
[27808]1196{
[28065]1197 AssertPtr(pIoCtx->pIoCtxParent);
[27808]1198
[38876]1199 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
1200 pIoCtx->Req.Io.uOffset = pIoCtx->Type.Child.uOffsetSaved;
[48848]1201 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved;
1202 Assert((uint32_t)pIoCtx->Type.Child.cbTransferLeftSaved == pIoCtx->Type.Child.cbTransferLeftSaved);
[28065]1203}
[27808]1204
[33745]1205DECLINLINE(PVDMETAXFER) vdMetaXferAlloc(PVDIOSTORAGE pIoStorage, uint64_t uOffset, size_t cb)
[30555]1206{
[73097]1207 PVDMETAXFER pMetaXfer = (PVDMETAXFER)RTMemAlloc(RT_UOFFSETOF_DYN(VDMETAXFER, abData[cb]));
[30555]1208
1209 if (RT_LIKELY(pMetaXfer))
1210 {
1211 pMetaXfer->Core.Key = uOffset;
1212 pMetaXfer->Core.KeyLast = uOffset + cb - 1;
1213 pMetaXfer->fFlags = VDMETAXFER_TXDIR_NONE;
1214 pMetaXfer->cbMeta = cb;
1215 pMetaXfer->pIoStorage = pIoStorage;
1216 pMetaXfer->cRefs = 0;
[52358]1217 pMetaXfer->pbDataShw = NULL;
[30555]1218 RTListInit(&pMetaXfer->ListIoCtxWaiting);
[52358]1219 RTListInit(&pMetaXfer->ListIoCtxShwWrites);
[30555]1220 }
1221 return pMetaXfer;
1222}
1223
[44396]1224DECLINLINE(void) vdIoCtxAddToWaitingList(volatile PVDIOCTX *ppList, PVDIOCTX pIoCtx)
[33094]1225{
[44396]1226 /* Put it on the waiting list. */
1227 PVDIOCTX pNext = ASMAtomicUoReadPtrT(ppList, PVDIOCTX);
1228 PVDIOCTX pHeadOld;
1229 pIoCtx->pIoCtxNext = pNext;
1230 while (!ASMAtomicCmpXchgExPtr(ppList, pIoCtx, pNext, &pHeadOld))
1231 {
1232 pNext = pHeadOld;
1233 Assert(pNext != pIoCtx);
1234 pIoCtx->pIoCtxNext = pNext;
1235 ASMNopPause();
1236 }
1237}
[33094]1238
[66250]1239DECLINLINE(void) vdIoCtxDefer(PVDISK pDisk, PVDIOCTX pIoCtx)
[44396]1240{
[51623]1241 LogFlowFunc(("Deferring I/O context pIoCtx=%#p\n", pIoCtx));
[33094]1242
[44242]1243 Assert(!pIoCtx->pIoCtxParent && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED));
1244 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[44396]1245 vdIoCtxAddToWaitingList(&pDisk->pIoCtxBlockedHead, pIoCtx);
[33094]1246}
1247
[28065]1248static size_t vdIoCtxCopy(PVDIOCTX pIoCtxDst, PVDIOCTX pIoCtxSrc, size_t cbData)
1249{
[38876]1250 return RTSgBufCopy(&pIoCtxDst->Req.Io.SgBuf, &pIoCtxSrc->Req.Io.SgBuf, cbData);
[28065]1251}
[27808]1252
[62873]1253#if 0 /* unused */
[28065]1254static int vdIoCtxCmp(PVDIOCTX pIoCtx1, PVDIOCTX pIoCtx2, size_t cbData)
1255{
[38876]1256 return RTSgBufCmp(&pIoCtx1->Req.Io.SgBuf, &pIoCtx2->Req.Io.SgBuf, cbData);
[27808]1257}
[62873]1258#endif
[27808]1259
[44252]1260static size_t vdIoCtxCopyTo(PVDIOCTX pIoCtx, const uint8_t *pbData, size_t cbData)
[27808]1261{
[44252]1262 return RTSgBufCopyFromBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
[27808]1263}
1264
1265static size_t vdIoCtxCopyFrom(PVDIOCTX pIoCtx, uint8_t *pbData, size_t cbData)
1266{
[44252]1267 return RTSgBufCopyToBuf(&pIoCtx->Req.Io.SgBuf, pbData, cbData);
[27808]1268}
1269
[28065]1270static size_t vdIoCtxSet(PVDIOCTX pIoCtx, uint8_t ch, size_t cbData)
[27808]1271{
[38876]1272 return RTSgBufSet(&pIoCtx->Req.Io.SgBuf, ch, cbData);
[27808]1273}
1274
[39893]1275/**
[63830]1276 * Returns whether the given I/O context has completed.
[39893]1277 *
[63830]1278 * @returns Flag whether the I/O context is complete.
1279 * @param pIoCtx The I/O context to check.
[39893]1280 */
[63830]1281DECLINLINE(bool) vdIoCtxIsComplete(PVDIOCTX pIoCtx)
[28620]1282{
[38876]1283 if ( !pIoCtx->cMetaTransfersPending
[29240]1284 && !pIoCtx->cDataTransfersPending
[28693]1285 && !pIoCtx->pfnIoCtxTransfer)
[63830]1286 return true;
[28620]1287
[29240]1288 /*
1289 * We complete the I/O context in case of an error
1290 * if there is no I/O task pending.
1291 */
1292 if ( RT_FAILURE(pIoCtx->rcReq)
1293 && !pIoCtx->cMetaTransfersPending
1294 && !pIoCtx->cDataTransfersPending)
[63830]1295 return true;
[29240]1296
[63830]1297 return false;
1298}
1299
1300/**
1301 * Returns whether the given I/O context is blocked due to a metadata transfer
1302 * or because the backend blocked it.
1303 *
1304 * @returns Flag whether the I/O context is blocked.
1305 * @param pIoCtx The I/O context to check.
1306 */
1307DECLINLINE(bool) vdIoCtxIsBlocked(PVDIOCTX pIoCtx)
1308{
[30863]1309 /* Don't change anything if there is a metadata transfer pending or we are blocked. */
1310 if ( pIoCtx->cMetaTransfersPending
[44242]1311 || (pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
[63830]1312 return true;
[30555]1313
[63830]1314 return false;
1315}
1316
1317/**
1318 * Process the I/O context, core method which assumes that the I/O context
1319 * acquired the lock.
1320 *
1321 * @returns VBox status code.
1322 * @param pIoCtx I/O context to process.
1323 */
1324static int vdIoCtxProcessLocked(PVDIOCTX pIoCtx)
1325{
1326 int rc = VINF_SUCCESS;
1327
1328 VD_IS_LOCKED(pIoCtx->pDisk);
1329
1330 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
1331
1332 if (!vdIoCtxIsComplete(pIoCtx))
[28620]1333 {
[63830]1334 if (!vdIoCtxIsBlocked(pIoCtx))
[28620]1335 {
[63830]1336 if (pIoCtx->pfnIoCtxTransfer)
1337 {
1338 /* Call the transfer function advancing to the next while there is no error. */
1339 while ( pIoCtx->pfnIoCtxTransfer
1340 && !pIoCtx->cMetaTransfersPending
1341 && RT_SUCCESS(rc))
1342 {
1343 LogFlowFunc(("calling transfer function %#p\n", pIoCtx->pfnIoCtxTransfer));
1344 rc = pIoCtx->pfnIoCtxTransfer(pIoCtx);
[28620]1345
[63830]1346 /* Advance to the next part of the transfer if the current one succeeded. */
1347 if (RT_SUCCESS(rc))
1348 {
1349 pIoCtx->pfnIoCtxTransfer = pIoCtx->pfnIoCtxTransferNext;
1350 pIoCtx->pfnIoCtxTransferNext = NULL;
1351 }
1352 }
1353 }
1354
1355 if ( RT_SUCCESS(rc)
1356 && !pIoCtx->cMetaTransfersPending
1357 && !pIoCtx->cDataTransfersPending
1358 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_BLOCKED))
1359 rc = VINF_VD_ASYNC_IO_FINISHED;
1360 else if ( RT_SUCCESS(rc)
1361 || rc == VERR_VD_NOT_ENOUGH_METADATA
1362 || rc == VERR_VD_IOCTX_HALT)
1363 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1364 else if ( RT_FAILURE(rc)
1365 && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
[28620]1366 {
[63830]1367 ASMAtomicCmpXchgS32(&pIoCtx->rcReq, rc, VINF_SUCCESS);
1368
1369 /*
1370 * The I/O context completed if we have an error and there is no data
1371 * or meta data transfer pending.
1372 */
1373 if ( !pIoCtx->cMetaTransfersPending
1374 && !pIoCtx->cDataTransfersPending)
1375 rc = VINF_VD_ASYNC_IO_FINISHED;
1376 else
1377 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
[28620]1378 }
1379 }
[49387]1380 else
1381 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
[29240]1382 }
[63830]1383 else
1384 rc = VINF_VD_ASYNC_IO_FINISHED;
[28620]1385
[38876]1386 LogFlowFunc(("pIoCtx=%#p rc=%Rrc cDataTransfersPending=%u cMetaTransfersPending=%u fComplete=%RTbool\n",
1387 pIoCtx, rc, pIoCtx->cDataTransfersPending, pIoCtx->cMetaTransfersPending,
[28620]1388 pIoCtx->fComplete));
1389
1390 return rc;
1391}
1392
[39893]1393/**
1394 * Processes the list of waiting I/O contexts.
1395 *
[49387]1396 * @returns VBox status code, only valid if pIoCtxRc is not NULL, treat as void
1397 * function otherwise.
[39893]1398 * @param pDisk The disk structure.
1399 * @param pIoCtxRc An I/O context handle which waits on the list. When processed
1400 * The status code is returned. NULL if there is no I/O context
1401 * to return the status code for.
1402 */
[66250]1403static int vdDiskProcessWaitingIoCtx(PVDISK pDisk, PVDIOCTX pIoCtxRc)
[39893]1404{
[49387]1405 int rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
[39893]1406
1407 LogFlowFunc(("pDisk=%#p pIoCtxRc=%#p\n", pDisk, pIoCtxRc));
1408
[44396]1409 VD_IS_LOCKED(pDisk);
[39893]1410
1411 /* Get the waiting list and process it in FIFO order. */
1412 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxHead, NULL, PVDIOCTX);
1413
1414 /* Reverse it. */
1415 PVDIOCTX pCur = pIoCtxHead;
1416 pIoCtxHead = NULL;
1417 while (pCur)
1418 {
1419 PVDIOCTX pInsert = pCur;
1420 pCur = pCur->pIoCtxNext;
1421 pInsert->pIoCtxNext = pIoCtxHead;
1422 pIoCtxHead = pInsert;
1423 }
1424
1425 /* Process now. */
1426 pCur = pIoCtxHead;
1427 while (pCur)
1428 {
1429 int rcTmp;
1430 PVDIOCTX pTmp = pCur;
1431
1432 pCur = pCur->pIoCtxNext;
1433 pTmp->pIoCtxNext = NULL;
1434
[45155]1435 /*
1436 * Need to clear the sync flag here if there is a new I/O context
1437 * with it set and the context is not given in pIoCtxRc.
1438 * This happens most likely on a different thread and that one shouldn't
1439 * process the context synchronously.
1440 *
1441 * The thread who issued the context will wait on the event semaphore
1442 * anyway which is signalled when the completion handler is called.
1443 */
1444 if ( pTmp->fFlags & VDIOCTX_FLAGS_SYNC
1445 && pTmp != pIoCtxRc)
1446 pTmp->fFlags &= ~VDIOCTX_FLAGS_SYNC;
1447
[39893]1448 rcTmp = vdIoCtxProcessLocked(pTmp);
1449 if (pTmp == pIoCtxRc)
1450 {
[51750]1451 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1452 && RT_SUCCESS(pTmp->rcReq)
1453 && pTmp->enmTxDir == VDIOCTXTXDIR_READ)
1454 {
1455 int rc2 = vdFilterChainApplyRead(pDisk, pTmp->Req.Io.uOffsetXferOrig,
1456 pTmp->Req.Io.cbXferOrig, pTmp);
1457 if (RT_FAILURE(rc2))
1458 rcTmp = rc2;
1459 }
1460
[39893]1461 /* The given I/O context was processed, pass the return code to the caller. */
[49388]1462 if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1463 && (pTmp->fFlags & VDIOCTX_FLAGS_SYNC))
[49387]1464 rc = pTmp->rcReq;
1465 else
1466 rc = rcTmp;
[39893]1467 }
1468 else if ( rcTmp == VINF_VD_ASYNC_IO_FINISHED
1469 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
1470 {
1471 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1472 vdThreadFinishWrite(pDisk);
[66665]1473
1474 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
[50991]1475 vdIoCtxRootComplete(pDisk, pTmp);
[66665]1476
1477 if (fFreeCtx)
1478 vdIoCtxFree(pDisk, pTmp);
[39893]1479 }
1480 }
1481
1482 LogFlowFunc(("returns rc=%Rrc\n", rc));
1483 return rc;
1484}
1485
1486/**
[44396]1487 * Processes the list of blocked I/O contexts.
[39893]1488 *
[44396]1489 * @param pDisk The disk structure.
[39893]1490 */
[66250]1491static void vdDiskProcessBlockedIoCtx(PVDISK pDisk)
[39893]1492{
[44396]1493 LogFlowFunc(("pDisk=%#p\n", pDisk));
[39893]1494
[44396]1495 VD_IS_LOCKED(pDisk);
[39893]1496
[44396]1497 /* Get the waiting list and process it in FIFO order. */
1498 PVDIOCTX pIoCtxHead = ASMAtomicXchgPtrT(&pDisk->pIoCtxBlockedHead, NULL, PVDIOCTX);
[39893]1499
[44396]1500 /* Reverse it. */
1501 PVDIOCTX pCur = pIoCtxHead;
1502 pIoCtxHead = NULL;
1503 while (pCur)
1504 {
1505 PVDIOCTX pInsert = pCur;
1506 pCur = pCur->pIoCtxNext;
1507 pInsert->pIoCtxNext = pIoCtxHead;
1508 pIoCtxHead = pInsert;
1509 }
[39893]1510
[44396]1511 /* Process now. */
1512 pCur = pIoCtxHead;
1513 while (pCur)
[39893]1514 {
[44396]1515 int rc;
1516 PVDIOCTX pTmp = pCur;
[39893]1517
[44396]1518 pCur = pCur->pIoCtxNext;
1519 pTmp->pIoCtxNext = NULL;
1520
1521 Assert(!pTmp->pIoCtxParent);
1522 Assert(pTmp->fFlags & VDIOCTX_FLAGS_BLOCKED);
1523 pTmp->fFlags &= ~VDIOCTX_FLAGS_BLOCKED;
1524
1525 rc = vdIoCtxProcessLocked(pTmp);
1526 if ( rc == VINF_VD_ASYNC_IO_FINISHED
1527 && ASMAtomicCmpXchgBool(&pTmp->fComplete, true, false))
[39893]1528 {
[44396]1529 LogFlowFunc(("Waiting I/O context completed pTmp=%#p\n", pTmp));
1530 vdThreadFinishWrite(pDisk);
[66665]1531
1532 bool fFreeCtx = RT_BOOL(!(pTmp->fFlags & VDIOCTX_FLAGS_DONT_FREE));
[50991]1533 vdIoCtxRootComplete(pDisk, pTmp);
[66665]1534 if (fFreeCtx)
1535 vdIoCtxFree(pDisk, pTmp);
[39893]1536 }
1537 }
1538
[44396]1539 LogFlowFunc(("returns\n"));
[39893]1540}
1541
1542/**
1543 * Processes the I/O context trying to lock the criticial section.
1544 * The context is deferred if the critical section is busy.
1545 *
1546 * @returns VBox status code.
1547 * @param pIoCtx The I/O context to process.
1548 */
1549static int vdIoCtxProcessTryLockDefer(PVDIOCTX pIoCtx)
1550{
1551 int rc = VINF_SUCCESS;
[66250]1552 PVDISK pDisk = pIoCtx->pDisk;
[39893]1553
[45155]1554 Log(("Defer pIoCtx=%#p\n", pIoCtx));
[39893]1555
1556 /* Put it on the waiting list first. */
[44396]1557 vdIoCtxAddToWaitingList(&pDisk->pIoCtxHead, pIoCtx);
[39893]1558
[44396]1559 if (ASMAtomicCmpXchgBool(&pDisk->fLocked, true, false))
[39893]1560 {
1561 /* Leave it again, the context will be processed just before leaving the lock. */
[44396]1562 LogFlowFunc(("Successfully acquired the lock\n"));
1563 rc = vdDiskUnlock(pDisk, pIoCtx);
[39893]1564 }
1565 else
1566 {
[44396]1567 LogFlowFunc(("Lock is held\n"));
[39893]1568 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
1569 }
1570
1571 return rc;
1572}
1573
[44415]1574/**
1575 * Process the I/O context in a synchronous manner, waiting
1576 * for it to complete.
1577 *
1578 * @returns VBox status code of the completed request.
[49387]1579 * @param pIoCtx The sync I/O context.
1580 * @param hEventComplete Event sempahore to wait on for completion.
[44415]1581 */
[49387]1582static int vdIoCtxProcessSync(PVDIOCTX pIoCtx, RTSEMEVENT hEventComplete)
[44415]1583{
1584 int rc = VINF_SUCCESS;
[66250]1585 PVDISK pDisk = pIoCtx->pDisk;
[44415]1586
1587 LogFlowFunc(("pIoCtx=%p\n", pIoCtx));
1588
[49387]1589 AssertMsg(pIoCtx->fFlags & (VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE),
[44415]1590 ("I/O context is not marked as synchronous\n"));
1591
1592 rc = vdIoCtxProcessTryLockDefer(pIoCtx);
1593 if (rc == VINF_VD_ASYNC_IO_FINISHED)
1594 rc = VINF_SUCCESS;
1595
1596 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1597 {
[49387]1598 rc = RTSemEventWait(hEventComplete, RT_INDEFINITE_WAIT);
[44415]1599 AssertRC(rc);
1600 }
1601
[49387]1602 rc = pIoCtx->rcReq;
1603 vdIoCtxFree(pDisk, pIoCtx);
1604
[44415]1605 return rc;
1606}
1607
[66250]1608DECLINLINE(bool) vdIoCtxIsDiskLockOwner(PVDISK pDisk, PVDIOCTX pIoCtx)
[36132]1609{
[44396]1610 return pDisk->pIoCtxLockOwner == pIoCtx;
[36132]1611}
1612
[66250]1613static int vdIoCtxLockDisk(PVDISK pDisk, PVDIOCTX pIoCtx)
[33094]1614{
1615 int rc = VINF_SUCCESS;
1616
[44396]1617 VD_IS_LOCKED(pDisk);
1618
[36221]1619 LogFlowFunc(("pDisk=%#p pIoCtx=%#p\n", pDisk, pIoCtx));
1620
[44396]1621 if (!ASMAtomicCmpXchgPtr(&pDisk->pIoCtxLockOwner, pIoCtx, NIL_VDIOCTX))
[33094]1622 {
[36132]1623 Assert(pDisk->pIoCtxLockOwner != pIoCtx); /* No nesting allowed. */
[44396]1624 vdIoCtxDefer(pDisk, pIoCtx);
1625 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
[33094]1626 }
1627
[36221]1628 LogFlowFunc(("returns -> %Rrc\n", rc));
[33094]1629 return rc;
1630}
1631
[66250]1632static void vdIoCtxUnlockDisk(PVDISK pDisk, PVDIOCTX pIoCtx, bool fProcessBlockedReqs)
[33094]1633{
[62738]1634 RT_NOREF1(pIoCtx);
[44396]1635 LogFlowFunc(("pDisk=%#p pIoCtx=%#p fProcessBlockedReqs=%RTbool\n",
1636 pDisk, pIoCtx, fProcessBlockedReqs));
[36221]1637
[44396]1638 VD_IS_LOCKED(pDisk);
1639
[36221]1640 LogFlow(("Unlocking disk lock owner is %#p\n", pDisk->pIoCtxLockOwner));
[36132]1641 Assert(pDisk->pIoCtxLockOwner == pIoCtx);
[44396]1642 ASMAtomicXchgPtrT(&pDisk->pIoCtxLockOwner, NIL_VDIOCTX, PVDIOCTX);
[33094]1643
[44396]1644 if (fProcessBlockedReqs)
[33094]1645 {
[44396]1646 /* Process any blocked writes if the current request didn't caused another growing. */
1647 vdDiskProcessBlockedIoCtx(pDisk);
[33094]1648 }
[36221]1649
1650 LogFlowFunc(("returns\n"));
[33094]1651}
1652
[2379]1653/**
[45155]1654 * Internal: Reads a given amount of data from the image chain of the disk.
1655 **/
[66250]1656static int vdDiskReadHelper(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
[45155]1657 uint64_t uOffset, size_t cbRead, PVDIOCTX pIoCtx, size_t *pcbThisRead)
1658{
[62738]1659 RT_NOREF1(pDisk);
[45155]1660 int rc = VINF_SUCCESS;
1661 size_t cbThisRead = cbRead;
1662
1663 AssertPtr(pcbThisRead);
1664
1665 *pcbThisRead = 0;
1666
1667 /*
1668 * Try to read from the given image.
1669 * If the block is not allocated read from override chain if present.
1670 */
1671 rc = pImage->Backend->pfnRead(pImage->pBackendData,
1672 uOffset, cbThisRead, pIoCtx,
1673 &cbThisRead);
1674
1675 if (rc == VERR_VD_BLOCK_FREE)
1676 {
1677 for (PVDIMAGE pCurrImage = pImageParentOverride ? pImageParentOverride : pImage->pPrev;
1678 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
1679 pCurrImage = pCurrImage->pPrev)
1680 {
1681 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1682 uOffset, cbThisRead, pIoCtx,
1683 &cbThisRead);
1684 }
1685 }
1686
1687 if (RT_SUCCESS(rc) || rc == VERR_VD_BLOCK_FREE)
1688 *pcbThisRead = cbThisRead;
1689
1690 return rc;
1691}
1692
1693/**
[27808]1694 * internal: read the specified amount of data in whatever blocks the backend
1695 * will give us - async version.
1696 */
[57388]1697static DECLCALLBACK(int) vdReadHelperAsync(PVDIOCTX pIoCtx)
[27808]1698{
1699 int rc;
[66250]1700 PVDISK pDisk = pIoCtx->pDisk;
[45155]1701 size_t cbToRead = pIoCtx->Req.Io.cbTransfer;
1702 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
1703 PVDIMAGE pCurrImage = pIoCtx->Req.Io.pImageCur;
1704 PVDIMAGE pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
1705 unsigned cImagesRead = pIoCtx->Req.Io.cImagesRead;
[27808]1706 size_t cbThisRead;
1707
[51623]1708 /*
1709 * Check whether there is a full block write in progress which was not allocated.
1710 * Defer I/O if the range interferes but only if it does not belong to the
1711 * write doing the allocation.
1712 */
1713 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
1714 && uOffset >= pDisk->uOffsetStartLocked
1715 && uOffset < pDisk->uOffsetEndLocked
1716 && ( !pIoCtx->pIoCtxParent
1717 || pIoCtx->pIoCtxParent != pDisk->pIoCtxLockOwner))
1718 {
1719 Log(("Interferring read while allocating a new block => deferring read\n"));
1720 vdIoCtxDefer(pDisk, pIoCtx);
[52268]1721 return VERR_VD_ASYNC_IO_IN_PROGRESS;
[51623]1722 }
1723
[27808]1724 /* Loop until all reads started or we have a backend which needs to read metadata. */
1725 do
1726 {
1727 /* Search for image with allocated block. Do not attempt to read more
1728 * than the previous reads marked as valid. Otherwise this would return
1729 * stale data when different block sizes are used for the images. */
[28620]1730 cbThisRead = cbToRead;
[27808]1731
[45155]1732 if ( pDisk->pCache
1733 && !pImageParentOverride)
1734 {
1735 rc = vdCacheReadHelper(pDisk->pCache, uOffset, cbThisRead,
1736 pIoCtx, &cbThisRead);
1737 if (rc == VERR_VD_BLOCK_FREE)
1738 {
1739 rc = vdDiskReadHelper(pDisk, pCurrImage, NULL, uOffset, cbThisRead,
1740 pIoCtx, &cbThisRead);
[27808]1741
[45155]1742 /* If the read was successful, write the data back into the cache. */
1743 if ( RT_SUCCESS(rc)
[46712]1744 && pIoCtx->fFlags & VDIOCTX_FLAGS_READ_UPDATE_CACHE)
[45155]1745 {
1746 rc = vdCacheWriteHelper(pDisk->pCache, uOffset, cbThisRead,
1747 pIoCtx, NULL);
1748 }
1749 }
1750 }
1751 else
[27808]1752 {
[45155]1753 /*
1754 * Try to read from the given image.
1755 * If the block is not allocated read from override chain if present.
1756 */
1757 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1758 uOffset, cbThisRead, pIoCtx,
1759 &cbThisRead);
1760
1761 if ( rc == VERR_VD_BLOCK_FREE
1762 && cImagesRead != 1)
[27808]1763 {
[45155]1764 unsigned cImagesToProcess = cImagesRead;
1765
1766 pCurrImage = pImageParentOverride ? pImageParentOverride : pCurrImage->pPrev;
1767 pIoCtx->Req.Io.pImageParentOverride = NULL;
1768
1769 while (pCurrImage && rc == VERR_VD_BLOCK_FREE)
1770 {
1771 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
1772 uOffset, cbThisRead,
1773 pIoCtx, &cbThisRead);
1774 if (cImagesToProcess == 1)
1775 break;
1776 else if (cImagesToProcess > 0)
1777 cImagesToProcess--;
1778
1779 if (rc == VERR_VD_BLOCK_FREE)
1780 pCurrImage = pCurrImage->pPrev;
1781 }
[27808]1782 }
1783 }
1784
[31573]1785 /* The task state will be updated on success already, don't do it here!. */
1786 if (rc == VERR_VD_BLOCK_FREE)
[27808]1787 {
1788 /* No image in the chain contains the data for the block. */
[48848]1789 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisRead); Assert(cbThisRead == (uint32_t)cbThisRead);
[45155]1790
1791 /* Fill the free space with 0 if we are told to do so
1792 * or a previous read returned valid data. */
1793 if (pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS)
1794 vdIoCtxSet(pIoCtx, '\0', cbThisRead);
1795 else
1796 pIoCtx->Req.Io.cbBufClear += cbThisRead;
1797
1798 if (pIoCtx->Req.Io.pImageCur->uOpenFlags & VD_OPEN_FLAGS_INFORM_ABOUT_ZERO_BLOCKS)
1799 rc = VINF_VD_NEW_ZEROED_BLOCK;
1800 else
1801 rc = VINF_SUCCESS;
[27808]1802 }
[31586]1803 else if (rc == VERR_VD_IOCTX_HALT)
1804 {
1805 uOffset += cbThisRead;
1806 cbToRead -= cbThisRead;
[44242]1807 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[31586]1808 }
[45155]1809 else if ( RT_SUCCESS(rc)
1810 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
1811 {
1812 /* First not free block, fill the space before with 0. */
1813 if ( pIoCtx->Req.Io.cbBufClear
1814 && !(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1815 {
1816 RTSGBUF SgBuf;
1817 RTSgBufClone(&SgBuf, &pIoCtx->Req.Io.SgBuf);
1818 RTSgBufReset(&SgBuf);
1819 RTSgBufSet(&SgBuf, 0, pIoCtx->Req.Io.cbBufClear);
1820 pIoCtx->Req.Io.cbBufClear = 0;
1821 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1822 }
1823 rc = VINF_SUCCESS;
1824 }
[27808]1825
[28065]1826 if (RT_FAILURE(rc))
1827 break;
[27808]1828
[28620]1829 cbToRead -= cbThisRead;
1830 uOffset += cbThisRead;
[39413]1831 pCurrImage = pIoCtx->Req.Io.pImageStart; /* Start with the highest image in the chain. */
[28620]1832 } while (cbToRead != 0 && RT_SUCCESS(rc));
[27808]1833
[31586]1834 if ( rc == VERR_VD_NOT_ENOUGH_METADATA
1835 || rc == VERR_VD_IOCTX_HALT)
[27808]1836 {
[28620]1837 /* Save the current state. */
[38876]1838 pIoCtx->Req.Io.uOffset = uOffset;
1839 pIoCtx->Req.Io.cbTransfer = cbToRead;
1840 pIoCtx->Req.Io.pImageCur = pCurrImage ? pCurrImage : pIoCtx->Req.Io.pImageStart;
[27808]1841 }
1842
[45155]1843 return (!(pIoCtx->fFlags & VDIOCTX_FLAGS_ZERO_FREE_BLOCKS))
1844 ? VERR_VD_BLOCK_FREE
1845 : rc;
[27808]1846}
1847
1848/**
[19176]1849 * internal: parent image read wrapper for compacting.
1850 */
[57388]1851static DECLCALLBACK(int) vdParentRead(void *pvUser, uint64_t uOffset, void *pvBuf,
1852 size_t cbRead)
[19176]1853{
1854 PVDPARENTSTATEDESC pParentState = (PVDPARENTSTATEDESC)pvUser;
[45155]1855
1856 /** @todo
1857 * Only used for compaction so far which is not possible to mix with async I/O.
1858 * Needs to be changed if we want to support online compaction of images.
1859 */
1860 bool fLocked = ASMAtomicXchgBool(&pParentState->pDisk->fLocked, true);
1861 AssertMsgReturn(!fLocked,
1862 ("Calling synchronous parent read while another thread holds the disk lock\n"),
1863 VERR_VD_INVALID_STATE);
1864
1865 /* Fake an I/O context. */
1866 RTSGSEG Segment;
1867 RTSGBUF SgBuf;
1868 VDIOCTX IoCtx;
1869
1870 Segment.pvSeg = pvBuf;
1871 Segment.cbSeg = cbRead;
1872 RTSgBufInit(&SgBuf, &Segment, 1);
1873 vdIoCtxInit(&IoCtx, pParentState->pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pParentState->pImage,
[50852]1874 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_ZERO_FREE_BLOCKS);
[45155]1875 int rc = vdReadHelperAsync(&IoCtx);
1876 ASMAtomicXchgBool(&pParentState->pDisk->fLocked, false);
1877 return rc;
[19176]1878}
1879
1880/**
[45155]1881 * Extended version of vdReadHelper(), implementing certain optimizations
1882 * for image cloning.
1883 *
1884 * @returns VBox status code.
1885 * @param pDisk The disk to read from.
1886 * @param pImage The image to start reading from.
1887 * @param pImageParentOverride The parent image to read from
1888 * if the starting image returns a free block.
1889 * If NULL is passed the real parent of the image
1890 * in the chain is used.
1891 * @param uOffset Offset in the disk to start reading from.
1892 * @param pvBuf Where to store the read data.
1893 * @param cbRead How much to read.
1894 * @param fZeroFreeBlocks Flag whether free blocks should be zeroed.
1895 * If false and no image has data for sepcified
1896 * range VERR_VD_BLOCK_FREE is returned.
1897 * Note that unallocated blocks are still zeroed
1898 * if at least one image has valid data for a part
1899 * of the range.
1900 * @param fUpdateCache Flag whether to update the attached cache if
1901 * available.
1902 * @param cImagesRead Number of images in the chain to read until
1903 * the read is cut off. A value of 0 disables the cut off.
1904 */
[66250]1905static int vdReadHelperEx(PVDISK pDisk, PVDIMAGE pImage, PVDIMAGE pImageParentOverride,
[45155]1906 uint64_t uOffset, void *pvBuf, size_t cbRead,
1907 bool fZeroFreeBlocks, bool fUpdateCache, unsigned cImagesRead)
1908{
[49387]1909 int rc = VINF_SUCCESS;
[45155]1910 uint32_t fFlags = VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
1911 RTSGSEG Segment;
1912 RTSGBUF SgBuf;
1913 VDIOCTX IoCtx;
[49387]1914 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
[45155]1915
[49387]1916 rc = RTSemEventCreate(&hEventComplete);
1917 if (RT_FAILURE(rc))
1918 return rc;
1919
[45155]1920 if (fZeroFreeBlocks)
1921 fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
1922 if (fUpdateCache)
[46712]1923 fFlags |= VDIOCTX_FLAGS_READ_UPDATE_CACHE;
[45155]1924
1925 Segment.pvSeg = pvBuf;
1926 Segment.cbSeg = cbRead;
1927 RTSgBufInit(&SgBuf, &Segment, 1);
1928 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_READ, uOffset, cbRead, pImage, &SgBuf,
1929 NULL, vdReadHelperAsync, fFlags);
1930
1931 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
1932 IoCtx.Req.Io.cImagesRead = cImagesRead;
1933 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
1934 IoCtx.Type.Root.pvUser1 = pDisk;
[49387]1935 IoCtx.Type.Root.pvUser2 = hEventComplete;
1936 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
1937 RTSemEventDestroy(hEventComplete);
1938 return rc;
[45155]1939}
1940
1941/**
1942 * internal: read the specified amount of data in whatever blocks the backend
1943 * will give us.
1944 */
[66250]1945static int vdReadHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
[45155]1946 void *pvBuf, size_t cbRead, bool fUpdateCache)
1947{
1948 return vdReadHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbRead,
1949 true /* fZeroFreeBlocks */, fUpdateCache, 0);
1950}
1951
1952/**
[2379]1953 * internal: mark the disk as not modified.
1954 */
[66250]1955static void vdResetModifiedFlag(PVDISK pDisk)
[2379]1956{
1957 if (pDisk->uModified & VD_IMAGE_MODIFIED_FLAG)
1958 {
1959 /* generate new last-modified uuid */
[7900]1960 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
[2379]1961 {
1962 RTUUID Uuid;
1963
1964 RTUuidCreate(&Uuid);
[32536]1965 pDisk->pLast->Backend->pfnSetModificationUuid(pDisk->pLast->pBackendData,
[7277]1966 &Uuid);
[32370]1967
1968 if (pDisk->pCache)
[32536]1969 pDisk->pCache->Backend->pfnSetModificationUuid(pDisk->pCache->pBackendData,
[32370]1970 &Uuid);
[2379]1971 }
1972
1973 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FLAG;
1974 }
1975}
1976
1977/**
1978 * internal: mark the disk as modified.
1979 */
[66250]1980static void vdSetModifiedFlag(PVDISK pDisk)
[2379]1981{
1982 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
1983 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
1984 {
1985 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
1986
1987 /* First modify, so create a UUID and ensure it's written to disk. */
1988 vdResetModifiedFlag(pDisk);
1989
[32370]1990 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
[44242]1991 {
1992 VDIOCTX IoCtx;
1993 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_FLUSH, 0, 0, NULL,
1994 NULL, NULL, NULL, VDIOCTX_FLAGS_SYNC);
[44252]1995 pDisk->pLast->Backend->pfnFlush(pDisk->pLast->pBackendData, &IoCtx);
[44242]1996 }
[2379]1997 }
1998}
1999
2000/**
[6291]2001 * internal: write buffer to the image, taking care of block boundaries and
2002 * write optimizations.
2003 */
[66250]2004static int vdWriteHelperEx(PVDISK pDisk, PVDIMAGE pImage,
[38203]2005 PVDIMAGE pImageParentOverride, uint64_t uOffset,
2006 const void *pvBuf, size_t cbWrite,
[46712]2007 uint32_t fFlags, unsigned cImagesRead)
[6291]2008{
[49387]2009 int rc = VINF_SUCCESS;
[45155]2010 RTSGSEG Segment;
[44242]2011 RTSGBUF SgBuf;
2012 VDIOCTX IoCtx;
[49387]2013 RTSEMEVENT hEventComplete = NIL_RTSEMEVENT;
[6291]2014
[49387]2015 rc = RTSemEventCreate(&hEventComplete);
2016 if (RT_FAILURE(rc))
2017 return rc;
2018
[46712]2019 fFlags |= VDIOCTX_FLAGS_SYNC | VDIOCTX_FLAGS_DONT_FREE;
[44242]2020
[45155]2021 Segment.pvSeg = (void *)pvBuf;
2022 Segment.cbSeg = cbWrite;
2023 RTSgBufInit(&SgBuf, &Segment, 1);
2024 vdIoCtxInit(&IoCtx, pDisk, VDIOCTXTXDIR_WRITE, uOffset, cbWrite, pImage, &SgBuf,
2025 NULL, vdWriteHelperAsync, fFlags);
[6291]2026
[45155]2027 IoCtx.Req.Io.pImageParentOverride = pImageParentOverride;
2028 IoCtx.Req.Io.cImagesRead = cImagesRead;
2029 IoCtx.pIoCtxParent = NULL;
2030 IoCtx.Type.Root.pfnComplete = vdIoCtxSyncComplete;
2031 IoCtx.Type.Root.pvUser1 = pDisk;
[49387]2032 IoCtx.Type.Root.pvUser2 = hEventComplete;
[50991]2033 if (RT_SUCCESS(rc))
2034 rc = vdIoCtxProcessSync(&IoCtx, hEventComplete);
[49387]2035
2036 RTSemEventDestroy(hEventComplete);
2037 return rc;
[6291]2038}
2039
[27977]2040/**
[38203]2041 * internal: write buffer to the image, taking care of block boundaries and
2042 * write optimizations.
2043 */
[66250]2044static int vdWriteHelper(PVDISK pDisk, PVDIMAGE pImage, uint64_t uOffset,
[46712]2045 const void *pvBuf, size_t cbWrite, uint32_t fFlags)
[38203]2046{
2047 return vdWriteHelperEx(pDisk, pImage, NULL, uOffset, pvBuf, cbWrite,
[46712]2048 fFlags, 0);
[38203]2049}
2050
2051/**
2052 * Internal: Copies the content of one disk to another one applying optimizations
2053 * to speed up the copy process if possible.
2054 */
[100078]2055static int vdCopyHelper(PVDISK pDiskFrom, PVDIMAGE pImageFrom, PVDISK pDiskTo, PVDIMAGE pImageTo,
[38203]2056 uint64_t cbSize, unsigned cImagesFromRead, unsigned cImagesToRead,
[38469]2057 bool fSuppressRedundantIo, PVDINTERFACEPROGRESS pIfProgress,
2058 PVDINTERFACEPROGRESS pDstIfProgress)
[38203]2059{
2060 int rc = VINF_SUCCESS;
2061 int rc2;
2062 uint64_t uOffset = 0;
2063 uint64_t cbRemaining = cbSize;
2064 void *pvBuf = NULL;
2065 bool fLockReadFrom = false;
2066 bool fLockWriteTo = false;
[54340]2067 bool fBlockwiseCopy = false;
[38203]2068 unsigned uProgressOld = 0;
2069
[100078]2070 LogFlowFunc(("pDiskFrom=%#p pImageFrom=%#p pDiskTo=%#p pImageTo=%#p cbSize=%llu cImagesFromRead=%u cImagesToRead=%u fSuppressRedundantIo=%RTbool pIfProgress=%#p pDstIfProgress=%#p\n",
2071 pDiskFrom, pImageFrom, pDiskTo, pImageTo, cbSize, cImagesFromRead, cImagesToRead, fSuppressRedundantIo, pDstIfProgress, pDstIfProgress));
[38203]2072
[54340]2073 if ( (fSuppressRedundantIo || (cImagesFromRead > 0))
2074 && RTListIsEmpty(&pDiskFrom->ListFilterChainRead))
2075 fBlockwiseCopy = true;
2076
[38203]2077 /* Allocate tmp buffer. */
2078 pvBuf = RTMemTmpAlloc(VD_MERGE_BUFFER_SIZE);
2079 if (!pvBuf)
2080 return rc;
2081
2082 do
2083 {
2084 size_t cbThisRead = RT_MIN(VD_MERGE_BUFFER_SIZE, cbRemaining);
2085
2086 /* Note that we don't attempt to synchronize cross-disk accesses.
2087 * It wouldn't be very difficult to do, just the lock order would
2088 * need to be defined somehow to prevent deadlocks. Postpone such
2089 * magic as there is no use case for this. */
2090
2091 rc2 = vdThreadStartRead(pDiskFrom);
2092 AssertRC(rc2);
2093 fLockReadFrom = true;
2094
2095 if (fBlockwiseCopy)
2096 {
[44242]2097 RTSGSEG SegmentBuf;
2098 RTSGBUF SgBuf;
2099 VDIOCTX IoCtx;
2100
2101 SegmentBuf.pvSeg = pvBuf;
2102 SegmentBuf.cbSeg = VD_MERGE_BUFFER_SIZE;
2103 RTSgBufInit(&SgBuf, &SegmentBuf, 1);
2104 vdIoCtxInit(&IoCtx, pDiskFrom, VDIOCTXTXDIR_READ, 0, 0, NULL,
2105 &SgBuf, NULL, NULL, VDIOCTX_FLAGS_SYNC);
2106
[38203]2107 /* Read the source data. */
[44252]2108 rc = pImageFrom->Backend->pfnRead(pImageFrom->pBackendData,
[46613]2109 uOffset, cbThisRead, &IoCtx,
2110 &cbThisRead);
[38203]2111
2112 if ( rc == VERR_VD_BLOCK_FREE
2113 && cImagesFromRead != 1)
2114 {
2115 unsigned cImagesToProcess = cImagesFromRead;
2116
2117 for (PVDIMAGE pCurrImage = pImageFrom->pPrev;
2118 pCurrImage != NULL && rc == VERR_VD_BLOCK_FREE;
2119 pCurrImage = pCurrImage->pPrev)
2120 {
[44252]2121 rc = pCurrImage->Backend->pfnRead(pCurrImage->pBackendData,
[44242]2122 uOffset, cbThisRead,
2123 &IoCtx, &cbThisRead);
[38203]2124 if (cImagesToProcess == 1)
2125 break;
2126 else if (cImagesToProcess > 0)
2127 cImagesToProcess--;
2128 }
2129 }
2130 }
2131 else
2132 rc = vdReadHelper(pDiskFrom, pImageFrom, uOffset, pvBuf, cbThisRead,
2133 false /* fUpdateCache */);
2134
2135 if (RT_FAILURE(rc) && rc != VERR_VD_BLOCK_FREE)
2136 break;
2137
2138 rc2 = vdThreadFinishRead(pDiskFrom);
2139 AssertRC(rc2);
2140 fLockReadFrom = false;
2141
2142 if (rc != VERR_VD_BLOCK_FREE)
2143 {
2144 rc2 = vdThreadStartWrite(pDiskTo);
2145 AssertRC(rc2);
2146 fLockWriteTo = true;
2147
2148 /* Only do collapsed I/O if we are copying the data blockwise. */
[100078]2149 rc = vdWriteHelperEx(pDiskTo, pImageTo, NULL, uOffset, pvBuf,
[46712]2150 cbThisRead, VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG /* fFlags */,
[38203]2151 fBlockwiseCopy ? cImagesToRead : 0);
2152 if (RT_FAILURE(rc))
2153 break;
2154
2155 rc2 = vdThreadFinishWrite(pDiskTo);
2156 AssertRC(rc2);
2157 fLockWriteTo = false;
2158 }
2159 else /* Don't propagate the error to the outside */
2160 rc = VINF_SUCCESS;
2161
2162 uOffset += cbThisRead;
2163 cbRemaining -= cbThisRead;
2164
2165 unsigned uProgressNew = uOffset * 99 / cbSize;
2166 if (uProgressNew != uProgressOld)
2167 {
2168 uProgressOld = uProgressNew;
2169
[38469]2170 if (pIfProgress && pIfProgress->pfnProgress)
[38203]2171 {
[38469]2172 rc = pIfProgress->pfnProgress(pIfProgress->Core.pvUser,
[38203]2173 uProgressOld);
2174 if (RT_FAILURE(rc))
2175 break;
2176 }
[38469]2177 if (pDstIfProgress && pDstIfProgress->pfnProgress)
[38203]2178 {
[38469]2179 rc = pDstIfProgress->pfnProgress(pDstIfProgress->Core.pvUser,
[38203]2180 uProgressOld);
2181 if (RT_FAILURE(rc))
2182 break;
2183 }
2184 }
2185 } while (uOffset < cbSize);
2186
2187 RTMemFree(pvBuf);
2188
2189 if (fLockReadFrom)
2190 {
2191 rc2 = vdThreadFinishRead(pDiskFrom);
2192 AssertRC(rc2);
2193 }
2194
2195 if (fLockWriteTo)
2196 {
2197 rc2 = vdThreadFinishWrite(pDiskTo);
2198 AssertRC(rc2);
2199 }
2200
2201 LogFlowFunc(("returns rc=%Rrc\n", rc));
2202 return rc;
2203}
2204
2205/**
[36132]2206 * Flush helper async version.
2207 */
[57388]2208static DECLCALLBACK(int) vdSetModifiedHelperAsync(PVDIOCTX pIoCtx)
[36132]2209{
2210 int rc = VINF_SUCCESS;
[38876]2211 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
[36132]2212
[44252]2213 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
[36132]2214 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2215 rc = VINF_SUCCESS;
2216
2217 return rc;
2218}
2219
2220/**
2221 * internal: mark the disk as modified - async version.
2222 */
[66250]2223static int vdSetModifiedFlagAsync(PVDISK pDisk, PVDIOCTX pIoCtx)
[36132]2224{
2225 int rc = VINF_SUCCESS;
2226
[44396]2227 VD_IS_LOCKED(pDisk);
2228
[36132]2229 pDisk->uModified |= VD_IMAGE_MODIFIED_FLAG;
2230 if (pDisk->uModified & VD_IMAGE_MODIFIED_FIRST)
2231 {
2232 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2233 if (RT_SUCCESS(rc))
2234 {
2235 pDisk->uModified &= ~VD_IMAGE_MODIFIED_FIRST;
2236
2237 /* First modify, so create a UUID and ensure it's written to disk. */
2238 vdResetModifiedFlag(pDisk);
2239
2240 if (!(pDisk->uModified & VD_IMAGE_MODIFIED_DISABLE_UUID_UPDATE))
2241 {
2242 PVDIOCTX pIoCtxFlush = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_FLUSH,
2243 0, 0, pDisk->pLast,
2244 NULL, pIoCtx, 0, 0, NULL,
2245 vdSetModifiedHelperAsync);
2246
2247 if (pIoCtxFlush)
2248 {
[44396]2249 rc = vdIoCtxProcessLocked(pIoCtxFlush);
[36132]2250 if (rc == VINF_VD_ASYNC_IO_FINISHED)
2251 {
[37073]2252 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs */);
[36132]2253 vdIoCtxFree(pDisk, pIoCtxFlush);
2254 }
2255 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2256 {
[38876]2257 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
[44242]2258 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[36132]2259 }
2260 else /* Another error */
2261 vdIoCtxFree(pDisk, pIoCtxFlush);
2262 }
2263 else
2264 rc = VERR_NO_MEMORY;
2265 }
2266 }
2267 }
2268
2269 return rc;
2270}
2271
[57388]2272static DECLCALLBACK(int) vdWriteHelperCommitAsync(PVDIOCTX pIoCtx)
[28065]2273{
[38876]2274 int rc = VINF_SUCCESS;
2275 PVDIMAGE pImage = pIoCtx->Req.Io.pImageStart;
2276 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2277 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2278 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
[35321]2279
2280 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
[44252]2281 rc = pImage->Backend->pfnWrite(pImage->pBackendData,
[45155]2282 pIoCtx->Req.Io.uOffset - cbPreRead,
2283 cbPreRead + cbThisWrite + cbPostRead,
2284 pIoCtx, NULL, &cbPreRead, &cbPostRead, 0);
[35321]2285 Assert(rc != VERR_VD_BLOCK_FREE);
2286 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPreRead == 0);
2287 Assert(rc == VERR_VD_NOT_ENOUGH_METADATA || cbPostRead == 0);
2288 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2289 rc = VINF_SUCCESS;
2290 else if (rc == VERR_VD_IOCTX_HALT)
2291 {
[44242]2292 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[35321]2293 rc = VINF_SUCCESS;
2294 }
2295
2296 LogFlowFunc(("returns rc=%Rrc\n", rc));
2297 return rc;
2298}
2299
[57388]2300static DECLCALLBACK(int) vdWriteHelperOptimizedCmpAndWriteAsync(PVDIOCTX pIoCtx)
[28065]2301{
[28620]2302 int rc = VINF_SUCCESS;
2303 size_t cbThisWrite = 0;
2304 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2305 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2306 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2307 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2308 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2309 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
[28065]2310
[28693]2311 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2312
[28620]2313 AssertPtr(pIoCtxParent);
[30555]2314 Assert(!pIoCtxParent->pIoCtxParent);
[38876]2315 Assert(!pIoCtx->Req.Io.cbTransferLeft && !pIoCtx->cMetaTransfersPending);
[28065]2316
[28620]2317 vdIoCtxChildReset(pIoCtx);
2318 cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
[38876]2319 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
[28065]2320
2321 /* Check if the write would modify anything in this block. */
[38876]2322 if (!RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &pIoCtxParent->Req.Io.SgBuf, cbThisWrite))
[28065]2323 {
[28154]2324 RTSGBUF SgBufSrcTmp;
2325
[38876]2326 RTSgBufClone(&SgBufSrcTmp, &pIoCtxParent->Req.Io.SgBuf);
[28154]2327 RTSgBufAdvance(&SgBufSrcTmp, cbThisWrite);
[38876]2328 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbThisWrite);
[28154]2329
[38876]2330 if (!cbWriteCopy || !RTSgBufCmp(&pIoCtx->Req.Io.SgBuf, &SgBufSrcTmp, cbWriteCopy))
[28154]2331 {
2332 /* Block is completely unchanged, so no need to write anything. */
2333 LogFlowFunc(("Block didn't changed\n"));
[38876]2334 ASMAtomicWriteU32(&pIoCtx->Req.Io.cbTransferLeft, 0);
2335 RTSgBufAdvance(&pIoCtxParent->Req.Io.SgBuf, cbThisWrite);
[28620]2336 return VINF_VD_ASYNC_IO_FINISHED;
[28154]2337 }
[28065]2338 }
2339
2340 /* Copy the data to the right place in the buffer. */
[38876]2341 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2342 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbPreRead);
[28620]2343 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
[28065]2344
2345 /* Handle the data that goes after the write to fill the block. */
2346 if (cbPostRead)
2347 {
2348 /* Now assemble the remaining data. */
2349 if (cbWriteCopy)
[30044]2350 {
2351 /*
2352 * The S/G buffer of the parent needs to be cloned because
2353 * it is not allowed to modify the state.
2354 */
2355 RTSGBUF SgBufParentTmp;
2356
[38876]2357 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2358 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
[30044]2359 }
2360
[28065]2361 /* Zero out the remainder of this block. Will never be visible, as this
2362 * is beyond the limit of the image. */
2363 if (cbFill)
[28154]2364 {
[38876]2365 RTSgBufAdvance(&pIoCtx->Req.Io.SgBuf, cbReadImage);
[28620]2366 vdIoCtxSet(pIoCtx, '\0', cbFill);
[28154]2367 }
[28065]2368 }
2369
2370 /* Write the full block to the virtual disk. */
[38876]2371 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
[45155]2372 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
[28154]2373
[28065]2374 return rc;
2375}
2376
[57388]2377static DECLCALLBACK(int) vdWriteHelperOptimizedPreReadAsync(PVDIOCTX pIoCtx)
[28620]2378{
[28693]2379 int rc = VINF_SUCCESS;
2380
2381 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2382
[45155]2383 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2384
[49880]2385 if ( pIoCtx->Req.Io.cbTransferLeft
2386 && !pIoCtx->cDataTransfersPending)
[28693]2387 rc = vdReadHelperAsync(pIoCtx);
2388
[52268]2389 if ( ( RT_SUCCESS(rc)
2390 || (rc == VERR_VD_ASYNC_IO_IN_PROGRESS))
[38876]2391 && ( pIoCtx->Req.Io.cbTransferLeft
[28693]2392 || pIoCtx->cMetaTransfersPending))
2393 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
[52268]2394 else
[28693]2395 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedCmpAndWriteAsync;
2396
2397 return rc;
[28620]2398}
2399
[28065]2400/**
[28620]2401 * internal: write a complete block (only used for diff images), taking the
2402 * remaining data from parent images. This implementation optimizes out writes
2403 * that do not change the data relative to the state as of the parent images.
2404 * All backends which support differential/growing images support this - async version.
2405 */
[57388]2406static DECLCALLBACK(int) vdWriteHelperOptimizedAsync(PVDIOCTX pIoCtx)
[28620]2407{
[66250]2408 PVDISK pDisk = pIoCtx->pDisk;
[28620]2409 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2410 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2411 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2412 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
[30044]2413 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
[28620]2414 size_t cbFill = 0;
2415 size_t cbWriteCopy = 0;
2416 size_t cbReadImage = 0;
2417
[28693]2418 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2419
[28620]2420 AssertPtr(pIoCtx->pIoCtxParent);
[30555]2421 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
[28620]2422
2423 if (cbPostRead)
2424 {
[33540]2425 /* Figure out how much we cannot read from the image, because
[28620]2426 * the last block to write might exceed the nominal size of the
2427 * image for technical reasons. */
2428 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2429 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2430
2431 /* If we have data to be written, use that instead of reading
2432 * data from the image. */
2433 if (cbWrite > cbThisWrite)
2434 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
2435
2436 /* The rest must be read from the image. */
2437 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2438 }
2439
2440 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2441 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2442 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2443
2444 /* Read the entire data of the block so that we can compare whether it will
2445 * be modified by the write or not. */
[48848]2446 size_t cbTmp = cbPreRead + cbThisWrite + cbPostRead - cbFill; Assert(cbTmp == (uint32_t)cbTmp);
2447 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbTmp;
[38876]2448 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2449 pIoCtx->Req.Io.uOffset -= cbPreRead;
[28620]2450
2451 /* Next step */
2452 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperOptimizedPreReadAsync;
2453 return VINF_SUCCESS;
2454}
2455
[57388]2456static DECLCALLBACK(int) vdWriteHelperStandardReadImageAsync(PVDIOCTX pIoCtx)
[49944]2457{
2458 int rc = VINF_SUCCESS;
2459
2460 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2461
2462 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2463
2464 if ( pIoCtx->Req.Io.cbTransferLeft
2465 && !pIoCtx->cDataTransfersPending)
2466 rc = vdReadHelperAsync(pIoCtx);
2467
2468 if ( RT_SUCCESS(rc)
2469 && ( pIoCtx->Req.Io.cbTransferLeft
2470 || pIoCtx->cMetaTransfersPending))
2471 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2472 else
2473 {
2474 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2475
2476 /* Zero out the remainder of this block. Will never be visible, as this
2477 * is beyond the limit of the image. */
2478 if (cbFill)
2479 vdIoCtxSet(pIoCtx, '\0', cbFill);
2480
2481 /* Write the full block to the virtual disk. */
2482 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
2483
2484 vdIoCtxChildReset(pIoCtx);
2485 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2486 }
2487
2488 return rc;
2489}
2490
[57388]2491static DECLCALLBACK(int) vdWriteHelperStandardAssemble(PVDIOCTX pIoCtx)
[45155]2492{
2493 int rc = VINF_SUCCESS;
2494 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2495 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2496 PVDIOCTX pIoCtxParent = pIoCtx->pIoCtxParent;
2497
2498 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2499
2500 vdIoCtxCopy(pIoCtx, pIoCtxParent, cbThisWrite);
2501 if (cbPostRead)
2502 {
2503 size_t cbFill = pIoCtx->Type.Child.Write.Optimized.cbFill;
2504 size_t cbWriteCopy = pIoCtx->Type.Child.Write.Optimized.cbWriteCopy;
2505 size_t cbReadImage = pIoCtx->Type.Child.Write.Optimized.cbReadImage;
2506
2507 /* Now assemble the remaining data. */
2508 if (cbWriteCopy)
2509 {
2510 /*
2511 * The S/G buffer of the parent needs to be cloned because
2512 * it is not allowed to modify the state.
2513 */
2514 RTSGBUF SgBufParentTmp;
2515
2516 RTSgBufClone(&SgBufParentTmp, &pIoCtxParent->Req.Io.SgBuf);
2517 RTSgBufCopy(&pIoCtx->Req.Io.SgBuf, &SgBufParentTmp, cbWriteCopy);
2518 }
2519
2520 if (cbReadImage)
2521 {
2522 /* Read remaining data. */
[49944]2523 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardReadImageAsync;
2524
2525 /* Read the data that goes before the write to fill the block. */
2526 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbReadImage; Assert(cbReadImage == (uint32_t)cbReadImage);
2527 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2528 pIoCtx->Req.Io.uOffset += cbWriteCopy;
[45155]2529 }
2530 else
2531 {
[49944]2532 /* Zero out the remainder of this block. Will never be visible, as this
2533 * is beyond the limit of the image. */
2534 if (cbFill)
2535 vdIoCtxSet(pIoCtx, '\0', cbFill);
2536
[45155]2537 /* Write the full block to the virtual disk. */
2538 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
[49944]2539 vdIoCtxChildReset(pIoCtx);
[45155]2540 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2541 }
2542 }
2543 else
2544 {
2545 /* Write the full block to the virtual disk. */
2546 RTSgBufReset(&pIoCtx->Req.Io.SgBuf);
[49944]2547 vdIoCtxChildReset(pIoCtx);
[45155]2548 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperCommitAsync;
2549 }
2550
2551 return rc;
2552}
2553
[57388]2554static DECLCALLBACK(int) vdWriteHelperStandardPreReadAsync(PVDIOCTX pIoCtx)
[45155]2555{
2556 int rc = VINF_SUCCESS;
2557
2558 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2559
2560 pIoCtx->fFlags |= VDIOCTX_FLAGS_ZERO_FREE_BLOCKS;
2561
[49944]2562 if ( pIoCtx->Req.Io.cbTransferLeft
2563 && !pIoCtx->cDataTransfersPending)
[45155]2564 rc = vdReadHelperAsync(pIoCtx);
2565
2566 if ( RT_SUCCESS(rc)
2567 && ( pIoCtx->Req.Io.cbTransferLeft
2568 || pIoCtx->cMetaTransfersPending))
2569 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2570 else
2571 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2572
2573 return rc;
2574}
2575
[57388]2576static DECLCALLBACK(int) vdWriteHelperStandardAsync(PVDIOCTX pIoCtx)
[45155]2577{
[66250]2578 PVDISK pDisk = pIoCtx->pDisk;
[45155]2579 uint64_t uOffset = pIoCtx->Type.Child.uOffsetSaved;
2580 size_t cbThisWrite = pIoCtx->Type.Child.cbTransferParent;
2581 size_t cbPreRead = pIoCtx->Type.Child.cbPreRead;
2582 size_t cbPostRead = pIoCtx->Type.Child.cbPostRead;
2583 size_t cbWrite = pIoCtx->Type.Child.cbWriteParent;
2584 size_t cbFill = 0;
2585 size_t cbWriteCopy = 0;
2586 size_t cbReadImage = 0;
2587
2588 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2589
2590 AssertPtr(pIoCtx->pIoCtxParent);
2591 Assert(!pIoCtx->pIoCtxParent->pIoCtxParent);
2592
2593 /* Calculate the amount of data to read that goes after the write to fill the block. */
2594 if (cbPostRead)
2595 {
2596 /* If we have data to be written, use that instead of reading
2597 * data from the image. */
2598 if (cbWrite > cbThisWrite)
2599 cbWriteCopy = RT_MIN(cbWrite - cbThisWrite, cbPostRead);
[49944]2600 else
2601 cbWriteCopy = 0;
[45155]2602
2603 /* Figure out how much we cannot read from the image, because
2604 * the last block to write might exceed the nominal size of the
2605 * image for technical reasons. */
2606 if (uOffset + cbThisWrite + cbPostRead > pDisk->cbSize)
2607 cbFill = uOffset + cbThisWrite + cbPostRead - pDisk->cbSize;
2608
2609 /* The rest must be read from the image. */
2610 cbReadImage = cbPostRead - cbWriteCopy - cbFill;
2611 }
2612
2613 pIoCtx->Type.Child.Write.Optimized.cbFill = cbFill;
2614 pIoCtx->Type.Child.Write.Optimized.cbWriteCopy = cbWriteCopy;
2615 pIoCtx->Type.Child.Write.Optimized.cbReadImage = cbReadImage;
2616
2617 /* Next step */
2618 if (cbPreRead)
2619 {
2620 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardPreReadAsync;
2621
2622 /* Read the data that goes before the write to fill the block. */
[48848]2623 pIoCtx->Req.Io.cbTransferLeft = (uint32_t)cbPreRead; Assert(cbPreRead == (uint32_t)cbPreRead);
[45155]2624 pIoCtx->Req.Io.cbTransfer = pIoCtx->Req.Io.cbTransferLeft;
2625 pIoCtx->Req.Io.uOffset -= cbPreRead;
2626 }
2627 else
2628 pIoCtx->pfnIoCtxTransferNext = vdWriteHelperStandardAssemble;
2629
2630 return VINF_SUCCESS;
2631}
2632
[28620]2633/**
[27977]2634 * internal: write buffer to the image, taking care of block boundaries and
2635 * write optimizations - async version.
2636 */
[57388]2637static DECLCALLBACK(int) vdWriteHelperAsync(PVDIOCTX pIoCtx)
[27977]2638{
2639 int rc;
[38876]2640 size_t cbWrite = pIoCtx->Req.Io.cbTransfer;
2641 uint64_t uOffset = pIoCtx->Req.Io.uOffset;
2642 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
[66250]2643 PVDISK pDisk = pIoCtx->pDisk;
[27977]2644 unsigned fWrite;
2645 size_t cbThisWrite;
2646 size_t cbPreRead, cbPostRead;
[6291]2647
[51750]2648 /* Apply write filter chain here if it was not done already. */
2649 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_WRITE_FILTER_APPLIED))
2650 {
2651 rc = vdFilterChainApplyWrite(pDisk, uOffset, cbWrite, pIoCtx);
2652 if (RT_FAILURE(rc))
2653 return rc;
2654 pIoCtx->fFlags |= VDIOCTX_FLAGS_WRITE_FILTER_APPLIED;
2655 }
2656
[46712]2657 if (!(pIoCtx->fFlags & VDIOCTX_FLAGS_DONT_SET_MODIFIED_FLAG))
2658 {
2659 rc = vdSetModifiedFlagAsync(pDisk, pIoCtx);
2660 if (RT_FAILURE(rc)) /* Includes I/O in progress. */
2661 return rc;
2662 }
[36132]2663
[38876]2664 rc = vdDiscardSetRangeAllocated(pDisk, uOffset, cbWrite);
2665 if (RT_FAILURE(rc))
2666 return rc;
2667
[27977]2668 /* Loop until all written. */
2669 do
2670 {
2671 /* Try to write the possibly partial block to the last opened image.
2672 * This works when the block is already allocated in this image or
2673 * if it is a full-block write (and allocation isn't suppressed below).
2674 * For image formats which don't support zero blocks, it's beneficial
2675 * to avoid unnecessarily allocating unchanged blocks. This prevents
2676 * unwanted expanding of images. VMDK is an example. */
2677 cbThisWrite = cbWrite;
[51623]2678
2679 /*
2680 * Check whether there is a full block write in progress which was not allocated.
2681 * Defer I/O if the range interferes.
2682 */
2683 if ( pDisk->pIoCtxLockOwner != NIL_VDIOCTX
2684 && uOffset >= pDisk->uOffsetStartLocked
2685 && uOffset < pDisk->uOffsetEndLocked)
2686 {
2687 Log(("Interferring write while allocating a new block => deferring write\n"));
2688 vdIoCtxDefer(pDisk, pIoCtx);
2689 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2690 break;
2691 }
2692
[27977]2693 fWrite = (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2694 ? 0 : VD_WRITE_NO_ALLOC;
[51623]2695 rc = pImage->Backend->pfnWrite(pImage->pBackendData, uOffset, cbThisWrite,
2696 pIoCtx, &cbThisWrite, &cbPreRead, &cbPostRead,
2697 fWrite);
[27977]2698 if (rc == VERR_VD_BLOCK_FREE)
2699 {
[33094]2700 /* Lock the disk .*/
2701 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2702 if (RT_SUCCESS(rc))
[28065]2703 {
[28620]2704 /*
2705 * Allocate segment and buffer in one go.
2706 * A bit hackish but avoids the need to allocate memory twice.
2707 */
[34217]2708 PRTSGBUF pTmp = (PRTSGBUF)RTMemAlloc(cbPreRead + cbThisWrite + cbPostRead + sizeof(RTSGSEG) + sizeof(RTSGBUF));
[54117]2709 AssertBreakStmt(pTmp, rc = VERR_NO_MEMORY);
[34217]2710 PRTSGSEG pSeg = (PRTSGSEG)(pTmp + 1);
[28065]2711
[34217]2712 pSeg->pvSeg = pSeg + 1;
2713 pSeg->cbSeg = cbPreRead + cbThisWrite + cbPostRead;
2714 RTSgBufInit(pTmp, pSeg, 1);
[27977]2715
[28620]2716 PVDIOCTX pIoCtxWrite = vdIoCtxChildAlloc(pDisk, VDIOCTXTXDIR_WRITE,
[35321]2717 uOffset, pSeg->cbSeg, pImage,
[34217]2718 pTmp,
[28620]2719 pIoCtx, cbThisWrite,
[30044]2720 cbWrite,
[29497]2721 pTmp,
[28620]2722 (pImage->uOpenFlags & VD_OPEN_FLAGS_HONOR_SAME)
2723 ? vdWriteHelperStandardAsync
2724 : vdWriteHelperOptimizedAsync);
[90802]2725 if (!pIoCtxWrite)
[28620]2726 {
2727 RTMemTmpFree(pTmp);
2728 rc = VERR_NO_MEMORY;
2729 break;
2730 }
2731
2732 LogFlowFunc(("Disk is growing because of pIoCtx=%#p pIoCtxWrite=%#p\n",
2733 pIoCtx, pIoCtxWrite));
2734
[51623]2735 /* Save the current range for the growing operation to check for intersecting requests later. */
2736 pDisk->uOffsetStartLocked = uOffset - cbPreRead;
2737 pDisk->uOffsetEndLocked = uOffset + cbThisWrite + cbPostRead;
2738
[28620]2739 pIoCtxWrite->Type.Child.cbPreRead = cbPreRead;
2740 pIoCtxWrite->Type.Child.cbPostRead = cbPostRead;
[46679]2741 pIoCtxWrite->Req.Io.pImageParentOverride = pIoCtx->Req.Io.pImageParentOverride;
[28620]2742
2743 /* Process the write request */
[44396]2744 rc = vdIoCtxProcessLocked(pIoCtxWrite);
[28620]2745
2746 if (RT_FAILURE(rc) && (rc != VERR_VD_ASYNC_IO_IN_PROGRESS))
2747 {
[53337]2748 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
[28620]2749 vdIoCtxFree(pDisk, pIoCtxWrite);
2750 break;
2751 }
2752 else if ( rc == VINF_VD_ASYNC_IO_FINISHED
2753 && ASMAtomicCmpXchgBool(&pIoCtxWrite->fComplete, true, false))
2754 {
2755 LogFlow(("Child write request completed\n"));
[38876]2756 Assert(pIoCtx->Req.Io.cbTransferLeft >= cbThisWrite);
[48848]2757 Assert(cbThisWrite == (uint32_t)cbThisWrite);
[49387]2758 rc = pIoCtxWrite->rcReq;
[48848]2759 ASMAtomicSubU32(&pIoCtx->Req.Io.cbTransferLeft, (uint32_t)cbThisWrite);
[36221]2760 vdIoCtxUnlockDisk(pDisk, pIoCtx, false /* fProcessDeferredReqs*/ );
[28620]2761 vdIoCtxFree(pDisk, pIoCtxWrite);
2762 }
2763 else
[30863]2764 {
[28620]2765 LogFlow(("Child write pending\n"));
[38876]2766 ASMAtomicIncU32(&pIoCtx->cDataTransfersPending);
[44242]2767 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[30863]2768 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2769 cbWrite -= cbThisWrite;
2770 uOffset += cbThisWrite;
2771 break;
2772 }
[28065]2773 }
[33094]2774 else
2775 {
2776 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
2777 break;
2778 }
[27977]2779 }
2780
[31586]2781 if (rc == VERR_VD_IOCTX_HALT)
2782 {
2783 cbWrite -= cbThisWrite;
2784 uOffset += cbThisWrite;
[44242]2785 pIoCtx->fFlags |= VDIOCTX_FLAGS_BLOCKED;
[30555]2786 break;
[31586]2787 }
2788 else if (rc == VERR_VD_NOT_ENOUGH_METADATA)
2789 break;
[30555]2790
[27977]2791 cbWrite -= cbThisWrite;
2792 uOffset += cbThisWrite;
[30555]2793 } while (cbWrite != 0 && (RT_SUCCESS(rc) || rc == VERR_VD_ASYNC_IO_IN_PROGRESS));
[27977]2794
[31586]2795 if ( rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2796 || rc == VERR_VD_NOT_ENOUGH_METADATA
2797 || rc == VERR_VD_IOCTX_HALT)
[28620]2798 {
2799 /*
2800 * Tell the caller that we don't need to go back here because all
2801 * writes are initiated.
2802 */
[43141]2803 if ( !cbWrite
2804 && rc != VERR_VD_IOCTX_HALT)
[28620]2805 rc = VINF_SUCCESS;
2806
[38876]2807 pIoCtx->Req.Io.uOffset = uOffset;
2808 pIoCtx->Req.Io.cbTransfer = cbWrite;
[28620]2809 }
2810
[27977]2811 return rc;
2812}
2813
[6291]2814/**
[28620]2815 * Flush helper async version.
2816 */
[57388]2817static DECLCALLBACK(int) vdFlushHelperAsync(PVDIOCTX pIoCtx)
[28620]2818{
2819 int rc = VINF_SUCCESS;
[66250]2820 PVDISK pDisk = pIoCtx->pDisk;
[38876]2821 PVDIMAGE pImage = pIoCtx->Req.Io.pImageCur;
[28620]2822
[33094]2823 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
2824 if (RT_SUCCESS(rc))
2825 {
[51623]2826 /* Mark the whole disk as locked. */
2827 pDisk->uOffsetStartLocked = 0;
2828 pDisk->uOffsetEndLocked = UINT64_C(0xffffffffffffffff);
2829
[33094]2830 vdResetModifiedFlag(pDisk);
[44252]2831 rc = pImage->Backend->pfnFlush(pImage->pBackendData, pIoCtx);
[44415]2832 if ( ( RT_SUCCESS(rc)
[47420]2833 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS
2834 || rc == VERR_VD_IOCTX_HALT)
[44415]2835 && pDisk->pCache)
2836 {
2837 rc = pDisk->pCache->Backend->pfnFlush(pDisk->pCache->pBackendData, pIoCtx);
2838 if ( RT_SUCCESS(rc)
[47420]2839 || ( rc != VERR_VD_ASYNC_IO_IN_PROGRESS
2840 && rc != VERR_VD_IOCTX_HALT))
[44415]2841 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
[47420]2842 else if (rc != VERR_VD_IOCTX_HALT)
[44430]2843 rc = VINF_SUCCESS;
[44415]2844 }
[44430]2845 else if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2846 rc = VINF_SUCCESS;
[47420]2847 else if (rc != VERR_VD_IOCTX_HALT)/* Some other error. */
[44415]2848 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessBlockedReqs */);
[33094]2849 }
[28620]2850
2851 return rc;
2852}
2853
2854/**
[38876]2855 * Async discard helper - discards a whole block which is recorded in the block
2856 * tree.
2857 *
2858 * @returns VBox status code.
2859 * @param pIoCtx The I/O context to operate on.
2860 */
[57388]2861static DECLCALLBACK(int) vdDiscardWholeBlockAsync(PVDIOCTX pIoCtx)
[38876]2862{
2863 int rc = VINF_SUCCESS;
[66250]2864 PVDISK pDisk = pIoCtx->pDisk;
[38876]2865 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2866 PVDDISCARDBLOCK pBlock = pIoCtx->Req.Discard.pBlock;
2867 size_t cbPreAllocated, cbPostAllocated, cbActuallyDiscarded;
2868
2869 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
2870
2871 AssertPtr(pBlock);
2872
[44252]2873 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
[38876]2874 pBlock->Core.Key, pBlock->cbDiscard,
2875 &cbPreAllocated, &cbPostAllocated,
2876 &cbActuallyDiscarded, NULL, 0);
2877 Assert(rc != VERR_VD_DISCARD_ALIGNMENT_NOT_MET);
2878 Assert(!cbPreAllocated);
2879 Assert(!cbPostAllocated);
2880 Assert(cbActuallyDiscarded == pBlock->cbDiscard || RT_FAILURE(rc));
2881
2882 /* Remove the block on success. */
2883 if ( RT_SUCCESS(rc)
2884 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2885 {
2886 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
[62738]2887 Assert(pBlockRemove == pBlock); RT_NOREF1(pBlockRemove);
[38876]2888
2889 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2890 RTListNodeRemove(&pBlock->NodeLru);
2891 RTMemFree(pBlock->pbmAllocated);
2892 RTMemFree(pBlock);
2893 pIoCtx->Req.Discard.pBlock = NULL;/* Safety precaution. */
2894 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
2895 rc = VINF_SUCCESS;
2896 }
2897
2898 LogFlowFunc(("returns rc=%Rrc\n", rc));
2899 return rc;
2900}
2901
2902/**
2903 * Removes the least recently used blocks from the waiting list until
2904 * the new value is reached - version for async I/O.
2905 *
2906 * @returns VBox status code.
2907 * @param pDisk VD disk container.
[64272]2908 * @param pIoCtx The I/O context associated with this discard operation.
[38876]2909 * @param cbDiscardingNew How many bytes should be waiting on success.
2910 * The number of bytes waiting can be less.
2911 */
[66250]2912static int vdDiscardRemoveBlocksAsync(PVDISK pDisk, PVDIOCTX pIoCtx, size_t cbDiscardingNew)
[38876]2913{
2914 int rc = VINF_SUCCESS;
2915 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
2916
2917 LogFlowFunc(("pDisk=%#p pDiscard=%#p cbDiscardingNew=%zu\n",
2918 pDisk, pDiscard, cbDiscardingNew));
2919
2920 while (pDiscard->cbDiscarding > cbDiscardingNew)
2921 {
2922 PVDDISCARDBLOCK pBlock = RTListGetLast(&pDiscard->ListLru, VDDISCARDBLOCK, NodeLru);
2923
2924 Assert(!RTListIsEmpty(&pDiscard->ListLru));
2925
2926 /* Go over the allocation bitmap and mark all discarded sectors as unused. */
2927 uint64_t offStart = pBlock->Core.Key;
2928 uint32_t idxStart = 0;
2929 size_t cbLeft = pBlock->cbDiscard;
2930 bool fAllocated = ASMBitTest(pBlock->pbmAllocated, idxStart);
[48848]2931 uint32_t cSectors = (uint32_t)(pBlock->cbDiscard / 512);
[38876]2932
2933 while (cbLeft > 0)
2934 {
2935 int32_t idxEnd;
2936 size_t cbThis = cbLeft;
2937
2938 if (fAllocated)
2939 {
2940 /* Check for the first unallocated bit. */
2941 idxEnd = ASMBitNextClear(pBlock->pbmAllocated, cSectors, idxStart);
2942 if (idxEnd != -1)
2943 {
2944 cbThis = (idxEnd - idxStart) * 512;
2945 fAllocated = false;
2946 }
2947 }
2948 else
2949 {
2950 /* Mark as unused and check for the first set bit. */
2951 idxEnd = ASMBitNextSet(pBlock->pbmAllocated, cSectors, idxStart);
2952 if (idxEnd != -1)
2953 cbThis = (idxEnd - idxStart) * 512;
2954
[44252]2955 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
[38876]2956 offStart, cbThis, NULL, NULL, &cbThis,
2957 NULL, VD_DISCARD_MARK_UNUSED);
2958 if ( RT_FAILURE(rc)
2959 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2960 break;
2961
2962 fAllocated = true;
2963 }
2964
2965 idxStart = idxEnd;
2966 offStart += cbThis;
2967 cbLeft -= cbThis;
2968 }
2969
2970 if ( RT_FAILURE(rc)
2971 && rc != VERR_VD_ASYNC_IO_IN_PROGRESS)
2972 break;
2973
2974 PVDDISCARDBLOCK pBlockRemove = (PVDDISCARDBLOCK)RTAvlrU64RangeRemove(pDiscard->pTreeBlocks, pBlock->Core.Key);
[62738]2975 Assert(pBlockRemove == pBlock); NOREF(pBlockRemove);
[38876]2976 RTListNodeRemove(&pBlock->NodeLru);
2977
2978 pDiscard->cbDiscarding -= pBlock->cbDiscard;
2979 RTMemFree(pBlock->pbmAllocated);
2980 RTMemFree(pBlock);
2981 }
2982
2983 if (rc == VERR_VD_ASYNC_IO_IN_PROGRESS)
2984 rc = VINF_SUCCESS;
2985
2986 Assert(RT_FAILURE(rc) || pDiscard->cbDiscarding <= cbDiscardingNew);
2987
2988 LogFlowFunc(("returns rc=%Rrc\n", rc));
2989 return rc;
2990}
2991
2992/**
2993 * Async discard helper - discards the current range if there is no matching
2994 * block in the tree.
2995 *
2996 * @returns VBox status code.
2997 * @param pIoCtx The I/O context to operate on.
2998 */
[57388]2999static DECLCALLBACK(int) vdDiscardCurrentRangeAsync(PVDIOCTX pIoCtx)
[38876]3000{
[66250]3001 PVDISK pDisk = pIoCtx->pDisk;
[38876]3002 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3003 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3004 size_t cbThisDiscard = pIoCtx->Req.Discard.cbThisDiscard;
3005 void *pbmAllocated = NULL;
3006 size_t cbPreAllocated, cbPostAllocated;
3007 int rc = VINF_SUCCESS;
3008
3009 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3010
3011 /* No block found, try to discard using the backend first. */
[44252]3012 rc = pDisk->pLast->Backend->pfnDiscard(pDisk->pLast->pBackendData, pIoCtx,
[38876]3013 offStart, cbThisDiscard, &cbPreAllocated,
3014 &cbPostAllocated, &cbThisDiscard,
3015 &pbmAllocated, 0);
3016 if (rc == VERR_VD_DISCARD_ALIGNMENT_NOT_MET)
3017 {
3018 /* Create new discard block. */
3019 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTMemAllocZ(sizeof(VDDISCARDBLOCK));
3020 if (pBlock)
3021 {
3022 pBlock->Core.Key = offStart - cbPreAllocated;
3023 pBlock->Core.KeyLast = offStart + cbThisDiscard + cbPostAllocated - 1;
3024 pBlock->cbDiscard = cbPreAllocated + cbThisDiscard + cbPostAllocated;
3025 pBlock->pbmAllocated = pbmAllocated;
3026 bool fInserted = RTAvlrU64Insert(pDiscard->pTreeBlocks, &pBlock->Core);
[62738]3027 Assert(fInserted); NOREF(fInserted);
[38876]3028
3029 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3030 pDiscard->cbDiscarding += pBlock->cbDiscard;
3031
3032 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3033 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3034 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3035 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3036
3037 if (pDiscard->cbDiscarding > VD_DISCARD_REMOVE_THRESHOLD)
3038 rc = vdDiscardRemoveBlocksAsync(pDisk, pIoCtx, VD_DISCARD_REMOVE_THRESHOLD);
3039 else
3040 rc = VINF_SUCCESS;
3041
3042 if (RT_SUCCESS(rc))
3043 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync; /* Next part. */
3044 }
3045 else
3046 {
3047 RTMemFree(pbmAllocated);
3048 rc = VERR_NO_MEMORY;
3049 }
3050 }
3051 else if ( RT_SUCCESS(rc)
3052 || rc == VERR_VD_ASYNC_IO_IN_PROGRESS) /* Save state and andvance to next range. */
3053 {
3054 Assert(pIoCtx->Req.Discard.cbDiscardLeft >= cbThisDiscard);
3055 pIoCtx->Req.Discard.cbDiscardLeft -= cbThisDiscard;
3056 pIoCtx->Req.Discard.offCur += cbThisDiscard;
3057 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3058 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3059 rc = VINF_SUCCESS;
3060 }
3061
3062 LogFlowFunc(("returns rc=%Rrc\n", rc));
3063 return rc;
3064}
3065
3066/**
3067 * Async discard helper - entry point.
3068 *
3069 * @returns VBox status code.
3070 * @param pIoCtx The I/O context to operate on.
3071 */
[57388]3072static DECLCALLBACK(int) vdDiscardHelperAsync(PVDIOCTX pIoCtx)
[38876]3073{
3074 int rc = VINF_SUCCESS;
[66250]3075 PVDISK pDisk = pIoCtx->pDisk;
[38876]3076 PCRTRANGE paRanges = pIoCtx->Req.Discard.paRanges;
3077 unsigned cRanges = pIoCtx->Req.Discard.cRanges;
3078 PVDDISCARDSTATE pDiscard = pDisk->pDiscard;
3079
3080 LogFlowFunc(("pIoCtx=%#p\n", pIoCtx));
3081
3082 /* Check if the I/O context processed all ranges. */
3083 if ( pIoCtx->Req.Discard.idxRange == cRanges
3084 && !pIoCtx->Req.Discard.cbDiscardLeft)
3085 {
3086 LogFlowFunc(("All ranges discarded, completing\n"));
3087 vdIoCtxUnlockDisk(pDisk, pIoCtx, true /* fProcessDeferredReqs*/);
3088 return VINF_SUCCESS;
3089 }
3090
3091 if (pDisk->pIoCtxLockOwner != pIoCtx)
3092 rc = vdIoCtxLockDisk(pDisk, pIoCtx);
3093
3094 if (RT_SUCCESS(rc))
3095 {
3096 uint64_t offStart = pIoCtx->Req.Discard.offCur;
3097 size_t cbDiscardLeft = pIoCtx->Req.Discard.cbDiscardLeft;
3098 size_t cbThisDiscard;
3099
[51623]3100 pDisk->uOffsetStartLocked = offStart;
3101 pDisk->uOffsetEndLocked = offStart + cbDiscardLeft;
3102
[38876]3103 if (RT_UNLIKELY(!pDiscard))
3104 {
3105 pDiscard = vdDiscardStateCreate();
3106 if (!pDiscard)
3107 return VERR_NO_MEMORY;
3108
3109 pDisk->pDiscard = pDiscard;
3110 }
3111
3112 if (!pIoCtx->Req.Discard.cbDiscardLeft)
3113 {
3114 offStart = paRanges[pIoCtx->Req.Discard.idxRange].offStart;
3115 cbDiscardLeft = paRanges[pIoCtx->Req.Discard.idxRange].cbRange;
3116 LogFlowFunc(("New range descriptor loaded (%u) offStart=%llu cbDiscard=%zu\n",
3117 pIoCtx->Req.Discard.idxRange, offStart, cbDiscardLeft));
3118 pIoCtx->Req.Discard.idxRange++;
3119 }
3120
3121 /* Look for a matching block in the AVL tree first. */
3122 PVDDISCARDBLOCK pBlock = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, false);
3123 if (!pBlock || pBlock->Core.KeyLast < offStart)
3124 {
3125 PVDDISCARDBLOCK pBlockAbove = (PVDDISCARDBLOCK)RTAvlrU64GetBestFit(pDiscard->pTreeBlocks, offStart, true);
3126
3127 /* Clip range to remain in the current block. */
3128 if (pBlockAbove)
3129 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlockAbove->Core.KeyLast - offStart + 1);
3130 else
3131 cbThisDiscard = cbDiscardLeft;
3132
3133 Assert(!(cbThisDiscard % 512));
3134 pIoCtx->Req.Discard.pBlock = NULL;
3135 pIoCtx->pfnIoCtxTransferNext = vdDiscardCurrentRangeAsync;
3136 }
3137 else
3138 {
3139 /* Range lies partly in the block, update allocation bitmap. */
3140 int32_t idxStart, idxEnd;
3141
3142 cbThisDiscard = RT_MIN(cbDiscardLeft, pBlock->Core.KeyLast - offStart + 1);
3143
3144 AssertPtr(pBlock);
3145
3146 Assert(!(cbThisDiscard % 512));
3147 Assert(!((offStart - pBlock->Core.Key) % 512));
3148
3149 idxStart = (offStart - pBlock->Core.Key) / 512;
[48848]3150 idxEnd = idxStart + (int32_t)(cbThisDiscard / 512);
[38876]3151
3152 ASMBitClearRange(pBlock->pbmAllocated, idxStart, idxEnd);
3153
3154 cbDiscardLeft -= cbThisDiscard;
3155 offStart += cbThisDiscard;
3156
3157 /* Call the backend to discard the block if it is completely unallocated now. */
[48848]3158 if (ASMBitFirstSet((volatile void *)pBlock->pbmAllocated, (uint32_t)(pBlock->cbDiscard / 512)) == -1)
[38876]3159 {
3160 pIoCtx->Req.Discard.pBlock = pBlock;
3161 pIoCtx->pfnIoCtxTransferNext = vdDiscardWholeBlockAsync;
3162 rc = VINF_SUCCESS;
3163 }
3164 else
3165 {
3166 RTListNodeRemove(&pBlock->NodeLru);
3167 RTListPrepend(&pDiscard->ListLru, &pBlock->NodeLru);
3168
3169 /* Start with next range. */
3170 pIoCtx->pfnIoCtxTransferNext = vdDiscardHelperAsync;
3171 rc = VINF_SUCCESS;
3172 }
3173 }
3174
3175 /* Save state in the context. */
3176 pIoCtx->Req.Discard.offCur = offStart;
3177 pIoCtx->Req.Discard.cbDiscardLeft = cbDiscardLeft;
3178 pIoCtx->Req.Discard.cbThisDiscard = cbThisDiscard;
3179 }
3180
3181 LogFlowFunc(("returns rc=%Rrc\n", rc));
3182 return rc;
3183}
3184
3185/**
[22966]3186 * VD async I/O interface open callback.
3187 */
[57388]3188static DECLCALLBACK(int) vdIOOpenFallback(void *pvUser, const char *pszLocation,
3189 uint32_t fOpen, PFNVDCOMPLETED pfnCompleted,
3190 void **ppStorage)