VirtualBox

source: vbox/trunk/src/VBox/Devices/Storage/DrvVD.cpp@ 33000

Last change on this file since 33000 was 32818, checked in by vboxsync, 14 years ago

IPRT: RTTcpClientCloseEx - don't be nice to storage servers, they don't always repay the kindness.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 87.3 KB
Line 
1/* $Id: DrvVD.cpp 32818 2010-09-29 15:28:35Z vboxsync $ */
2/** @file
3 * DrvVD - Generic VBox disk media driver.
4 */
5
6/*
7 * Copyright (C) 2006-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_DRV_VD
23#include <VBox/VBoxHDD.h>
24#include <VBox/pdmdrv.h>
25#include <VBox/pdmasynccompletion.h>
26#include <iprt/asm.h>
27#include <iprt/alloc.h>
28#include <iprt/assert.h>
29#include <iprt/uuid.h>
30#include <iprt/file.h>
31#include <iprt/string.h>
32#include <iprt/tcp.h>
33#include <iprt/semaphore.h>
34#include <iprt/sg.h>
35#include <iprt/poll.h>
36#include <iprt/pipe.h>
37#include <iprt/system.h>
38
39#ifdef VBOX_WITH_INIP
40/* All lwip header files are not C++ safe. So hack around this. */
41RT_C_DECLS_BEGIN
42#include <lwip/inet.h>
43#include <lwip/tcp.h>
44#include <lwip/sockets.h>
45RT_C_DECLS_END
46#endif /* VBOX_WITH_INIP */
47
48#include "Builtins.h"
49
50#ifdef VBOX_WITH_INIP
51/* Small hack to get at lwIP initialized status */
52extern bool DevINIPConfigured(void);
53#endif /* VBOX_WITH_INIP */
54
55
56/*******************************************************************************
57* Defined types, constants and macros *
58*******************************************************************************/
59
60/** Converts a pointer to VBOXDISK::IMedia to a PVBOXDISK. */
61#define PDMIMEDIA_2_VBOXDISK(pInterface) \
62 ( (PVBOXDISK)((uintptr_t)pInterface - RT_OFFSETOF(VBOXDISK, IMedia)) )
63
64/** Converts a pointer to PDMDRVINS::IBase to a PPDMDRVINS. */
65#define PDMIBASE_2_DRVINS(pInterface) \
66 ( (PPDMDRVINS)((uintptr_t)pInterface - RT_OFFSETOF(PDMDRVINS, IBase)) )
67
68/** Converts a pointer to PDMDRVINS::IBase to a PVBOXDISK. */
69#define PDMIBASE_2_VBOXDISK(pInterface) \
70 ( PDMINS_2_DATA(PDMIBASE_2_DRVINS(pInterface), PVBOXDISK) )
71
72/** Converts a pointer to VBOXDISK::IMediaAsync to a PVBOXDISK. */
73#define PDMIMEDIAASYNC_2_VBOXDISK(pInterface) \
74 ( (PVBOXDISK)((uintptr_t)pInterface - RT_OFFSETOF(VBOXDISK, IMediaAsync)) )
75
76/**
77 * VBox disk container, image information, private part.
78 */
79
80typedef struct VBOXIMAGE
81{
82 /** Pointer to next image. */
83 struct VBOXIMAGE *pNext;
84 /** Pointer to list of VD interfaces. Per-image. */
85 PVDINTERFACE pVDIfsImage;
86 /** Common structure for the configuration information interface. */
87 VDINTERFACE VDIConfig;
88 /** Common structure for the supported TCP network stack interface. */
89 VDINTERFACE VDITcpNet;
90 /** Common structure for the supported I/O interface. */
91 VDINTERFACE VDIIO;
92} VBOXIMAGE, *PVBOXIMAGE;
93
94/**
95 * Storage backend data.
96 */
97typedef struct DRVVDSTORAGEBACKEND
98{
99 /** PDM async completion end point. */
100 PPDMASYNCCOMPLETIONENDPOINT pEndpoint;
101 /** The template. */
102 PPDMASYNCCOMPLETIONTEMPLATE pTemplate;
103 /** Event semaphore for synchronous operations. */
104 RTSEMEVENT EventSem;
105 /** Flag whether a synchronous operation is currently pending. */
106 volatile bool fSyncIoPending;
107 /** Return code of the last completed request. */
108 int rcReqLast;
109 /** Callback routine */
110 PFNVDCOMPLETED pfnCompleted;
111} DRVVDSTORAGEBACKEND, *PDRVVDSTORAGEBACKEND;
112
113/**
114 * VBox disk container media main structure, private part.
115 *
116 * @implements PDMIMEDIA
117 * @implements PDMIMEDIAASYNC
118 * @implements VDINTERFACEERROR
119 * @implements VDINTERFACETCPNET
120 * @implements VDINTERFACEASYNCIO
121 * @implements VDINTERFACECONFIG
122 */
123typedef struct VBOXDISK
124{
125 /** The VBox disk container. */
126 PVBOXHDD pDisk;
127 /** The media interface. */
128 PDMIMEDIA IMedia;
129 /** Pointer to the driver instance. */
130 PPDMDRVINS pDrvIns;
131 /** Flag whether suspend has changed image open mode to read only. */
132 bool fTempReadOnly;
133 /** Flag whether to use the runtime (true) or startup error facility. */
134 bool fErrorUseRuntime;
135 /** Pointer to list of VD interfaces. Per-disk. */
136 PVDINTERFACE pVDIfsDisk;
137 /** Common structure for the supported error interface. */
138 VDINTERFACE VDIError;
139 /** Callback table for error interface. */
140 VDINTERFACEERROR VDIErrorCallbacks;
141 /** Common structure for the supported thread synchronization interface. */
142 VDINTERFACE VDIThreadSync;
143 /** Callback table for thread synchronization interface. */
144 VDINTERFACETHREADSYNC VDIThreadSyncCallbacks;
145
146 /** Callback table for the configuration information interface. */
147 VDINTERFACECONFIG VDIConfigCallbacks;
148 /** Callback table for TCP network stack interface. */
149 VDINTERFACETCPNET VDITcpNetCallbacks;
150 /** Callback table for I/O interface. */
151 VDINTERFACEIO VDIIOCallbacks;
152
153 /** Flag whether opened disk suppports async I/O operations. */
154 bool fAsyncIOSupported;
155 /** The async media interface. */
156 PDMIMEDIAASYNC IMediaAsync;
157 /** The async media port interface above. */
158 PPDMIMEDIAASYNCPORT pDrvMediaAsyncPort;
159 /** Pointer to the list of data we need to keep per image. */
160 PVBOXIMAGE pImages;
161 /** Flag whether the media should allow concurrent open for writing. */
162 bool fShareable;
163 /** Flag whether a merge operation has been set up. */
164 bool fMergePending;
165 /** Synchronization to prevent destruction before merge finishes. */
166 RTSEMFASTMUTEX MergeCompleteMutex;
167 /** Synchronization between merge and other image accesses. */
168 RTSEMRW MergeLock;
169 /** Source image index for merging. */
170 unsigned uMergeSource;
171 /** Target image index for merging. */
172 unsigned uMergeTarget;
173
174 /** Flag whether boot acceleration is enabled. */
175 bool fBootAccelEnabled;
176 /** Flag whether boot acceleration is currently active. */
177 bool fBootAccelActive;
178 /** Size of the disk, used for read truncation. */
179 size_t cbDisk;
180 /** Size of the configured buffer. */
181 size_t cbBootAccelBuffer;
182 /** Start offset for which the buffer holds data. */
183 uint64_t offDisk;
184 /** Number of valid bytes in the buffer. */
185 size_t cbDataValid;
186 /** The disk buffer. */
187 uint8_t *pbData;
188} VBOXDISK, *PVBOXDISK;
189
190
191/*******************************************************************************
192* Internal Functions *
193*******************************************************************************/
194
195/**
196 * Internal: allocate new image descriptor and put it in the list
197 */
198static PVBOXIMAGE drvvdNewImage(PVBOXDISK pThis)
199{
200 AssertPtr(pThis);
201 PVBOXIMAGE pImage = (PVBOXIMAGE)RTMemAllocZ(sizeof(VBOXIMAGE));
202 if (pImage)
203 {
204 pImage->pVDIfsImage = NULL;
205 PVBOXIMAGE *pp = &pThis->pImages;
206 while (*pp != NULL)
207 pp = &(*pp)->pNext;
208 *pp = pImage;
209 pImage->pNext = NULL;
210 }
211
212 return pImage;
213}
214
215/**
216 * Internal: free the list of images descriptors.
217 */
218static void drvvdFreeImages(PVBOXDISK pThis)
219{
220 while (pThis->pImages != NULL)
221 {
222 PVBOXIMAGE p = pThis->pImages;
223 pThis->pImages = pThis->pImages->pNext;
224 RTMemFree(p);
225 }
226}
227
228
229/**
230 * Make the image temporarily read-only.
231 *
232 * @returns VBox status code.
233 * @param pThis The driver instance data.
234 */
235static int drvvdSetReadonly(PVBOXDISK pThis)
236{
237 int rc = VINF_SUCCESS;
238 if (!VDIsReadOnly(pThis->pDisk))
239 {
240 unsigned uOpenFlags;
241 rc = VDGetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, &uOpenFlags);
242 AssertRC(rc);
243 uOpenFlags |= VD_OPEN_FLAGS_READONLY;
244 rc = VDSetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, uOpenFlags);
245 AssertRC(rc);
246 pThis->fTempReadOnly = true;
247 }
248 return rc;
249}
250
251
252/**
253 * Undo the temporary read-only status of the image.
254 *
255 * @returns VBox status code.
256 * @param pThis The driver instance data.
257 */
258static int drvvdSetWritable(PVBOXDISK pThis)
259{
260 int rc = VINF_SUCCESS;
261 if (pThis->fTempReadOnly)
262 {
263 unsigned uOpenFlags;
264 rc = VDGetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, &uOpenFlags);
265 AssertRC(rc);
266 uOpenFlags &= ~VD_OPEN_FLAGS_READONLY;
267 rc = VDSetOpenFlags(pThis->pDisk, VD_LAST_IMAGE, uOpenFlags);
268 if (RT_SUCCESS(rc))
269 pThis->fTempReadOnly = false;
270 else
271 AssertRC(rc);
272 }
273 return rc;
274}
275
276
277/*******************************************************************************
278* Error reporting callback *
279*******************************************************************************/
280
281static void drvvdErrorCallback(void *pvUser, int rc, RT_SRC_POS_DECL,
282 const char *pszFormat, va_list va)
283{
284 PPDMDRVINS pDrvIns = (PPDMDRVINS)pvUser;
285 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
286 if (pThis->fErrorUseRuntime)
287 /* We must not pass VMSETRTERR_FLAGS_FATAL as it could lead to a
288 * deadlock: We are probably executed in a thread context != EMT
289 * and the EM thread would wait until every thread is suspended
290 * but we would wait for the EM thread ... */
291
292 PDMDrvHlpVMSetRuntimeErrorV(pDrvIns, /* fFlags=*/ 0, "DrvVD", pszFormat, va);
293 else
294 PDMDrvHlpVMSetErrorV(pDrvIns, rc, RT_SRC_POS_ARGS, pszFormat, va);
295}
296
297/*******************************************************************************
298* VD Async I/O interface implementation *
299*******************************************************************************/
300
301#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION
302
303static DECLCALLBACK(void) drvvdAsyncTaskCompleted(PPDMDRVINS pDrvIns, void *pvTemplateUser, void *pvUser, int rcReq)
304{
305 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
306 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pvTemplateUser;
307
308 LogFlowFunc(("pDrvIns=%#p pvTemplateUser=%#p pvUser=%#p rcReq\n",
309 pDrvIns, pvTemplateUser, pvUser, rcReq));
310
311 if (pStorageBackend->fSyncIoPending)
312 {
313 Assert(!pvUser);
314 pStorageBackend->rcReqLast = rcReq;
315 pStorageBackend->fSyncIoPending = false;
316 RTSemEventSignal(pStorageBackend->EventSem);
317 }
318 else
319 {
320 int rc;
321
322 AssertPtr(pvUser);
323
324 AssertPtr(pStorageBackend->pfnCompleted);
325 rc = pStorageBackend->pfnCompleted(pvUser, rcReq);
326 AssertRC(rc);
327 }
328}
329
330static DECLCALLBACK(int) drvvdAsyncIOOpen(void *pvUser, const char *pszLocation,
331 uint32_t fOpen,
332 PFNVDCOMPLETED pfnCompleted,
333 void **ppStorage)
334{
335 PVBOXDISK pThis = (PVBOXDISK)pvUser;
336 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)RTMemAllocZ(sizeof(DRVVDSTORAGEBACKEND));
337 int rc = VINF_SUCCESS;
338
339 if (pStorageBackend)
340 {
341 pStorageBackend->fSyncIoPending = false;
342 pStorageBackend->rcReqLast = VINF_SUCCESS;
343 pStorageBackend->pfnCompleted = pfnCompleted;
344
345 rc = RTSemEventCreate(&pStorageBackend->EventSem);
346 if (RT_SUCCESS(rc))
347 {
348 rc = PDMDrvHlpAsyncCompletionTemplateCreate(pThis->pDrvIns, &pStorageBackend->pTemplate,
349 drvvdAsyncTaskCompleted, pStorageBackend, "AsyncTaskCompleted");
350 if (RT_SUCCESS(rc))
351 {
352 uint32_t fFlags = (fOpen & RTFILE_O_ACCESS_MASK) == RTFILE_O_READ
353 ? PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_CACHING
354 : 0;
355 if (pThis->fShareable)
356 {
357 Assert((fOpen & RTFILE_O_DENY_MASK) == RTFILE_O_DENY_NONE);
358
359 fFlags |= PDMACEP_FILE_FLAGS_DONT_LOCK;
360 }
361 else
362 fFlags |= PDMACEP_FILE_FLAGS_CACHING;
363 rc = PDMR3AsyncCompletionEpCreateForFile(&pStorageBackend->pEndpoint,
364 pszLocation, fFlags,
365 pStorageBackend->pTemplate);
366 if (RT_SUCCESS(rc))
367 {
368 *ppStorage = pStorageBackend;
369 return VINF_SUCCESS;
370 }
371
372 PDMR3AsyncCompletionTemplateDestroy(pStorageBackend->pTemplate);
373 }
374 RTSemEventDestroy(pStorageBackend->EventSem);
375 }
376 RTMemFree(pStorageBackend);
377 }
378 else
379 rc = VERR_NO_MEMORY;
380
381 return rc;
382}
383
384static DECLCALLBACK(int) drvvdAsyncIOClose(void *pvUser, void *pStorage)
385{
386 PVBOXDISK pThis = (PVBOXDISK)pvUser;
387 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
388
389 PDMR3AsyncCompletionEpClose(pStorageBackend->pEndpoint);
390 PDMR3AsyncCompletionTemplateDestroy(pStorageBackend->pTemplate);
391 RTSemEventDestroy(pStorageBackend->EventSem);
392 RTMemFree(pStorageBackend);
393
394 return VINF_SUCCESS;;
395}
396
397static DECLCALLBACK(int) drvvdAsyncIOReadSync(void *pvUser, void *pStorage, uint64_t uOffset,
398 void *pvBuf, size_t cbRead, size_t *pcbRead)
399{
400 PVBOXDISK pThis = (PVBOXDISK)pvUser;
401 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
402 RTSGSEG DataSeg;
403 PPDMASYNCCOMPLETIONTASK pTask;
404
405 Assert(!pStorageBackend->fSyncIoPending);
406 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
407 DataSeg.cbSeg = cbRead;
408 DataSeg.pvSeg = pvBuf;
409
410 int rc = PDMR3AsyncCompletionEpRead(pStorageBackend->pEndpoint, uOffset, &DataSeg, 1, cbRead, NULL, &pTask);
411 if (RT_FAILURE(rc))
412 return rc;
413
414 if (rc == VINF_AIO_TASK_PENDING)
415 {
416 /* Wait */
417 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
418 AssertRC(rc);
419 }
420 else
421 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
422
423 if (pcbRead)
424 *pcbRead = cbRead;
425
426 return pStorageBackend->rcReqLast;
427}
428
429static DECLCALLBACK(int) drvvdAsyncIOWriteSync(void *pvUser, void *pStorage, uint64_t uOffset,
430 const void *pvBuf, size_t cbWrite, size_t *pcbWritten)
431{
432 PVBOXDISK pThis = (PVBOXDISK)pvUser;
433 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
434 RTSGSEG DataSeg;
435 PPDMASYNCCOMPLETIONTASK pTask;
436
437 Assert(!pStorageBackend->fSyncIoPending);
438 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
439 DataSeg.cbSeg = cbWrite;
440 DataSeg.pvSeg = (void *)pvBuf;
441
442 int rc = PDMR3AsyncCompletionEpWrite(pStorageBackend->pEndpoint, uOffset, &DataSeg, 1, cbWrite, NULL, &pTask);
443 if (RT_FAILURE(rc))
444 return rc;
445
446 if (rc == VINF_AIO_TASK_PENDING)
447 {
448 /* Wait */
449 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
450 AssertRC(rc);
451 }
452 else
453 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
454
455 if (pcbWritten)
456 *pcbWritten = cbWrite;
457
458 return pStorageBackend->rcReqLast;
459}
460
461static DECLCALLBACK(int) drvvdAsyncIOFlushSync(void *pvUser, void *pStorage)
462{
463 PVBOXDISK pThis = (PVBOXDISK)pvUser;
464 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
465 PPDMASYNCCOMPLETIONTASK pTask;
466
467 LogFlowFunc(("pvUser=%#p pStorage=%#p\n", pvUser, pStorage));
468
469 Assert(!pStorageBackend->fSyncIoPending);
470 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, true);
471
472 int rc = PDMR3AsyncCompletionEpFlush(pStorageBackend->pEndpoint, NULL, &pTask);
473 if (RT_FAILURE(rc))
474 return rc;
475
476 if (rc == VINF_AIO_TASK_PENDING)
477 {
478 /* Wait */
479 LogFlowFunc(("Waiting for flush to complete\n"));
480 rc = RTSemEventWait(pStorageBackend->EventSem, RT_INDEFINITE_WAIT);
481 AssertRC(rc);
482 }
483 else
484 ASMAtomicXchgBool(&pStorageBackend->fSyncIoPending, false);
485
486 return pStorageBackend->rcReqLast;
487}
488
489static DECLCALLBACK(int) drvvdAsyncIOReadAsync(void *pvUser, void *pStorage, uint64_t uOffset,
490 PCRTSGSEG paSegments, size_t cSegments,
491 size_t cbRead, void *pvCompletion,
492 void **ppTask)
493{
494 PVBOXDISK pThis = (PVBOXDISK)pvUser;
495 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
496
497 int rc = PDMR3AsyncCompletionEpRead(pStorageBackend->pEndpoint, uOffset, paSegments, cSegments, cbRead,
498 pvCompletion, (PPPDMASYNCCOMPLETIONTASK)ppTask);
499 if (rc == VINF_AIO_TASK_PENDING)
500 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
501
502 return rc;
503}
504
505static DECLCALLBACK(int) drvvdAsyncIOWriteAsync(void *pvUser, void *pStorage, uint64_t uOffset,
506 PCRTSGSEG paSegments, size_t cSegments,
507 size_t cbWrite, void *pvCompletion,
508 void **ppTask)
509{
510 PVBOXDISK pThis = (PVBOXDISK)pvUser;
511 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
512
513 int rc = PDMR3AsyncCompletionEpWrite(pStorageBackend->pEndpoint, uOffset, paSegments, cSegments, cbWrite,
514 pvCompletion, (PPPDMASYNCCOMPLETIONTASK)ppTask);
515 if (rc == VINF_AIO_TASK_PENDING)
516 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
517
518 return rc;
519}
520
521static DECLCALLBACK(int) drvvdAsyncIOFlushAsync(void *pvUser, void *pStorage,
522 void *pvCompletion, void **ppTask)
523{
524 PVBOXDISK pThis = (PVBOXDISK)pvUser;
525 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
526
527 int rc = PDMR3AsyncCompletionEpFlush(pStorageBackend->pEndpoint, pvCompletion,
528 (PPPDMASYNCCOMPLETIONTASK)ppTask);
529 if (rc == VINF_AIO_TASK_PENDING)
530 rc = VERR_VD_ASYNC_IO_IN_PROGRESS;
531
532 return rc;
533}
534
535static DECLCALLBACK(int) drvvdAsyncIOGetSize(void *pvUser, void *pStorage, uint64_t *pcbSize)
536{
537 PVBOXDISK pDrvVD = (PVBOXDISK)pvUser;
538 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
539
540 return PDMR3AsyncCompletionEpGetSize(pStorageBackend->pEndpoint, pcbSize);
541}
542
543static DECLCALLBACK(int) drvvdAsyncIOSetSize(void *pvUser, void *pStorage, uint64_t cbSize)
544{
545 PVBOXDISK pDrvVD = (PVBOXDISK)pvUser;
546 PDRVVDSTORAGEBACKEND pStorageBackend = (PDRVVDSTORAGEBACKEND)pStorage;
547
548 int rc = drvvdAsyncIOFlushSync(pvUser, pStorage);
549 if (RT_SUCCESS(rc))
550 rc = PDMR3AsyncCompletionEpSetSize(pStorageBackend->pEndpoint, cbSize);
551
552 return rc;
553}
554
555#endif /* VBOX_WITH_PDM_ASYNC_COMPLETION */
556
557
558/*******************************************************************************
559* VD Thread Synchronization interface implementation *
560*******************************************************************************/
561
562static DECLCALLBACK(int) drvvdThreadStartRead(void *pvUser)
563{
564 PVBOXDISK pThis = (PVBOXDISK)pvUser;
565
566 return RTSemRWRequestRead(pThis->MergeLock, RT_INDEFINITE_WAIT);
567}
568
569static DECLCALLBACK(int) drvvdThreadFinishRead(void *pvUser)
570{
571 PVBOXDISK pThis = (PVBOXDISK)pvUser;
572
573 return RTSemRWReleaseRead(pThis->MergeLock);
574}
575
576static DECLCALLBACK(int) drvvdThreadStartWrite(void *pvUser)
577{
578 PVBOXDISK pThis = (PVBOXDISK)pvUser;
579
580 return RTSemRWRequestWrite(pThis->MergeLock, RT_INDEFINITE_WAIT);
581}
582
583static DECLCALLBACK(int) drvvdThreadFinishWrite(void *pvUser)
584{
585 PVBOXDISK pThis = (PVBOXDISK)pvUser;
586
587 return RTSemRWReleaseWrite(pThis->MergeLock);
588}
589
590
591/*******************************************************************************
592* VD Configuration interface implementation *
593*******************************************************************************/
594
595static bool drvvdCfgAreKeysValid(void *pvUser, const char *pszzValid)
596{
597 return CFGMR3AreValuesValid((PCFGMNODE)pvUser, pszzValid);
598}
599
600static int drvvdCfgQuerySize(void *pvUser, const char *pszName, size_t *pcb)
601{
602 return CFGMR3QuerySize((PCFGMNODE)pvUser, pszName, pcb);
603}
604
605static int drvvdCfgQuery(void *pvUser, const char *pszName, char *pszString, size_t cchString)
606{
607 return CFGMR3QueryString((PCFGMNODE)pvUser, pszName, pszString, cchString);
608}
609
610
611#ifdef VBOX_WITH_INIP
612/*******************************************************************************
613* VD TCP network stack interface implementation - INIP case *
614*******************************************************************************/
615
616typedef union INIPSOCKADDRUNION
617{
618 struct sockaddr Addr;
619 struct sockaddr_in Ipv4;
620} INIPSOCKADDRUNION;
621
622typedef struct INIPSOCKET
623{
624 int hSock;
625} INIPSOCKET, *PINIPSOCKET;
626
627static DECLCALLBACK(int) drvvdINIPFlush(VDSOCKET Sock);
628
629/** @copydoc VDINTERFACETCPNET::pfnSocketCreate */
630static DECLCALLBACK(int) drvvdINIPSocketCreate(uint32_t fFlags, PVDSOCKET pSock)
631{
632 PINIPSOCKET pSocketInt = NULL;
633
634 /*
635 * The extended select method is not supported because it is impossible to wakeup
636 * the thread.
637 */
638 if (fFlags & VD_INTERFACETCPNET_CONNECT_EXTENDED_SELECT)
639 return VERR_NOT_SUPPORTED;
640
641 pSocketInt = (PINIPSOCKET)RTMemAllocZ(sizeof(INIPSOCKET));
642 if (pSocketInt)
643 {
644 pSocketInt->hSock = INT32_MAX;
645 *pSock = (VDSOCKET)pSocketInt;
646 return VINF_SUCCESS;
647 }
648
649 return VERR_NO_MEMORY;
650}
651
652/** @copydoc VDINTERFACETCPNET::pfnSocketCreate */
653static DECLCALLBACK(int) drvvdINIPSocketDestroy(VDSOCKET Sock)
654{
655 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
656
657 RTMemFree(pSocketInt);
658 return VINF_SUCCESS;
659}
660
661/** @copydoc VDINTERFACETCPNET::pfnClientConnect */
662static DECLCALLBACK(int) drvvdINIPClientConnect(VDSOCKET Sock, const char *pszAddress, uint32_t uPort)
663{
664 int rc = VINF_SUCCESS;
665 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
666
667 /* Check whether lwIP is set up in this VM instance. */
668 if (!DevINIPConfigured())
669 {
670 LogRelFunc(("no IP stack\n"));
671 return VERR_NET_HOST_UNREACHABLE;
672 }
673 /* Resolve hostname. As there is no standard resolver for lwIP yet,
674 * just accept numeric IP addresses for now. */
675 struct in_addr ip;
676 if (!lwip_inet_aton(pszAddress, &ip))
677 {
678 LogRelFunc(("cannot resolve IP %s\n", pszAddress));
679 return VERR_NET_HOST_UNREACHABLE;
680 }
681 /* Create socket and connect. */
682 int iSock = lwip_socket(PF_INET, SOCK_STREAM, 0);
683 if (iSock != -1)
684 {
685 struct sockaddr_in InAddr = {0};
686 InAddr.sin_family = AF_INET;
687 InAddr.sin_port = htons(uPort);
688 InAddr.sin_addr = ip;
689 if (!lwip_connect(iSock, (struct sockaddr *)&InAddr, sizeof(InAddr)))
690 {
691 pSocketInt->hSock = iSock;
692 return VINF_SUCCESS;
693 }
694 rc = VERR_NET_CONNECTION_REFUSED; /* @todo real solution needed */
695 lwip_close(iSock);
696 }
697 else
698 rc = VERR_NET_CONNECTION_REFUSED; /* @todo real solution needed */
699 return rc;
700}
701
702/** @copydoc VDINTERFACETCPNET::pfnClientClose */
703static DECLCALLBACK(int) drvvdINIPClientClose(VDSOCKET Sock)
704{
705 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
706
707 lwip_close(pSocketInt->hSock);
708 pSocketInt->hSock = INT32_MAX;
709 return VINF_SUCCESS; /** @todo real solution needed */
710}
711
712/** @copydoc VDINTERFACETCPNET::pfnIsClientConnected */
713static DECLCALLBACK(bool) drvvdINIPIsClientConnected(VDSOCKET Sock)
714{
715 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
716
717 return pSocketInt->hSock != INT32_MAX;
718}
719
720/** @copydoc VDINTERFACETCPNET::pfnSelectOne */
721static DECLCALLBACK(int) drvvdINIPSelectOne(VDSOCKET Sock, RTMSINTERVAL cMillies)
722{
723 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
724 fd_set fdsetR;
725 FD_ZERO(&fdsetR);
726 FD_SET((uintptr_t)Sock, &fdsetR);
727 fd_set fdsetE = fdsetR;
728
729 int rc;
730 if (cMillies == RT_INDEFINITE_WAIT)
731 rc = lwip_select(pSocketInt->hSock + 1, &fdsetR, NULL, &fdsetE, NULL);
732 else
733 {
734 struct timeval timeout;
735 timeout.tv_sec = cMillies / 1000;
736 timeout.tv_usec = (cMillies % 1000) * 1000;
737 rc = lwip_select(pSocketInt->hSock + 1, &fdsetR, NULL, &fdsetE, &timeout);
738 }
739 if (rc > 0)
740 return VINF_SUCCESS;
741 if (rc == 0)
742 return VERR_TIMEOUT;
743 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
744}
745
746/** @copydoc VDINTERFACETCPNET::pfnRead */
747static DECLCALLBACK(int) drvvdINIPRead(VDSOCKET Sock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
748{
749 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
750
751 /* Do params checking */
752 if (!pvBuffer || !cbBuffer)
753 {
754 AssertMsgFailed(("Invalid params\n"));
755 return VERR_INVALID_PARAMETER;
756 }
757
758 /*
759 * Read loop.
760 * If pcbRead is NULL we have to fill the entire buffer!
761 */
762 size_t cbRead = 0;
763 size_t cbToRead = cbBuffer;
764 for (;;)
765 {
766 /** @todo this clipping here is just in case (the send function
767 * needed it, so I added it here, too). Didn't investigate if this
768 * really has issues. Better be safe than sorry. */
769 ssize_t cbBytesRead = lwip_recv(pSocketInt->hSock, (char *)pvBuffer + cbRead,
770 RT_MIN(cbToRead, 32768), 0);
771 if (cbBytesRead < 0)
772 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution */
773 if (cbBytesRead == 0 && errno) /** @todo r=bird: lwip_recv will not touch errno on Windows. This may apply to other hosts as well */
774 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution */
775 if (pcbRead)
776 {
777 /* return partial data */
778 *pcbRead = cbBytesRead;
779 break;
780 }
781
782 /* read more? */
783 cbRead += cbBytesRead;
784 if (cbRead == cbBuffer)
785 break;
786
787 /* next */
788 cbToRead = cbBuffer - cbRead;
789 }
790
791 return VINF_SUCCESS;
792}
793
794/** @copydoc VDINTERFACETCPNET::pfnWrite */
795static DECLCALLBACK(int) drvvdINIPWrite(VDSOCKET Sock, const void *pvBuffer, size_t cbBuffer)
796{
797 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
798
799 do
800 {
801 /** @todo lwip send only supports up to 65535 bytes in a single
802 * send (stupid limitation buried in the code), so make sure we
803 * don't get any wraparounds. This should be moved to DevINIP
804 * stack interface once that's implemented. */
805 ssize_t cbWritten = lwip_send(pSocketInt->hSock, (void *)pvBuffer,
806 RT_MIN(cbBuffer, 32768), 0);
807 if (cbWritten < 0)
808 return VERR_NET_CONNECTION_REFUSED; /** @todo real solution needed */
809 AssertMsg(cbBuffer >= (size_t)cbWritten, ("Wrote more than we requested!!! cbWritten=%d cbBuffer=%d\n",
810 cbWritten, cbBuffer));
811 cbBuffer -= cbWritten;
812 pvBuffer = (const char *)pvBuffer + cbWritten;
813 } while (cbBuffer);
814
815 return VINF_SUCCESS;
816}
817
818/** @copydoc VDINTERFACETCPNET::pfnSgWrite */
819static DECLCALLBACK(int) drvvdINIPSgWrite(VDSOCKET Sock, PCRTSGBUF pSgBuf)
820{
821 int rc = VINF_SUCCESS;
822
823 /* This is an extremely crude emulation, however it's good enough
824 * for our iSCSI code. INIP has no sendmsg(). */
825 for (unsigned i = 0; i < pSgBuf->cSegs; i++)
826 {
827 rc = drvvdINIPWrite(Sock, pSgBuf->paSegs[i].pvSeg,
828 pSgBuf->paSegs[i].cbSeg);
829 if (RT_FAILURE(rc))
830 break;
831 }
832 if (RT_SUCCESS(rc))
833 drvvdINIPFlush(Sock);
834
835 return rc;
836}
837
838/** @copydoc VDINTERFACETCPNET::pfnFlush */
839static DECLCALLBACK(int) drvvdINIPFlush(VDSOCKET Sock)
840{
841 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
842
843 int fFlag = 1;
844 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
845 (const char *)&fFlag, sizeof(fFlag));
846 fFlag = 0;
847 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
848 (const char *)&fFlag, sizeof(fFlag));
849 return VINF_SUCCESS;
850}
851
852/** @copydoc VDINTERFACETCPNET::pfnSetSendCoalescing */
853static DECLCALLBACK(int) drvvdINIPSetSendCoalescing(VDSOCKET Sock, bool fEnable)
854{
855 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
856
857 int fFlag = fEnable ? 0 : 1;
858 lwip_setsockopt(pSocketInt->hSock, IPPROTO_TCP, TCP_NODELAY,
859 (const char *)&fFlag, sizeof(fFlag));
860 return VINF_SUCCESS;
861}
862
863/** @copydoc VDINTERFACETCPNET::pfnGetLocalAddress */
864static DECLCALLBACK(int) drvvdINIPGetLocalAddress(VDSOCKET Sock, PRTNETADDR pAddr)
865{
866 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
867 INIPSOCKADDRUNION u;
868 socklen_t cbAddr = sizeof(u);
869 RT_ZERO(u);
870 if (!lwip_getsockname(pSocketInt->hSock, &u.Addr, &cbAddr))
871 {
872 /*
873 * Convert the address.
874 */
875 if ( cbAddr == sizeof(struct sockaddr_in)
876 && u.Addr.sa_family == AF_INET)
877 {
878 RT_ZERO(*pAddr);
879 pAddr->enmType = RTNETADDRTYPE_IPV4;
880 pAddr->uPort = RT_N2H_U16(u.Ipv4.sin_port);
881 pAddr->uAddr.IPv4.u = u.Ipv4.sin_addr.s_addr;
882 }
883 else
884 return VERR_NET_ADDRESS_FAMILY_NOT_SUPPORTED;
885 return VINF_SUCCESS;
886 }
887 return VERR_NET_OPERATION_NOT_SUPPORTED;
888}
889
890/** @copydoc VDINTERFACETCPNET::pfnGetPeerAddress */
891static DECLCALLBACK(int) drvvdINIPGetPeerAddress(VDSOCKET Sock, PRTNETADDR pAddr)
892{
893 PINIPSOCKET pSocketInt = (PINIPSOCKET)Sock;
894 INIPSOCKADDRUNION u;
895 socklen_t cbAddr = sizeof(u);
896 RT_ZERO(u);
897 if (!lwip_getpeername(pSocketInt->hSock, &u.Addr, &cbAddr))
898 {
899 /*
900 * Convert the address.
901 */
902 if ( cbAddr == sizeof(struct sockaddr_in)
903 && u.Addr.sa_family == AF_INET)
904 {
905 RT_ZERO(*pAddr);
906 pAddr->enmType = RTNETADDRTYPE_IPV4;
907 pAddr->uPort = RT_N2H_U16(u.Ipv4.sin_port);
908 pAddr->uAddr.IPv4.u = u.Ipv4.sin_addr.s_addr;
909 }
910 else
911 return VERR_NET_ADDRESS_FAMILY_NOT_SUPPORTED;
912 return VINF_SUCCESS;
913 }
914 return VERR_NET_OPERATION_NOT_SUPPORTED;
915}
916
917/** @copydoc VDINTERFACETCPNET::pfnSelectOneEx */
918static DECLCALLBACK(int) drvvdINIPSelectOneEx(VDSOCKET Sock, uint32_t fEvents, uint32_t *pfEvents, RTMSINTERVAL cMillies)
919{
920 AssertMsgFailed(("Not supported!\n"));
921 return VERR_NOT_SUPPORTED;
922}
923
924/** @copydoc VDINTERFACETCPNET::pfnPoke */
925static DECLCALLBACK(int) drvvdINIPPoke(VDSOCKET Sock)
926{
927 AssertMsgFailed(("Not supported!\n"));
928 return VERR_NOT_SUPPORTED;
929}
930
931#endif /* VBOX_WITH_INIP */
932
933
934/*******************************************************************************
935* VD TCP network stack interface implementation - Host TCP case *
936*******************************************************************************/
937
938/**
939 * Socket data.
940 */
941typedef struct VDSOCKETINT
942{
943 /** IPRT socket handle. */
944 RTSOCKET hSocket;
945 /** Pollset with the wakeup pipe and socket. */
946 RTPOLLSET hPollSet;
947 /** Pipe endpoint - read (in the pollset). */
948 RTPIPE hPipeR;
949 /** Pipe endpoint - write. */
950 RTPIPE hPipeW;
951 /** Flag whether the thread was woken up. */
952 volatile bool fWokenUp;
953 /** Flag whether the thread is waiting in the select call. */
954 volatile bool fWaiting;
955 /** Old event mask. */
956 uint32_t fEventsOld;
957} VDSOCKETINT, *PVDSOCKETINT;
958
959/** Pollset id of the socket. */
960#define VDSOCKET_POLL_ID_SOCKET 0
961/** Pollset id of the pipe. */
962#define VDSOCKET_POLL_ID_PIPE 1
963
964/** @copydoc VDINTERFACETCPNET::pfnSocketCreate */
965static DECLCALLBACK(int) drvvdTcpSocketCreate(uint32_t fFlags, PVDSOCKET pSock)
966{
967 int rc = VINF_SUCCESS;
968 int rc2 = VINF_SUCCESS;
969 PVDSOCKETINT pSockInt = NULL;
970
971 pSockInt = (PVDSOCKETINT)RTMemAllocZ(sizeof(VDSOCKETINT));
972 if (!pSockInt)
973 return VERR_NO_MEMORY;
974
975 pSockInt->hSocket = NIL_RTSOCKET;
976 pSockInt->hPollSet = NIL_RTPOLLSET;
977 pSockInt->hPipeR = NIL_RTPIPE;
978 pSockInt->hPipeW = NIL_RTPIPE;
979 pSockInt->fWokenUp = false;
980 pSockInt->fWaiting = false;
981
982 if (fFlags & VD_INTERFACETCPNET_CONNECT_EXTENDED_SELECT)
983 {
984 /* Init pipe and pollset. */
985 rc = RTPipeCreate(&pSockInt->hPipeR, &pSockInt->hPipeW, 0);
986 if (RT_SUCCESS(rc))
987 {
988 rc = RTPollSetCreate(&pSockInt->hPollSet);
989 if (RT_SUCCESS(rc))
990 {
991 rc = RTPollSetAddPipe(pSockInt->hPollSet, pSockInt->hPipeR,
992 RTPOLL_EVT_READ, VDSOCKET_POLL_ID_PIPE);
993 if (RT_SUCCESS(rc))
994 {
995 *pSock = pSockInt;
996 return VINF_SUCCESS;
997 }
998
999 RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_PIPE);
1000 rc2 = RTPollSetDestroy(pSockInt->hPollSet);
1001 AssertRC(rc2);
1002 }
1003
1004 rc2 = RTPipeClose(pSockInt->hPipeR);
1005 AssertRC(rc2);
1006 rc2 = RTPipeClose(pSockInt->hPipeW);
1007 AssertRC(rc2);
1008 }
1009 }
1010 else
1011 {
1012 *pSock = pSockInt;
1013 return VINF_SUCCESS;
1014 }
1015
1016 RTMemFree(pSockInt);
1017
1018 return rc;
1019}
1020
1021/** @copydoc VDINTERFACETCPNET::pfnSocketDestroy */
1022static DECLCALLBACK(int) drvvdTcpSocketDestroy(VDSOCKET Sock)
1023{
1024 int rc = VINF_SUCCESS;
1025 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1026
1027 /* Destroy the pipe and pollset if necessary. */
1028 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1029 {
1030 if (pSockInt->hSocket != NIL_RTSOCKET)
1031 {
1032 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1033 Assert(RT_SUCCESS(rc) || rc == VERR_POLL_HANDLE_ID_NOT_FOUND);
1034 }
1035 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_PIPE);
1036 AssertRC(rc);
1037 rc = RTPollSetDestroy(pSockInt->hPollSet);
1038 AssertRC(rc);
1039 rc = RTPipeClose(pSockInt->hPipeR);
1040 AssertRC(rc);
1041 rc = RTPipeClose(pSockInt->hPipeW);
1042 AssertRC(rc);
1043 }
1044
1045 if (pSockInt->hSocket != NIL_RTSOCKET)
1046 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1047
1048 RTMemFree(pSockInt);
1049
1050 return rc;
1051}
1052
1053/** @copydoc VDINTERFACETCPNET::pfnClientConnect */
1054static DECLCALLBACK(int) drvvdTcpClientConnect(VDSOCKET Sock, const char *pszAddress, uint32_t uPort)
1055{
1056 int rc = VINF_SUCCESS;
1057 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1058
1059 rc = RTTcpClientConnect(pszAddress, uPort, &pSockInt->hSocket);
1060 if (RT_SUCCESS(rc))
1061 {
1062 /* Add to the pollset if required. */
1063 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1064 {
1065 pSockInt->fEventsOld = RTPOLL_EVT_READ | RTPOLL_EVT_WRITE | RTPOLL_EVT_ERROR;
1066
1067 rc = RTPollSetAddSocket(pSockInt->hPollSet, pSockInt->hSocket,
1068 pSockInt->fEventsOld, VDSOCKET_POLL_ID_SOCKET);
1069 }
1070
1071 if (RT_SUCCESS(rc))
1072 return VINF_SUCCESS;
1073
1074 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1075 }
1076
1077 return rc;
1078}
1079
1080/** @copydoc VDINTERFACETCPNET::pfnClientClose */
1081static DECLCALLBACK(int) drvvdTcpClientClose(VDSOCKET Sock)
1082{
1083 int rc = VINF_SUCCESS;
1084 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1085
1086 if (pSockInt->hPollSet != NIL_RTPOLLSET)
1087 {
1088 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1089 AssertRC(rc);
1090 }
1091
1092 rc = RTTcpClientCloseEx(pSockInt->hSocket, false /*fGracefulShutdown*/);
1093 pSockInt->hSocket = NIL_RTSOCKET;
1094
1095 return rc;
1096}
1097
1098/** @copydoc VDINTERFACETCPNET::pfnIsClientConnected */
1099static DECLCALLBACK(bool) drvvdTcpIsClientConnected(VDSOCKET Sock)
1100{
1101 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1102
1103 return pSockInt->hSocket != NIL_RTSOCKET;
1104}
1105
1106/** @copydoc VDINTERFACETCPNET::pfnSelectOne */
1107static DECLCALLBACK(int) drvvdTcpSelectOne(VDSOCKET Sock, RTMSINTERVAL cMillies)
1108{
1109 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1110
1111 return RTTcpSelectOne(pSockInt->hSocket, cMillies);
1112}
1113
1114/** @copydoc VDINTERFACETCPNET::pfnRead */
1115static DECLCALLBACK(int) drvvdTcpRead(VDSOCKET Sock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
1116{
1117 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1118
1119 return RTTcpRead(pSockInt->hSocket, pvBuffer, cbBuffer, pcbRead);
1120}
1121
1122/** @copydoc VDINTERFACETCPNET::pfnWrite */
1123static DECLCALLBACK(int) drvvdTcpWrite(VDSOCKET Sock, const void *pvBuffer, size_t cbBuffer)
1124{
1125 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1126
1127 return RTTcpWrite(pSockInt->hSocket, pvBuffer, cbBuffer);
1128}
1129
1130/** @copydoc VDINTERFACETCPNET::pfnSgWrite */
1131static DECLCALLBACK(int) drvvdTcpSgWrite(VDSOCKET Sock, PCRTSGBUF pSgBuf)
1132{
1133 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1134
1135 return RTTcpSgWrite(pSockInt->hSocket, pSgBuf);
1136}
1137
1138/** @copydoc VDINTERFACETCPNET::pfnReadNB */
1139static DECLCALLBACK(int) drvvdTcpReadNB(VDSOCKET Sock, void *pvBuffer, size_t cbBuffer, size_t *pcbRead)
1140{
1141 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1142
1143 return RTTcpReadNB(pSockInt->hSocket, pvBuffer, cbBuffer, pcbRead);
1144}
1145
1146/** @copydoc VDINTERFACETCPNET::pfnWriteNB */
1147static DECLCALLBACK(int) drvvdTcpWriteNB(VDSOCKET Sock, const void *pvBuffer, size_t cbBuffer, size_t *pcbWritten)
1148{
1149 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1150
1151 return RTTcpWriteNB(pSockInt->hSocket, pvBuffer, cbBuffer, pcbWritten);
1152}
1153
1154/** @copydoc VDINTERFACETCPNET::pfnSgWriteNB */
1155static DECLCALLBACK(int) drvvdTcpSgWriteNB(VDSOCKET Sock, PRTSGBUF pSgBuf, size_t *pcbWritten)
1156{
1157 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1158
1159 return RTTcpSgWriteNB(pSockInt->hSocket, pSgBuf, pcbWritten);
1160}
1161
1162/** @copydoc VDINTERFACETCPNET::pfnFlush */
1163static DECLCALLBACK(int) drvvdTcpFlush(VDSOCKET Sock)
1164{
1165 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1166
1167 return RTTcpFlush(pSockInt->hSocket);
1168}
1169
1170/** @copydoc VDINTERFACETCPNET::pfnSetSendCoalescing */
1171static DECLCALLBACK(int) drvvdTcpSetSendCoalescing(VDSOCKET Sock, bool fEnable)
1172{
1173 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1174
1175 return RTTcpSetSendCoalescing(pSockInt->hSocket, fEnable);
1176}
1177
1178/** @copydoc VDINTERFACETCPNET::pfnGetLocalAddress */
1179static DECLCALLBACK(int) drvvdTcpGetLocalAddress(VDSOCKET Sock, PRTNETADDR pAddr)
1180{
1181 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1182
1183 return RTTcpGetLocalAddress(pSockInt->hSocket, pAddr);
1184}
1185
1186/** @copydoc VDINTERFACETCPNET::pfnGetPeerAddress */
1187static DECLCALLBACK(int) drvvdTcpGetPeerAddress(VDSOCKET Sock, PRTNETADDR pAddr)
1188{
1189 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1190
1191 return RTTcpGetPeerAddress(pSockInt->hSocket, pAddr);
1192}
1193
1194static int drvvdTcpSelectOneExPoll(VDSOCKET Sock, uint32_t fEvents,
1195 uint32_t *pfEvents, RTMSINTERVAL cMillies)
1196{
1197 int rc = VINF_SUCCESS;
1198 uint32_t id = 0;
1199 uint32_t fEventsRecv = 0;
1200 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1201
1202 *pfEvents = 0;
1203
1204 if ( pSockInt->fEventsOld != fEvents
1205 && pSockInt->hSocket != NIL_RTSOCKET)
1206 {
1207 uint32_t fPollEvents = 0;
1208
1209 if (fEvents & VD_INTERFACETCPNET_EVT_READ)
1210 fPollEvents |= RTPOLL_EVT_READ;
1211 if (fEvents & VD_INTERFACETCPNET_EVT_WRITE)
1212 fPollEvents |= RTPOLL_EVT_WRITE;
1213 if (fEvents & VD_INTERFACETCPNET_EVT_ERROR)
1214 fPollEvents |= RTPOLL_EVT_ERROR;
1215
1216 rc = RTPollSetEventsChange(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET, fPollEvents);
1217 if (RT_FAILURE(rc))
1218 return rc;
1219
1220 pSockInt->fEventsOld = fEvents;
1221 }
1222
1223 ASMAtomicXchgBool(&pSockInt->fWaiting, true);
1224 if (ASMAtomicXchgBool(&pSockInt->fWokenUp, false))
1225 {
1226 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1227 return VERR_INTERRUPTED;
1228 }
1229
1230 rc = RTPoll(pSockInt->hPollSet, cMillies, &fEventsRecv, &id);
1231 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1232
1233 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1234
1235 if (RT_SUCCESS(rc))
1236 {
1237 if (id == VDSOCKET_POLL_ID_SOCKET)
1238 {
1239 fEventsRecv &= RTPOLL_EVT_VALID_MASK;
1240
1241 if (fEventsRecv & RTPOLL_EVT_READ)
1242 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1243 if (fEventsRecv & RTPOLL_EVT_WRITE)
1244 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1245 if (fEventsRecv & RTPOLL_EVT_ERROR)
1246 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1247 }
1248 else
1249 {
1250 size_t cbRead = 0;
1251 uint8_t abBuf[10];
1252 Assert(id == VDSOCKET_POLL_ID_PIPE);
1253 Assert((fEventsRecv & RTPOLL_EVT_VALID_MASK) == RTPOLL_EVT_READ);
1254
1255 /* We got interrupted, drain the pipe. */
1256 rc = RTPipeRead(pSockInt->hPipeR, abBuf, sizeof(abBuf), &cbRead);
1257 AssertRC(rc);
1258
1259 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1260
1261 rc = VERR_INTERRUPTED;
1262 }
1263 }
1264
1265 return rc;
1266}
1267
1268/** @copydoc VDINTERFACETCPNET::pfnSelectOneEx */
1269static DECLCALLBACK(int) drvvdTcpSelectOneExNoPoll(VDSOCKET Sock, uint32_t fEvents,
1270 uint32_t *pfEvents, RTMSINTERVAL cMillies)
1271{
1272 int rc = VINF_SUCCESS;
1273 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1274
1275 *pfEvents = 0;
1276
1277 ASMAtomicXchgBool(&pSockInt->fWaiting, true);
1278 if (ASMAtomicXchgBool(&pSockInt->fWokenUp, false))
1279 {
1280 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1281 return VERR_INTERRUPTED;
1282 }
1283
1284 if ( pSockInt->hSocket == NIL_RTSOCKET
1285 || !fEvents)
1286 {
1287 /*
1288 * Only the pipe is configured or the caller doesn't wait for a socket event,
1289 * wait until there is something to read from the pipe.
1290 */
1291 size_t cbRead = 0;
1292 char ch = 0;
1293 rc = RTPipeReadBlocking(pSockInt->hPipeR, &ch, 1, &cbRead);
1294 if (RT_SUCCESS(rc))
1295 {
1296 Assert(cbRead == 1);
1297 rc = VERR_INTERRUPTED;
1298 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1299 }
1300 }
1301 else
1302 {
1303 uint32_t fSelectEvents = 0;
1304
1305 if (fEvents & VD_INTERFACETCPNET_EVT_READ)
1306 fSelectEvents |= RTSOCKET_EVT_READ;
1307 if (fEvents & VD_INTERFACETCPNET_EVT_WRITE)
1308 fSelectEvents |= RTSOCKET_EVT_WRITE;
1309 if (fEvents & VD_INTERFACETCPNET_EVT_ERROR)
1310 fSelectEvents |= RTSOCKET_EVT_ERROR;
1311
1312 if (fEvents & VD_INTERFACETCPNET_HINT_INTERRUPT)
1313 {
1314 uint32_t fEventsRecv = 0;
1315
1316 /* Make sure the socket is not in the pollset. */
1317 rc = RTPollSetRemove(pSockInt->hPollSet, VDSOCKET_POLL_ID_SOCKET);
1318 Assert(RT_SUCCESS(rc) || rc == VERR_POLL_HANDLE_ID_NOT_FOUND);
1319
1320 for (;;)
1321 {
1322 uint32_t id = 0;
1323 rc = RTPoll(pSockInt->hPollSet, 5, &fEvents, &id);
1324 if (rc == VERR_TIMEOUT)
1325 {
1326 /* Check the socket. */
1327 rc = RTTcpSelectOneEx(pSockInt->hSocket, fSelectEvents, &fEventsRecv, 0);
1328 if (RT_SUCCESS(rc))
1329 {
1330 if (fEventsRecv & RTSOCKET_EVT_READ)
1331 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1332 if (fEventsRecv & RTSOCKET_EVT_WRITE)
1333 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1334 if (fEventsRecv & RTSOCKET_EVT_ERROR)
1335 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1336 break; /* Quit */
1337 }
1338 else if (rc != VERR_TIMEOUT)
1339 break;
1340 }
1341 else if (RT_SUCCESS(rc))
1342 {
1343 size_t cbRead = 0;
1344 uint8_t abBuf[10];
1345 Assert(id == VDSOCKET_POLL_ID_PIPE);
1346 Assert((fEventsRecv & RTPOLL_EVT_VALID_MASK) == RTPOLL_EVT_READ);
1347
1348 /* We got interrupted, drain the pipe. */
1349 rc = RTPipeRead(pSockInt->hPipeR, abBuf, sizeof(abBuf), &cbRead);
1350 AssertRC(rc);
1351
1352 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1353
1354 rc = VERR_INTERRUPTED;
1355 break;
1356 }
1357 else
1358 break;
1359 }
1360 }
1361 else /* The caller waits for a socket event. */
1362 {
1363 uint32_t fEventsRecv = 0;
1364
1365 /* Loop until we got woken up or a socket event occurred. */
1366 for (;;)
1367 {
1368 /** @todo find an adaptive wait algorithm based on the
1369 * number of wakeups in the past. */
1370 rc = RTTcpSelectOneEx(pSockInt->hSocket, fSelectEvents, &fEventsRecv, 5);
1371 if (rc == VERR_TIMEOUT)
1372 {
1373 /* Check if there is an event pending. */
1374 size_t cbRead = 0;
1375 char ch = 0;
1376 rc = RTPipeRead(pSockInt->hPipeR, &ch, 1, &cbRead);
1377 if (RT_SUCCESS(rc) && rc != VINF_TRY_AGAIN)
1378 {
1379 Assert(cbRead == 1);
1380 rc = VERR_INTERRUPTED;
1381 ASMAtomicXchgBool(&pSockInt->fWokenUp, false);
1382 break; /* Quit */
1383 }
1384 else
1385 Assert(rc == VINF_TRY_AGAIN);
1386 }
1387 else if (RT_SUCCESS(rc))
1388 {
1389 if (fEventsRecv & RTSOCKET_EVT_READ)
1390 *pfEvents |= VD_INTERFACETCPNET_EVT_READ;
1391 if (fEventsRecv & RTSOCKET_EVT_WRITE)
1392 *pfEvents |= VD_INTERFACETCPNET_EVT_WRITE;
1393 if (fEventsRecv & RTSOCKET_EVT_ERROR)
1394 *pfEvents |= VD_INTERFACETCPNET_EVT_ERROR;
1395 break; /* Quit */
1396 }
1397 else
1398 break;
1399 }
1400 }
1401 }
1402
1403 ASMAtomicXchgBool(&pSockInt->fWaiting, false);
1404
1405 return rc;
1406}
1407
1408/** @copydoc VDINTERFACETCPNET::pfnPoke */
1409static DECLCALLBACK(int) drvvdTcpPoke(VDSOCKET Sock)
1410{
1411 int rc = VINF_SUCCESS;
1412 size_t cbWritten = 0;
1413 PVDSOCKETINT pSockInt = (PVDSOCKETINT)Sock;
1414
1415 ASMAtomicXchgBool(&pSockInt->fWokenUp, true);
1416
1417 if (ASMAtomicReadBool(&pSockInt->fWaiting))
1418 {
1419 rc = RTPipeWrite(pSockInt->hPipeW, "", 1, &cbWritten);
1420 Assert(RT_SUCCESS(rc) || cbWritten == 0);
1421 }
1422
1423 return VINF_SUCCESS;
1424}
1425
1426
1427/*******************************************************************************
1428* Media interface methods *
1429*******************************************************************************/
1430
1431/** @copydoc PDMIMEDIA::pfnRead */
1432static DECLCALLBACK(int) drvvdRead(PPDMIMEDIA pInterface,
1433 uint64_t off, void *pvBuf, size_t cbRead)
1434{
1435 int rc = VINF_SUCCESS;
1436
1437 LogFlowFunc(("off=%#llx pvBuf=%p cbRead=%d\n", off, pvBuf, cbRead));
1438 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1439
1440 if (!pThis->fBootAccelActive)
1441 rc = VDRead(pThis->pDisk, off, pvBuf, cbRead);
1442 else
1443 {
1444 /* Can we serve the request from the buffer? */
1445 if ( off >= pThis->offDisk
1446 && off - pThis->offDisk < pThis->cbDataValid)
1447 {
1448 size_t cbToCopy = RT_MIN(cbRead, pThis->offDisk + pThis->cbDataValid - off);
1449
1450 memcpy(pvBuf, pThis->pbData + (off - pThis->offDisk), cbToCopy);
1451 cbRead -= cbToCopy;
1452 off += cbToCopy;
1453 pvBuf = (char *)pvBuf + cbToCopy;
1454 }
1455
1456 if ( cbRead > 0
1457 && cbRead < pThis->cbBootAccelBuffer)
1458 {
1459 /* Increase request to the buffer size and read. */
1460 pThis->cbDataValid = RT_MIN(pThis->cbDisk - off, pThis->cbBootAccelBuffer);
1461 pThis->offDisk = off;
1462 rc = VDRead(pThis->pDisk, off, pThis->pbData, pThis->cbDataValid);
1463 if (RT_FAILURE(rc))
1464 pThis->cbDataValid = 0;
1465 else
1466 memcpy(pvBuf, pThis->pbData, cbRead);
1467 }
1468 else if (cbRead >= pThis->cbBootAccelBuffer)
1469 {
1470 pThis->fBootAccelActive = false; /* Deactiviate */
1471 }
1472 }
1473
1474 if (RT_SUCCESS(rc))
1475 Log2(("%s: off=%#llx pvBuf=%p cbRead=%d %.*Rhxd\n", __FUNCTION__,
1476 off, pvBuf, cbRead, cbRead, pvBuf));
1477 LogFlowFunc(("returns %Rrc\n", rc));
1478 return rc;
1479}
1480
1481/** @copydoc PDMIMEDIA::pfnWrite */
1482static DECLCALLBACK(int) drvvdWrite(PPDMIMEDIA pInterface,
1483 uint64_t off, const void *pvBuf,
1484 size_t cbWrite)
1485{
1486 LogFlowFunc(("off=%#llx pvBuf=%p cbWrite=%d\n", off, pvBuf, cbWrite));
1487 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1488 Log2(("%s: off=%#llx pvBuf=%p cbWrite=%d %.*Rhxd\n", __FUNCTION__,
1489 off, pvBuf, cbWrite, cbWrite, pvBuf));
1490
1491 /* Invalidate any buffer if boot acceleration is enabled. */
1492 if (pThis->fBootAccelActive)
1493 {
1494 pThis->cbDataValid = 0;
1495 pThis->offDisk = 0;
1496 }
1497
1498 int rc = VDWrite(pThis->pDisk, off, pvBuf, cbWrite);
1499 LogFlowFunc(("returns %Rrc\n", rc));
1500 return rc;
1501}
1502
1503/** @copydoc PDMIMEDIA::pfnFlush */
1504static DECLCALLBACK(int) drvvdFlush(PPDMIMEDIA pInterface)
1505{
1506 LogFlowFunc(("\n"));
1507 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1508 int rc = VDFlush(pThis->pDisk);
1509 LogFlowFunc(("returns %Rrc\n", rc));
1510 return rc;
1511}
1512
1513/** @copydoc PDMIMEDIA::pfnMerge */
1514static DECLCALLBACK(int) drvvdMerge(PPDMIMEDIA pInterface,
1515 PFNSIMPLEPROGRESS pfnProgress,
1516 void *pvUser)
1517{
1518 LogFlowFunc(("\n"));
1519 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1520 int rc = VINF_SUCCESS;
1521
1522 /* Note: There is an unavoidable race between destruction and another
1523 * thread invoking this function. This is handled safely and gracefully by
1524 * atomically invalidating the lock handle in drvvdDestruct. */
1525 int rc2 = RTSemFastMutexRequest(pThis->MergeCompleteMutex);
1526 AssertRC(rc2);
1527 if (RT_SUCCESS(rc2) && pThis->fMergePending)
1528 {
1529 /* Take shortcut: PFNSIMPLEPROGRESS is exactly the same type as
1530 * PFNVDPROGRESS, so there's no need for a conversion function. */
1531 /** @todo maybe introduce a conversion which limits update frequency. */
1532 PVDINTERFACE pVDIfsOperation = NULL;
1533 VDINTERFACE VDIProgress;
1534 VDINTERFACEPROGRESS VDIProgressCallbacks;
1535 VDIProgressCallbacks.cbSize = sizeof(VDINTERFACEPROGRESS);
1536 VDIProgressCallbacks.enmInterface = VDINTERFACETYPE_PROGRESS;
1537 VDIProgressCallbacks.pfnProgress = pfnProgress;
1538 rc2 = VDInterfaceAdd(&VDIProgress, "DrvVD_VDIProgress", VDINTERFACETYPE_PROGRESS,
1539 &VDIProgressCallbacks, pvUser, &pVDIfsOperation);
1540 AssertRC(rc2);
1541 pThis->fMergePending = false;
1542 rc = VDMerge(pThis->pDisk, pThis->uMergeSource,
1543 pThis->uMergeTarget, pVDIfsOperation);
1544 }
1545 rc2 = RTSemFastMutexRelease(pThis->MergeCompleteMutex);
1546 AssertRC(rc2);
1547 LogFlowFunc(("returns %Rrc\n", rc));
1548 return rc;
1549}
1550
1551/** @copydoc PDMIMEDIA::pfnGetSize */
1552static DECLCALLBACK(uint64_t) drvvdGetSize(PPDMIMEDIA pInterface)
1553{
1554 LogFlowFunc(("\n"));
1555 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1556 uint64_t cb = VDGetSize(pThis->pDisk, VD_LAST_IMAGE);
1557 LogFlowFunc(("returns %#llx (%llu)\n", cb, cb));
1558 return cb;
1559}
1560
1561/** @copydoc PDMIMEDIA::pfnIsReadOnly */
1562static DECLCALLBACK(bool) drvvdIsReadOnly(PPDMIMEDIA pInterface)
1563{
1564 LogFlowFunc(("\n"));
1565 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1566 bool f = VDIsReadOnly(pThis->pDisk);
1567 LogFlowFunc(("returns %d\n", f));
1568 return f;
1569}
1570
1571/** @copydoc PDMIMEDIA::pfnBiosGetPCHSGeometry */
1572static DECLCALLBACK(int) drvvdBiosGetPCHSGeometry(PPDMIMEDIA pInterface,
1573 PPDMMEDIAGEOMETRY pPCHSGeometry)
1574{
1575 LogFlowFunc(("\n"));
1576 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1577 VDGEOMETRY geo;
1578 int rc = VDGetPCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
1579 if (RT_SUCCESS(rc))
1580 {
1581 pPCHSGeometry->cCylinders = geo.cCylinders;
1582 pPCHSGeometry->cHeads = geo.cHeads;
1583 pPCHSGeometry->cSectors = geo.cSectors;
1584 }
1585 else
1586 {
1587 LogFunc(("geometry not available.\n"));
1588 rc = VERR_PDM_GEOMETRY_NOT_SET;
1589 }
1590 LogFlowFunc(("returns %Rrc (CHS=%d/%d/%d)\n",
1591 rc, pPCHSGeometry->cCylinders, pPCHSGeometry->cHeads, pPCHSGeometry->cSectors));
1592 return rc;
1593}
1594
1595/** @copydoc PDMIMEDIA::pfnBiosSetPCHSGeometry */
1596static DECLCALLBACK(int) drvvdBiosSetPCHSGeometry(PPDMIMEDIA pInterface,
1597 PCPDMMEDIAGEOMETRY pPCHSGeometry)
1598{
1599 LogFlowFunc(("CHS=%d/%d/%d\n",
1600 pPCHSGeometry->cCylinders, pPCHSGeometry->cHeads, pPCHSGeometry->cSectors));
1601 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1602 VDGEOMETRY geo;
1603 geo.cCylinders = pPCHSGeometry->cCylinders;
1604 geo.cHeads = pPCHSGeometry->cHeads;
1605 geo.cSectors = pPCHSGeometry->cSectors;
1606 int rc = VDSetPCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
1607 if (rc == VERR_VD_GEOMETRY_NOT_SET)
1608 rc = VERR_PDM_GEOMETRY_NOT_SET;
1609 LogFlowFunc(("returns %Rrc\n", rc));
1610 return rc;
1611}
1612
1613/** @copydoc PDMIMEDIA::pfnBiosGetLCHSGeometry */
1614static DECLCALLBACK(int) drvvdBiosGetLCHSGeometry(PPDMIMEDIA pInterface,
1615 PPDMMEDIAGEOMETRY pLCHSGeometry)
1616{
1617 LogFlowFunc(("\n"));
1618 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1619 VDGEOMETRY geo;
1620 int rc = VDGetLCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
1621 if (RT_SUCCESS(rc))
1622 {
1623 pLCHSGeometry->cCylinders = geo.cCylinders;
1624 pLCHSGeometry->cHeads = geo.cHeads;
1625 pLCHSGeometry->cSectors = geo.cSectors;
1626 }
1627 else
1628 {
1629 LogFunc(("geometry not available.\n"));
1630 rc = VERR_PDM_GEOMETRY_NOT_SET;
1631 }
1632 LogFlowFunc(("returns %Rrc (CHS=%d/%d/%d)\n",
1633 rc, pLCHSGeometry->cCylinders, pLCHSGeometry->cHeads, pLCHSGeometry->cSectors));
1634 return rc;
1635}
1636
1637/** @copydoc PDMIMEDIA::pfnBiosSetLCHSGeometry */
1638static DECLCALLBACK(int) drvvdBiosSetLCHSGeometry(PPDMIMEDIA pInterface,
1639 PCPDMMEDIAGEOMETRY pLCHSGeometry)
1640{
1641 LogFlowFunc(("CHS=%d/%d/%d\n",
1642 pLCHSGeometry->cCylinders, pLCHSGeometry->cHeads, pLCHSGeometry->cSectors));
1643 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1644 VDGEOMETRY geo;
1645 geo.cCylinders = pLCHSGeometry->cCylinders;
1646 geo.cHeads = pLCHSGeometry->cHeads;
1647 geo.cSectors = pLCHSGeometry->cSectors;
1648 int rc = VDSetLCHSGeometry(pThis->pDisk, VD_LAST_IMAGE, &geo);
1649 if (rc == VERR_VD_GEOMETRY_NOT_SET)
1650 rc = VERR_PDM_GEOMETRY_NOT_SET;
1651 LogFlowFunc(("returns %Rrc\n", rc));
1652 return rc;
1653}
1654
1655/** @copydoc PDMIMEDIA::pfnGetUuid */
1656static DECLCALLBACK(int) drvvdGetUuid(PPDMIMEDIA pInterface, PRTUUID pUuid)
1657{
1658 LogFlowFunc(("\n"));
1659 PVBOXDISK pThis = PDMIMEDIA_2_VBOXDISK(pInterface);
1660 int rc = VDGetUuid(pThis->pDisk, 0, pUuid);
1661 LogFlowFunc(("returns %Rrc ({%RTuuid})\n", rc, pUuid));
1662 return rc;
1663}
1664
1665/*******************************************************************************
1666* Async Media interface methods *
1667*******************************************************************************/
1668
1669static void drvvdAsyncReqComplete(void *pvUser1, void *pvUser2, int rcReq)
1670{
1671 PVBOXDISK pThis = (PVBOXDISK)pvUser1;
1672
1673 int rc = pThis->pDrvMediaAsyncPort->pfnTransferCompleteNotify(pThis->pDrvMediaAsyncPort,
1674 pvUser2, rcReq);
1675 AssertRC(rc);
1676}
1677
1678static DECLCALLBACK(int) drvvdStartRead(PPDMIMEDIAASYNC pInterface, uint64_t uOffset,
1679 PCRTSGSEG paSeg, unsigned cSeg,
1680 size_t cbRead, void *pvUser)
1681{
1682 LogFlowFunc(("uOffset=%#llx paSeg=%#p cSeg=%u cbRead=%d\n pvUser=%#p",
1683 uOffset, paSeg, cSeg, cbRead, pvUser));
1684 PVBOXDISK pThis = PDMIMEDIAASYNC_2_VBOXDISK(pInterface);
1685
1686 pThis->fBootAccelActive = false;
1687
1688 int rc = VDAsyncRead(pThis->pDisk, uOffset, cbRead, paSeg, cSeg,
1689 drvvdAsyncReqComplete, pThis, pvUser);
1690 LogFlowFunc(("returns %Rrc\n", rc));
1691 return rc;
1692}
1693
1694static DECLCALLBACK(int) drvvdStartWrite(PPDMIMEDIAASYNC pInterface, uint64_t uOffset,
1695 PCRTSGSEG paSeg, unsigned cSeg,
1696 size_t cbWrite, void *pvUser)
1697{
1698 LogFlowFunc(("uOffset=%#llx paSeg=%#p cSeg=%u cbWrite=%d pvUser=%#p\n",
1699 uOffset, paSeg, cSeg, cbWrite, pvUser));
1700 PVBOXDISK pThis = PDMIMEDIAASYNC_2_VBOXDISK(pInterface);
1701
1702 pThis->fBootAccelActive = false;
1703
1704 int rc = VDAsyncWrite(pThis->pDisk, uOffset, cbWrite, paSeg, cSeg,
1705 drvvdAsyncReqComplete, pThis, pvUser);
1706 LogFlowFunc(("returns %Rrc\n", rc));
1707 return rc;
1708}
1709
1710static DECLCALLBACK(int) drvvdStartFlush(PPDMIMEDIAASYNC pInterface, void *pvUser)
1711{
1712 LogFlowFunc(("pvUser=%#p\n", pvUser));
1713 PVBOXDISK pThis = PDMIMEDIAASYNC_2_VBOXDISK(pInterface);
1714 int rc = VDAsyncFlush(pThis->pDisk, drvvdAsyncReqComplete, pThis, pvUser);
1715 LogFlowFunc(("returns %Rrc\n", rc));
1716 return rc;
1717}
1718
1719
1720/*******************************************************************************
1721* Base interface methods *
1722*******************************************************************************/
1723
1724/**
1725 * @interface_method_impl{PDMIBASE,pfnQueryInterface}
1726 */
1727static DECLCALLBACK(void *) drvvdQueryInterface(PPDMIBASE pInterface, const char *pszIID)
1728{
1729 PPDMDRVINS pDrvIns = PDMIBASE_2_DRVINS(pInterface);
1730 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1731
1732 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIBASE, &pDrvIns->IBase);
1733 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIMEDIA, &pThis->IMedia);
1734 PDMIBASE_RETURN_INTERFACE(pszIID, PDMIMEDIAASYNC, pThis->fAsyncIOSupported ? &pThis->IMediaAsync : NULL);
1735 return NULL;
1736}
1737
1738
1739/*******************************************************************************
1740* Saved state notification methods *
1741*******************************************************************************/
1742
1743/**
1744 * Load done callback for re-opening the image writable during teleportation.
1745 *
1746 * This is called both for successful and failed load runs, we only care about
1747 * successfull ones.
1748 *
1749 * @returns VBox status code.
1750 * @param pDrvIns The driver instance.
1751 * @param pSSM The saved state handle.
1752 */
1753static DECLCALLBACK(int) drvvdLoadDone(PPDMDRVINS pDrvIns, PSSMHANDLE pSSM)
1754{
1755 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1756 Assert(!pThis->fErrorUseRuntime);
1757
1758 /* Drop out if we don't have any work to do or if it's a failed load. */
1759 if ( !pThis->fTempReadOnly
1760 || RT_FAILURE(SSMR3HandleGetStatus(pSSM)))
1761 return VINF_SUCCESS;
1762
1763 int rc = drvvdSetWritable(pThis);
1764 if (RT_FAILURE(rc)) /** @todo does the bugger set any errors? */
1765 return SSMR3SetLoadError(pSSM, rc, RT_SRC_POS,
1766 N_("Failed to write lock the images"));
1767 return VINF_SUCCESS;
1768}
1769
1770
1771/*******************************************************************************
1772* Driver methods *
1773*******************************************************************************/
1774
1775static DECLCALLBACK(void) drvvdPowerOff(PPDMDRVINS pDrvIns)
1776{
1777 LogFlowFunc(("\n"));
1778 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1779
1780 /*
1781 * We must close the disk here to ensure that
1782 * the backend closes all files before the
1783 * async transport driver is destructed.
1784 */
1785 int rc = VDCloseAll(pThis->pDisk);
1786 AssertRC(rc);
1787}
1788
1789/**
1790 * VM resume notification that we use to undo what the temporary read-only image
1791 * mode set by drvvdSuspend.
1792 *
1793 * Also switch to runtime error mode if we're resuming after a state load
1794 * without having been powered on first.
1795 *
1796 * @param pDrvIns The driver instance data.
1797 *
1798 * @todo The VMSetError vs VMSetRuntimeError mess must be fixed elsewhere,
1799 * we're making assumptions about Main behavior here!
1800 */
1801static DECLCALLBACK(void) drvvdResume(PPDMDRVINS pDrvIns)
1802{
1803 LogFlowFunc(("\n"));
1804 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1805 drvvdSetWritable(pThis);
1806 pThis->fErrorUseRuntime = true;
1807}
1808
1809/**
1810 * The VM is being suspended, temporarily change to read-only image mode.
1811 *
1812 * This is important for several reasons:
1813 * -# It makes sure that there are no pending writes to the image. Most
1814 * backends implements this by closing and reopening the image in read-only
1815 * mode.
1816 * -# It allows Main to read the images during snapshotting without having
1817 * to account for concurrent writes.
1818 * -# This is essential for making teleportation targets sharing images work
1819 * right. Both with regards to caching and with regards to file sharing
1820 * locks (RTFILE_O_DENY_*). (See also drvvdLoadDone.)
1821 *
1822 * @param pDrvIns The driver instance data.
1823 */
1824static DECLCALLBACK(void) drvvdSuspend(PPDMDRVINS pDrvIns)
1825{
1826 LogFlowFunc(("\n"));
1827 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1828 drvvdSetReadonly(pThis);
1829}
1830
1831/**
1832 * VM PowerOn notification for undoing the TempReadOnly config option and
1833 * changing to runtime error mode.
1834 *
1835 * @param pDrvIns The driver instance data.
1836 *
1837 * @todo The VMSetError vs VMSetRuntimeError mess must be fixed elsewhere,
1838 * we're making assumptions about Main behavior here!
1839 */
1840static DECLCALLBACK(void) drvvdPowerOn(PPDMDRVINS pDrvIns)
1841{
1842 LogFlowFunc(("\n"));
1843 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1844 drvvdSetWritable(pThis);
1845 pThis->fErrorUseRuntime = true;
1846}
1847
1848/**
1849 * @copydoc FNPDMDRVRESET
1850 */
1851static DECLCALLBACK(void) drvvdReset(PPDMDRVINS pDrvIns)
1852{
1853 LogFlowFunc(("\n"));
1854 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1855
1856 if (pThis->fBootAccelEnabled)
1857 {
1858 pThis->fBootAccelActive = true;
1859 pThis->cbDataValid = 0;
1860 pThis->offDisk = 0;
1861 }
1862}
1863
1864/**
1865 * @copydoc FNPDMDRVDESTRUCT
1866 */
1867static DECLCALLBACK(void) drvvdDestruct(PPDMDRVINS pDrvIns)
1868{
1869 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1870 LogFlowFunc(("\n"));
1871 PDMDRV_CHECK_VERSIONS_RETURN_VOID(pDrvIns);
1872
1873 RTSEMFASTMUTEX mutex;
1874 ASMAtomicXchgHandle(&pThis->MergeCompleteMutex, NIL_RTSEMFASTMUTEX, &mutex);
1875 if (mutex != NIL_RTSEMFASTMUTEX)
1876 {
1877 /* Request the semaphore to wait until a potentially running merge
1878 * operation has been finished. */
1879 int rc = RTSemFastMutexRequest(mutex);
1880 AssertRC(rc);
1881 pThis->fMergePending = false;
1882 rc = RTSemFastMutexRelease(mutex);
1883 AssertRC(rc);
1884 rc = RTSemFastMutexDestroy(mutex);
1885 AssertRC(rc);
1886 }
1887
1888 if (VALID_PTR(pThis->pDisk))
1889 {
1890 VDDestroy(pThis->pDisk);
1891 pThis->pDisk = NULL;
1892 }
1893 drvvdFreeImages(pThis);
1894
1895 if (pThis->MergeLock != NIL_RTSEMRW)
1896 {
1897 int rc = RTSemRWDestroy(pThis->MergeLock);
1898 AssertRC(rc);
1899 pThis->MergeLock = NIL_RTSEMRW;
1900 }
1901 if (pThis->pbData)
1902 RTMemFree(pThis->pbData);
1903}
1904
1905/**
1906 * Construct a VBox disk media driver instance.
1907 *
1908 * @copydoc FNPDMDRVCONSTRUCT
1909 */
1910static DECLCALLBACK(int) drvvdConstruct(PPDMDRVINS pDrvIns,
1911 PCFGMNODE pCfg,
1912 uint32_t fFlags)
1913{
1914 LogFlowFunc(("\n"));
1915 PVBOXDISK pThis = PDMINS_2_DATA(pDrvIns, PVBOXDISK);
1916 int rc = VINF_SUCCESS;
1917 char *pszName = NULL; /**< The path of the disk image file. */
1918 char *pszFormat = NULL; /**< The format backed to use for this image. */
1919 bool fReadOnly; /**< True if the media is read-only. */
1920 bool fMaybeReadOnly; /**< True if the media may or may not be read-only. */
1921 bool fHonorZeroWrites; /**< True if zero blocks should be written. */
1922 PDMDRV_CHECK_VERSIONS_RETURN(pDrvIns);
1923
1924 /*
1925 * Init the static parts.
1926 */
1927 pDrvIns->IBase.pfnQueryInterface = drvvdQueryInterface;
1928 pThis->pDrvIns = pDrvIns;
1929 pThis->fTempReadOnly = false;
1930 pThis->pDisk = NULL;
1931 pThis->fAsyncIOSupported = false;
1932 pThis->fShareable = false;
1933 pThis->fMergePending = false;
1934 pThis->MergeCompleteMutex = NIL_RTSEMFASTMUTEX;
1935 pThis->uMergeSource = VD_LAST_IMAGE;
1936 pThis->uMergeTarget = VD_LAST_IMAGE;
1937
1938 /* IMedia */
1939 pThis->IMedia.pfnRead = drvvdRead;
1940 pThis->IMedia.pfnWrite = drvvdWrite;
1941 pThis->IMedia.pfnFlush = drvvdFlush;
1942 pThis->IMedia.pfnMerge = drvvdMerge;
1943 pThis->IMedia.pfnGetSize = drvvdGetSize;
1944 pThis->IMedia.pfnIsReadOnly = drvvdIsReadOnly;
1945 pThis->IMedia.pfnBiosGetPCHSGeometry = drvvdBiosGetPCHSGeometry;
1946 pThis->IMedia.pfnBiosSetPCHSGeometry = drvvdBiosSetPCHSGeometry;
1947 pThis->IMedia.pfnBiosGetLCHSGeometry = drvvdBiosGetLCHSGeometry;
1948 pThis->IMedia.pfnBiosSetLCHSGeometry = drvvdBiosSetLCHSGeometry;
1949 pThis->IMedia.pfnGetUuid = drvvdGetUuid;
1950
1951 /* IMediaAsync */
1952 pThis->IMediaAsync.pfnStartRead = drvvdStartRead;
1953 pThis->IMediaAsync.pfnStartWrite = drvvdStartWrite;
1954 pThis->IMediaAsync.pfnStartFlush = drvvdStartFlush;
1955
1956 /* Initialize supported VD interfaces. */
1957 pThis->pVDIfsDisk = NULL;
1958
1959 pThis->VDIErrorCallbacks.cbSize = sizeof(VDINTERFACEERROR);
1960 pThis->VDIErrorCallbacks.enmInterface = VDINTERFACETYPE_ERROR;
1961 pThis->VDIErrorCallbacks.pfnError = drvvdErrorCallback;
1962 pThis->VDIErrorCallbacks.pfnMessage = NULL;
1963
1964 rc = VDInterfaceAdd(&pThis->VDIError, "DrvVD_VDIError", VDINTERFACETYPE_ERROR,
1965 &pThis->VDIErrorCallbacks, pDrvIns, &pThis->pVDIfsDisk);
1966 AssertRC(rc);
1967
1968 /* This is just prepared here, the actual interface is per-image, so it's
1969 * added later. No need to have separate callback tables. */
1970 pThis->VDIConfigCallbacks.cbSize = sizeof(VDINTERFACECONFIG);
1971 pThis->VDIConfigCallbacks.enmInterface = VDINTERFACETYPE_CONFIG;
1972 pThis->VDIConfigCallbacks.pfnAreKeysValid = drvvdCfgAreKeysValid;
1973 pThis->VDIConfigCallbacks.pfnQuerySize = drvvdCfgQuerySize;
1974 pThis->VDIConfigCallbacks.pfnQuery = drvvdCfgQuery;
1975
1976 /* List of images is empty now. */
1977 pThis->pImages = NULL;
1978
1979 /* Try to attach async media port interface above.*/
1980 pThis->pDrvMediaAsyncPort = PDMIBASE_QUERY_INTERFACE(pDrvIns->pUpBase, PDMIMEDIAASYNCPORT);
1981
1982 /*
1983 * Validate configuration and find all parent images.
1984 * It's sort of up side down from the image dependency tree.
1985 */
1986 bool fHostIP = false;
1987 bool fUseNewIo = false;
1988 unsigned iLevel = 0;
1989 PCFGMNODE pCurNode = pCfg;
1990
1991 for (;;)
1992 {
1993 bool fValid;
1994
1995 if (pCurNode == pCfg)
1996 {
1997 /* Toplevel configuration additionally contains the global image
1998 * open flags. Some might be converted to per-image flags later. */
1999 fValid = CFGMR3AreValuesValid(pCurNode,
2000 "Format\0Path\0"
2001 "ReadOnly\0MaybeReadOnly\0TempReadOnly\0Shareable\0HonorZeroWrites\0"
2002 "HostIPStack\0UseNewIo\0BootAcceleration\0BootAccelerationBuffer\0"
2003 "SetupMerge\0MergeSource\0MergeTarget\0");
2004 }
2005 else
2006 {
2007 /* All other image configurations only contain image name and
2008 * the format information. */
2009 fValid = CFGMR3AreValuesValid(pCurNode, "Format\0Path\0"
2010 "MergeSource\0MergeTarget\0");
2011 }
2012 if (!fValid)
2013 {
2014 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
2015 RT_SRC_POS, N_("DrvVD: Configuration error: keys incorrect at level %d"), iLevel);
2016 break;
2017 }
2018
2019 if (pCurNode == pCfg)
2020 {
2021 rc = CFGMR3QueryBoolDef(pCurNode, "HostIPStack", &fHostIP, true);
2022 if (RT_FAILURE(rc))
2023 {
2024 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2025 N_("DrvVD: Configuration error: Querying \"HostIPStack\" as boolean failed"));
2026 break;
2027 }
2028
2029 rc = CFGMR3QueryBoolDef(pCurNode, "HonorZeroWrites", &fHonorZeroWrites, false);
2030 if (RT_FAILURE(rc))
2031 {
2032 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2033 N_("DrvVD: Configuration error: Querying \"HonorZeroWrites\" as boolean failed"));
2034 break;
2035 }
2036
2037 rc = CFGMR3QueryBoolDef(pCurNode, "ReadOnly", &fReadOnly, false);
2038 if (RT_FAILURE(rc))
2039 {
2040 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2041 N_("DrvVD: Configuration error: Querying \"ReadOnly\" as boolean failed"));
2042 break;
2043 }
2044
2045 rc = CFGMR3QueryBoolDef(pCurNode, "MaybeReadOnly", &fMaybeReadOnly, false);
2046 if (RT_FAILURE(rc))
2047 {
2048 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2049 N_("DrvVD: Configuration error: Querying \"MaybeReadOnly\" as boolean failed"));
2050 break;
2051 }
2052
2053 rc = CFGMR3QueryBoolDef(pCurNode, "TempReadOnly", &pThis->fTempReadOnly, false);
2054 if (RT_FAILURE(rc))
2055 {
2056 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2057 N_("DrvVD: Configuration error: Querying \"TempReadOnly\" as boolean failed"));
2058 break;
2059 }
2060 if (fReadOnly && pThis->fTempReadOnly)
2061 {
2062 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
2063 N_("DrvVD: Configuration error: Both \"ReadOnly\" and \"TempReadOnly\" are set"));
2064 break;
2065 }
2066
2067 rc = CFGMR3QueryBoolDef(pCurNode, "Shareable", &pThis->fShareable, false);
2068 if (RT_FAILURE(rc))
2069 {
2070 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2071 N_("DrvVD: Configuration error: Querying \"Shareable\" as boolean failed"));
2072 break;
2073 }
2074
2075 rc = CFGMR3QueryBoolDef(pCurNode, "UseNewIo", &fUseNewIo, false);
2076 if (RT_FAILURE(rc))
2077 {
2078 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2079 N_("DrvVD: Configuration error: Querying \"UseNewIo\" as boolean failed"));
2080 break;
2081 }
2082 rc = CFGMR3QueryBoolDef(pCurNode, "SetupMerge", &pThis->fMergePending, false);
2083 if (RT_FAILURE(rc))
2084 {
2085 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2086 N_("DrvVD: Configuration error: Querying \"SetupMerge\" as boolean failed"));
2087 break;
2088 }
2089 if (fReadOnly && pThis->fMergePending)
2090 {
2091 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
2092 N_("DrvVD: Configuration error: Both \"ReadOnly\" and \"MergePending\" are set"));
2093 break;
2094 }
2095 rc = CFGMR3QueryBoolDef(pCurNode, "BootAcceleration", &pThis->fBootAccelEnabled, false);
2096 if (RT_FAILURE(rc))
2097 {
2098 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2099 N_("DrvVD: Configuration error: Querying \"BootAcceleration\" as boolean failed"));
2100 break;
2101 }
2102 rc = CFGMR3QueryU32Def(pCurNode, "BootAccelerationBuffer", (uint32_t *)&pThis->cbBootAccelBuffer, 16 * _1K);
2103 if (RT_FAILURE(rc))
2104 {
2105 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2106 N_("DrvVD: Configuration error: Querying \"BootAccelerationBuffer\" as integer failed"));
2107 break;
2108 }
2109 }
2110
2111 PCFGMNODE pParent = CFGMR3GetChild(pCurNode, "Parent");
2112 if (!pParent)
2113 break;
2114 pCurNode = pParent;
2115 iLevel++;
2116 }
2117
2118 /*
2119 * Create the image container and the necessary interfaces.
2120 */
2121 if (RT_SUCCESS(rc))
2122 {
2123 /* Construct TCPNET callback table depending on the config. This is
2124 * done unconditionally, as uninterested backends will ignore it. */
2125 if (fHostIP)
2126 {
2127 pThis->VDITcpNetCallbacks.cbSize = sizeof(VDINTERFACETCPNET);
2128 pThis->VDITcpNetCallbacks.enmInterface = VDINTERFACETYPE_TCPNET;
2129 pThis->VDITcpNetCallbacks.pfnSocketCreate = drvvdTcpSocketCreate;
2130 pThis->VDITcpNetCallbacks.pfnSocketDestroy = drvvdTcpSocketDestroy;
2131 pThis->VDITcpNetCallbacks.pfnClientConnect = drvvdTcpClientConnect;
2132 pThis->VDITcpNetCallbacks.pfnIsClientConnected = drvvdTcpIsClientConnected;
2133 pThis->VDITcpNetCallbacks.pfnClientClose = drvvdTcpClientClose;
2134 pThis->VDITcpNetCallbacks.pfnSelectOne = drvvdTcpSelectOne;
2135 pThis->VDITcpNetCallbacks.pfnRead = drvvdTcpRead;
2136 pThis->VDITcpNetCallbacks.pfnWrite = drvvdTcpWrite;
2137 pThis->VDITcpNetCallbacks.pfnSgWrite = drvvdTcpSgWrite;
2138 pThis->VDITcpNetCallbacks.pfnReadNB = drvvdTcpReadNB;
2139 pThis->VDITcpNetCallbacks.pfnWriteNB = drvvdTcpWriteNB;
2140 pThis->VDITcpNetCallbacks.pfnSgWriteNB = drvvdTcpSgWriteNB;
2141 pThis->VDITcpNetCallbacks.pfnFlush = drvvdTcpFlush;
2142 pThis->VDITcpNetCallbacks.pfnSetSendCoalescing = drvvdTcpSetSendCoalescing;
2143 pThis->VDITcpNetCallbacks.pfnGetLocalAddress = drvvdTcpGetLocalAddress;
2144 pThis->VDITcpNetCallbacks.pfnGetPeerAddress = drvvdTcpGetPeerAddress;
2145
2146 /*
2147 * There is a 15ms delay between receiving the data and marking the socket
2148 * as readable on Windows XP which hurts async I/O performance of
2149 * TCP backends badly. Provide a different select method without
2150 * using poll on XP.
2151 * This is only used on XP because it is not as efficient as the one using poll
2152 * and all other Windows versions are working fine.
2153 */
2154 char szOS[64];
2155 memset(szOS, 0, sizeof(szOS));
2156 rc = RTSystemQueryOSInfo(RTSYSOSINFO_PRODUCT, &szOS[0], sizeof(szOS));
2157
2158 if (RT_SUCCESS(rc) && !strncmp(szOS, "Windows XP", 10))
2159 {
2160 LogRel(("VD: Detected Windows XP, disabled poll based waiting for TCP\n"));
2161 pThis->VDITcpNetCallbacks.pfnSelectOneEx = drvvdTcpSelectOneExNoPoll;
2162 }
2163 else
2164 pThis->VDITcpNetCallbacks.pfnSelectOneEx = drvvdTcpSelectOneExPoll;
2165
2166 pThis->VDITcpNetCallbacks.pfnPoke = drvvdTcpPoke;
2167 }
2168 else
2169 {
2170#ifndef VBOX_WITH_INIP
2171 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
2172 RT_SRC_POS, N_("DrvVD: Configuration error: TCP over Internal Networking not compiled in"));
2173#else /* VBOX_WITH_INIP */
2174 pThis->VDITcpNetCallbacks.cbSize = sizeof(VDINTERFACETCPNET);
2175 pThis->VDITcpNetCallbacks.enmInterface = VDINTERFACETYPE_TCPNET;
2176 pThis->VDITcpNetCallbacks.pfnSocketCreate = drvvdINIPSocketCreate;
2177 pThis->VDITcpNetCallbacks.pfnSocketDestroy = drvvdINIPSocketDestroy;
2178 pThis->VDITcpNetCallbacks.pfnClientConnect = drvvdINIPClientConnect;
2179 pThis->VDITcpNetCallbacks.pfnClientClose = drvvdINIPClientClose;
2180 pThis->VDITcpNetCallbacks.pfnIsClientConnected = drvvdINIPIsClientConnected;
2181 pThis->VDITcpNetCallbacks.pfnSelectOne = drvvdINIPSelectOne;
2182 pThis->VDITcpNetCallbacks.pfnRead = drvvdINIPRead;
2183 pThis->VDITcpNetCallbacks.pfnWrite = drvvdINIPWrite;
2184 pThis->VDITcpNetCallbacks.pfnSgWrite = drvvdINIPSgWrite;
2185 pThis->VDITcpNetCallbacks.pfnFlush = drvvdINIPFlush;
2186 pThis->VDITcpNetCallbacks.pfnSetSendCoalescing = drvvdINIPSetSendCoalescing;
2187 pThis->VDITcpNetCallbacks.pfnGetLocalAddress = drvvdINIPGetLocalAddress;
2188 pThis->VDITcpNetCallbacks.pfnGetPeerAddress = drvvdINIPGetPeerAddress;
2189 pThis->VDITcpNetCallbacks.pfnSelectOneEx = drvvdINIPSelectOneEx;
2190 pThis->VDITcpNetCallbacks.pfnPoke = drvvdINIPPoke;
2191#endif /* VBOX_WITH_INIP */
2192 }
2193
2194 /** @todo quick hack to work around problems in the async I/O
2195 * implementation (rw semaphore thread ownership problem)
2196 * while a merge is running. Remove once this is fixed. */
2197 if (pThis->fMergePending)
2198 fUseNewIo = false;
2199
2200 if (RT_SUCCESS(rc) && fUseNewIo)
2201 {
2202#ifdef VBOX_WITH_PDM_ASYNC_COMPLETION
2203 pThis->VDIIOCallbacks.cbSize = sizeof(VDINTERFACEIO);
2204 pThis->VDIIOCallbacks.enmInterface = VDINTERFACETYPE_IO;
2205 pThis->VDIIOCallbacks.pfnOpen = drvvdAsyncIOOpen;
2206 pThis->VDIIOCallbacks.pfnClose = drvvdAsyncIOClose;
2207 pThis->VDIIOCallbacks.pfnGetSize = drvvdAsyncIOGetSize;
2208 pThis->VDIIOCallbacks.pfnSetSize = drvvdAsyncIOSetSize;
2209 pThis->VDIIOCallbacks.pfnReadSync = drvvdAsyncIOReadSync;
2210 pThis->VDIIOCallbacks.pfnWriteSync = drvvdAsyncIOWriteSync;
2211 pThis->VDIIOCallbacks.pfnFlushSync = drvvdAsyncIOFlushSync;
2212 pThis->VDIIOCallbacks.pfnReadAsync = drvvdAsyncIOReadAsync;
2213 pThis->VDIIOCallbacks.pfnWriteAsync = drvvdAsyncIOWriteAsync;
2214 pThis->VDIIOCallbacks.pfnFlushAsync = drvvdAsyncIOFlushAsync;
2215#else /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
2216 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_PDM_DRVINS_UNKNOWN_CFG_VALUES,
2217 RT_SRC_POS, N_("DrvVD: Configuration error: Async Completion Framework not compiled in"));
2218#endif /* !VBOX_WITH_PDM_ASYNC_COMPLETION */
2219 }
2220
2221 if (RT_SUCCESS(rc) && pThis->fMergePending)
2222 {
2223 rc = RTSemFastMutexCreate(&pThis->MergeCompleteMutex);
2224 if (RT_SUCCESS(rc))
2225 rc = RTSemRWCreate(&pThis->MergeLock);
2226 if (RT_SUCCESS(rc))
2227 {
2228 pThis->VDIThreadSyncCallbacks.cbSize = sizeof(VDINTERFACETHREADSYNC);
2229 pThis->VDIThreadSyncCallbacks.enmInterface = VDINTERFACETYPE_THREADSYNC;
2230 pThis->VDIThreadSyncCallbacks.pfnStartRead = drvvdThreadStartRead;
2231 pThis->VDIThreadSyncCallbacks.pfnFinishRead = drvvdThreadFinishRead;
2232 pThis->VDIThreadSyncCallbacks.pfnStartWrite = drvvdThreadStartWrite;
2233 pThis->VDIThreadSyncCallbacks.pfnFinishWrite = drvvdThreadFinishWrite;
2234
2235 rc = VDInterfaceAdd(&pThis->VDIThreadSync, "DrvVD_ThreadSync", VDINTERFACETYPE_THREADSYNC,
2236 &pThis->VDIThreadSyncCallbacks, pThis, &pThis->pVDIfsDisk);
2237 }
2238 else
2239 {
2240 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2241 N_("DrvVD: Failed to create semaphores for \"MergePending\""));
2242 }
2243 }
2244
2245 if (RT_SUCCESS(rc))
2246 {
2247 rc = VDCreate(pThis->pVDIfsDisk, &pThis->pDisk);
2248 /* Error message is already set correctly. */
2249 }
2250 }
2251
2252 if (pThis->pDrvMediaAsyncPort && fUseNewIo)
2253 pThis->fAsyncIOSupported = true;
2254
2255 unsigned iImageIdx = 0;
2256 while (pCurNode && RT_SUCCESS(rc))
2257 {
2258 /* Allocate per-image data. */
2259 PVBOXIMAGE pImage = drvvdNewImage(pThis);
2260 if (!pImage)
2261 {
2262 rc = VERR_NO_MEMORY;
2263 break;
2264 }
2265
2266 /*
2267 * Read the image configuration.
2268 */
2269 rc = CFGMR3QueryStringAlloc(pCurNode, "Path", &pszName);
2270 if (RT_FAILURE(rc))
2271 {
2272 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2273 N_("DrvVD: Configuration error: Querying \"Path\" as string failed"));
2274 break;
2275 }
2276
2277 rc = CFGMR3QueryStringAlloc(pCurNode, "Format", &pszFormat);
2278 if (RT_FAILURE(rc))
2279 {
2280 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2281 N_("DrvVD: Configuration error: Querying \"Format\" as string failed"));
2282 break;
2283 }
2284
2285 bool fMergeSource;
2286 rc = CFGMR3QueryBoolDef(pCurNode, "MergeSource", &fMergeSource, false);
2287 if (RT_FAILURE(rc))
2288 {
2289 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2290 N_("DrvVD: Configuration error: Querying \"MergeSource\" as boolean failed"));
2291 break;
2292 }
2293 if (fMergeSource)
2294 {
2295 if (pThis->uMergeSource == VD_LAST_IMAGE)
2296 pThis->uMergeSource = iImageIdx;
2297 else
2298 {
2299 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
2300 N_("DrvVD: Configuration error: Multiple \"MergeSource\" occurrences"));
2301 break;
2302 }
2303 }
2304
2305 bool fMergeTarget;
2306 rc = CFGMR3QueryBoolDef(pCurNode, "MergeTarget", &fMergeTarget, false);
2307 if (RT_FAILURE(rc))
2308 {
2309 rc = PDMDRV_SET_ERROR(pDrvIns, rc,
2310 N_("DrvVD: Configuration error: Querying \"MergeTarget\" as boolean failed"));
2311 break;
2312 }
2313 if (fMergeTarget)
2314 {
2315 if (pThis->uMergeTarget == VD_LAST_IMAGE)
2316 pThis->uMergeTarget = iImageIdx;
2317 else
2318 {
2319 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
2320 N_("DrvVD: Configuration error: Multiple \"MergeTarget\" occurrences"));
2321 break;
2322 }
2323 }
2324
2325 PCFGMNODE pCfgVDConfig = CFGMR3GetChild(pCurNode, "VDConfig");
2326 rc = VDInterfaceAdd(&pImage->VDIConfig, "DrvVD_Config", VDINTERFACETYPE_CONFIG,
2327 &pThis->VDIConfigCallbacks, pCfgVDConfig, &pImage->pVDIfsImage);
2328 AssertRC(rc);
2329
2330 /* Unconditionally insert the TCPNET interface, don't bother to check
2331 * if an image really needs it. Will be ignored. Since the TCPNET
2332 * interface is per image we could make this more flexible in the
2333 * future if we want to. */
2334 rc = VDInterfaceAdd(&pImage->VDITcpNet, "DrvVD_TCPNET",
2335 VDINTERFACETYPE_TCPNET, &pThis->VDITcpNetCallbacks,
2336 NULL, &pImage->pVDIfsImage);
2337 AssertRC(rc);
2338
2339 /* Insert the custom I/O interface only if we're told to use new IO.
2340 * Since the I/O interface is per image we could make this more
2341 * flexible in the future if we want to. */
2342 if (fUseNewIo)
2343 {
2344 rc = VDInterfaceAdd(&pImage->VDIIO, "DrvVD_IO", VDINTERFACETYPE_IO,
2345 &pThis->VDIIOCallbacks, pThis,
2346 &pImage->pVDIfsImage);
2347 AssertRC(rc);
2348 }
2349
2350 /*
2351 * Open the image.
2352 */
2353 unsigned uOpenFlags;
2354 if (fReadOnly || pThis->fTempReadOnly || iLevel != 0)
2355 uOpenFlags = VD_OPEN_FLAGS_READONLY;
2356 else
2357 uOpenFlags = VD_OPEN_FLAGS_NORMAL;
2358 if (fHonorZeroWrites)
2359 uOpenFlags |= VD_OPEN_FLAGS_HONOR_ZEROES;
2360 if (pThis->fAsyncIOSupported)
2361 uOpenFlags |= VD_OPEN_FLAGS_ASYNC_IO;
2362 if (pThis->fShareable)
2363 uOpenFlags |= VD_OPEN_FLAGS_SHAREABLE;
2364
2365 /* Try to open backend in async I/O mode first. */
2366 rc = VDOpen(pThis->pDisk, pszFormat, pszName, uOpenFlags, pImage->pVDIfsImage);
2367 if (rc == VERR_NOT_SUPPORTED)
2368 {
2369 pThis->fAsyncIOSupported = false;
2370 uOpenFlags &= ~VD_OPEN_FLAGS_ASYNC_IO;
2371 rc = VDOpen(pThis->pDisk, pszFormat, pszName, uOpenFlags, pImage->pVDIfsImage);
2372 }
2373
2374 if (RT_SUCCESS(rc))
2375 {
2376 LogFunc(("%d - Opened '%s' in %s mode\n",
2377 iLevel, pszName,
2378 VDIsReadOnly(pThis->pDisk) ? "read-only" : "read-write"));
2379 if ( VDIsReadOnly(pThis->pDisk)
2380 && !fReadOnly
2381 && !fMaybeReadOnly
2382 && !pThis->fTempReadOnly
2383 && iLevel == 0)
2384 {
2385 rc = PDMDrvHlpVMSetError(pDrvIns, VERR_VD_IMAGE_READ_ONLY, RT_SRC_POS,
2386 N_("Failed to open image '%s' for writing due to wrong permissions"),
2387 pszName);
2388 break;
2389 }
2390 }
2391 else
2392 {
2393 rc = PDMDrvHlpVMSetError(pDrvIns, rc, RT_SRC_POS,
2394 N_("Failed to open image '%s' in %s mode rc=%Rrc"), pszName,
2395 (uOpenFlags & VD_OPEN_FLAGS_READONLY) ? "read-only" : "read-write", rc);
2396 break;
2397 }
2398
2399
2400 MMR3HeapFree(pszName);
2401 pszName = NULL;
2402 MMR3HeapFree(pszFormat);
2403 pszFormat = NULL;
2404
2405 /* next */
2406 iLevel--;
2407 iImageIdx++;
2408 pCurNode = CFGMR3GetParent(pCurNode);
2409 }
2410
2411 if ( RT_SUCCESS(rc)
2412 && pThis->fMergePending
2413 && ( pThis->uMergeSource == VD_LAST_IMAGE
2414 || pThis->uMergeTarget == VD_LAST_IMAGE))
2415 {
2416 rc = PDMDRV_SET_ERROR(pDrvIns, VERR_PDM_DRIVER_INVALID_PROPERTIES,
2417 N_("DrvVD: Configuration error: Inconsistent image merge data"));
2418 }
2419
2420 /*
2421 * Register a load-done callback so we can undo TempReadOnly config before
2422 * we get to drvvdResume. Autoamtically deregistered upon destruction.
2423 */
2424 if (RT_SUCCESS(rc))
2425 rc = PDMDrvHlpSSMRegisterEx(pDrvIns, 0 /* version */, 0 /* cbGuess */,
2426 NULL /*pfnLivePrep*/, NULL /*pfnLiveExec*/, NULL /*pfnLiveVote*/,
2427 NULL /*pfnSavePrep*/, NULL /*pfnSaveExec*/, NULL /*pfnSaveDone*/,
2428 NULL /*pfnDonePrep*/, NULL /*pfnLoadExec*/, drvvdLoadDone);
2429
2430 /* Setup the boot acceleration stuff if enabled. */
2431 if (RT_SUCCESS(rc) && pThis->fBootAccelEnabled)
2432 {
2433 pThis->cbDisk = VDGetSize(pThis->pDisk, VD_LAST_IMAGE);
2434 Assert(pThis->cbDisk > 0);
2435 pThis->pbData = (uint8_t *)RTMemAllocZ(pThis->cbBootAccelBuffer);
2436 if (pThis->pbData)
2437 {
2438 pThis->fBootAccelActive = true;
2439 pThis->offDisk = 0;
2440 pThis->cbDataValid = 0;
2441 LogRel(("VD: Boot acceleration enabled\n"));
2442 }
2443 else
2444 LogRel(("VD: Boot acceleration, out of memory, disabled\n"));
2445 }
2446
2447 if (RT_FAILURE(rc))
2448 {
2449 if (VALID_PTR(pszName))
2450 MMR3HeapFree(pszName);
2451 if (VALID_PTR(pszFormat))
2452 MMR3HeapFree(pszFormat);
2453 /* drvvdDestruct does the rest. */
2454 }
2455
2456 LogFlowFunc(("returns %Rrc\n", rc));
2457 return rc;
2458}
2459
2460/**
2461 * VBox disk container media driver registration record.
2462 */
2463const PDMDRVREG g_DrvVD =
2464{
2465 /* u32Version */
2466 PDM_DRVREG_VERSION,
2467 /* szName */
2468 "VD",
2469 /* szRCMod */
2470 "",
2471 /* szR0Mod */
2472 "",
2473 /* pszDescription */
2474 "Generic VBox disk media driver.",
2475 /* fFlags */
2476 PDM_DRVREG_FLAGS_HOST_BITS_DEFAULT,
2477 /* fClass. */
2478 PDM_DRVREG_CLASS_MEDIA,
2479 /* cMaxInstances */
2480 ~0,
2481 /* cbInstance */
2482 sizeof(VBOXDISK),
2483 /* pfnConstruct */
2484 drvvdConstruct,
2485 /* pfnDestruct */
2486 drvvdDestruct,
2487 /* pfnRelocate */
2488 NULL,
2489 /* pfnIOCtl */
2490 NULL,
2491 /* pfnPowerOn */
2492 drvvdPowerOn,
2493 /* pfnReset */
2494 drvvdReset,
2495 /* pfnSuspend */
2496 drvvdSuspend,
2497 /* pfnResume */
2498 drvvdResume,
2499 /* pfnAttach */
2500 NULL,
2501 /* pfnDetach */
2502 NULL,
2503 /* pfnPowerOff */
2504 drvvdPowerOff,
2505 /* pfnSoftReset */
2506 NULL,
2507 /* u32EndVersion */
2508 PDM_DRVREG_VERSION
2509};
2510
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use