VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 84044

Last change on this file since 84044 was 82968, checked in by vboxsync, 4 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 66.8 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 82968 2020-02-04 10:35:17Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#include <iprt/types.h>
24#include <iprt/asm.h>
25#include <iprt/file.h>
26#include <iprt/mem.h>
27#include <iprt/string.h>
28#include <iprt/assert.h>
29#include <VBox/log.h>
30
31#include "PDMAsyncCompletionFileInternal.h"
32
33
34/*********************************************************************************************************************************
35* Defined Constants And Macros *
36*********************************************************************************************************************************/
37/** The update period for the I/O load statistics in ms. */
38#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
39/** Maximum number of requests a manager will handle. */
40#define PDMACEPFILEMGR_REQS_STEP 64
41
42
43/*********************************************************************************************************************************
44* Internal functions *
45*********************************************************************************************************************************/
46static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
47 PPDMACEPFILEMGR pAioMgr,
48 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
49
50static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
51 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
52 PPDMACFILERANGELOCK pRangeLock);
53
54static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
55 int rc, size_t cbTransfered);
56
57
58int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
59{
60 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
61
62 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
63 if (rc == VERR_OUT_OF_RANGE)
64 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
65
66 if (RT_SUCCESS(rc))
67 {
68 /* Initialize request handle array. */
69 pAioMgr->iFreeEntry = 0;
70 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
71 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
72
73 if (pAioMgr->pahReqsFree)
74 {
75 /* Create the range lock memcache. */
76 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
77 0, UINT32_MAX, NULL, NULL, NULL, 0);
78 if (RT_SUCCESS(rc))
79 return VINF_SUCCESS;
80
81 RTMemFree(pAioMgr->pahReqsFree);
82 }
83 else
84 {
85 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
86 rc = VERR_NO_MEMORY;
87 }
88 }
89
90 return rc;
91}
92
93void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
94{
95 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
96
97 while (pAioMgr->iFreeEntry > 0)
98 {
99 pAioMgr->iFreeEntry--;
100 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
101 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
102 }
103
104 RTMemFree(pAioMgr->pahReqsFree);
105 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
106}
107
108#if 0 /* currently unused */
109/**
110 * Sorts the endpoint list with insertion sort.
111 */
112static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
113{
114 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
115
116 pEpPrev = pAioMgr->pEndpointsHead;
117 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
118
119 while (pEpCurr)
120 {
121 /* Remember the next element to sort because the list might change. */
122 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
123
124 /* Unlink the current element from the list. */
125 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
126 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
127
128 if (pPrev)
129 pPrev->AioMgr.pEndpointNext = pNext;
130 else
131 pAioMgr->pEndpointsHead = pNext;
132
133 if (pNext)
134 pNext->AioMgr.pEndpointPrev = pPrev;
135
136 /* Go back until we reached the place to insert the current endpoint into. */
137 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
138 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
139
140 /* Link the endpoint into the list. */
141 if (pEpPrev)
142 pNext = pEpPrev->AioMgr.pEndpointNext;
143 else
144 pNext = pAioMgr->pEndpointsHead;
145
146 pEpCurr->AioMgr.pEndpointNext = pNext;
147 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
148
149 if (pNext)
150 pNext->AioMgr.pEndpointPrev = pEpCurr;
151
152 if (pEpPrev)
153 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
154 else
155 pAioMgr->pEndpointsHead = pEpCurr;
156
157 pEpCurr = pEpNextToSort;
158 }
159
160#ifdef DEBUG
161 /* Validate sorting algorithm */
162 unsigned cEndpoints = 0;
163 pEpCurr = pAioMgr->pEndpointsHead;
164
165 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
166 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
167
168 while (pEpCurr)
169 {
170 cEndpoints++;
171
172 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
173 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
174
175 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
176 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
177
178 pEpCurr = pNext;
179 }
180
181 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
182
183#endif
184}
185#endif /* currently unused */
186
187/**
188 * Removes an endpoint from the currently assigned manager.
189 *
190 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
191 * FALSE otherwise.
192 * @param pEndpointRemove The endpoint to remove.
193 */
194static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
195{
196 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
197 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
198 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
199
200 pAioMgr->cEndpoints--;
201
202 if (pPrev)
203 pPrev->AioMgr.pEndpointNext = pNext;
204 else
205 pAioMgr->pEndpointsHead = pNext;
206
207 if (pNext)
208 pNext->AioMgr.pEndpointPrev = pPrev;
209
210 /* Make sure that there is no request pending on this manager for the endpoint. */
211 if (!pEndpointRemove->AioMgr.cRequestsActive)
212 {
213 Assert(!pEndpointRemove->pFlushReq);
214
215 /* Reopen the file so that the new endpoint can re-associate with the file */
216 RTFileClose(pEndpointRemove->hFile);
217 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
218 AssertRC(rc);
219 return false;
220 }
221
222 return true;
223}
224
225#if 0 /* currently unused */
226
227static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
228{
229 /* Balancing doesn't make sense with only one endpoint. */
230 if (pAioMgr->cEndpoints == 1)
231 return false;
232
233 /* Doesn't make sens to move endpoints if only one produces the whole load */
234 unsigned cEndpointsWithLoad = 0;
235
236 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
237
238 while (pCurr)
239 {
240 if (pCurr->AioMgr.cReqsPerSec)
241 cEndpointsWithLoad++;
242
243 pCurr = pCurr->AioMgr.pEndpointNext;
244 }
245
246 return (cEndpointsWithLoad > 1);
247}
248
249/**
250 * Creates a new I/O manager and spreads the I/O load of the endpoints
251 * between the given I/O manager and the new one.
252 *
253 * @returns nothing.
254 * @param pAioMgr The I/O manager with high I/O load.
255 */
256static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
257{
258 /*
259 * Check if balancing would improve the situation.
260 */
261 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
262 {
263 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
264 PPDMACEPFILEMGR pAioMgrNew = NULL;
265
266 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
267 if (RT_SUCCESS(rc))
268 {
269 /* We will sort the list by request count per second. */
270 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
271
272 /* Now move some endpoints to the new manager. */
273 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
274 unsigned cReqsOther = 0;
275 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
276
277 while (pCurr)
278 {
279 if (cReqsHere <= cReqsOther)
280 {
281 /*
282 * The other manager has more requests to handle now.
283 * We will keep the current endpoint.
284 */
285 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
286 cReqsHere += pCurr->AioMgr.cReqsPerSec;
287 pCurr = pCurr->AioMgr.pEndpointNext;
288 }
289 else
290 {
291 /* Move to other endpoint. */
292 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
293 cReqsOther += pCurr->AioMgr.cReqsPerSec;
294
295 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
296
297 pCurr = pCurr->AioMgr.pEndpointNext;
298
299 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
300
301 if (fReqsPending)
302 {
303 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
304 pMove->AioMgr.fMoving = true;
305 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
306 }
307 else
308 {
309 pMove->AioMgr.fMoving = false;
310 pMove->AioMgr.pAioMgrDst = NULL;
311 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
312 }
313 }
314 }
315 }
316 else
317 {
318 /* Don't process further but leave a log entry about reduced performance. */
319 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
320 }
321 }
322 else
323 Log(("AIOMgr: Load balancing would not improve anything\n"));
324}
325
326#endif /* unused */
327
328/**
329 * Increase the maximum number of active requests for the given I/O manager.
330 *
331 * @returns VBox status code.
332 * @param pAioMgr The I/O manager to grow.
333 */
334static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
335{
336 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
337
338 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
339 && !pAioMgr->cRequestsActive,
340 ("Invalid state of the I/O manager\n"));
341
342#ifdef RT_OS_WINDOWS
343 /*
344 * Reopen the files of all assigned endpoints first so we can assign them to the new
345 * I/O context.
346 */
347 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
348
349 while (pCurr)
350 {
351 RTFileClose(pCurr->hFile);
352 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
353
354 pCurr = pCurr->AioMgr.pEndpointNext;
355 }
356#endif
357
358 /* Create the new bigger context. */
359 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
360
361 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
362 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
363 if (rc == VERR_OUT_OF_RANGE)
364 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
365
366 if (RT_SUCCESS(rc))
367 {
368 /* Close the old context. */
369 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
370 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
371
372 pAioMgr->hAioCtx = hAioCtxNew;
373
374 /* Create a new I/O task handle array */
375 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
376 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
377
378 if (pahReqNew)
379 {
380 /* Copy the cached request handles. */
381 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
382 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
383
384 RTMemFree(pAioMgr->pahReqsFree);
385 pAioMgr->pahReqsFree = pahReqNew;
386 pAioMgr->cReqEntries = cReqEntriesNew;
387 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
388 pAioMgr->cRequestsActiveMax));
389 }
390 else
391 rc = VERR_NO_MEMORY;
392 }
393
394#ifdef RT_OS_WINDOWS
395 /* Assign the file to the new context. */
396 pCurr = pAioMgr->pEndpointsHead;
397 while (pCurr)
398 {
399 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
400 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
401
402 pCurr = pCurr->AioMgr.pEndpointNext;
403 }
404#endif
405
406 if (RT_FAILURE(rc))
407 {
408 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
409 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
410 }
411
412 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
413 LogFlowFunc(("returns rc=%Rrc\n", rc));
414
415 return rc;
416}
417
418/**
419 * Checks if a given status code is fatal.
420 * Non fatal errors can be fixed by migrating the endpoint to a
421 * failsafe manager.
422 *
423 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
424 * false If the error can be fixed by a migration. (image on NFS disk for example)
425 * @param rcReq The status code to check.
426 */
427DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
428{
429 return rcReq == VERR_DEV_IO_ERROR
430 || rcReq == VERR_FILE_IO_ERROR
431 || rcReq == VERR_DISK_IO_ERROR
432 || rcReq == VERR_DISK_FULL
433 || rcReq == VERR_FILE_TOO_BIG;
434}
435
436/**
437 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
438 *
439 * @returns VBox status code
440 * @param pAioMgr The I/O manager the error occurred on.
441 * @param rc The error code.
442 * @param SRC_POS The source location of the error (use RT_SRC_POS).
443 */
444static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
445{
446 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
447 pAioMgr, rc));
448 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
449 LogRel(("AIOMgr: Please contact the product vendor\n"));
450
451 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
452
453 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
454 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
455
456 AssertMsgFailed(("Implement\n"));
457 return VINF_SUCCESS;
458}
459
460/**
461 * Put a list of tasks in the pending request list of an endpoint.
462 */
463DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
464{
465 /* Add the rest of the tasks to the pending list */
466 if (!pEndpoint->AioMgr.pReqsPendingHead)
467 {
468 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
469 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
470 }
471 else
472 {
473 Assert(pEndpoint->AioMgr.pReqsPendingTail);
474 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
475 }
476
477 /* Update the tail. */
478 while (pTaskHead->pNext)
479 pTaskHead = pTaskHead->pNext;
480
481 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
482 pTaskHead->pNext = NULL;
483}
484
485/**
486 * Put one task in the pending request list of an endpoint.
487 */
488DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
489{
490 /* Add the rest of the tasks to the pending list */
491 if (!pEndpoint->AioMgr.pReqsPendingHead)
492 {
493 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
494 pEndpoint->AioMgr.pReqsPendingHead = pTask;
495 }
496 else
497 {
498 Assert(pEndpoint->AioMgr.pReqsPendingTail);
499 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
500 }
501
502 pEndpoint->AioMgr.pReqsPendingTail = pTask;
503 pTask->pNext = NULL;
504}
505
506/**
507 * Allocates a async I/O request.
508 *
509 * @returns Handle to the request.
510 * @param pAioMgr The I/O manager.
511 */
512static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
513{
514 /* Get a request handle. */
515 RTFILEAIOREQ hReq;
516 if (pAioMgr->iFreeEntry > 0)
517 {
518 pAioMgr->iFreeEntry--;
519 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
520 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
521 Assert(hReq != NIL_RTFILEAIOREQ);
522 }
523 else
524 {
525 int rc = RTFileAioReqCreate(&hReq);
526 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
527 }
528
529 return hReq;
530}
531
532/**
533 * Frees a async I/O request handle.
534 *
535 * @returns nothing.
536 * @param pAioMgr The I/O manager.
537 * @param hReq The I/O request handle to free.
538 */
539static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
540{
541 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
542 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
543
544 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
545 pAioMgr->iFreeEntry++;
546}
547
548/**
549 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
550 */
551static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
552 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
553 PRTFILEAIOREQ pahReqs, unsigned cReqs)
554{
555 pAioMgr->cRequestsActive += cReqs;
556 pEndpoint->AioMgr.cRequestsActive += cReqs;
557
558 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
559 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
560
561 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
562 if (RT_FAILURE(rc))
563 {
564 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
565 {
566 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
567
568 /* Append any not submitted task to the waiting list. */
569 for (size_t i = 0; i < cReqs; i++)
570 {
571 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
572
573 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
574 {
575 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
576
577 Assert(pTask->hReq == pahReqs[i]);
578 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
579 pAioMgr->cRequestsActive--;
580 pEndpoint->AioMgr.cRequestsActive--;
581
582 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
583 {
584 /* Clear the pending flush */
585 Assert(pEndpoint->pFlushReq == pTask);
586 pEndpoint->pFlushReq = NULL;
587 }
588 }
589 }
590
591 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
592
593 /* Print an entry in the release log */
594 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
595 {
596 pEpClass->fOutOfResourcesWarningPrinted = true;
597 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
598 pAioMgr->cRequestsActive));
599 }
600
601 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
602 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
603 rc = VINF_SUCCESS;
604 }
605 else /* Another kind of error happened (full disk, ...) */
606 {
607 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
608 for (size_t i = 0; i < cReqs; i++)
609 {
610 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
611
612 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
613 {
614 /* We call ourself again to do any error handling which might come up now. */
615 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
616 AssertRC(rc);
617 }
618 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
619 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
620 }
621
622
623 if ( pEndpoint->pFlushReq
624 && !pAioMgr->cRequestsActive
625 && !pEndpoint->fAsyncFlushSupported)
626 {
627 /*
628 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
629 * the async flush API.
630 * Happens only if this we just noticed that this is not supported
631 * and the only active request was a flush.
632 */
633 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
634 pEndpoint->pFlushReq = NULL;
635 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
636 pdmacFileTaskFree(pEndpoint, pFlush);
637 }
638 }
639 }
640
641 return VINF_SUCCESS;
642}
643
644static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
645 RTFOFF offStart, size_t cbRange,
646 PPDMACTASKFILE pTask, bool fAlignedReq)
647{
648 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
649 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
650 ("Invalid task type %d\n", pTask->enmTransferType));
651
652 /*
653 * If there is no unaligned request active and the current one is aligned
654 * just pass it through.
655 */
656 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
657 return false;
658
659 PPDMACFILERANGELOCK pRangeLock;
660 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
661 if (!pRangeLock)
662 {
663 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
664 /* Check if we intersect with the range. */
665 if ( !pRangeLock
666 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
667 && (pRangeLock->Core.KeyLast) >= offStart))
668 {
669 pRangeLock = NULL; /* False alarm */
670 }
671 }
672
673 /* Check whether we have one of the situations explained below */
674 if (pRangeLock)
675 {
676 /* Add to the list. */
677 pTask->pNext = NULL;
678
679 if (!pRangeLock->pWaitingTasksHead)
680 {
681 Assert(!pRangeLock->pWaitingTasksTail);
682 pRangeLock->pWaitingTasksHead = pTask;
683 pRangeLock->pWaitingTasksTail = pTask;
684 }
685 else
686 {
687 AssertPtr(pRangeLock->pWaitingTasksTail);
688 pRangeLock->pWaitingTasksTail->pNext = pTask;
689 pRangeLock->pWaitingTasksTail = pTask;
690 }
691 return true;
692 }
693
694 return false;
695}
696
697static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
698 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
699 RTFOFF offStart, size_t cbRange,
700 PPDMACTASKFILE pTask, bool fAlignedReq)
701{
702 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
703 pAioMgr, pEndpoint, offStart, cbRange, pTask));
704
705 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
706 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
707 offStart, cbRange));
708
709 /*
710 * If there is no unaligned request active and the current one is aligned
711 * just don't use the lock.
712 */
713 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
714 {
715 pTask->pRangeLock = NULL;
716 return VINF_SUCCESS;
717 }
718
719 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
720 if (!pRangeLock)
721 return VERR_NO_MEMORY;
722
723 /* Init the lock. */
724 pRangeLock->Core.Key = offStart;
725 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
726 pRangeLock->cRefs = 1;
727 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
728 pRangeLock->pWaitingTasksHead = NULL;
729 pRangeLock->pWaitingTasksTail = NULL;
730
731 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
732 AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted);
733
734 /* Let the task point to its lock. */
735 pTask->pRangeLock = pRangeLock;
736 pEndpoint->AioMgr.cLockedReqsActive++;
737
738 return VINF_SUCCESS;
739}
740
741static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
742 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
743 PPDMACFILERANGELOCK pRangeLock)
744{
745 PPDMACTASKFILE pTasksWaitingHead;
746
747 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
748 pAioMgr, pEndpoint, pRangeLock));
749
750 /* pRangeLock can be NULL if there was no lock assigned with the task. */
751 if (!pRangeLock)
752 return NULL;
753
754 Assert(pRangeLock->cRefs == 1);
755
756 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
757 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
758 pRangeLock->pWaitingTasksHead = NULL;
759 pRangeLock->pWaitingTasksTail = NULL;
760 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
761 pEndpoint->AioMgr.cLockedReqsActive--;
762
763 return pTasksWaitingHead;
764}
765
766static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
767 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
768 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
769{
770 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
771 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
772 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
773 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
774
775 pTask->fPrefetch = false;
776 pTask->cbBounceBuffer = 0;
777
778 /*
779 * Before we start to setup the request we have to check whether there is a task
780 * already active which range intersects with ours. We have to defer execution
781 * of this task in two cases:
782 * - The pending task is a write and the current is either read or write
783 * - The pending task is a read and the current task is a write task.
784 *
785 * To check whether a range is currently "locked" we use the AVL tree where every pending task
786 * is stored by its file offset range. The current task will be added to the active task
787 * and will be executed when the active one completes. (The method below
788 * which checks whether a range is already used will add the task)
789 *
790 * This is necessary because of the requirement to align all requests to a 512 boundary
791 * which is enforced by the host OS (Linux and Windows atm). It is possible that
792 * we have to process unaligned tasks and need to align them using bounce buffers.
793 * While the data is fetched from the file another request might arrive writing to
794 * the same range. This will result in data corruption if both are executed concurrently.
795 */
796 int rc = VINF_SUCCESS;
797 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
798 true /* fAlignedReq */);
799 if (!fLocked)
800 {
801 /* Get a request handle. */
802 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
803 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
804
805 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
806 {
807 /* Grow the file if needed. */
808 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
809 {
810 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
811 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
812 }
813
814 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
815 pTask->Off, pTask->DataSeg.pvSeg,
816 pTask->DataSeg.cbSeg, pTask);
817 }
818 else
819 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
820 pTask->Off, pTask->DataSeg.pvSeg,
821 pTask->DataSeg.cbSeg, pTask);
822 AssertRC(rc);
823
824 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
825 pTask->DataSeg.cbSeg,
826 pTask, true /* fAlignedReq */);
827
828 if (RT_SUCCESS(rc))
829 {
830 pTask->hReq = hReq;
831 *phReq = hReq;
832 }
833 }
834 else
835 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
836
837 return rc;
838}
839
840static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
841 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
842 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
843{
844 /*
845 * Check if the alignment requirements are met.
846 * Offset, transfer size and buffer address
847 * need to be on a 512 boundary.
848 */
849 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
850 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
851 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
852 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
853 && offStart == pTask->Off;
854
855 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
856 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
857 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
858 offStart, cbToTransfer, pEndpoint->cbFile));
859
860 pTask->fPrefetch = false;
861
862 /*
863 * Before we start to setup the request we have to check whether there is a task
864 * already active which range intersects with ours. We have to defer execution
865 * of this task in two cases:
866 * - The pending task is a write and the current is either read or write
867 * - The pending task is a read and the current task is a write task.
868 *
869 * To check whether a range is currently "locked" we use the AVL tree where every pending task
870 * is stored by its file offset range. The current task will be added to the active task
871 * and will be executed when the active one completes. (The method below
872 * which checks whether a range is already used will add the task)
873 *
874 * This is necessary because of the requirement to align all requests to a 512 boundary
875 * which is enforced by the host OS (Linux and Windows atm). It is possible that
876 * we have to process unaligned tasks and need to align them using bounce buffers.
877 * While the data is fetched from the file another request might arrive writing to
878 * the same range. This will result in data corruption if both are executed concurrently.
879 */
880 int rc = VINF_SUCCESS;
881 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
882 if (!fLocked)
883 {
884 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
885 void *pvBuf = pTask->DataSeg.pvSeg;
886
887 /* Get a request handle. */
888 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
889 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
890
891 if ( !fAlignedReq
892 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
893 {
894 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
895 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
896
897 /* Create bounce buffer. */
898 pTask->cbBounceBuffer = cbToTransfer;
899
900 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
901 pTask->Off, offStart));
902 pTask->offBounceBuffer = pTask->Off - offStart;
903
904 /** @todo I think we need something like a RTMemAllocAligned method here.
905 * Current assumption is that the maximum alignment is 4096byte
906 * (GPT disk on Windows)
907 * so we can use RTMemPageAlloc here.
908 */
909 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
910 if (RT_LIKELY(pTask->pvBounceBuffer))
911 {
912 pvBuf = pTask->pvBounceBuffer;
913
914 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
915 {
916 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
917 || RT_UNLIKELY(offStart != pTask->Off))
918 {
919 /* We have to fill the buffer first before we can update the data. */
920 LogFlow(("Prefetching data for task %#p\n", pTask));
921 pTask->fPrefetch = true;
922 enmTransferType = PDMACTASKFILETRANSFER_READ;
923 }
924 else
925 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
926 }
927 }
928 else
929 rc = VERR_NO_MEMORY;
930 }
931 else
932 pTask->cbBounceBuffer = 0;
933
934 if (RT_SUCCESS(rc))
935 {
936 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
937 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
938
939 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
940 {
941 /* Grow the file if needed. */
942 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
943 {
944 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
945 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
946 }
947
948 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
949 offStart, pvBuf, cbToTransfer, pTask);
950 }
951 else
952 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
953 offStart, pvBuf, cbToTransfer, pTask);
954 AssertRC(rc);
955
956 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
957 if (RT_SUCCESS(rc))
958 {
959 pTask->hReq = hReq;
960 *phReq = hReq;
961 }
962 else
963 {
964 /* Cleanup */
965 if (pTask->cbBounceBuffer)
966 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
967 }
968 }
969 }
970 else
971 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
972
973 return rc;
974}
975
976static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
977 PPDMACEPFILEMGR pAioMgr,
978 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
979{
980 RTFILEAIOREQ apReqs[20];
981 unsigned cRequests = 0;
982 int rc = VINF_SUCCESS;
983
984 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
985 ("Trying to process request lists of a non active endpoint!\n"));
986
987 /* Go through the list and queue the requests until we get a flush request */
988 while ( pTaskHead
989 && !pEndpoint->pFlushReq
990 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
991 && RT_SUCCESS(rc))
992 {
993 RTMSINTERVAL msWhenNext;
994 PPDMACTASKFILE pCurr = pTaskHead;
995
996 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
997 {
998 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
999 break;
1000 }
1001
1002 pTaskHead = pTaskHead->pNext;
1003
1004 pCurr->pNext = NULL;
1005
1006 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
1007 ("Endpoints do not match\n"));
1008
1009 switch (pCurr->enmTransferType)
1010 {
1011 case PDMACTASKFILETRANSFER_FLUSH:
1012 {
1013 /* If there is no data transfer request this flush request finished immediately. */
1014 if (pEndpoint->fAsyncFlushSupported)
1015 {
1016 /* Issue a flush to the host. */
1017 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1018 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1019
1020 LogFlow(("Flush request %#p\n", hReq));
1021
1022 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1023 if (RT_FAILURE(rc))
1024 {
1025 if (rc == VERR_NOT_SUPPORTED)
1026 LogRel(("AIOMgr: Async flushes not supported\n"));
1027 else
1028 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1029 pEndpoint->fAsyncFlushSupported = false;
1030 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1031 rc = VINF_SUCCESS; /* Fake success */
1032 }
1033 else
1034 {
1035 pCurr->hReq = hReq;
1036 apReqs[cRequests] = hReq;
1037 pEndpoint->AioMgr.cReqsProcessed++;
1038 cRequests++;
1039 }
1040 }
1041
1042 if ( !pEndpoint->AioMgr.cRequestsActive
1043 && !pEndpoint->fAsyncFlushSupported)
1044 {
1045 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1046 pdmacFileTaskFree(pEndpoint, pCurr);
1047 }
1048 else
1049 {
1050 Assert(!pEndpoint->pFlushReq);
1051 pEndpoint->pFlushReq = pCurr;
1052 }
1053 break;
1054 }
1055 case PDMACTASKFILETRANSFER_READ:
1056 case PDMACTASKFILETRANSFER_WRITE:
1057 {
1058 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1059
1060 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1061 {
1062 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1063 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1064 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1065 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1066 else
1067 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1068
1069 AssertRC(rc);
1070 }
1071 else
1072 {
1073 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1074 hReq = pCurr->hReq;
1075 }
1076
1077 LogFlow(("Read/Write request %#p\n", hReq));
1078
1079 if (hReq != NIL_RTFILEAIOREQ)
1080 {
1081 apReqs[cRequests] = hReq;
1082 cRequests++;
1083 }
1084 break;
1085 }
1086 default:
1087 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1088 } /* switch transfer type */
1089
1090 /* Queue the requests if the array is full. */
1091 if (cRequests == RT_ELEMENTS(apReqs))
1092 {
1093 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1094 cRequests = 0;
1095 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1096 ("Unexpected return code\n"));
1097 }
1098 }
1099
1100 if (cRequests)
1101 {
1102 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1103 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1104 ("Unexpected return code rc=%Rrc\n", rc));
1105 }
1106
1107 if (pTaskHead)
1108 {
1109 /* Add the rest of the tasks to the pending list */
1110 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1111
1112 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1113 && !pEndpoint->pFlushReq))
1114 {
1115#if 0
1116 /*
1117 * The I/O manager has no room left for more requests
1118 * but there are still requests to process.
1119 * Create a new I/O manager and let it handle some endpoints.
1120 */
1121 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1122#else
1123 /* Grow the I/O manager */
1124 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1125#endif
1126 }
1127 }
1128
1129 /* Insufficient resources are not fatal. */
1130 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1131 rc = VINF_SUCCESS;
1132
1133 return rc;
1134}
1135
1136/**
1137 * Adds all pending requests for the given endpoint
1138 * until a flush request is encountered or there is no
1139 * request anymore.
1140 *
1141 * @returns VBox status code.
1142 * @param pAioMgr The async I/O manager for the endpoint
1143 * @param pEndpoint The endpoint to get the requests from.
1144 */
1145static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1146 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1147{
1148 int rc = VINF_SUCCESS;
1149 PPDMACTASKFILE pTasksHead = NULL;
1150
1151 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1152 ("Trying to process request lists of a non active endpoint!\n"));
1153
1154 Assert(!pEndpoint->pFlushReq);
1155
1156 /* Check the pending list first */
1157 if (pEndpoint->AioMgr.pReqsPendingHead)
1158 {
1159 LogFlow(("Queuing pending requests first\n"));
1160
1161 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1162 /*
1163 * Clear the list as the processing routine will insert them into the list
1164 * again if it gets a flush request.
1165 */
1166 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1167 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1168 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1169 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1170 }
1171
1172 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1173 {
1174 /* Now the request queue. */
1175 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1176 if (pTasksHead)
1177 {
1178 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1179 AssertRC(rc);
1180 }
1181 }
1182
1183 return rc;
1184}
1185
1186static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1187{
1188 int rc = VINF_SUCCESS;
1189 bool fNotifyWaiter = false;
1190
1191 LogFlowFunc((": Enter\n"));
1192
1193 Assert(pAioMgr->fBlockingEventPending);
1194
1195 switch (pAioMgr->enmBlockingEvent)
1196 {
1197 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1198 {
1199 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1200 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1201
1202 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1203
1204 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1205 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1206 if (pAioMgr->pEndpointsHead)
1207 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1208 pAioMgr->pEndpointsHead = pEndpointNew;
1209
1210 /* Assign the completion point to this file. */
1211 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1212 fNotifyWaiter = true;
1213 pAioMgr->cEndpoints++;
1214 break;
1215 }
1216 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1217 {
1218 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1219 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1220
1221 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1222 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1223 break;
1224 }
1225 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1226 {
1227 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1228 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1229
1230 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1231 {
1232 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1233
1234 /* Make sure all tasks finished. Process the queues a last time first. */
1235 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1236 AssertRC(rc);
1237
1238 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1239 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1240 }
1241 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1242 && (!pEndpointClose->AioMgr.cRequestsActive))
1243 fNotifyWaiter = true;
1244 break;
1245 }
1246 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1247 {
1248 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1249 if (!pAioMgr->cRequestsActive)
1250 fNotifyWaiter = true;
1251 break;
1252 }
1253 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1254 {
1255 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1256 break;
1257 }
1258 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1259 {
1260 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1261 fNotifyWaiter = true;
1262 break;
1263 }
1264 default:
1265 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1266 }
1267
1268 if (fNotifyWaiter)
1269 {
1270 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1271 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1272
1273 /* Release the waiting thread. */
1274 LogFlow(("Signalling waiter\n"));
1275 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1276 AssertRC(rc);
1277 }
1278
1279 LogFlowFunc((": Leave\n"));
1280 return rc;
1281}
1282
1283/**
1284 * Checks all endpoints for pending events or new requests.
1285 *
1286 * @returns VBox status code.
1287 * @param pAioMgr The I/O manager handle.
1288 */
1289static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1290{
1291 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1292 int rc = VINF_SUCCESS;
1293 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1294
1295 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1296
1297 while (pEndpoint)
1298 {
1299 if (!pEndpoint->pFlushReq
1300 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1301 && !pEndpoint->AioMgr.fMoving)
1302 {
1303 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1304 if (RT_FAILURE(rc))
1305 return rc;
1306 }
1307 else if ( !pEndpoint->AioMgr.cRequestsActive
1308 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1309 {
1310 /* Reopen the file so that the new endpoint can re-associate with the file */
1311 RTFileClose(pEndpoint->hFile);
1312 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1313 AssertRC(rc);
1314
1315 if (pEndpoint->AioMgr.fMoving)
1316 {
1317 pEndpoint->AioMgr.fMoving = false;
1318 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1319 }
1320 else
1321 {
1322 Assert(pAioMgr->fBlockingEventPending);
1323 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1324
1325 /* Release the waiting thread. */
1326 LogFlow(("Signalling waiter\n"));
1327 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1328 AssertRC(rc);
1329 }
1330 }
1331
1332 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1333 }
1334
1335 return rc;
1336}
1337
1338/**
1339 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1340 */
1341static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1342{
1343 size_t cbTransfered = 0;
1344 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1345
1346 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1347}
1348
1349static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1350 int rcReq, size_t cbTransfered)
1351{
1352 int rc = VINF_SUCCESS;
1353 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1354 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1355 PPDMACTASKFILE pTasksWaiting;
1356
1357 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1358
1359 pEndpoint = pTask->pEndpoint;
1360
1361 pTask->hReq = NIL_RTFILEAIOREQ;
1362
1363 pAioMgr->cRequestsActive--;
1364 pEndpoint->AioMgr.cRequestsActive--;
1365 pEndpoint->AioMgr.cReqsProcessed++;
1366
1367 /*
1368 * It is possible that the request failed on Linux with kernels < 2.6.23
1369 * if the passed buffer was allocated with remap_pfn_range or if the file
1370 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1371 * The endpoint will be migrated to a failsafe manager in case a request fails.
1372 */
1373 if (RT_FAILURE(rcReq))
1374 {
1375 /* Free bounce buffers and the IPRT request. */
1376 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1377
1378 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1379 {
1380 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1381 pEndpoint->fAsyncFlushSupported = false;
1382 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1383 /* The other method will take over now. */
1384
1385 pEndpoint->pFlushReq = NULL;
1386 /* Call completion callback */
1387 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1388 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1389 pdmacFileTaskFree(pEndpoint, pTask);
1390 }
1391 else
1392 {
1393 /* Free the lock and process pending tasks if necessary */
1394 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1395 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1396 AssertRC(rc);
1397
1398 if (pTask->cbBounceBuffer)
1399 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1400
1401 /*
1402 * Fatal errors are reported to the guest and non-fatal errors
1403 * will cause a migration to the failsafe manager in the hope
1404 * that the error disappears.
1405 */
1406 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1407 {
1408 /* Queue the request on the pending list. */
1409 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1410 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1411
1412 /* Create a new failsafe manager if necessary. */
1413 if (!pEndpoint->AioMgr.fMoving)
1414 {
1415 PPDMACEPFILEMGR pAioMgrFailsafe;
1416
1417 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1418 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1419
1420 pEndpoint->AioMgr.fMoving = true;
1421
1422 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1423 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1424 AssertRC(rc);
1425
1426 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1427
1428 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1429 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1430 }
1431
1432 /* If this was the last request for the endpoint migrate it to the new manager. */
1433 if (!pEndpoint->AioMgr.cRequestsActive)
1434 {
1435 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1436 Assert(!fReqsPending); NOREF(fReqsPending);
1437
1438 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1439 AssertRC(rc);
1440 }
1441 }
1442 else
1443 {
1444 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1445 pdmacFileTaskFree(pEndpoint, pTask);
1446 }
1447 }
1448 }
1449 else
1450 {
1451 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1452 {
1453 /* Clear pending flush */
1454 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1455 pEndpoint->pFlushReq = NULL;
1456 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1457
1458 /* Call completion callback */
1459 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1460 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1461 pdmacFileTaskFree(pEndpoint, pTask);
1462 }
1463 else
1464 {
1465 /*
1466 * Restart an incomplete transfer.
1467 * This usually means that the request will return an error now
1468 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1469 * the transfer needs to be continued.
1470 */
1471 pTask->cbTransfered += cbTransfered;
1472
1473 if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg
1474 || ( pTask->cbBounceBuffer
1475 && pTask->cbTransfered < pTask->cbBounceBuffer)))
1476 {
1477 RTFOFF offStart;
1478 size_t cbToTransfer;
1479 uint8_t *pbBuf = NULL;
1480
1481 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1482 pTask, cbTransfered));
1483 Assert(cbTransfered % 512 == 0);
1484
1485 if (pTask->cbBounceBuffer)
1486 {
1487 AssertPtr(pTask->pvBounceBuffer);
1488 offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
1489 cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
1490 pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
1491 }
1492 else
1493 {
1494 Assert(!pTask->pvBounceBuffer);
1495 offStart = pTask->Off + pTask->cbTransfered;
1496 cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
1497 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
1498 }
1499
1500 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1501 {
1502 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1503 pbBuf, cbToTransfer, pTask);
1504 }
1505 else
1506 {
1507 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1508 ("Invalid transfer type\n"));
1509 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1510 pbBuf, cbToTransfer, pTask);
1511 }
1512 AssertRC(rc);
1513
1514 pTask->hReq = hReq;
1515 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1516 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1517 ("Unexpected return code rc=%Rrc\n", rc));
1518 }
1519 else if (pTask->fPrefetch)
1520 {
1521 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1522 Assert(pTask->cbBounceBuffer);
1523
1524 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1525 pTask->DataSeg.pvSeg,
1526 pTask->DataSeg.cbSeg);
1527
1528 /* Write it now. */
1529 pTask->fPrefetch = false;
1530 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1531 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1532
1533 pTask->cbTransfered = 0;
1534
1535 /* Grow the file if needed. */
1536 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1537 {
1538 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1539 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1540 }
1541
1542 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1543 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1544 AssertRC(rc);
1545 pTask->hReq = hReq;
1546 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1547 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1548 ("Unexpected return code rc=%Rrc\n", rc));
1549 }
1550 else
1551 {
1552 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1553 {
1554 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1555 memcpy(pTask->DataSeg.pvSeg,
1556 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1557 pTask->DataSeg.cbSeg);
1558
1559 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1560 }
1561
1562 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1563
1564 /* Free the lock and process pending tasks if necessary */
1565 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1566 if (pTasksWaiting)
1567 {
1568 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1569 AssertRC(rc);
1570 }
1571
1572 /* Call completion callback */
1573 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1574 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1575 pdmacFileTaskFree(pEndpoint, pTask);
1576
1577 /*
1578 * If there is no request left on the endpoint but a flush request is set
1579 * it completed now and we notify the owner.
1580 * Furthermore we look for new requests and continue.
1581 */
1582 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1583 {
1584 /* Call completion callback */
1585 pTask = pEndpoint->pFlushReq;
1586 pEndpoint->pFlushReq = NULL;
1587
1588 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1589
1590 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1591 pdmacFileTaskFree(pEndpoint, pTask);
1592 }
1593 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1594 {
1595 /* If the endpoint is about to be migrated do it now. */
1596 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1597 Assert(!fReqsPending); NOREF(fReqsPending);
1598
1599 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1600 AssertRC(rc);
1601 }
1602 }
1603 } /* Not a flush request */
1604 } /* request completed successfully */
1605}
1606
1607/** Helper macro for checking for error codes. */
1608#define CHECK_RC(pAioMgr, rc) \
1609 if (RT_FAILURE(rc)) \
1610 {\
1611 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1612 return rc2;\
1613 }
1614
1615/**
1616 * The normal I/O manager using the RTFileAio* API
1617 *
1618 * @returns VBox status code.
1619 * @param hThreadSelf Handle of the thread.
1620 * @param pvUser Opaque user data.
1621 */
1622DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1623{
1624 int rc = VINF_SUCCESS;
1625 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1626 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1627 NOREF(hThreadSelf);
1628
1629 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1630 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1631 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1632 {
1633 if (!pAioMgr->cRequestsActive)
1634 {
1635 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1636 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1637 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1638 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1639 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1640
1641 LogFlow(("Got woken up\n"));
1642 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1643 }
1644
1645 /* Check for an external blocking event first. */
1646 if (pAioMgr->fBlockingEventPending)
1647 {
1648 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1649 CHECK_RC(pAioMgr, rc);
1650 }
1651
1652 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1653 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1654 {
1655 /* We got woken up because an endpoint issued new requests. Queue them. */
1656 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1657 CHECK_RC(pAioMgr, rc);
1658
1659 while (pAioMgr->cRequestsActive)
1660 {
1661 RTFILEAIOREQ apReqs[20];
1662 uint32_t cReqsCompleted = 0;
1663 size_t cReqsWait;
1664
1665 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1666 cReqsWait = RT_ELEMENTS(apReqs);
1667 else
1668 cReqsWait = pAioMgr->cRequestsActive;
1669
1670 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1671
1672 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1673 1,
1674 RT_INDEFINITE_WAIT, apReqs,
1675 cReqsWait, &cReqsCompleted);
1676 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1677 CHECK_RC(pAioMgr, rc);
1678
1679 LogFlow(("%d tasks completed\n", cReqsCompleted));
1680
1681 for (uint32_t i = 0; i < cReqsCompleted; i++)
1682 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1683
1684 /* Check for an external blocking event before we go to sleep again. */
1685 if (pAioMgr->fBlockingEventPending)
1686 {
1687 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1688 CHECK_RC(pAioMgr, rc);
1689 }
1690
1691 /* Update load statistics. */
1692 uint64_t uMillisCurr = RTTimeMilliTS();
1693 if (uMillisCurr > uMillisEnd)
1694 {
1695 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1696
1697 /* Calculate timespan. */
1698 uMillisCurr -= uMillisEnd;
1699
1700 while (pEndpointCurr)
1701 {
1702 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1703 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1704 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1705 }
1706
1707 /* Set new update interval */
1708 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1709 }
1710
1711 /* Check endpoints for new requests. */
1712 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1713 {
1714 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1715 CHECK_RC(pAioMgr, rc);
1716 }
1717 } /* while requests are active. */
1718
1719 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1720 {
1721 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1722 AssertRC(rc);
1723 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1724
1725 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1726 CHECK_RC(pAioMgr, rc);
1727 }
1728 } /* if still running */
1729 } /* while running */
1730
1731 LogFlowFunc(("rc=%Rrc\n", rc));
1732 return rc;
1733}
1734
1735#undef CHECK_RC
1736
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use