VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 96407

Last change on this file since 96407 was 96407, checked in by vboxsync, 22 months ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 67.1 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 96407 2022-08-22 17:43:14Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Async File I/O manager.
4 */
5
6/*
7 * Copyright (C) 2006-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
33#include <iprt/types.h>
34#include <iprt/asm.h>
35#include <iprt/file.h>
36#include <iprt/mem.h>
37#include <iprt/string.h>
38#include <iprt/assert.h>
39#include <VBox/log.h>
40
41#include "PDMAsyncCompletionFileInternal.h"
42
43
44/*********************************************************************************************************************************
45* Defined Constants And Macros *
46*********************************************************************************************************************************/
47/** The update period for the I/O load statistics in ms. */
48#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
49/** Maximum number of requests a manager will handle. */
50#define PDMACEPFILEMGR_REQS_STEP 64
51
52
53/*********************************************************************************************************************************
54* Internal functions *
55*********************************************************************************************************************************/
56static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
57 PPDMACEPFILEMGR pAioMgr,
58 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
59
60static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
61 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
62 PPDMACFILERANGELOCK pRangeLock);
63
64static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
65 int rc, size_t cbTransfered);
66
67
68int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
69{
70 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
71
72 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
73 if (rc == VERR_OUT_OF_RANGE)
74 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
75
76 if (RT_SUCCESS(rc))
77 {
78 /* Initialize request handle array. */
79 pAioMgr->iFreeEntry = 0;
80 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
81 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
82
83 if (pAioMgr->pahReqsFree)
84 {
85 /* Create the range lock memcache. */
86 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
87 0, UINT32_MAX, NULL, NULL, NULL, 0);
88 if (RT_SUCCESS(rc))
89 return VINF_SUCCESS;
90
91 RTMemFree(pAioMgr->pahReqsFree);
92 }
93 else
94 {
95 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
96 rc = VERR_NO_MEMORY;
97 }
98 }
99
100 return rc;
101}
102
103void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
104{
105 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
106
107 while (pAioMgr->iFreeEntry > 0)
108 {
109 pAioMgr->iFreeEntry--;
110 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
111 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
112 }
113
114 RTMemFree(pAioMgr->pahReqsFree);
115 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
116}
117
118#if 0 /* currently unused */
119/**
120 * Sorts the endpoint list with insertion sort.
121 */
122static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
123{
124 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
125
126 pEpPrev = pAioMgr->pEndpointsHead;
127 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
128
129 while (pEpCurr)
130 {
131 /* Remember the next element to sort because the list might change. */
132 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
133
134 /* Unlink the current element from the list. */
135 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
136 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
137
138 if (pPrev)
139 pPrev->AioMgr.pEndpointNext = pNext;
140 else
141 pAioMgr->pEndpointsHead = pNext;
142
143 if (pNext)
144 pNext->AioMgr.pEndpointPrev = pPrev;
145
146 /* Go back until we reached the place to insert the current endpoint into. */
147 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
148 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
149
150 /* Link the endpoint into the list. */
151 if (pEpPrev)
152 pNext = pEpPrev->AioMgr.pEndpointNext;
153 else
154 pNext = pAioMgr->pEndpointsHead;
155
156 pEpCurr->AioMgr.pEndpointNext = pNext;
157 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
158
159 if (pNext)
160 pNext->AioMgr.pEndpointPrev = pEpCurr;
161
162 if (pEpPrev)
163 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
164 else
165 pAioMgr->pEndpointsHead = pEpCurr;
166
167 pEpCurr = pEpNextToSort;
168 }
169
170#ifdef DEBUG
171 /* Validate sorting algorithm */
172 unsigned cEndpoints = 0;
173 pEpCurr = pAioMgr->pEndpointsHead;
174
175 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
176 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
177
178 while (pEpCurr)
179 {
180 cEndpoints++;
181
182 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
183 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
184
185 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
186 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
187
188 pEpCurr = pNext;
189 }
190
191 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
192
193#endif
194}
195#endif /* currently unused */
196
197/**
198 * Removes an endpoint from the currently assigned manager.
199 *
200 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
201 * FALSE otherwise.
202 * @param pEndpointRemove The endpoint to remove.
203 */
204static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
205{
206 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
207 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
208 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
209
210 pAioMgr->cEndpoints--;
211
212 if (pPrev)
213 pPrev->AioMgr.pEndpointNext = pNext;
214 else
215 pAioMgr->pEndpointsHead = pNext;
216
217 if (pNext)
218 pNext->AioMgr.pEndpointPrev = pPrev;
219
220 /* Make sure that there is no request pending on this manager for the endpoint. */
221 if (!pEndpointRemove->AioMgr.cRequestsActive)
222 {
223 Assert(!pEndpointRemove->pFlushReq);
224
225 /* Reopen the file so that the new endpoint can re-associate with the file */
226 RTFileClose(pEndpointRemove->hFile);
227 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
228 AssertRC(rc);
229 return false;
230 }
231
232 return true;
233}
234
235#if 0 /* currently unused */
236
237static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
238{
239 /* Balancing doesn't make sense with only one endpoint. */
240 if (pAioMgr->cEndpoints == 1)
241 return false;
242
243 /* Doesn't make sens to move endpoints if only one produces the whole load */
244 unsigned cEndpointsWithLoad = 0;
245
246 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
247
248 while (pCurr)
249 {
250 if (pCurr->AioMgr.cReqsPerSec)
251 cEndpointsWithLoad++;
252
253 pCurr = pCurr->AioMgr.pEndpointNext;
254 }
255
256 return (cEndpointsWithLoad > 1);
257}
258
259/**
260 * Creates a new I/O manager and spreads the I/O load of the endpoints
261 * between the given I/O manager and the new one.
262 *
263 * @returns nothing.
264 * @param pAioMgr The I/O manager with high I/O load.
265 */
266static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
267{
268 /*
269 * Check if balancing would improve the situation.
270 */
271 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
272 {
273 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
274 PPDMACEPFILEMGR pAioMgrNew = NULL;
275
276 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
277 if (RT_SUCCESS(rc))
278 {
279 /* We will sort the list by request count per second. */
280 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
281
282 /* Now move some endpoints to the new manager. */
283 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
284 unsigned cReqsOther = 0;
285 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
286
287 while (pCurr)
288 {
289 if (cReqsHere <= cReqsOther)
290 {
291 /*
292 * The other manager has more requests to handle now.
293 * We will keep the current endpoint.
294 */
295 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
296 cReqsHere += pCurr->AioMgr.cReqsPerSec;
297 pCurr = pCurr->AioMgr.pEndpointNext;
298 }
299 else
300 {
301 /* Move to other endpoint. */
302 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
303 cReqsOther += pCurr->AioMgr.cReqsPerSec;
304
305 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
306
307 pCurr = pCurr->AioMgr.pEndpointNext;
308
309 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
310
311 if (fReqsPending)
312 {
313 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
314 pMove->AioMgr.fMoving = true;
315 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
316 }
317 else
318 {
319 pMove->AioMgr.fMoving = false;
320 pMove->AioMgr.pAioMgrDst = NULL;
321 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
322 }
323 }
324 }
325 }
326 else
327 {
328 /* Don't process further but leave a log entry about reduced performance. */
329 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
330 }
331 }
332 else
333 Log(("AIOMgr: Load balancing would not improve anything\n"));
334}
335
336#endif /* unused */
337
338/**
339 * Increase the maximum number of active requests for the given I/O manager.
340 *
341 * @returns VBox status code.
342 * @param pAioMgr The I/O manager to grow.
343 */
344static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
345{
346 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
347
348 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
349 && !pAioMgr->cRequestsActive,
350 ("Invalid state of the I/O manager\n"));
351
352#ifdef RT_OS_WINDOWS
353 /*
354 * Reopen the files of all assigned endpoints first so we can assign them to the new
355 * I/O context.
356 */
357 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
358
359 while (pCurr)
360 {
361 RTFileClose(pCurr->hFile);
362 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
363
364 pCurr = pCurr->AioMgr.pEndpointNext;
365 }
366#endif
367
368 /* Create the new bigger context. */
369 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
370
371 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
372 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
373 if (rc == VERR_OUT_OF_RANGE)
374 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
375
376 if (RT_SUCCESS(rc))
377 {
378 /* Close the old context. */
379 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
380 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
381
382 pAioMgr->hAioCtx = hAioCtxNew;
383
384 /* Create a new I/O task handle array */
385 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
386 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
387
388 if (pahReqNew)
389 {
390 /* Copy the cached request handles. */
391 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
392 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
393
394 RTMemFree(pAioMgr->pahReqsFree);
395 pAioMgr->pahReqsFree = pahReqNew;
396 pAioMgr->cReqEntries = cReqEntriesNew;
397 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
398 pAioMgr->cRequestsActiveMax));
399 }
400 else
401 rc = VERR_NO_MEMORY;
402 }
403
404#ifdef RT_OS_WINDOWS
405 /* Assign the file to the new context. */
406 pCurr = pAioMgr->pEndpointsHead;
407 while (pCurr)
408 {
409 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
410 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
411
412 pCurr = pCurr->AioMgr.pEndpointNext;
413 }
414#endif
415
416 if (RT_FAILURE(rc))
417 {
418 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
419 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
420 }
421
422 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
423 LogFlowFunc(("returns rc=%Rrc\n", rc));
424
425 return rc;
426}
427
428/**
429 * Checks if a given status code is fatal.
430 * Non fatal errors can be fixed by migrating the endpoint to a
431 * failsafe manager.
432 *
433 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
434 * false If the error can be fixed by a migration. (image on NFS disk for example)
435 * @param rcReq The status code to check.
436 */
437DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
438{
439 return rcReq == VERR_DEV_IO_ERROR
440 || rcReq == VERR_FILE_IO_ERROR
441 || rcReq == VERR_DISK_IO_ERROR
442 || rcReq == VERR_DISK_FULL
443 || rcReq == VERR_FILE_TOO_BIG;
444}
445
446/**
447 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
448 *
449 * @returns VBox status code
450 * @param pAioMgr The I/O manager the error occurred on.
451 * @param rc The error code.
452 * @param SRC_POS The source location of the error (use RT_SRC_POS).
453 */
454static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
455{
456 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
457 pAioMgr, rc));
458 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
459 LogRel(("AIOMgr: Please contact the product vendor\n"));
460
461 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
462
463 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
464 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
465
466 AssertMsgFailed(("Implement\n"));
467 return VINF_SUCCESS;
468}
469
470/**
471 * Put a list of tasks in the pending request list of an endpoint.
472 */
473DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
474{
475 /* Add the rest of the tasks to the pending list */
476 if (!pEndpoint->AioMgr.pReqsPendingHead)
477 {
478 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
479 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
480 }
481 else
482 {
483 Assert(pEndpoint->AioMgr.pReqsPendingTail);
484 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
485 }
486
487 /* Update the tail. */
488 while (pTaskHead->pNext)
489 pTaskHead = pTaskHead->pNext;
490
491 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
492 pTaskHead->pNext = NULL;
493}
494
495/**
496 * Put one task in the pending request list of an endpoint.
497 */
498DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
499{
500 /* Add the rest of the tasks to the pending list */
501 if (!pEndpoint->AioMgr.pReqsPendingHead)
502 {
503 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
504 pEndpoint->AioMgr.pReqsPendingHead = pTask;
505 }
506 else
507 {
508 Assert(pEndpoint->AioMgr.pReqsPendingTail);
509 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
510 }
511
512 pEndpoint->AioMgr.pReqsPendingTail = pTask;
513 pTask->pNext = NULL;
514}
515
516/**
517 * Allocates a async I/O request.
518 *
519 * @returns Handle to the request.
520 * @param pAioMgr The I/O manager.
521 */
522static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
523{
524 /* Get a request handle. */
525 RTFILEAIOREQ hReq;
526 if (pAioMgr->iFreeEntry > 0)
527 {
528 pAioMgr->iFreeEntry--;
529 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
530 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
531 Assert(hReq != NIL_RTFILEAIOREQ);
532 }
533 else
534 {
535 int rc = RTFileAioReqCreate(&hReq);
536 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
537 }
538
539 return hReq;
540}
541
542/**
543 * Frees a async I/O request handle.
544 *
545 * @returns nothing.
546 * @param pAioMgr The I/O manager.
547 * @param hReq The I/O request handle to free.
548 */
549static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
550{
551 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
552 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
553
554 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
555 pAioMgr->iFreeEntry++;
556}
557
558/**
559 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
560 */
561static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
562 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
563 PRTFILEAIOREQ pahReqs, unsigned cReqs)
564{
565 pAioMgr->cRequestsActive += cReqs;
566 pEndpoint->AioMgr.cRequestsActive += cReqs;
567
568 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
569 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
570
571 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
572 if (RT_FAILURE(rc))
573 {
574 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
575 {
576 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
577
578 /* Append any not submitted task to the waiting list. */
579 for (size_t i = 0; i < cReqs; i++)
580 {
581 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
582
583 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
584 {
585 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
586
587 Assert(pTask->hReq == pahReqs[i]);
588 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
589 pAioMgr->cRequestsActive--;
590 pEndpoint->AioMgr.cRequestsActive--;
591
592 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
593 {
594 /* Clear the pending flush */
595 Assert(pEndpoint->pFlushReq == pTask);
596 pEndpoint->pFlushReq = NULL;
597 }
598 }
599 }
600
601 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
602
603 /* Print an entry in the release log */
604 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
605 {
606 pEpClass->fOutOfResourcesWarningPrinted = true;
607 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
608 pAioMgr->cRequestsActive));
609 }
610
611 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
612 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
613 rc = VINF_SUCCESS;
614 }
615 else /* Another kind of error happened (full disk, ...) */
616 {
617 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
618 for (size_t i = 0; i < cReqs; i++)
619 {
620 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
621
622 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
623 {
624 /* We call ourself again to do any error handling which might come up now. */
625 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
626 AssertRC(rc);
627 }
628 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
629 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
630 }
631
632
633 if ( pEndpoint->pFlushReq
634 && !pAioMgr->cRequestsActive
635 && !pEndpoint->fAsyncFlushSupported)
636 {
637 /*
638 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
639 * the async flush API.
640 * Happens only if this we just noticed that this is not supported
641 * and the only active request was a flush.
642 */
643 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
644 pEndpoint->pFlushReq = NULL;
645 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
646 pdmacFileTaskFree(pEndpoint, pFlush);
647 }
648 }
649 }
650
651 return VINF_SUCCESS;
652}
653
654static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
655 RTFOFF offStart, size_t cbRange,
656 PPDMACTASKFILE pTask, bool fAlignedReq)
657{
658 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
659 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
660 ("Invalid task type %d\n", pTask->enmTransferType));
661
662 /*
663 * If there is no unaligned request active and the current one is aligned
664 * just pass it through.
665 */
666 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
667 return false;
668
669 PPDMACFILERANGELOCK pRangeLock;
670 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
671 if (!pRangeLock)
672 {
673 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
674 /* Check if we intersect with the range. */
675 if ( !pRangeLock
676 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
677 && (pRangeLock->Core.KeyLast) >= offStart))
678 {
679 pRangeLock = NULL; /* False alarm */
680 }
681 }
682
683 /* Check whether we have one of the situations explained below */
684 if (pRangeLock)
685 {
686 /* Add to the list. */
687 pTask->pNext = NULL;
688
689 if (!pRangeLock->pWaitingTasksHead)
690 {
691 Assert(!pRangeLock->pWaitingTasksTail);
692 pRangeLock->pWaitingTasksHead = pTask;
693 pRangeLock->pWaitingTasksTail = pTask;
694 }
695 else
696 {
697 AssertPtr(pRangeLock->pWaitingTasksTail);
698 pRangeLock->pWaitingTasksTail->pNext = pTask;
699 pRangeLock->pWaitingTasksTail = pTask;
700 }
701 return true;
702 }
703
704 return false;
705}
706
707static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
708 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
709 RTFOFF offStart, size_t cbRange,
710 PPDMACTASKFILE pTask, bool fAlignedReq)
711{
712 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
713 pAioMgr, pEndpoint, offStart, cbRange, pTask));
714
715 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
716 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
717 offStart, cbRange));
718
719 /*
720 * If there is no unaligned request active and the current one is aligned
721 * just don't use the lock.
722 */
723 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
724 {
725 pTask->pRangeLock = NULL;
726 return VINF_SUCCESS;
727 }
728
729 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
730 if (!pRangeLock)
731 return VERR_NO_MEMORY;
732
733 /* Init the lock. */
734 pRangeLock->Core.Key = offStart;
735 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
736 pRangeLock->cRefs = 1;
737 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
738 pRangeLock->pWaitingTasksHead = NULL;
739 pRangeLock->pWaitingTasksTail = NULL;
740
741 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
742 AssertMsg(fInserted, ("Range lock was not inserted!\n")); NOREF(fInserted);
743
744 /* Let the task point to its lock. */
745 pTask->pRangeLock = pRangeLock;
746 pEndpoint->AioMgr.cLockedReqsActive++;
747
748 return VINF_SUCCESS;
749}
750
751static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
752 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
753 PPDMACFILERANGELOCK pRangeLock)
754{
755 PPDMACTASKFILE pTasksWaitingHead;
756
757 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
758 pAioMgr, pEndpoint, pRangeLock));
759
760 /* pRangeLock can be NULL if there was no lock assigned with the task. */
761 if (!pRangeLock)
762 return NULL;
763
764 Assert(pRangeLock->cRefs == 1);
765
766 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
767 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
768 pRangeLock->pWaitingTasksHead = NULL;
769 pRangeLock->pWaitingTasksTail = NULL;
770 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
771 pEndpoint->AioMgr.cLockedReqsActive--;
772
773 return pTasksWaitingHead;
774}
775
776static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
777 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
778 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
779{
780 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
781 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
782 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
783 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
784
785 pTask->fPrefetch = false;
786 pTask->cbBounceBuffer = 0;
787
788 /*
789 * Before we start to setup the request we have to check whether there is a task
790 * already active which range intersects with ours. We have to defer execution
791 * of this task in two cases:
792 * - The pending task is a write and the current is either read or write
793 * - The pending task is a read and the current task is a write task.
794 *
795 * To check whether a range is currently "locked" we use the AVL tree where every pending task
796 * is stored by its file offset range. The current task will be added to the active task
797 * and will be executed when the active one completes. (The method below
798 * which checks whether a range is already used will add the task)
799 *
800 * This is necessary because of the requirement to align all requests to a 512 boundary
801 * which is enforced by the host OS (Linux and Windows atm). It is possible that
802 * we have to process unaligned tasks and need to align them using bounce buffers.
803 * While the data is fetched from the file another request might arrive writing to
804 * the same range. This will result in data corruption if both are executed concurrently.
805 */
806 int rc = VINF_SUCCESS;
807 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
808 true /* fAlignedReq */);
809 if (!fLocked)
810 {
811 /* Get a request handle. */
812 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
813 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
814
815 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
816 {
817 /* Grow the file if needed. */
818 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
819 {
820 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
821 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
822 }
823
824 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
825 pTask->Off, pTask->DataSeg.pvSeg,
826 pTask->DataSeg.cbSeg, pTask);
827 }
828 else
829 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
830 pTask->Off, pTask->DataSeg.pvSeg,
831 pTask->DataSeg.cbSeg, pTask);
832 AssertRC(rc);
833
834 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
835 pTask->DataSeg.cbSeg,
836 pTask, true /* fAlignedReq */);
837
838 if (RT_SUCCESS(rc))
839 {
840 pTask->hReq = hReq;
841 *phReq = hReq;
842 }
843 }
844 else
845 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
846
847 return rc;
848}
849
850static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
851 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
852 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
853{
854 /*
855 * Check if the alignment requirements are met.
856 * Offset, transfer size and buffer address
857 * need to be on a 512 boundary.
858 */
859 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
860 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
861 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
862 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
863 && offStart == pTask->Off;
864
865 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
866 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
867 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
868 offStart, cbToTransfer, pEndpoint->cbFile));
869
870 pTask->fPrefetch = false;
871
872 /*
873 * Before we start to setup the request we have to check whether there is a task
874 * already active which range intersects with ours. We have to defer execution
875 * of this task in two cases:
876 * - The pending task is a write and the current is either read or write
877 * - The pending task is a read and the current task is a write task.
878 *
879 * To check whether a range is currently "locked" we use the AVL tree where every pending task
880 * is stored by its file offset range. The current task will be added to the active task
881 * and will be executed when the active one completes. (The method below
882 * which checks whether a range is already used will add the task)
883 *
884 * This is necessary because of the requirement to align all requests to a 512 boundary
885 * which is enforced by the host OS (Linux and Windows atm). It is possible that
886 * we have to process unaligned tasks and need to align them using bounce buffers.
887 * While the data is fetched from the file another request might arrive writing to
888 * the same range. This will result in data corruption if both are executed concurrently.
889 */
890 int rc = VINF_SUCCESS;
891 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
892 if (!fLocked)
893 {
894 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
895 void *pvBuf = pTask->DataSeg.pvSeg;
896
897 /* Get a request handle. */
898 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
899 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
900
901 if ( !fAlignedReq
902 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
903 {
904 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
905 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
906
907 /* Create bounce buffer. */
908 pTask->cbBounceBuffer = cbToTransfer;
909
910 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
911 pTask->Off, offStart));
912 pTask->offBounceBuffer = pTask->Off - offStart;
913
914 /** @todo I think we need something like a RTMemAllocAligned method here.
915 * Current assumption is that the maximum alignment is 4096byte
916 * (GPT disk on Windows)
917 * so we can use RTMemPageAlloc here.
918 */
919 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
920 if (RT_LIKELY(pTask->pvBounceBuffer))
921 {
922 pvBuf = pTask->pvBounceBuffer;
923
924 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
925 {
926 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
927 || RT_UNLIKELY(offStart != pTask->Off))
928 {
929 /* We have to fill the buffer first before we can update the data. */
930 LogFlow(("Prefetching data for task %#p\n", pTask));
931 pTask->fPrefetch = true;
932 enmTransferType = PDMACTASKFILETRANSFER_READ;
933 }
934 else
935 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
936 }
937 }
938 else
939 rc = VERR_NO_MEMORY;
940 }
941 else
942 pTask->cbBounceBuffer = 0;
943
944 if (RT_SUCCESS(rc))
945 {
946 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
947 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
948
949 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
950 {
951 /* Grow the file if needed. */
952 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
953 {
954 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
955 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
956 }
957
958 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
959 offStart, pvBuf, cbToTransfer, pTask);
960 }
961 else
962 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
963 offStart, pvBuf, cbToTransfer, pTask);
964 AssertRC(rc);
965
966 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
967 if (RT_SUCCESS(rc))
968 {
969 pTask->hReq = hReq;
970 *phReq = hReq;
971 }
972 else
973 {
974 /* Cleanup */
975 if (pTask->cbBounceBuffer)
976 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
977 }
978 }
979 }
980 else
981 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
982
983 return rc;
984}
985
986static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
987 PPDMACEPFILEMGR pAioMgr,
988 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
989{
990 RTFILEAIOREQ apReqs[20];
991 unsigned cRequests = 0;
992 int rc = VINF_SUCCESS;
993
994 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
995 ("Trying to process request lists of a non active endpoint!\n"));
996
997 /* Go through the list and queue the requests until we get a flush request */
998 while ( pTaskHead
999 && !pEndpoint->pFlushReq
1000 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
1001 && RT_SUCCESS(rc))
1002 {
1003 RTMSINTERVAL msWhenNext;
1004 PPDMACTASKFILE pCurr = pTaskHead;
1005
1006 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
1007 {
1008 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
1009 break;
1010 }
1011
1012 pTaskHead = pTaskHead->pNext;
1013
1014 pCurr->pNext = NULL;
1015
1016 AssertMsg(RT_VALID_PTR(pCurr->pEndpoint) && pCurr->pEndpoint == pEndpoint,
1017 ("Endpoints do not match\n"));
1018
1019 switch (pCurr->enmTransferType)
1020 {
1021 case PDMACTASKFILETRANSFER_FLUSH:
1022 {
1023 /* If there is no data transfer request this flush request finished immediately. */
1024 if (pEndpoint->fAsyncFlushSupported)
1025 {
1026 /* Issue a flush to the host. */
1027 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1028 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1029
1030 LogFlow(("Flush request %#p\n", hReq));
1031
1032 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
1033 if (RT_FAILURE(rc))
1034 {
1035 if (rc == VERR_NOT_SUPPORTED)
1036 LogRel(("AIOMgr: Async flushes not supported\n"));
1037 else
1038 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
1039 pEndpoint->fAsyncFlushSupported = false;
1040 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1041 rc = VINF_SUCCESS; /* Fake success */
1042 }
1043 else
1044 {
1045 pCurr->hReq = hReq;
1046 apReqs[cRequests] = hReq;
1047 pEndpoint->AioMgr.cReqsProcessed++;
1048 cRequests++;
1049 }
1050 }
1051
1052 if ( !pEndpoint->AioMgr.cRequestsActive
1053 && !pEndpoint->fAsyncFlushSupported)
1054 {
1055 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1056 pdmacFileTaskFree(pEndpoint, pCurr);
1057 }
1058 else
1059 {
1060 Assert(!pEndpoint->pFlushReq);
1061 pEndpoint->pFlushReq = pCurr;
1062 }
1063 break;
1064 }
1065 case PDMACTASKFILETRANSFER_READ:
1066 case PDMACTASKFILETRANSFER_WRITE:
1067 {
1068 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1069
1070 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1071 {
1072 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1073 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1074 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1075 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1076 else
1077 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1078
1079 AssertRC(rc);
1080 }
1081 else
1082 {
1083 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1084 hReq = pCurr->hReq;
1085 }
1086
1087 LogFlow(("Read/Write request %#p\n", hReq));
1088
1089 if (hReq != NIL_RTFILEAIOREQ)
1090 {
1091 apReqs[cRequests] = hReq;
1092 cRequests++;
1093 }
1094 break;
1095 }
1096 default:
1097 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1098 } /* switch transfer type */
1099
1100 /* Queue the requests if the array is full. */
1101 if (cRequests == RT_ELEMENTS(apReqs))
1102 {
1103 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1104 cRequests = 0;
1105 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1106 ("Unexpected return code\n"));
1107 }
1108 }
1109
1110 if (cRequests)
1111 {
1112 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1113 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1114 ("Unexpected return code rc=%Rrc\n", rc));
1115 }
1116
1117 if (pTaskHead)
1118 {
1119 /* Add the rest of the tasks to the pending list */
1120 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1121
1122 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1123 && !pEndpoint->pFlushReq))
1124 {
1125#if 0
1126 /*
1127 * The I/O manager has no room left for more requests
1128 * but there are still requests to process.
1129 * Create a new I/O manager and let it handle some endpoints.
1130 */
1131 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1132#else
1133 /* Grow the I/O manager */
1134 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1135#endif
1136 }
1137 }
1138
1139 /* Insufficient resources are not fatal. */
1140 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1141 rc = VINF_SUCCESS;
1142
1143 return rc;
1144}
1145
1146/**
1147 * Adds all pending requests for the given endpoint
1148 * until a flush request is encountered or there is no
1149 * request anymore.
1150 *
1151 * @returns VBox status code.
1152 * @param pAioMgr The async I/O manager for the endpoint
1153 * @param pEndpoint The endpoint to get the requests from.
1154 */
1155static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1156 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1157{
1158 int rc = VINF_SUCCESS;
1159 PPDMACTASKFILE pTasksHead = NULL;
1160
1161 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1162 ("Trying to process request lists of a non active endpoint!\n"));
1163
1164 Assert(!pEndpoint->pFlushReq);
1165
1166 /* Check the pending list first */
1167 if (pEndpoint->AioMgr.pReqsPendingHead)
1168 {
1169 LogFlow(("Queuing pending requests first\n"));
1170
1171 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1172 /*
1173 * Clear the list as the processing routine will insert them into the list
1174 * again if it gets a flush request.
1175 */
1176 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1177 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1178 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1179 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
1180 }
1181
1182 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1183 {
1184 /* Now the request queue. */
1185 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1186 if (pTasksHead)
1187 {
1188 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1189 AssertRC(rc);
1190 }
1191 }
1192
1193 return rc;
1194}
1195
1196static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1197{
1198 int rc = VINF_SUCCESS;
1199 bool fNotifyWaiter = false;
1200
1201 LogFlowFunc((": Enter\n"));
1202
1203 Assert(pAioMgr->fBlockingEventPending);
1204
1205 switch (pAioMgr->enmBlockingEvent)
1206 {
1207 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1208 {
1209 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1210 AssertMsg(RT_VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1211
1212 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1213
1214 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1215 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1216 if (pAioMgr->pEndpointsHead)
1217 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1218 pAioMgr->pEndpointsHead = pEndpointNew;
1219
1220 /* Assign the completion point to this file. */
1221 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
1222 fNotifyWaiter = true;
1223 pAioMgr->cEndpoints++;
1224 break;
1225 }
1226 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1227 {
1228 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1229 AssertMsg(RT_VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1230
1231 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1232 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1233 break;
1234 }
1235 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1236 {
1237 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1238 AssertMsg(RT_VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1239
1240 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1241 {
1242 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1243
1244 /* Make sure all tasks finished. Process the queues a last time first. */
1245 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1246 AssertRC(rc);
1247
1248 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1249 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1250 }
1251 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1252 && (!pEndpointClose->AioMgr.cRequestsActive))
1253 fNotifyWaiter = true;
1254 break;
1255 }
1256 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1257 {
1258 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1259 if (!pAioMgr->cRequestsActive)
1260 fNotifyWaiter = true;
1261 break;
1262 }
1263 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1264 {
1265 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1266 break;
1267 }
1268 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1269 {
1270 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1271 fNotifyWaiter = true;
1272 break;
1273 }
1274 default:
1275 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1276 }
1277
1278 if (fNotifyWaiter)
1279 {
1280 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1281 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1282
1283 /* Release the waiting thread. */
1284 LogFlow(("Signalling waiter\n"));
1285 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1286 AssertRC(rc);
1287 }
1288
1289 LogFlowFunc((": Leave\n"));
1290 return rc;
1291}
1292
1293/**
1294 * Checks all endpoints for pending events or new requests.
1295 *
1296 * @returns VBox status code.
1297 * @param pAioMgr The I/O manager handle.
1298 */
1299static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1300{
1301 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1302 int rc = VINF_SUCCESS;
1303 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1304
1305 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
1306
1307 while (pEndpoint)
1308 {
1309 if (!pEndpoint->pFlushReq
1310 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1311 && !pEndpoint->AioMgr.fMoving)
1312 {
1313 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1314 if (RT_FAILURE(rc))
1315 return rc;
1316 }
1317 else if ( !pEndpoint->AioMgr.cRequestsActive
1318 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1319 {
1320 /* Reopen the file so that the new endpoint can re-associate with the file */
1321 RTFileClose(pEndpoint->hFile);
1322 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1323 AssertRC(rc);
1324
1325 if (pEndpoint->AioMgr.fMoving)
1326 {
1327 pEndpoint->AioMgr.fMoving = false;
1328 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1329 }
1330 else
1331 {
1332 Assert(pAioMgr->fBlockingEventPending);
1333 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1334
1335 /* Release the waiting thread. */
1336 LogFlow(("Signalling waiter\n"));
1337 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1338 AssertRC(rc);
1339 }
1340 }
1341
1342 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1343 }
1344
1345 return rc;
1346}
1347
1348/**
1349 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1350 */
1351static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1352{
1353 size_t cbTransfered = 0;
1354 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1355
1356 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1357}
1358
1359static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1360 int rcReq, size_t cbTransfered)
1361{
1362 int rc = VINF_SUCCESS;
1363 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1364 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1365 PPDMACTASKFILE pTasksWaiting;
1366
1367 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1368
1369 pEndpoint = pTask->pEndpoint;
1370
1371 pTask->hReq = NIL_RTFILEAIOREQ;
1372
1373 pAioMgr->cRequestsActive--;
1374 pEndpoint->AioMgr.cRequestsActive--;
1375 pEndpoint->AioMgr.cReqsProcessed++;
1376
1377 /*
1378 * It is possible that the request failed on Linux with kernels < 2.6.23
1379 * if the passed buffer was allocated with remap_pfn_range or if the file
1380 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1381 * The endpoint will be migrated to a failsafe manager in case a request fails.
1382 */
1383 if (RT_FAILURE(rcReq))
1384 {
1385 /* Free bounce buffers and the IPRT request. */
1386 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1387
1388 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1389 {
1390 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
1391 pEndpoint->fAsyncFlushSupported = false;
1392 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1393 /* The other method will take over now. */
1394
1395 pEndpoint->pFlushReq = NULL;
1396 /* Call completion callback */
1397 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1398 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1399 pdmacFileTaskFree(pEndpoint, pTask);
1400 }
1401 else
1402 {
1403 /* Free the lock and process pending tasks if necessary */
1404 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1405 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1406 AssertRC(rc);
1407
1408 if (pTask->cbBounceBuffer)
1409 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1410
1411 /*
1412 * Fatal errors are reported to the guest and non-fatal errors
1413 * will cause a migration to the failsafe manager in the hope
1414 * that the error disappears.
1415 */
1416 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1417 {
1418 /* Queue the request on the pending list. */
1419 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1420 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1421
1422 /* Create a new failsafe manager if necessary. */
1423 if (!pEndpoint->AioMgr.fMoving)
1424 {
1425 PPDMACEPFILEMGR pAioMgrFailsafe;
1426
1427 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1428 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1429
1430 pEndpoint->AioMgr.fMoving = true;
1431
1432 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1433 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1434 AssertRC(rc);
1435
1436 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1437
1438 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1439 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1440 }
1441
1442 /* If this was the last request for the endpoint migrate it to the new manager. */
1443 if (!pEndpoint->AioMgr.cRequestsActive)
1444 {
1445 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1446 Assert(!fReqsPending); NOREF(fReqsPending);
1447
1448 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1449 AssertRC(rc);
1450 }
1451 }
1452 else
1453 {
1454 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1455 pdmacFileTaskFree(pEndpoint, pTask);
1456 }
1457 }
1458 }
1459 else
1460 {
1461 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1462 {
1463 /* Clear pending flush */
1464 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1465 pEndpoint->pFlushReq = NULL;
1466 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1467
1468 /* Call completion callback */
1469 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1470 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1471 pdmacFileTaskFree(pEndpoint, pTask);
1472 }
1473 else
1474 {
1475 /*
1476 * Restart an incomplete transfer.
1477 * This usually means that the request will return an error now
1478 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1479 * the transfer needs to be continued.
1480 */
1481 pTask->cbTransfered += cbTransfered;
1482
1483 if (RT_UNLIKELY( pTask->cbTransfered < pTask->DataSeg.cbSeg
1484 || ( pTask->cbBounceBuffer
1485 && pTask->cbTransfered < pTask->cbBounceBuffer)))
1486 {
1487 RTFOFF offStart;
1488 size_t cbToTransfer;
1489 uint8_t *pbBuf = NULL;
1490
1491 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
1492 pTask, cbTransfered));
1493 Assert(cbTransfered % 512 == 0);
1494
1495 if (pTask->cbBounceBuffer)
1496 {
1497 AssertPtr(pTask->pvBounceBuffer);
1498 offStart = (pTask->Off & ~((RTFOFF)512-1)) + pTask->cbTransfered;
1499 cbToTransfer = pTask->cbBounceBuffer - pTask->cbTransfered;
1500 pbBuf = (uint8_t *)pTask->pvBounceBuffer + pTask->cbTransfered;
1501 }
1502 else
1503 {
1504 Assert(!pTask->pvBounceBuffer);
1505 offStart = pTask->Off + pTask->cbTransfered;
1506 cbToTransfer = pTask->DataSeg.cbSeg - pTask->cbTransfered;
1507 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + pTask->cbTransfered;
1508 }
1509
1510 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1511 {
1512 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
1513 pbBuf, cbToTransfer, pTask);
1514 }
1515 else
1516 {
1517 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1518 ("Invalid transfer type\n"));
1519 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
1520 pbBuf, cbToTransfer, pTask);
1521 }
1522 AssertRC(rc);
1523
1524 pTask->hReq = hReq;
1525 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1526 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1527 ("Unexpected return code rc=%Rrc\n", rc));
1528 }
1529 else if (pTask->fPrefetch)
1530 {
1531 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1532 Assert(pTask->cbBounceBuffer);
1533
1534 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1535 pTask->DataSeg.pvSeg,
1536 pTask->DataSeg.cbSeg);
1537
1538 /* Write it now. */
1539 pTask->fPrefetch = false;
1540 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1541 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
1542
1543 pTask->cbTransfered = 0;
1544
1545 /* Grow the file if needed. */
1546 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1547 {
1548 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1549 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
1550 }
1551
1552 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
1553 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1554 AssertRC(rc);
1555 pTask->hReq = hReq;
1556 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1557 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1558 ("Unexpected return code rc=%Rrc\n", rc));
1559 }
1560 else
1561 {
1562 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1563 {
1564 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1565 memcpy(pTask->DataSeg.pvSeg,
1566 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1567 pTask->DataSeg.cbSeg);
1568
1569 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1570 }
1571
1572 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1573
1574 /* Free the lock and process pending tasks if necessary */
1575 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1576 if (pTasksWaiting)
1577 {
1578 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1579 AssertRC(rc);
1580 }
1581
1582 /* Call completion callback */
1583 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1584 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1585 pdmacFileTaskFree(pEndpoint, pTask);
1586
1587 /*
1588 * If there is no request left on the endpoint but a flush request is set
1589 * it completed now and we notify the owner.
1590 * Furthermore we look for new requests and continue.
1591 */
1592 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1593 {
1594 /* Call completion callback */
1595 pTask = pEndpoint->pFlushReq;
1596 pEndpoint->pFlushReq = NULL;
1597
1598 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1599
1600 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1601 pdmacFileTaskFree(pEndpoint, pTask);
1602 }
1603 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1604 {
1605 /* If the endpoint is about to be migrated do it now. */
1606 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1607 Assert(!fReqsPending); NOREF(fReqsPending);
1608
1609 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1610 AssertRC(rc);
1611 }
1612 }
1613 } /* Not a flush request */
1614 } /* request completed successfully */
1615}
1616
1617/** Helper macro for checking for error codes. */
1618#define CHECK_RC(pAioMgr, rc) \
1619 if (RT_FAILURE(rc)) \
1620 {\
1621 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1622 return rc2;\
1623 }
1624
1625/**
1626 * The normal I/O manager using the RTFileAio* API
1627 *
1628 * @returns VBox status code.
1629 * @param hThreadSelf Handle of the thread.
1630 * @param pvUser Opaque user data.
1631 */
1632DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
1633{
1634 int rc = VINF_SUCCESS;
1635 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1636 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1637 NOREF(hThreadSelf);
1638
1639 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1640 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1641 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1642 {
1643 if (!pAioMgr->cRequestsActive)
1644 {
1645 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1646 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1647 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
1648 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1649 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
1650
1651 LogFlow(("Got woken up\n"));
1652 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1653 }
1654
1655 /* Check for an external blocking event first. */
1656 if (pAioMgr->fBlockingEventPending)
1657 {
1658 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1659 CHECK_RC(pAioMgr, rc);
1660 }
1661
1662 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1663 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1664 {
1665 /* We got woken up because an endpoint issued new requests. Queue them. */
1666 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1667 CHECK_RC(pAioMgr, rc);
1668
1669 while (pAioMgr->cRequestsActive)
1670 {
1671 RTFILEAIOREQ apReqs[20];
1672 uint32_t cReqsCompleted = 0;
1673 size_t cReqsWait;
1674
1675 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1676 cReqsWait = RT_ELEMENTS(apReqs);
1677 else
1678 cReqsWait = pAioMgr->cRequestsActive;
1679
1680 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1681
1682 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1683 1,
1684 RT_INDEFINITE_WAIT, apReqs,
1685 cReqsWait, &cReqsCompleted);
1686 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1687 CHECK_RC(pAioMgr, rc);
1688
1689 LogFlow(("%d tasks completed\n", cReqsCompleted));
1690
1691 for (uint32_t i = 0; i < cReqsCompleted; i++)
1692 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1693
1694 /* Check for an external blocking event before we go to sleep again. */
1695 if (pAioMgr->fBlockingEventPending)
1696 {
1697 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1698 CHECK_RC(pAioMgr, rc);
1699 }
1700
1701 /* Update load statistics. */
1702 uint64_t uMillisCurr = RTTimeMilliTS();
1703 if (uMillisCurr > uMillisEnd)
1704 {
1705 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1706
1707 /* Calculate timespan. */
1708 uMillisCurr -= uMillisEnd;
1709
1710 while (pEndpointCurr)
1711 {
1712 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1713 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1714 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1715 }
1716
1717 /* Set new update interval */
1718 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1719 }
1720
1721 /* Check endpoints for new requests. */
1722 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1723 {
1724 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1725 CHECK_RC(pAioMgr, rc);
1726 }
1727 } /* while requests are active. */
1728
1729 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1730 {
1731 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1732 AssertRC(rc);
1733 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1734
1735 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1736 CHECK_RC(pAioMgr, rc);
1737 }
1738 } /* if still running */
1739 } /* while running */
1740
1741 LogFlowFunc(("rc=%Rrc\n", rc));
1742 return rc;
1743}
1744
1745#undef CHECK_RC
1746
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use