VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 32427

Last change on this file since 32427 was 32427, checked in by vboxsync, 15 years ago

AsyncCompletion: Make submitting requests to the host error prove. Fail the responsible request in case of an error (VERR_DISK_FULL) instead of aborting I/O processing

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 64.5 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 32427 2010-09-11 14:42:42Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Oracle Corporation
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 */
18#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
19#include <iprt/types.h>
20#include <iprt/asm.h>
21#include <iprt/file.h>
22#include <iprt/mem.h>
23#include <iprt/string.h>
24#include <iprt/assert.h>
25#include <VBox/log.h>
26
27#include "PDMAsyncCompletionFileInternal.h"
28
29/** The update period for the I/O load statistics in ms. */
30#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
31/** Maximum number of requests a manager will handle. */
32#define PDMACEPFILEMGR_REQS_STEP 512
33
34/*******************************************************************************
35* Internal functions *
36*******************************************************************************/
37static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
38 PPDMACEPFILEMGR pAioMgr,
39 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
40
41static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
42 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
43 PPDMACFILERANGELOCK pRangeLock);
44
45static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
46 int rc, size_t cbTransfered);
47
48int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
49{
50 int rc = VINF_SUCCESS;
51
52 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
53
54 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
55 if (rc == VERR_OUT_OF_RANGE)
56 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
57
58 if (RT_SUCCESS(rc))
59 {
60 /* Initialize request handle array. */
61 pAioMgr->iFreeEntry = 0;
62 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
63 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
64
65 if (pAioMgr->pahReqsFree)
66 {
67 /* Create the range lock memcache. */
68 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
69 0, UINT32_MAX, NULL, NULL, NULL, 0);
70 if (RT_SUCCESS(rc))
71 return VINF_SUCCESS;
72
73 RTMemFree(pAioMgr->pahReqsFree);
74 }
75 else
76 {
77 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
78 rc = VERR_NO_MEMORY;
79 }
80 }
81
82 return rc;
83}
84
85void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
86{
87 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
88
89 while (pAioMgr->iFreeEntry > 0)
90 {
91 pAioMgr->iFreeEntry--;
92 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
93 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
94 }
95
96 RTMemFree(pAioMgr->pahReqsFree);
97 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
98}
99
100/**
101 * Sorts the endpoint list with insertion sort.
102 */
103static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
104{
105 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
106
107 pEpPrev = pAioMgr->pEndpointsHead;
108 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
109
110 while (pEpCurr)
111 {
112 /* Remember the next element to sort because the list might change. */
113 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
114
115 /* Unlink the current element from the list. */
116 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
117 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
118
119 if (pPrev)
120 pPrev->AioMgr.pEndpointNext = pNext;
121 else
122 pAioMgr->pEndpointsHead = pNext;
123
124 if (pNext)
125 pNext->AioMgr.pEndpointPrev = pPrev;
126
127 /* Go back until we reached the place to insert the current endpoint into. */
128 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
129 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
130
131 /* Link the endpoint into the list. */
132 if (pEpPrev)
133 pNext = pEpPrev->AioMgr.pEndpointNext;
134 else
135 pNext = pAioMgr->pEndpointsHead;
136
137 pEpCurr->AioMgr.pEndpointNext = pNext;
138 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
139
140 if (pNext)
141 pNext->AioMgr.pEndpointPrev = pEpCurr;
142
143 if (pEpPrev)
144 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
145 else
146 pAioMgr->pEndpointsHead = pEpCurr;
147
148 pEpCurr = pEpNextToSort;
149 }
150
151#ifdef DEBUG
152 /* Validate sorting alogrithm */
153 unsigned cEndpoints = 0;
154 pEpCurr = pAioMgr->pEndpointsHead;
155
156 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
157 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
158
159 while (pEpCurr)
160 {
161 cEndpoints++;
162
163 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
164 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
165
166 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
167 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
168
169 pEpCurr = pNext;
170 }
171
172 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
173
174#endif
175}
176
177/**
178 * Removes an endpoint from the currently assigned manager.
179 *
180 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
181 * FALSE otherwise.
182 * @param pEndpointRemove The endpoint to remove.
183 */
184static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
185{
186 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
187 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
188 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
189
190 pAioMgr->cEndpoints--;
191
192 if (pPrev)
193 pPrev->AioMgr.pEndpointNext = pNext;
194 else
195 pAioMgr->pEndpointsHead = pNext;
196
197 if (pNext)
198 pNext->AioMgr.pEndpointPrev = pPrev;
199
200 /* Make sure that there is no request pending on this manager for the endpoint. */
201 if (!pEndpointRemove->AioMgr.cRequestsActive)
202 {
203 Assert(!pEndpointRemove->pFlushReq);
204
205 /* Reopen the file so that the new endpoint can reassociate with the file */
206 RTFileClose(pEndpointRemove->File);
207 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
208 AssertRC(rc);
209 return false;
210 }
211
212 return true;
213}
214
215static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
216{
217 /* Balancing doesn't make sense with only one endpoint. */
218 if (pAioMgr->cEndpoints == 1)
219 return false;
220
221 /* Doesn't make sens to move endpoints if only one produces the whole load */
222 unsigned cEndpointsWithLoad = 0;
223
224 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
225
226 while (pCurr)
227 {
228 if (pCurr->AioMgr.cReqsPerSec)
229 cEndpointsWithLoad++;
230
231 pCurr = pCurr->AioMgr.pEndpointNext;
232 }
233
234 return (cEndpointsWithLoad > 1);
235}
236
237/**
238 * Creates a new I/O manager and spreads the I/O load of the endpoints
239 * between the given I/O manager and the new one.
240 *
241 * @returns nothing.
242 * @param pAioMgr The I/O manager with high I/O load.
243 */
244static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
245{
246 PPDMACEPFILEMGR pAioMgrNew = NULL;
247 int rc = VINF_SUCCESS;
248
249 /*
250 * Check if balancing would improve the situation.
251 */
252 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
253 {
254 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
255
256 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
257 if (RT_SUCCESS(rc))
258 {
259 /* We will sort the list by request count per second. */
260 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
261
262 /* Now move some endpoints to the new manager. */
263 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
264 unsigned cReqsOther = 0;
265 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
266
267 while (pCurr)
268 {
269 if (cReqsHere <= cReqsOther)
270 {
271 /*
272 * The other manager has more requests to handle now.
273 * We will keep the current endpoint.
274 */
275 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
276 cReqsHere += pCurr->AioMgr.cReqsPerSec;
277 pCurr = pCurr->AioMgr.pEndpointNext;
278 }
279 else
280 {
281 /* Move to other endpoint. */
282 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
283 cReqsOther += pCurr->AioMgr.cReqsPerSec;
284
285 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
286
287 pCurr = pCurr->AioMgr.pEndpointNext;
288
289 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
290
291 if (fReqsPending)
292 {
293 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
294 pMove->AioMgr.fMoving = true;
295 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
296 }
297 else
298 {
299 pMove->AioMgr.fMoving = false;
300 pMove->AioMgr.pAioMgrDst = NULL;
301 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
302 }
303 }
304 }
305 }
306 else
307 {
308 /* Don't process further but leave a log entry about reduced performance. */
309 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
310 }
311 }
312 else
313 Log(("AIOMgr: Load balancing would not improve anything\n"));
314}
315
316/**
317 * Increase the maximum number of active requests for the given I/O manager.
318 *
319 * @returns VBox status code.
320 * @param pAioMgr The I/O manager to grow.
321 */
322static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
323{
324 int rc = VINF_SUCCESS;
325 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
326
327 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
328
329 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
330 && !pAioMgr->cRequestsActive,
331 ("Invalid state of the I/O manager\n"));
332
333#ifdef RT_OS_WINDOWS
334 /*
335 * Reopen the files of all assigned endpoints first so we can assign them to the new
336 * I/O context.
337 */
338 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
339
340 while (pCurr)
341 {
342 RTFileClose(pCurr->File);
343 rc = RTFileOpen(&pCurr->File, pCurr->Core.pszUri, pCurr->fFlags);
344 AssertRC(rc);
345
346 pCurr = pCurr->AioMgr.pEndpointNext;
347 }
348#endif
349
350 /* Create the new bigger context. */
351 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
352
353 rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
354 if (rc == VERR_OUT_OF_RANGE)
355 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
356
357 if (RT_SUCCESS(rc))
358 {
359 /* Close the old context. */
360 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
361 AssertRC(rc);
362
363 pAioMgr->hAioCtx = hAioCtxNew;
364
365 /* Create a new I/O task handle array */
366 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
367 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
368
369 if (pahReqNew)
370 {
371 /* Copy the cached request handles. */
372 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
373 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
374
375 RTMemFree(pAioMgr->pahReqsFree);
376 pAioMgr->pahReqsFree = pahReqNew;
377 pAioMgr->cReqEntries = cReqEntriesNew;
378 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
379 pAioMgr->cRequestsActiveMax));
380 }
381 else
382 rc = VERR_NO_MEMORY;
383 }
384
385#ifdef RT_OS_WINDOWS
386 /* Assign the file to the new context. */
387 pCurr = pAioMgr->pEndpointsHead;
388
389 while (pCurr)
390 {
391 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->File);
392 AssertRC(rc);
393
394 pCurr = pCurr->AioMgr.pEndpointNext;
395 }
396#endif
397
398 if (RT_FAILURE(rc))
399 {
400 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
401 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
402 }
403
404 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
405 LogFlowFunc(("returns rc=%Rrc\n", rc));
406
407 return rc;
408}
409
410/**
411 * Checks if a given status code is fatal.
412 * Non fatal errors can be fixed by migrating the endpoint to a
413 * failsafe manager.
414 *
415 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
416 * false If the error can be fixed by a migration. (image on NFS disk for example)
417 * @param rcReq The status code to check.
418 */
419DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
420{
421 return rcReq == VERR_DEV_IO_ERROR
422 || rcReq == VERR_FILE_IO_ERROR
423 || rcReq == VERR_DISK_IO_ERROR
424 || rcReq == VERR_DISK_FULL
425 || rcReq == VERR_FILE_TOO_BIG;
426}
427
428/**
429 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
430 *
431 * @returns VBox status code
432 * @param pAioMgr The I/O manager the error ocurred on.
433 * @param rc The error code.
434 */
435static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
436{
437 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
438 pAioMgr, rc));
439 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
440 LogRel(("AIOMgr: Please contact the product vendor\n"));
441
442 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
443
444 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
445 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
446
447 AssertMsgFailed(("Implement\n"));
448 return VINF_SUCCESS;
449}
450
451/**
452 * Put a list of tasks in the pending request list of an endpoint.
453 */
454DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
455{
456 /* Add the rest of the tasks to the pending list */
457 if (!pEndpoint->AioMgr.pReqsPendingHead)
458 {
459 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
460 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
461 }
462 else
463 {
464 Assert(pEndpoint->AioMgr.pReqsPendingTail);
465 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
466 }
467
468 /* Update the tail. */
469 while (pTaskHead->pNext)
470 pTaskHead = pTaskHead->pNext;
471
472 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
473 pTaskHead->pNext = NULL;
474}
475
476/**
477 * Put one task in the pending request list of an endpoint.
478 */
479DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
480{
481 /* Add the rest of the tasks to the pending list */
482 if (!pEndpoint->AioMgr.pReqsPendingHead)
483 {
484 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
485 pEndpoint->AioMgr.pReqsPendingHead = pTask;
486 }
487 else
488 {
489 Assert(pEndpoint->AioMgr.pReqsPendingTail);
490 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
491 }
492
493 pEndpoint->AioMgr.pReqsPendingTail = pTask;
494 pTask->pNext = NULL;
495}
496
497/**
498 * Allocates a async I/O request.
499 *
500 * @returns Handle to the request.
501 * @param pAioMgr The I/O manager.
502 */
503static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
504{
505 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
506
507 /* Get a request handle. */
508 if (pAioMgr->iFreeEntry > 0)
509 {
510 pAioMgr->iFreeEntry--;
511 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
512 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
513 Assert(hReq != NIL_RTFILEAIOREQ);
514 }
515 else
516 {
517 int rc = RTFileAioReqCreate(&hReq);
518 AssertRC(rc);
519 }
520
521 return hReq;
522}
523
524/**
525 * Frees a async I/O request handle.
526 *
527 * @returns nothing.
528 * @param pAioMgr The I/O manager.
529 * @param hReq The I/O request handle to free.
530 */
531static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
532{
533 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
534 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
535
536 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
537 pAioMgr->iFreeEntry++;
538}
539
540/**
541 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
542 */
543static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
544 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
545 PRTFILEAIOREQ pahReqs, unsigned cReqs)
546{
547 int rc;
548
549 pAioMgr->cRequestsActive += cReqs;
550 pEndpoint->AioMgr.cRequestsActive += cReqs;
551
552 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
553 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
554
555 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
556 if (RT_FAILURE(rc))
557 {
558 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
559 {
560 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
561
562 /* Append any not submitted task to the waiting list. */
563 for (size_t i = 0; i < cReqs; i++)
564 {
565 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
566
567 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
568 {
569 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
570
571 Assert(pTask->hReq == pahReqs[i]);
572 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
573 pAioMgr->cRequestsActive--;
574 pEndpoint->AioMgr.cRequestsActive--;
575
576 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
577 {
578 /* Clear the pending flush */
579 Assert(pEndpoint->pFlushReq == pTask);
580 pEndpoint->pFlushReq = NULL;
581 }
582 }
583 }
584
585 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
586
587 /* Print an entry in the release log */
588 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
589 {
590 pEpClass->fOutOfResourcesWarningPrinted = true;
591 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
592 pAioMgr->cRequestsActive));
593 }
594
595 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
596 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
597 rc = VINF_SUCCESS;
598 }
599 else /* Another kind of error happened (full disk, ...) */
600 {
601 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
602 for (size_t i = 0; i < cReqs; i++)
603 {
604 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
605
606 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
607 {
608 /* We call ourself again to do any error handling which might come up now. */
609 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
610 AssertRC(rc);
611 }
612 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
613 {
614 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
615
616 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
617 }
618 }
619
620
621 if ( pEndpoint->pFlushReq
622 && !pAioMgr->cRequestsActive
623 && !pEndpoint->fAsyncFlushSupported)
624 {
625 /*
626 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
627 * the async flush API.
628 * Happens only if this we just noticed that this is not supported
629 * and the only active request was a flush.
630 */
631 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
632 pEndpoint->pFlushReq = NULL;
633 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
634 pdmacFileTaskFree(pEndpoint, pFlush);
635 }
636 }
637 }
638
639 return VINF_SUCCESS;
640}
641
642static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
643 RTFOFF offStart, size_t cbRange,
644 PPDMACTASKFILE pTask)
645{
646 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
647
648 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
649 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
650 ("Invalid task type %d\n", pTask->enmTransferType));
651
652 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
653 if (!pRangeLock)
654 {
655 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
656 /* Check if we intersect with the range. */
657 if ( !pRangeLock
658 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
659 && (pRangeLock->Core.KeyLast) >= offStart))
660 {
661 pRangeLock = NULL; /* False alarm */
662 }
663 }
664
665 /* Check whether we have one of the situations explained below */
666 if ( pRangeLock
667#if 0 /** @todo: later. For now we will just block all requests if they interfere */
668 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
669 || (!pRangeLock->fReadLock)
670#endif
671 )
672 {
673 /* Add to the list. */
674 pTask->pNext = NULL;
675
676 if (!pRangeLock->pWaitingTasksHead)
677 {
678 Assert(!pRangeLock->pWaitingTasksTail);
679 pRangeLock->pWaitingTasksHead = pTask;
680 pRangeLock->pWaitingTasksTail = pTask;
681 }
682 else
683 {
684 AssertPtr(pRangeLock->pWaitingTasksTail);
685 pRangeLock->pWaitingTasksTail->pNext = pTask;
686 pRangeLock->pWaitingTasksTail = pTask;
687 }
688 return true;
689 }
690
691 return false;
692}
693
694static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
695 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
696 RTFOFF offStart, size_t cbRange,
697 PPDMACTASKFILE pTask)
698{
699 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
700 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
701 offStart, cbRange));
702
703 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
704 if (!pRangeLock)
705 return VERR_NO_MEMORY;
706
707 /* Init the lock. */
708 pRangeLock->Core.Key = offStart;
709 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
710 pRangeLock->cRefs = 1;
711 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
712 pRangeLock->pWaitingTasksHead = NULL;
713 pRangeLock->pWaitingTasksTail = NULL;
714
715 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
716 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
717
718 /* Let the task point to its lock. */
719 pTask->pRangeLock = pRangeLock;
720
721 return VINF_SUCCESS;
722}
723
724static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
725 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
726 PPDMACFILERANGELOCK pRangeLock)
727{
728 PPDMACTASKFILE pTasksWaitingHead;
729
730 AssertPtr(pRangeLock);
731 Assert(pRangeLock->cRefs == 1);
732
733 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
734 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
735 pRangeLock->pWaitingTasksHead = NULL;
736 pRangeLock->pWaitingTasksTail = NULL;
737 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
738
739 return pTasksWaitingHead;
740}
741
742static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
743 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
744 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
745{
746 int rc = VINF_SUCCESS;
747 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
748 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
749 void *pvBuf = pTask->DataSeg.pvSeg;
750
751 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
752 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
753 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
754 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
755
756 pTask->fPrefetch = false;
757 pTask->cbBounceBuffer = 0;
758
759 /*
760 * Before we start to setup the request we have to check whether there is a task
761 * already active which range intersects with ours. We have to defer execution
762 * of this task in two cases:
763 * - The pending task is a write and the current is either read or write
764 * - The pending task is a read and the current task is a write task.
765 *
766 * To check whether a range is currently "locked" we use the AVL tree where every pending task
767 * is stored by its file offset range. The current task will be added to the active task
768 * and will be executed when the active one completes. (The method below
769 * which checks whether a range is already used will add the task)
770 *
771 * This is neccessary because of the requirement to align all requests to a 512 boundary
772 * which is enforced by the host OS (Linux and Windows atm). It is possible that
773 * we have to process unaligned tasks and need to align them using bounce buffers.
774 * While the data is fetched from the file another request might arrive writing to
775 * the same range. This will result in data corruption if both are executed concurrently.
776 */
777 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
778
779 if (!fLocked)
780 {
781 /* Get a request handle. */
782 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
783 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
784
785 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
786 {
787 /* Grow the file if needed. */
788 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
789 {
790 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
791 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
792 }
793
794 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
795 pTask->Off, pTask->DataSeg.pvSeg,
796 pTask->DataSeg.cbSeg, pTask);
797 }
798 else
799 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
800 pTask->Off, pTask->DataSeg.pvSeg,
801 pTask->DataSeg.cbSeg, pTask);
802 AssertRC(rc);
803
804 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
805 pTask->DataSeg.cbSeg,
806 pTask);
807
808 if (RT_SUCCESS(rc))
809 *phReq = hReq;
810 }
811 else
812 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
813
814 return rc;
815}
816
817static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
818 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
819 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
820{
821 int rc = VINF_SUCCESS;
822 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
823 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
824 void *pvBuf = pTask->DataSeg.pvSeg;
825
826 /*
827 * Check if the alignment requirements are met.
828 * Offset, transfer size and buffer address
829 * need to be on a 512 boundary.
830 */
831 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
832 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
833 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
834
835 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
836 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
837 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
838 offStart, cbToTransfer, pEndpoint->cbFile));
839
840 pTask->fPrefetch = false;
841
842 /*
843 * Before we start to setup the request we have to check whether there is a task
844 * already active which range intersects with ours. We have to defer execution
845 * of this task in two cases:
846 * - The pending task is a write and the current is either read or write
847 * - The pending task is a read and the current task is a write task.
848 *
849 * To check whether a range is currently "locked" we use the AVL tree where every pending task
850 * is stored by its file offset range. The current task will be added to the active task
851 * and will be executed when the active one completes. (The method below
852 * which checks whether a range is already used will add the task)
853 *
854 * This is neccessary because of the requirement to align all requests to a 512 boundary
855 * which is enforced by the host OS (Linux and Windows atm). It is possible that
856 * we have to process unaligned tasks and need to align them using bounce buffers.
857 * While the data is fetched from the file another request might arrive writing to
858 * the same range. This will result in data corruption if both are executed concurrently.
859 */
860 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
861
862 if (!fLocked)
863 {
864 /* Get a request handle. */
865 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
866 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
867
868 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
869 || RT_UNLIKELY(offStart != pTask->Off)
870 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
871 {
872 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
873 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
874
875 /* Create bounce buffer. */
876 pTask->cbBounceBuffer = cbToTransfer;
877
878 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
879 pTask->Off, offStart));
880 pTask->offBounceBuffer = pTask->Off - offStart;
881
882 /** @todo: I think we need something like a RTMemAllocAligned method here.
883 * Current assumption is that the maximum alignment is 4096byte
884 * (GPT disk on Windows)
885 * so we can use RTMemPageAlloc here.
886 */
887 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
888 if (RT_LIKELY(pTask->pvBounceBuffer))
889 {
890 pvBuf = pTask->pvBounceBuffer;
891
892 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
893 {
894 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
895 || RT_UNLIKELY(offStart != pTask->Off))
896 {
897 /* We have to fill the buffer first before we can update the data. */
898 LogFlow(("Prefetching data for task %#p\n", pTask));
899 pTask->fPrefetch = true;
900 enmTransferType = PDMACTASKFILETRANSFER_READ;
901 }
902 else
903 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
904 }
905 }
906 else
907 rc = VERR_NO_MEMORY;
908 }
909 else
910 pTask->cbBounceBuffer = 0;
911
912 if (RT_SUCCESS(rc))
913 {
914 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
915 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
916
917 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
918 {
919 /* Grow the file if needed. */
920 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
921 {
922 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
923 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
924 }
925
926 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
927 offStart, pvBuf, cbToTransfer, pTask);
928 }
929 else
930 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
931 offStart, pvBuf, cbToTransfer, pTask);
932 AssertRC(rc);
933
934 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
935
936 if (RT_SUCCESS(rc))
937 *phReq = hReq;
938 else
939 {
940 /* Cleanup */
941 if (pTask->cbBounceBuffer)
942 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
943 }
944 }
945 }
946 else
947 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
948
949 return rc;
950}
951
952static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
953 PPDMACEPFILEMGR pAioMgr,
954 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
955{
956 RTFILEAIOREQ apReqs[20];
957 unsigned cRequests = 0;
958 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
959 int rc = VINF_SUCCESS;
960
961 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
962 ("Trying to process request lists of a non active endpoint!\n"));
963
964 /* Go through the list and queue the requests until we get a flush request */
965 while ( pTaskHead
966 && !pEndpoint->pFlushReq
967 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
968 && RT_SUCCESS(rc))
969 {
970 PPDMACTASKFILE pCurr = pTaskHead;
971
972 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, (uint32_t)pCurr->DataSeg.cbSeg))
973 {
974 pAioMgr->fBwLimitReached = true;
975 break;
976 }
977
978 pTaskHead = pTaskHead->pNext;
979
980 pCurr->pNext = NULL;
981
982 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
983 ("Endpoints do not match\n"));
984
985 switch (pCurr->enmTransferType)
986 {
987 case PDMACTASKFILETRANSFER_FLUSH:
988 {
989 /* If there is no data transfer request this flush request finished immediately. */
990 if (pEndpoint->fAsyncFlushSupported)
991 {
992 /* Issue a flush to the host. */
993 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
994 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
995
996 LogFlow(("Flush request %#p\n", hReq));
997
998 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->File, pCurr);
999 if (RT_FAILURE(rc))
1000 {
1001 pEndpoint->fAsyncFlushSupported = false;
1002 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1003 rc = VINF_SUCCESS; /* Fake success */
1004 }
1005 else
1006 {
1007 apReqs[cRequests] = hReq;
1008 pEndpoint->AioMgr.cReqsProcessed++;
1009 cRequests++;
1010 }
1011 }
1012
1013 if ( !pEndpoint->AioMgr.cRequestsActive
1014 && !pEndpoint->fAsyncFlushSupported)
1015 {
1016 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1017 pdmacFileTaskFree(pEndpoint, pCurr);
1018 }
1019 else
1020 {
1021 Assert(!pEndpoint->pFlushReq);
1022 pEndpoint->pFlushReq = pCurr;
1023 }
1024 break;
1025 }
1026 case PDMACTASKFILETRANSFER_READ:
1027 case PDMACTASKFILETRANSFER_WRITE:
1028 {
1029 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1030
1031 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1032 {
1033 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1034 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1035 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1036 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1037 else
1038 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1039
1040 AssertRC(rc);
1041 }
1042 else
1043 {
1044 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1045 hReq = pCurr->hReq;
1046 }
1047
1048 LogFlow(("Read/Write request %#p\n", hReq));
1049
1050 if (hReq != NIL_RTFILEAIOREQ)
1051 {
1052 apReqs[cRequests] = hReq;
1053 cRequests++;
1054 }
1055 break;
1056 }
1057 default:
1058 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1059 } /* switch transfer type */
1060
1061 /* Queue the requests if the array is full. */
1062 if (cRequests == RT_ELEMENTS(apReqs))
1063 {
1064 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1065 cRequests = 0;
1066 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1067 ("Unexpected return code\n"));
1068 }
1069 }
1070
1071 if (cRequests)
1072 {
1073 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1074 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1075 ("Unexpected return code rc=%Rrc\n", rc));
1076 }
1077
1078 if (pTaskHead)
1079 {
1080 /* Add the rest of the tasks to the pending list */
1081 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1082
1083 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1084 && !pEndpoint->pFlushReq
1085 && !pAioMgr->fBwLimitReached))
1086 {
1087#if 0
1088 /*
1089 * The I/O manager has no room left for more requests
1090 * but there are still requests to process.
1091 * Create a new I/O manager and let it handle some endpoints.
1092 */
1093 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1094#else
1095 /* Grow the I/O manager */
1096 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1097#endif
1098 }
1099 }
1100
1101 /* Insufficient resources are not fatal. */
1102 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1103 rc = VINF_SUCCESS;
1104
1105 return rc;
1106}
1107
1108/**
1109 * Adds all pending requests for the given endpoint
1110 * until a flush request is encountered or there is no
1111 * request anymore.
1112 *
1113 * @returns VBox status code.
1114 * @param pAioMgr The async I/O manager for the endpoint
1115 * @param pEndpoint The endpoint to get the requests from.
1116 */
1117static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1118 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1119{
1120 int rc = VINF_SUCCESS;
1121 PPDMACTASKFILE pTasksHead = NULL;
1122
1123 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1124 ("Trying to process request lists of a non active endpoint!\n"));
1125
1126 Assert(!pEndpoint->pFlushReq);
1127
1128 /* Check the pending list first */
1129 if (pEndpoint->AioMgr.pReqsPendingHead)
1130 {
1131 LogFlow(("Queuing pending requests first\n"));
1132
1133 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1134 /*
1135 * Clear the list as the processing routine will insert them into the list
1136 * again if it gets a flush request.
1137 */
1138 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1139 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1140 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1141 AssertRC(rc);
1142 }
1143
1144 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1145 {
1146 /* Now the request queue. */
1147 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1148 if (pTasksHead)
1149 {
1150 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1151 AssertRC(rc);
1152 }
1153 }
1154
1155 return rc;
1156}
1157
1158static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1159{
1160 int rc = VINF_SUCCESS;
1161 bool fNotifyWaiter = false;
1162
1163 LogFlowFunc((": Enter\n"));
1164
1165 Assert(pAioMgr->fBlockingEventPending);
1166
1167 switch (pAioMgr->enmBlockingEvent)
1168 {
1169 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1170 {
1171 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1172 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1173
1174 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1175
1176 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1177 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1178 if (pAioMgr->pEndpointsHead)
1179 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1180 pAioMgr->pEndpointsHead = pEndpointNew;
1181
1182 /* Assign the completion point to this file. */
1183 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
1184 fNotifyWaiter = true;
1185 pAioMgr->cEndpoints++;
1186 break;
1187 }
1188 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1189 {
1190 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1191 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1192
1193 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1194 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1195 break;
1196 }
1197 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1198 {
1199 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1200 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1201
1202 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1203 {
1204 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1205
1206 /* Make sure all tasks finished. Process the queues a last time first. */
1207 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1208 AssertRC(rc);
1209
1210 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1211 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1212 }
1213 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1214 && (!pEndpointClose->AioMgr.cRequestsActive))
1215 fNotifyWaiter = true;
1216 break;
1217 }
1218 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1219 {
1220 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1221 if (!pAioMgr->cRequestsActive)
1222 fNotifyWaiter = true;
1223 break;
1224 }
1225 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1226 {
1227 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1228 break;
1229 }
1230 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1231 {
1232 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1233 fNotifyWaiter = true;
1234 break;
1235 }
1236 default:
1237 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1238 }
1239
1240 if (fNotifyWaiter)
1241 {
1242 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1243 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1244
1245 /* Release the waiting thread. */
1246 LogFlow(("Signalling waiter\n"));
1247 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1248 AssertRC(rc);
1249 }
1250
1251 LogFlowFunc((": Leave\n"));
1252 return rc;
1253}
1254
1255/**
1256 * Checks all endpoints for pending events or new requests.
1257 *
1258 * @returns VBox status code.
1259 * @param pAioMgr The I/O manager handle.
1260 */
1261static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1262{
1263 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1264 int rc = VINF_SUCCESS;
1265 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1266
1267 pAioMgr->fBwLimitReached = false;
1268
1269 while (pEndpoint)
1270 {
1271 if (!pEndpoint->pFlushReq
1272 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1273 && !pEndpoint->AioMgr.fMoving)
1274 {
1275 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1276 if (RT_FAILURE(rc))
1277 return rc;
1278 }
1279 else if ( !pEndpoint->AioMgr.cRequestsActive
1280 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1281 {
1282 /* Reopen the file so that the new endpoint can reassociate with the file */
1283 RTFileClose(pEndpoint->File);
1284 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1285 AssertRC(rc);
1286
1287 if (pEndpoint->AioMgr.fMoving)
1288 {
1289 pEndpoint->AioMgr.fMoving = false;
1290 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1291 }
1292 else
1293 {
1294 Assert(pAioMgr->fBlockingEventPending);
1295 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1296
1297 /* Release the waiting thread. */
1298 LogFlow(("Signalling waiter\n"));
1299 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1300 AssertRC(rc);
1301 }
1302 }
1303
1304 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1305 }
1306
1307 return rc;
1308}
1309
1310/**
1311 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1312 */
1313static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1314{
1315 size_t cbTransfered = 0;
1316 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1317
1318 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1319}
1320
1321static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1322 int rcReq, size_t cbTransfered)
1323{
1324 int rc = VINF_SUCCESS;
1325 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1326 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1327 PPDMACTASKFILE pTasksWaiting;
1328
1329 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1330
1331 pEndpoint = pTask->pEndpoint;
1332
1333 pTask->hReq = NIL_RTFILEAIOREQ;
1334
1335 pAioMgr->cRequestsActive--;
1336 pEndpoint->AioMgr.cRequestsActive--;
1337 pEndpoint->AioMgr.cReqsProcessed++;
1338
1339 /*
1340 * It is possible that the request failed on Linux with kernels < 2.6.23
1341 * if the passed buffer was allocated with remap_pfn_range or if the file
1342 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1343 * The endpoint will be migrated to a failsafe manager in case a request fails.
1344 */
1345 if (RT_FAILURE(rcReq))
1346 {
1347 /* Free bounce buffers and the IPRT request. */
1348 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1349
1350 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1351 {
1352 LogFlow(("Async flushes are not supported for this endpoint, disabling\n"));
1353 pEndpoint->fAsyncFlushSupported = false;
1354 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1355 /* The other method will take over now. */
1356 }
1357 else
1358 {
1359 /* Free the lock and process pending tasks if neccessary */
1360 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1361 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1362 AssertRC(rc);
1363
1364 if (pTask->cbBounceBuffer)
1365 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1366
1367 /*
1368 * Fatal errors are reported to the guest and non-fatal errors
1369 * will cause a migration to the failsafe manager in the hope
1370 * that the error disappears.
1371 */
1372 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1373 {
1374 /* Queue the request on the pending list. */
1375 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1376 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1377
1378 /* Create a new failsafe manager if neccessary. */
1379 if (!pEndpoint->AioMgr.fMoving)
1380 {
1381 PPDMACEPFILEMGR pAioMgrFailsafe;
1382
1383 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1384 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1385
1386 pEndpoint->AioMgr.fMoving = true;
1387
1388 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1389 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1390 AssertRC(rc);
1391
1392 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1393
1394 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1395 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1396 }
1397
1398 /* If this was the last request for the endpoint migrate it to the new manager. */
1399 if (!pEndpoint->AioMgr.cRequestsActive)
1400 {
1401 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1402 Assert(!fReqsPending);
1403
1404 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1405 AssertRC(rc);
1406 }
1407 }
1408 else
1409 {
1410 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1411 pdmacFileTaskFree(pEndpoint, pTask);
1412 }
1413 }
1414 }
1415 else
1416 {
1417 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1418 {
1419 /* Clear pending flush */
1420 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1421 pEndpoint->pFlushReq = NULL;
1422 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1423
1424 /* Call completion callback */
1425 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1426 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1427 pdmacFileTaskFree(pEndpoint, pTask);
1428 }
1429 else
1430 {
1431 /*
1432 * Restart an incomplete transfer.
1433 * This usually means that the request will return an error now
1434 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1435 * the transfer needs to be continued.
1436 */
1437 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1438 || ( pTask->cbBounceBuffer
1439 && cbTransfered < pTask->cbBounceBuffer)))
1440 {
1441 RTFOFF offStart;
1442 size_t cbToTransfer;
1443 uint8_t *pbBuf = NULL;
1444
1445 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transfered)\n",
1446 pTask, cbTransfered));
1447 Assert(cbTransfered % 512 == 0);
1448
1449 if (pTask->cbBounceBuffer)
1450 {
1451 AssertPtr(pTask->pvBounceBuffer);
1452 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1453 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1454 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1455 }
1456 else
1457 {
1458 Assert(!pTask->pvBounceBuffer);
1459 offStart = pTask->Off + cbTransfered;
1460 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1461 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1462 }
1463
1464 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1465 {
1466 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File, offStart,
1467 pbBuf, cbToTransfer, pTask);
1468 }
1469 else
1470 {
1471 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1472 ("Invalid transfer type\n"));
1473 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File, offStart,
1474 pbBuf, cbToTransfer, pTask);
1475 }
1476
1477 AssertRC(rc);
1478 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1479 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1480 ("Unexpected return code rc=%Rrc\n", rc));
1481 }
1482 else if (pTask->fPrefetch)
1483 {
1484 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1485 Assert(pTask->cbBounceBuffer);
1486
1487 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1488 pTask->DataSeg.pvSeg,
1489 pTask->DataSeg.cbSeg);
1490
1491 /* Write it now. */
1492 pTask->fPrefetch = false;
1493 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1494 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1495
1496 /* Grow the file if needed. */
1497 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1498 {
1499 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1500 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1501 }
1502
1503 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1504 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1505 AssertRC(rc);
1506 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1507 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1508 ("Unexpected return code rc=%Rrc\n", rc));
1509 }
1510 else
1511 {
1512 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1513 {
1514 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1515 memcpy(pTask->DataSeg.pvSeg,
1516 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1517 pTask->DataSeg.cbSeg);
1518
1519 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1520 }
1521
1522 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1523
1524 /* Free the lock and process pending tasks if neccessary */
1525 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1526 if (pTasksWaiting)
1527 {
1528 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1529 AssertRC(rc);
1530 }
1531
1532 /* Call completion callback */
1533 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1534 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1535 pdmacFileTaskFree(pEndpoint, pTask);
1536
1537 /*
1538 * If there is no request left on the endpoint but a flush request is set
1539 * it completed now and we notify the owner.
1540 * Furthermore we look for new requests and continue.
1541 */
1542 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1543 {
1544 /* Call completion callback */
1545 pTask = pEndpoint->pFlushReq;
1546 pEndpoint->pFlushReq = NULL;
1547
1548 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1549
1550 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1551 pdmacFileTaskFree(pEndpoint, pTask);
1552 }
1553 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1554 {
1555 /* If the endpoint is about to be migrated do it now. */
1556 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1557 Assert(!fReqsPending);
1558
1559 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1560 AssertRC(rc);
1561 }
1562 }
1563 } /* Not a flush request */
1564 } /* request completed successfully */
1565}
1566
1567/** Helper macro for checking for error codes. */
1568#define CHECK_RC(pAioMgr, rc) \
1569 if (RT_FAILURE(rc)) \
1570 {\
1571 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1572 return rc2;\
1573 }
1574
1575/**
1576 * The normal I/O manager using the RTFileAio* API
1577 *
1578 * @returns VBox status code.
1579 * @param ThreadSelf Handle of the thread.
1580 * @param pvUser Opaque user data.
1581 */
1582int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1583{
1584 int rc = VINF_SUCCESS;
1585 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1586 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1587
1588 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1589 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING)
1590 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1591 {
1592 if (!pAioMgr->cRequestsActive)
1593 {
1594 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1595 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1596 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1597 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1598 AssertRC(rc);
1599
1600 LogFlow(("Got woken up\n"));
1601 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1602 }
1603
1604 /* Check for an external blocking event first. */
1605 if (pAioMgr->fBlockingEventPending)
1606 {
1607 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1608 CHECK_RC(pAioMgr, rc);
1609 }
1610
1611 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1612 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1613 {
1614 /* We got woken up because an endpoint issued new requests. Queue them. */
1615 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1616 CHECK_RC(pAioMgr, rc);
1617
1618 while ( pAioMgr->cRequestsActive
1619 || pAioMgr->fBwLimitReached)
1620 {
1621 if (pAioMgr->cRequestsActive)
1622 {
1623 RTFILEAIOREQ apReqs[20];
1624 uint32_t cReqsCompleted = 0;
1625 size_t cReqsWait;
1626
1627 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1628 cReqsWait = RT_ELEMENTS(apReqs);
1629 else
1630 cReqsWait = pAioMgr->cRequestsActive;
1631
1632 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1633
1634 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1635 1,
1636 RT_INDEFINITE_WAIT, apReqs,
1637 cReqsWait, &cReqsCompleted);
1638 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1639 CHECK_RC(pAioMgr, rc);
1640
1641 LogFlow(("%d tasks completed\n", cReqsCompleted));
1642
1643 for (uint32_t i = 0; i < cReqsCompleted; i++)
1644 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1645
1646 /* Check for an external blocking event before we go to sleep again. */
1647 if (pAioMgr->fBlockingEventPending)
1648 {
1649 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1650 CHECK_RC(pAioMgr, rc);
1651 }
1652
1653 /* Update load statistics. */
1654 uint64_t uMillisCurr = RTTimeMilliTS();
1655 if (uMillisCurr > uMillisEnd)
1656 {
1657 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1658
1659 /* Calculate timespan. */
1660 uMillisCurr -= uMillisEnd;
1661
1662 while (pEndpointCurr)
1663 {
1664 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1665 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1666 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1667 }
1668
1669 /* Set new update interval */
1670 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1671 }
1672 }
1673 else
1674 {
1675 /*
1676 * Bandwidth limit reached for all endpoints.
1677 * Yield and wait until we have enough resources again.
1678 */
1679 RTThreadYield();
1680 }
1681
1682 /* Check endpoints for new requests. */
1683 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1684 {
1685 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1686 CHECK_RC(pAioMgr, rc);
1687 }
1688 } /* while requests are active. */
1689
1690 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1691 {
1692 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1693 AssertRC(rc);
1694 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1695
1696 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1697 CHECK_RC(pAioMgr, rc);
1698 }
1699 } /* if still running */
1700 } /* while running */
1701
1702 LogFlowFunc(("rc=%Rrc\n", rc));
1703 return rc;
1704}
1705
1706#undef CHECK_RC
1707
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette