VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 33000

Last change on this file since 33000 was 32467, checked in by vboxsync, 14 years ago

AsyncCompletion: Another one

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 64.7 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 32467 2010-09-14 09:08:17Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Oracle Corporation
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 */
18#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
19#include <iprt/types.h>
20#include <iprt/asm.h>
21#include <iprt/file.h>
22#include <iprt/mem.h>
23#include <iprt/string.h>
24#include <iprt/assert.h>
25#include <VBox/log.h>
26
27#include "PDMAsyncCompletionFileInternal.h"
28
29/** The update period for the I/O load statistics in ms. */
30#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
31/** Maximum number of requests a manager will handle. */
32#define PDMACEPFILEMGR_REQS_STEP 512
33
34/*******************************************************************************
35* Internal functions *
36*******************************************************************************/
37static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
38 PPDMACEPFILEMGR pAioMgr,
39 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
40
41static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
42 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
43 PPDMACFILERANGELOCK pRangeLock);
44
45static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
46 int rc, size_t cbTransfered);
47
48int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
49{
50 int rc = VINF_SUCCESS;
51
52 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
53
54 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
55 if (rc == VERR_OUT_OF_RANGE)
56 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
57
58 if (RT_SUCCESS(rc))
59 {
60 /* Initialize request handle array. */
61 pAioMgr->iFreeEntry = 0;
62 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
63 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
64
65 if (pAioMgr->pahReqsFree)
66 {
67 /* Create the range lock memcache. */
68 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
69 0, UINT32_MAX, NULL, NULL, NULL, 0);
70 if (RT_SUCCESS(rc))
71 return VINF_SUCCESS;
72
73 RTMemFree(pAioMgr->pahReqsFree);
74 }
75 else
76 {
77 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
78 rc = VERR_NO_MEMORY;
79 }
80 }
81
82 return rc;
83}
84
85void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
86{
87 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
88
89 while (pAioMgr->iFreeEntry > 0)
90 {
91 pAioMgr->iFreeEntry--;
92 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
93 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
94 }
95
96 RTMemFree(pAioMgr->pahReqsFree);
97 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
98}
99
100/**
101 * Sorts the endpoint list with insertion sort.
102 */
103static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
104{
105 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
106
107 pEpPrev = pAioMgr->pEndpointsHead;
108 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
109
110 while (pEpCurr)
111 {
112 /* Remember the next element to sort because the list might change. */
113 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
114
115 /* Unlink the current element from the list. */
116 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
117 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
118
119 if (pPrev)
120 pPrev->AioMgr.pEndpointNext = pNext;
121 else
122 pAioMgr->pEndpointsHead = pNext;
123
124 if (pNext)
125 pNext->AioMgr.pEndpointPrev = pPrev;
126
127 /* Go back until we reached the place to insert the current endpoint into. */
128 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
129 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
130
131 /* Link the endpoint into the list. */
132 if (pEpPrev)
133 pNext = pEpPrev->AioMgr.pEndpointNext;
134 else
135 pNext = pAioMgr->pEndpointsHead;
136
137 pEpCurr->AioMgr.pEndpointNext = pNext;
138 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
139
140 if (pNext)
141 pNext->AioMgr.pEndpointPrev = pEpCurr;
142
143 if (pEpPrev)
144 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
145 else
146 pAioMgr->pEndpointsHead = pEpCurr;
147
148 pEpCurr = pEpNextToSort;
149 }
150
151#ifdef DEBUG
152 /* Validate sorting alogrithm */
153 unsigned cEndpoints = 0;
154 pEpCurr = pAioMgr->pEndpointsHead;
155
156 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
157 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
158
159 while (pEpCurr)
160 {
161 cEndpoints++;
162
163 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
164 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
165
166 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
167 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
168
169 pEpCurr = pNext;
170 }
171
172 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
173
174#endif
175}
176
177/**
178 * Removes an endpoint from the currently assigned manager.
179 *
180 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
181 * FALSE otherwise.
182 * @param pEndpointRemove The endpoint to remove.
183 */
184static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
185{
186 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
187 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
188 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
189
190 pAioMgr->cEndpoints--;
191
192 if (pPrev)
193 pPrev->AioMgr.pEndpointNext = pNext;
194 else
195 pAioMgr->pEndpointsHead = pNext;
196
197 if (pNext)
198 pNext->AioMgr.pEndpointPrev = pPrev;
199
200 /* Make sure that there is no request pending on this manager for the endpoint. */
201 if (!pEndpointRemove->AioMgr.cRequestsActive)
202 {
203 Assert(!pEndpointRemove->pFlushReq);
204
205 /* Reopen the file so that the new endpoint can reassociate with the file */
206 RTFileClose(pEndpointRemove->File);
207 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
208 AssertRC(rc);
209 return false;
210 }
211
212 return true;
213}
214
215static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
216{
217 /* Balancing doesn't make sense with only one endpoint. */
218 if (pAioMgr->cEndpoints == 1)
219 return false;
220
221 /* Doesn't make sens to move endpoints if only one produces the whole load */
222 unsigned cEndpointsWithLoad = 0;
223
224 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
225
226 while (pCurr)
227 {
228 if (pCurr->AioMgr.cReqsPerSec)
229 cEndpointsWithLoad++;
230
231 pCurr = pCurr->AioMgr.pEndpointNext;
232 }
233
234 return (cEndpointsWithLoad > 1);
235}
236
237/**
238 * Creates a new I/O manager and spreads the I/O load of the endpoints
239 * between the given I/O manager and the new one.
240 *
241 * @returns nothing.
242 * @param pAioMgr The I/O manager with high I/O load.
243 */
244static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
245{
246 PPDMACEPFILEMGR pAioMgrNew = NULL;
247 int rc = VINF_SUCCESS;
248
249 /*
250 * Check if balancing would improve the situation.
251 */
252 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
253 {
254 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
255
256 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
257 if (RT_SUCCESS(rc))
258 {
259 /* We will sort the list by request count per second. */
260 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
261
262 /* Now move some endpoints to the new manager. */
263 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
264 unsigned cReqsOther = 0;
265 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
266
267 while (pCurr)
268 {
269 if (cReqsHere <= cReqsOther)
270 {
271 /*
272 * The other manager has more requests to handle now.
273 * We will keep the current endpoint.
274 */
275 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
276 cReqsHere += pCurr->AioMgr.cReqsPerSec;
277 pCurr = pCurr->AioMgr.pEndpointNext;
278 }
279 else
280 {
281 /* Move to other endpoint. */
282 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
283 cReqsOther += pCurr->AioMgr.cReqsPerSec;
284
285 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
286
287 pCurr = pCurr->AioMgr.pEndpointNext;
288
289 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
290
291 if (fReqsPending)
292 {
293 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
294 pMove->AioMgr.fMoving = true;
295 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
296 }
297 else
298 {
299 pMove->AioMgr.fMoving = false;
300 pMove->AioMgr.pAioMgrDst = NULL;
301 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
302 }
303 }
304 }
305 }
306 else
307 {
308 /* Don't process further but leave a log entry about reduced performance. */
309 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
310 }
311 }
312 else
313 Log(("AIOMgr: Load balancing would not improve anything\n"));
314}
315
316/**
317 * Increase the maximum number of active requests for the given I/O manager.
318 *
319 * @returns VBox status code.
320 * @param pAioMgr The I/O manager to grow.
321 */
322static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
323{
324 int rc = VINF_SUCCESS;
325 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
326
327 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
328
329 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
330 && !pAioMgr->cRequestsActive,
331 ("Invalid state of the I/O manager\n"));
332
333#ifdef RT_OS_WINDOWS
334 /*
335 * Reopen the files of all assigned endpoints first so we can assign them to the new
336 * I/O context.
337 */
338 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
339
340 while (pCurr)
341 {
342 RTFileClose(pCurr->File);
343 rc = RTFileOpen(&pCurr->File, pCurr->Core.pszUri, pCurr->fFlags);
344 AssertRC(rc);
345
346 pCurr = pCurr->AioMgr.pEndpointNext;
347 }
348#endif
349
350 /* Create the new bigger context. */
351 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
352
353 rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
354 if (rc == VERR_OUT_OF_RANGE)
355 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
356
357 if (RT_SUCCESS(rc))
358 {
359 /* Close the old context. */
360 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
361 AssertRC(rc);
362
363 pAioMgr->hAioCtx = hAioCtxNew;
364
365 /* Create a new I/O task handle array */
366 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
367 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
368
369 if (pahReqNew)
370 {
371 /* Copy the cached request handles. */
372 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
373 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
374
375 RTMemFree(pAioMgr->pahReqsFree);
376 pAioMgr->pahReqsFree = pahReqNew;
377 pAioMgr->cReqEntries = cReqEntriesNew;
378 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
379 pAioMgr->cRequestsActiveMax));
380 }
381 else
382 rc = VERR_NO_MEMORY;
383 }
384
385#ifdef RT_OS_WINDOWS
386 /* Assign the file to the new context. */
387 pCurr = pAioMgr->pEndpointsHead;
388
389 while (pCurr)
390 {
391 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->File);
392 AssertRC(rc);
393
394 pCurr = pCurr->AioMgr.pEndpointNext;
395 }
396#endif
397
398 if (RT_FAILURE(rc))
399 {
400 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
401 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
402 }
403
404 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
405 LogFlowFunc(("returns rc=%Rrc\n", rc));
406
407 return rc;
408}
409
410/**
411 * Checks if a given status code is fatal.
412 * Non fatal errors can be fixed by migrating the endpoint to a
413 * failsafe manager.
414 *
415 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
416 * false If the error can be fixed by a migration. (image on NFS disk for example)
417 * @param rcReq The status code to check.
418 */
419DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
420{
421 return rcReq == VERR_DEV_IO_ERROR
422 || rcReq == VERR_FILE_IO_ERROR
423 || rcReq == VERR_DISK_IO_ERROR
424 || rcReq == VERR_DISK_FULL
425 || rcReq == VERR_FILE_TOO_BIG;
426}
427
428/**
429 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
430 *
431 * @returns VBox status code
432 * @param pAioMgr The I/O manager the error ocurred on.
433 * @param rc The error code.
434 */
435static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
436{
437 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
438 pAioMgr, rc));
439 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
440 LogRel(("AIOMgr: Please contact the product vendor\n"));
441
442 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
443
444 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
445 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
446
447 AssertMsgFailed(("Implement\n"));
448 return VINF_SUCCESS;
449}
450
451/**
452 * Put a list of tasks in the pending request list of an endpoint.
453 */
454DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
455{
456 /* Add the rest of the tasks to the pending list */
457 if (!pEndpoint->AioMgr.pReqsPendingHead)
458 {
459 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
460 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
461 }
462 else
463 {
464 Assert(pEndpoint->AioMgr.pReqsPendingTail);
465 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
466 }
467
468 /* Update the tail. */
469 while (pTaskHead->pNext)
470 pTaskHead = pTaskHead->pNext;
471
472 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
473 pTaskHead->pNext = NULL;
474}
475
476/**
477 * Put one task in the pending request list of an endpoint.
478 */
479DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
480{
481 /* Add the rest of the tasks to the pending list */
482 if (!pEndpoint->AioMgr.pReqsPendingHead)
483 {
484 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
485 pEndpoint->AioMgr.pReqsPendingHead = pTask;
486 }
487 else
488 {
489 Assert(pEndpoint->AioMgr.pReqsPendingTail);
490 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
491 }
492
493 pEndpoint->AioMgr.pReqsPendingTail = pTask;
494 pTask->pNext = NULL;
495}
496
497/**
498 * Allocates a async I/O request.
499 *
500 * @returns Handle to the request.
501 * @param pAioMgr The I/O manager.
502 */
503static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
504{
505 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
506
507 /* Get a request handle. */
508 if (pAioMgr->iFreeEntry > 0)
509 {
510 pAioMgr->iFreeEntry--;
511 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
512 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
513 Assert(hReq != NIL_RTFILEAIOREQ);
514 }
515 else
516 {
517 int rc = RTFileAioReqCreate(&hReq);
518 AssertRC(rc);
519 }
520
521 return hReq;
522}
523
524/**
525 * Frees a async I/O request handle.
526 *
527 * @returns nothing.
528 * @param pAioMgr The I/O manager.
529 * @param hReq The I/O request handle to free.
530 */
531static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
532{
533 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
534 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
535
536 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
537 pAioMgr->iFreeEntry++;
538}
539
540/**
541 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
542 */
543static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
544 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
545 PRTFILEAIOREQ pahReqs, unsigned cReqs)
546{
547 int rc;
548
549 pAioMgr->cRequestsActive += cReqs;
550 pEndpoint->AioMgr.cRequestsActive += cReqs;
551
552 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
553 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
554
555 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
556 if (RT_FAILURE(rc))
557 {
558 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
559 {
560 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
561
562 /* Append any not submitted task to the waiting list. */
563 for (size_t i = 0; i < cReqs; i++)
564 {
565 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
566
567 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
568 {
569 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
570
571 Assert(pTask->hReq == pahReqs[i]);
572 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
573 pAioMgr->cRequestsActive--;
574 pEndpoint->AioMgr.cRequestsActive--;
575
576 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
577 {
578 /* Clear the pending flush */
579 Assert(pEndpoint->pFlushReq == pTask);
580 pEndpoint->pFlushReq = NULL;
581 }
582 }
583 }
584
585 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
586
587 /* Print an entry in the release log */
588 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
589 {
590 pEpClass->fOutOfResourcesWarningPrinted = true;
591 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
592 pAioMgr->cRequestsActive));
593 }
594
595 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
596 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
597 rc = VINF_SUCCESS;
598 }
599 else /* Another kind of error happened (full disk, ...) */
600 {
601 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
602 for (size_t i = 0; i < cReqs; i++)
603 {
604 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
605
606 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
607 {
608 /* We call ourself again to do any error handling which might come up now. */
609 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
610 AssertRC(rc);
611 }
612 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
613 {
614 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
615
616 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
617 }
618 }
619
620
621 if ( pEndpoint->pFlushReq
622 && !pAioMgr->cRequestsActive
623 && !pEndpoint->fAsyncFlushSupported)
624 {
625 /*
626 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
627 * the async flush API.
628 * Happens only if this we just noticed that this is not supported
629 * and the only active request was a flush.
630 */
631 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
632 pEndpoint->pFlushReq = NULL;
633 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
634 pdmacFileTaskFree(pEndpoint, pFlush);
635 }
636 }
637 }
638
639 return VINF_SUCCESS;
640}
641
642static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
643 RTFOFF offStart, size_t cbRange,
644 PPDMACTASKFILE pTask)
645{
646 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
647
648 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
649 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
650 ("Invalid task type %d\n", pTask->enmTransferType));
651
652 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
653 if (!pRangeLock)
654 {
655 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
656 /* Check if we intersect with the range. */
657 if ( !pRangeLock
658 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
659 && (pRangeLock->Core.KeyLast) >= offStart))
660 {
661 pRangeLock = NULL; /* False alarm */
662 }
663 }
664
665 /* Check whether we have one of the situations explained below */
666 if ( pRangeLock
667#if 0 /** @todo: later. For now we will just block all requests if they interfere */
668 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
669 || (!pRangeLock->fReadLock)
670#endif
671 )
672 {
673 /* Add to the list. */
674 pTask->pNext = NULL;
675
676 if (!pRangeLock->pWaitingTasksHead)
677 {
678 Assert(!pRangeLock->pWaitingTasksTail);
679 pRangeLock->pWaitingTasksHead = pTask;
680 pRangeLock->pWaitingTasksTail = pTask;
681 }
682 else
683 {
684 AssertPtr(pRangeLock->pWaitingTasksTail);
685 pRangeLock->pWaitingTasksTail->pNext = pTask;
686 pRangeLock->pWaitingTasksTail = pTask;
687 }
688 return true;
689 }
690
691 return false;
692}
693
694static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
695 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
696 RTFOFF offStart, size_t cbRange,
697 PPDMACTASKFILE pTask)
698{
699 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
700 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
701 offStart, cbRange));
702
703 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
704 if (!pRangeLock)
705 return VERR_NO_MEMORY;
706
707 /* Init the lock. */
708 pRangeLock->Core.Key = offStart;
709 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
710 pRangeLock->cRefs = 1;
711 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
712 pRangeLock->pWaitingTasksHead = NULL;
713 pRangeLock->pWaitingTasksTail = NULL;
714
715 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
716 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
717
718 /* Let the task point to its lock. */
719 pTask->pRangeLock = pRangeLock;
720
721 return VINF_SUCCESS;
722}
723
724static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
725 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
726 PPDMACFILERANGELOCK pRangeLock)
727{
728 PPDMACTASKFILE pTasksWaitingHead;
729
730 AssertPtr(pRangeLock);
731 Assert(pRangeLock->cRefs == 1);
732
733 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
734 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
735 pRangeLock->pWaitingTasksHead = NULL;
736 pRangeLock->pWaitingTasksTail = NULL;
737 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
738
739 return pTasksWaitingHead;
740}
741
742static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
743 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
744 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
745{
746 int rc = VINF_SUCCESS;
747 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
748 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
749 void *pvBuf = pTask->DataSeg.pvSeg;
750
751 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
752 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
753 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
754 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
755
756 pTask->fPrefetch = false;
757 pTask->cbBounceBuffer = 0;
758
759 /*
760 * Before we start to setup the request we have to check whether there is a task
761 * already active which range intersects with ours. We have to defer execution
762 * of this task in two cases:
763 * - The pending task is a write and the current is either read or write
764 * - The pending task is a read and the current task is a write task.
765 *
766 * To check whether a range is currently "locked" we use the AVL tree where every pending task
767 * is stored by its file offset range. The current task will be added to the active task
768 * and will be executed when the active one completes. (The method below
769 * which checks whether a range is already used will add the task)
770 *
771 * This is neccessary because of the requirement to align all requests to a 512 boundary
772 * which is enforced by the host OS (Linux and Windows atm). It is possible that
773 * we have to process unaligned tasks and need to align them using bounce buffers.
774 * While the data is fetched from the file another request might arrive writing to
775 * the same range. This will result in data corruption if both are executed concurrently.
776 */
777 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
778
779 if (!fLocked)
780 {
781 /* Get a request handle. */
782 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
783 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
784
785 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
786 {
787 /* Grow the file if needed. */
788 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
789 {
790 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
791 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
792 }
793
794 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
795 pTask->Off, pTask->DataSeg.pvSeg,
796 pTask->DataSeg.cbSeg, pTask);
797 }
798 else
799 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
800 pTask->Off, pTask->DataSeg.pvSeg,
801 pTask->DataSeg.cbSeg, pTask);
802 AssertRC(rc);
803
804 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
805 pTask->DataSeg.cbSeg,
806 pTask);
807
808 if (RT_SUCCESS(rc))
809 {
810 pTask->hReq = hReq;
811 *phReq = hReq;
812 }
813 }
814 else
815 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
816
817 return rc;
818}
819
820static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
821 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
822 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
823{
824 int rc = VINF_SUCCESS;
825 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
826 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
827 void *pvBuf = pTask->DataSeg.pvSeg;
828
829 /*
830 * Check if the alignment requirements are met.
831 * Offset, transfer size and buffer address
832 * need to be on a 512 boundary.
833 */
834 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
835 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
836 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
837
838 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
839 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
840 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
841 offStart, cbToTransfer, pEndpoint->cbFile));
842
843 pTask->fPrefetch = false;
844
845 /*
846 * Before we start to setup the request we have to check whether there is a task
847 * already active which range intersects with ours. We have to defer execution
848 * of this task in two cases:
849 * - The pending task is a write and the current is either read or write
850 * - The pending task is a read and the current task is a write task.
851 *
852 * To check whether a range is currently "locked" we use the AVL tree where every pending task
853 * is stored by its file offset range. The current task will be added to the active task
854 * and will be executed when the active one completes. (The method below
855 * which checks whether a range is already used will add the task)
856 *
857 * This is neccessary because of the requirement to align all requests to a 512 boundary
858 * which is enforced by the host OS (Linux and Windows atm). It is possible that
859 * we have to process unaligned tasks and need to align them using bounce buffers.
860 * While the data is fetched from the file another request might arrive writing to
861 * the same range. This will result in data corruption if both are executed concurrently.
862 */
863 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
864
865 if (!fLocked)
866 {
867 /* Get a request handle. */
868 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
869 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
870
871 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
872 || RT_UNLIKELY(offStart != pTask->Off)
873 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
874 {
875 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
876 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
877
878 /* Create bounce buffer. */
879 pTask->cbBounceBuffer = cbToTransfer;
880
881 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
882 pTask->Off, offStart));
883 pTask->offBounceBuffer = pTask->Off - offStart;
884
885 /** @todo: I think we need something like a RTMemAllocAligned method here.
886 * Current assumption is that the maximum alignment is 4096byte
887 * (GPT disk on Windows)
888 * so we can use RTMemPageAlloc here.
889 */
890 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
891 if (RT_LIKELY(pTask->pvBounceBuffer))
892 {
893 pvBuf = pTask->pvBounceBuffer;
894
895 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
896 {
897 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
898 || RT_UNLIKELY(offStart != pTask->Off))
899 {
900 /* We have to fill the buffer first before we can update the data. */
901 LogFlow(("Prefetching data for task %#p\n", pTask));
902 pTask->fPrefetch = true;
903 enmTransferType = PDMACTASKFILETRANSFER_READ;
904 }
905 else
906 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
907 }
908 }
909 else
910 rc = VERR_NO_MEMORY;
911 }
912 else
913 pTask->cbBounceBuffer = 0;
914
915 if (RT_SUCCESS(rc))
916 {
917 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
918 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
919
920 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
921 {
922 /* Grow the file if needed. */
923 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
924 {
925 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
926 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
927 }
928
929 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
930 offStart, pvBuf, cbToTransfer, pTask);
931 }
932 else
933 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
934 offStart, pvBuf, cbToTransfer, pTask);
935 AssertRC(rc);
936
937 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
938
939 if (RT_SUCCESS(rc))
940 {
941 pTask->hReq = hReq;
942 *phReq = hReq;
943 }
944 else
945 {
946 /* Cleanup */
947 if (pTask->cbBounceBuffer)
948 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
949 }
950 }
951 }
952 else
953 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
954
955 return rc;
956}
957
958static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
959 PPDMACEPFILEMGR pAioMgr,
960 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
961{
962 RTFILEAIOREQ apReqs[20];
963 unsigned cRequests = 0;
964 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
965 int rc = VINF_SUCCESS;
966
967 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
968 ("Trying to process request lists of a non active endpoint!\n"));
969
970 /* Go through the list and queue the requests until we get a flush request */
971 while ( pTaskHead
972 && !pEndpoint->pFlushReq
973 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
974 && RT_SUCCESS(rc))
975 {
976 PPDMACTASKFILE pCurr = pTaskHead;
977
978 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, (uint32_t)pCurr->DataSeg.cbSeg))
979 {
980 pAioMgr->fBwLimitReached = true;
981 break;
982 }
983
984 pTaskHead = pTaskHead->pNext;
985
986 pCurr->pNext = NULL;
987
988 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
989 ("Endpoints do not match\n"));
990
991 switch (pCurr->enmTransferType)
992 {
993 case PDMACTASKFILETRANSFER_FLUSH:
994 {
995 /* If there is no data transfer request this flush request finished immediately. */
996 if (pEndpoint->fAsyncFlushSupported)
997 {
998 /* Issue a flush to the host. */
999 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1000 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1001
1002 LogFlow(("Flush request %#p\n", hReq));
1003
1004 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->File, pCurr);
1005 if (RT_FAILURE(rc))
1006 {
1007 pEndpoint->fAsyncFlushSupported = false;
1008 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1009 rc = VINF_SUCCESS; /* Fake success */
1010 }
1011 else
1012 {
1013 pCurr->hReq = hReq;
1014 apReqs[cRequests] = hReq;
1015 pEndpoint->AioMgr.cReqsProcessed++;
1016 cRequests++;
1017 }
1018 }
1019
1020 if ( !pEndpoint->AioMgr.cRequestsActive
1021 && !pEndpoint->fAsyncFlushSupported)
1022 {
1023 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
1024 pdmacFileTaskFree(pEndpoint, pCurr);
1025 }
1026 else
1027 {
1028 Assert(!pEndpoint->pFlushReq);
1029 pEndpoint->pFlushReq = pCurr;
1030 }
1031 break;
1032 }
1033 case PDMACTASKFILETRANSFER_READ:
1034 case PDMACTASKFILETRANSFER_WRITE:
1035 {
1036 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
1037
1038 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1039 {
1040 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1041 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1042 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1043 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1044 else
1045 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1046
1047 AssertRC(rc);
1048 }
1049 else
1050 {
1051 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1052 hReq = pCurr->hReq;
1053 }
1054
1055 LogFlow(("Read/Write request %#p\n", hReq));
1056
1057 if (hReq != NIL_RTFILEAIOREQ)
1058 {
1059 apReqs[cRequests] = hReq;
1060 cRequests++;
1061 }
1062 break;
1063 }
1064 default:
1065 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
1066 } /* switch transfer type */
1067
1068 /* Queue the requests if the array is full. */
1069 if (cRequests == RT_ELEMENTS(apReqs))
1070 {
1071 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1072 cRequests = 0;
1073 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1074 ("Unexpected return code\n"));
1075 }
1076 }
1077
1078 if (cRequests)
1079 {
1080 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1081 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1082 ("Unexpected return code rc=%Rrc\n", rc));
1083 }
1084
1085 if (pTaskHead)
1086 {
1087 /* Add the rest of the tasks to the pending list */
1088 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
1089
1090 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
1091 && !pEndpoint->pFlushReq
1092 && !pAioMgr->fBwLimitReached))
1093 {
1094#if 0
1095 /*
1096 * The I/O manager has no room left for more requests
1097 * but there are still requests to process.
1098 * Create a new I/O manager and let it handle some endpoints.
1099 */
1100 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1101#else
1102 /* Grow the I/O manager */
1103 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1104#endif
1105 }
1106 }
1107
1108 /* Insufficient resources are not fatal. */
1109 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1110 rc = VINF_SUCCESS;
1111
1112 return rc;
1113}
1114
1115/**
1116 * Adds all pending requests for the given endpoint
1117 * until a flush request is encountered or there is no
1118 * request anymore.
1119 *
1120 * @returns VBox status code.
1121 * @param pAioMgr The async I/O manager for the endpoint
1122 * @param pEndpoint The endpoint to get the requests from.
1123 */
1124static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1125 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1126{
1127 int rc = VINF_SUCCESS;
1128 PPDMACTASKFILE pTasksHead = NULL;
1129
1130 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1131 ("Trying to process request lists of a non active endpoint!\n"));
1132
1133 Assert(!pEndpoint->pFlushReq);
1134
1135 /* Check the pending list first */
1136 if (pEndpoint->AioMgr.pReqsPendingHead)
1137 {
1138 LogFlow(("Queuing pending requests first\n"));
1139
1140 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1141 /*
1142 * Clear the list as the processing routine will insert them into the list
1143 * again if it gets a flush request.
1144 */
1145 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1146 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1147 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1148 AssertRC(rc);
1149 }
1150
1151 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1152 {
1153 /* Now the request queue. */
1154 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1155 if (pTasksHead)
1156 {
1157 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1158 AssertRC(rc);
1159 }
1160 }
1161
1162 return rc;
1163}
1164
1165static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1166{
1167 int rc = VINF_SUCCESS;
1168 bool fNotifyWaiter = false;
1169
1170 LogFlowFunc((": Enter\n"));
1171
1172 Assert(pAioMgr->fBlockingEventPending);
1173
1174 switch (pAioMgr->enmBlockingEvent)
1175 {
1176 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1177 {
1178 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1179 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1180
1181 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1182
1183 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1184 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1185 if (pAioMgr->pEndpointsHead)
1186 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1187 pAioMgr->pEndpointsHead = pEndpointNew;
1188
1189 /* Assign the completion point to this file. */
1190 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
1191 fNotifyWaiter = true;
1192 pAioMgr->cEndpoints++;
1193 break;
1194 }
1195 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1196 {
1197 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1198 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1199
1200 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1201 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1202 break;
1203 }
1204 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1205 {
1206 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
1207 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1208
1209 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1210 {
1211 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1212
1213 /* Make sure all tasks finished. Process the queues a last time first. */
1214 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1215 AssertRC(rc);
1216
1217 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1218 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1219 }
1220 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1221 && (!pEndpointClose->AioMgr.cRequestsActive))
1222 fNotifyWaiter = true;
1223 break;
1224 }
1225 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1226 {
1227 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1228 if (!pAioMgr->cRequestsActive)
1229 fNotifyWaiter = true;
1230 break;
1231 }
1232 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1233 {
1234 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1235 break;
1236 }
1237 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1238 {
1239 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1240 fNotifyWaiter = true;
1241 break;
1242 }
1243 default:
1244 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1245 }
1246
1247 if (fNotifyWaiter)
1248 {
1249 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1250 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1251
1252 /* Release the waiting thread. */
1253 LogFlow(("Signalling waiter\n"));
1254 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1255 AssertRC(rc);
1256 }
1257
1258 LogFlowFunc((": Leave\n"));
1259 return rc;
1260}
1261
1262/**
1263 * Checks all endpoints for pending events or new requests.
1264 *
1265 * @returns VBox status code.
1266 * @param pAioMgr The I/O manager handle.
1267 */
1268static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1269{
1270 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1271 int rc = VINF_SUCCESS;
1272 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1273
1274 pAioMgr->fBwLimitReached = false;
1275
1276 while (pEndpoint)
1277 {
1278 if (!pEndpoint->pFlushReq
1279 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1280 && !pEndpoint->AioMgr.fMoving)
1281 {
1282 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1283 if (RT_FAILURE(rc))
1284 return rc;
1285 }
1286 else if ( !pEndpoint->AioMgr.cRequestsActive
1287 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1288 {
1289 /* Reopen the file so that the new endpoint can reassociate with the file */
1290 RTFileClose(pEndpoint->File);
1291 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1292 AssertRC(rc);
1293
1294 if (pEndpoint->AioMgr.fMoving)
1295 {
1296 pEndpoint->AioMgr.fMoving = false;
1297 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1298 }
1299 else
1300 {
1301 Assert(pAioMgr->fBlockingEventPending);
1302 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1303
1304 /* Release the waiting thread. */
1305 LogFlow(("Signalling waiter\n"));
1306 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1307 AssertRC(rc);
1308 }
1309 }
1310
1311 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1312 }
1313
1314 return rc;
1315}
1316
1317/**
1318 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1319 */
1320static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1321{
1322 size_t cbTransfered = 0;
1323 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1324
1325 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1326}
1327
1328static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1329 int rcReq, size_t cbTransfered)
1330{
1331 int rc = VINF_SUCCESS;
1332 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1333 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1334 PPDMACTASKFILE pTasksWaiting;
1335
1336 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1337
1338 pEndpoint = pTask->pEndpoint;
1339
1340 pTask->hReq = NIL_RTFILEAIOREQ;
1341
1342 pAioMgr->cRequestsActive--;
1343 pEndpoint->AioMgr.cRequestsActive--;
1344 pEndpoint->AioMgr.cReqsProcessed++;
1345
1346 /*
1347 * It is possible that the request failed on Linux with kernels < 2.6.23
1348 * if the passed buffer was allocated with remap_pfn_range or if the file
1349 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1350 * The endpoint will be migrated to a failsafe manager in case a request fails.
1351 */
1352 if (RT_FAILURE(rcReq))
1353 {
1354 /* Free bounce buffers and the IPRT request. */
1355 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1356
1357 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1358 {
1359 LogFlow(("Async flushes are not supported for this endpoint, disabling\n"));
1360 pEndpoint->fAsyncFlushSupported = false;
1361 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1362 /* The other method will take over now. */
1363 }
1364 else
1365 {
1366 /* Free the lock and process pending tasks if neccessary */
1367 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1368 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1369 AssertRC(rc);
1370
1371 if (pTask->cbBounceBuffer)
1372 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1373
1374 /*
1375 * Fatal errors are reported to the guest and non-fatal errors
1376 * will cause a migration to the failsafe manager in the hope
1377 * that the error disappears.
1378 */
1379 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
1380 {
1381 /* Queue the request on the pending list. */
1382 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1383 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1384
1385 /* Create a new failsafe manager if neccessary. */
1386 if (!pEndpoint->AioMgr.fMoving)
1387 {
1388 PPDMACEPFILEMGR pAioMgrFailsafe;
1389
1390 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1391 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1392
1393 pEndpoint->AioMgr.fMoving = true;
1394
1395 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1396 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1397 AssertRC(rc);
1398
1399 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1400
1401 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1402 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1403 }
1404
1405 /* If this was the last request for the endpoint migrate it to the new manager. */
1406 if (!pEndpoint->AioMgr.cRequestsActive)
1407 {
1408 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1409 Assert(!fReqsPending);
1410
1411 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1412 AssertRC(rc);
1413 }
1414 }
1415 else
1416 {
1417 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1418 pdmacFileTaskFree(pEndpoint, pTask);
1419 }
1420 }
1421 }
1422 else
1423 {
1424 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1425 {
1426 /* Clear pending flush */
1427 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1428 pEndpoint->pFlushReq = NULL;
1429 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1430
1431 /* Call completion callback */
1432 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1433 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1434 pdmacFileTaskFree(pEndpoint, pTask);
1435 }
1436 else
1437 {
1438 /*
1439 * Restart an incomplete transfer.
1440 * This usually means that the request will return an error now
1441 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1442 * the transfer needs to be continued.
1443 */
1444 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1445 || ( pTask->cbBounceBuffer
1446 && cbTransfered < pTask->cbBounceBuffer)))
1447 {
1448 RTFOFF offStart;
1449 size_t cbToTransfer;
1450 uint8_t *pbBuf = NULL;
1451
1452 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transfered)\n",
1453 pTask, cbTransfered));
1454 Assert(cbTransfered % 512 == 0);
1455
1456 if (pTask->cbBounceBuffer)
1457 {
1458 AssertPtr(pTask->pvBounceBuffer);
1459 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1460 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1461 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1462 }
1463 else
1464 {
1465 Assert(!pTask->pvBounceBuffer);
1466 offStart = pTask->Off + cbTransfered;
1467 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1468 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1469 }
1470
1471 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1472 {
1473 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File, offStart,
1474 pbBuf, cbToTransfer, pTask);
1475 }
1476 else
1477 {
1478 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1479 ("Invalid transfer type\n"));
1480 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File, offStart,
1481 pbBuf, cbToTransfer, pTask);
1482 }
1483
1484 AssertRC(rc);
1485 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1486 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1487 ("Unexpected return code rc=%Rrc\n", rc));
1488 }
1489 else if (pTask->fPrefetch)
1490 {
1491 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1492 Assert(pTask->cbBounceBuffer);
1493
1494 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1495 pTask->DataSeg.pvSeg,
1496 pTask->DataSeg.cbSeg);
1497
1498 /* Write it now. */
1499 pTask->fPrefetch = false;
1500 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1501 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1502
1503 /* Grow the file if needed. */
1504 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1505 {
1506 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1507 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1508 }
1509
1510 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1511 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1512 AssertRC(rc);
1513 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1514 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1515 ("Unexpected return code rc=%Rrc\n", rc));
1516 }
1517 else
1518 {
1519 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1520 {
1521 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1522 memcpy(pTask->DataSeg.pvSeg,
1523 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1524 pTask->DataSeg.cbSeg);
1525
1526 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1527 }
1528
1529 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1530
1531 /* Free the lock and process pending tasks if neccessary */
1532 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1533 if (pTasksWaiting)
1534 {
1535 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1536 AssertRC(rc);
1537 }
1538
1539 /* Call completion callback */
1540 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1541 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1542 pdmacFileTaskFree(pEndpoint, pTask);
1543
1544 /*
1545 * If there is no request left on the endpoint but a flush request is set
1546 * it completed now and we notify the owner.
1547 * Furthermore we look for new requests and continue.
1548 */
1549 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1550 {
1551 /* Call completion callback */
1552 pTask = pEndpoint->pFlushReq;
1553 pEndpoint->pFlushReq = NULL;
1554
1555 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1556
1557 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1558 pdmacFileTaskFree(pEndpoint, pTask);
1559 }
1560 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1561 {
1562 /* If the endpoint is about to be migrated do it now. */
1563 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1564 Assert(!fReqsPending);
1565
1566 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1567 AssertRC(rc);
1568 }
1569 }
1570 } /* Not a flush request */
1571 } /* request completed successfully */
1572}
1573
1574/** Helper macro for checking for error codes. */
1575#define CHECK_RC(pAioMgr, rc) \
1576 if (RT_FAILURE(rc)) \
1577 {\
1578 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1579 return rc2;\
1580 }
1581
1582/**
1583 * The normal I/O manager using the RTFileAio* API
1584 *
1585 * @returns VBox status code.
1586 * @param ThreadSelf Handle of the thread.
1587 * @param pvUser Opaque user data.
1588 */
1589int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1590{
1591 int rc = VINF_SUCCESS;
1592 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1593 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1594
1595 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1596 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING)
1597 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1598 {
1599 if (!pAioMgr->cRequestsActive)
1600 {
1601 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1602 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1603 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1604 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1605 AssertRC(rc);
1606
1607 LogFlow(("Got woken up\n"));
1608 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1609 }
1610
1611 /* Check for an external blocking event first. */
1612 if (pAioMgr->fBlockingEventPending)
1613 {
1614 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1615 CHECK_RC(pAioMgr, rc);
1616 }
1617
1618 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1619 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
1620 {
1621 /* We got woken up because an endpoint issued new requests. Queue them. */
1622 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1623 CHECK_RC(pAioMgr, rc);
1624
1625 while ( pAioMgr->cRequestsActive
1626 || pAioMgr->fBwLimitReached)
1627 {
1628 if (pAioMgr->cRequestsActive)
1629 {
1630 RTFILEAIOREQ apReqs[20];
1631 uint32_t cReqsCompleted = 0;
1632 size_t cReqsWait;
1633
1634 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1635 cReqsWait = RT_ELEMENTS(apReqs);
1636 else
1637 cReqsWait = pAioMgr->cRequestsActive;
1638
1639 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
1640
1641 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1642 1,
1643 RT_INDEFINITE_WAIT, apReqs,
1644 cReqsWait, &cReqsCompleted);
1645 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1646 CHECK_RC(pAioMgr, rc);
1647
1648 LogFlow(("%d tasks completed\n", cReqsCompleted));
1649
1650 for (uint32_t i = 0; i < cReqsCompleted; i++)
1651 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1652
1653 /* Check for an external blocking event before we go to sleep again. */
1654 if (pAioMgr->fBlockingEventPending)
1655 {
1656 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1657 CHECK_RC(pAioMgr, rc);
1658 }
1659
1660 /* Update load statistics. */
1661 uint64_t uMillisCurr = RTTimeMilliTS();
1662 if (uMillisCurr > uMillisEnd)
1663 {
1664 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1665
1666 /* Calculate timespan. */
1667 uMillisCurr -= uMillisEnd;
1668
1669 while (pEndpointCurr)
1670 {
1671 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1672 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1673 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1674 }
1675
1676 /* Set new update interval */
1677 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1678 }
1679 }
1680 else
1681 {
1682 /*
1683 * Bandwidth limit reached for all endpoints.
1684 * Yield and wait until we have enough resources again.
1685 */
1686 RTThreadYield();
1687 }
1688
1689 /* Check endpoints for new requests. */
1690 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1691 {
1692 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1693 CHECK_RC(pAioMgr, rc);
1694 }
1695 } /* while requests are active. */
1696
1697 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1698 {
1699 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1700 AssertRC(rc);
1701 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1702
1703 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1704 CHECK_RC(pAioMgr, rc);
1705 }
1706 } /* if still running */
1707 } /* while running */
1708
1709 LogFlowFunc(("rc=%Rrc\n", rc));
1710 return rc;
1711}
1712
1713#undef CHECK_RC
1714
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use