VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/PDMAsyncCompletionFileNormal.cpp@ 50653

Last change on this file since 50653 was 45678, checked in by vboxsync, 11 years ago

Runtime/aio: Add flags parameter to RTFileAioCtxCreate

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 65.7 KB
RevLine 
[21496]1/* $Id: PDMAsyncCompletionFileNormal.cpp 45678 2013-04-23 11:28:41Z vboxsync $ */
2/** @file
[39034]3 * PDM Async I/O - Async File I/O manager.
[21496]4 */
5
6/*
[44528]7 * Copyright (C) 2006-2013 Oracle Corporation
[21496]8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
[39034]17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
[21496]21#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
22#include <iprt/types.h>
[22757]23#include <iprt/asm.h>
[21496]24#include <iprt/file.h>
25#include <iprt/mem.h>
26#include <iprt/string.h>
[23956]27#include <iprt/assert.h>
[21496]28#include <VBox/log.h>
29
30#include "PDMAsyncCompletionFileInternal.h"
31
[22309]32/** The update period for the I/O load statistics in ms. */
[39034]33#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
[22309]34/** Maximum number of requests a manager will handle. */
[39034]35#define PDMACEPFILEMGR_REQS_STEP 512
[22309]36
[39034]37
[26338]38/*******************************************************************************
39* Internal functions *
40*******************************************************************************/
41static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
42 PPDMACEPFILEMGR pAioMgr,
43 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
44
[28224]45static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
46 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
47 PPDMACFILERANGELOCK pRangeLock);
[26338]48
[32427]49static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
50 int rc, size_t cbTransfered);
51
[39034]52
[21496]53int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
54{
[28719]55 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
[28224]56
[45678]57 int rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
[21496]58 if (rc == VERR_OUT_OF_RANGE)
[45678]59 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
[21496]60
[22309]61 if (RT_SUCCESS(rc))
62 {
63 /* Initialize request handle array. */
[35205]64 pAioMgr->iFreeEntry = 0;
65 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
66 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
[22309]67
68 if (pAioMgr->pahReqsFree)
69 {
[27280]70 /* Create the range lock memcache. */
71 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
72 0, UINT32_MAX, NULL, NULL, NULL, 0);
73 if (RT_SUCCESS(rc))
74 return VINF_SUCCESS;
75
76 RTMemFree(pAioMgr->pahReqsFree);
[22309]77 }
78 else
79 {
80 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
81 rc = VERR_NO_MEMORY;
82 }
83 }
84
[21496]85 return rc;
86}
87
88void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
89{
90 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
[22309]91
[28719]92 while (pAioMgr->iFreeEntry > 0)
[22309]93 {
[28719]94 pAioMgr->iFreeEntry--;
95 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
96 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
[22309]97 }
98
99 RTMemFree(pAioMgr->pahReqsFree);
[27280]100 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
[21496]101}
102
[39070]103#if 0 /* currently unused */
[21496]104/**
[22757]105 * Sorts the endpoint list with insertion sort.
106 */
107static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
108{
109 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
110
111 pEpPrev = pAioMgr->pEndpointsHead;
112 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
113
114 while (pEpCurr)
115 {
116 /* Remember the next element to sort because the list might change. */
117 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
118
119 /* Unlink the current element from the list. */
120 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
121 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
122
123 if (pPrev)
124 pPrev->AioMgr.pEndpointNext = pNext;
125 else
126 pAioMgr->pEndpointsHead = pNext;
127
128 if (pNext)
129 pNext->AioMgr.pEndpointPrev = pPrev;
130
131 /* Go back until we reached the place to insert the current endpoint into. */
132 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
133 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
134
135 /* Link the endpoint into the list. */
136 if (pEpPrev)
137 pNext = pEpPrev->AioMgr.pEndpointNext;
138 else
139 pNext = pAioMgr->pEndpointsHead;
140
141 pEpCurr->AioMgr.pEndpointNext = pNext;
142 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
[24355]143
144 if (pNext)
145 pNext->AioMgr.pEndpointPrev = pEpCurr;
146
[22757]147 if (pEpPrev)
148 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
149 else
150 pAioMgr->pEndpointsHead = pEpCurr;
151
152 pEpCurr = pEpNextToSort;
153 }
154
155#ifdef DEBUG
[33540]156 /* Validate sorting algorithm */
[22757]157 unsigned cEndpoints = 0;
158 pEpCurr = pAioMgr->pEndpointsHead;
159
160 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
161 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
162
163 while (pEpCurr)
164 {
165 cEndpoints++;
166
167 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
168 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
169
170 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
171 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
172
173 pEpCurr = pNext;
174 }
175
176 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
177
178#endif
179}
[39070]180#endif /* currently unused */
[22757]181
182/**
183 * Removes an endpoint from the currently assigned manager.
184 *
185 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
186 * FALSE otherwise.
187 * @param pEndpointRemove The endpoint to remove.
188 */
189static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
190{
191 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
192 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
193 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
194
195 pAioMgr->cEndpoints--;
196
197 if (pPrev)
198 pPrev->AioMgr.pEndpointNext = pNext;
199 else
200 pAioMgr->pEndpointsHead = pNext;
201
202 if (pNext)
203 pNext->AioMgr.pEndpointPrev = pPrev;
204
205 /* Make sure that there is no request pending on this manager for the endpoint. */
206 if (!pEndpointRemove->AioMgr.cRequestsActive)
207 {
208 Assert(!pEndpointRemove->pFlushReq);
209
[33540]210 /* Reopen the file so that the new endpoint can re-associate with the file */
[37596]211 RTFileClose(pEndpointRemove->hFile);
212 int rc = RTFileOpen(&pEndpointRemove->hFile, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
[22757]213 AssertRC(rc);
214 return false;
215 }
216
217 return true;
218}
219
[39070]220#if 0 /* currently unused */
221
[26479]222static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
223{
224 /* Balancing doesn't make sense with only one endpoint. */
225 if (pAioMgr->cEndpoints == 1)
226 return false;
227
228 /* Doesn't make sens to move endpoints if only one produces the whole load */
229 unsigned cEndpointsWithLoad = 0;
230
231 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
232
233 while (pCurr)
234 {
235 if (pCurr->AioMgr.cReqsPerSec)
236 cEndpointsWithLoad++;
237
238 pCurr = pCurr->AioMgr.pEndpointNext;
239 }
240
241 return (cEndpointsWithLoad > 1);
242}
243
[22757]244/**
245 * Creates a new I/O manager and spreads the I/O load of the endpoints
246 * between the given I/O manager and the new one.
247 *
248 * @returns nothing.
249 * @param pAioMgr The I/O manager with high I/O load.
250 */
251static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
252{
[26479]253 /*
254 * Check if balancing would improve the situation.
255 */
256 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
[22757]257 {
[39034]258 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
259 PPDMACEPFILEMGR pAioMgrNew = NULL;
[27299]260
[39034]261 int rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
[22757]262 if (RT_SUCCESS(rc))
263 {
264 /* We will sort the list by request count per second. */
265 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
266
267 /* Now move some endpoints to the new manager. */
268 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
269 unsigned cReqsOther = 0;
270 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
271
272 while (pCurr)
273 {
274 if (cReqsHere <= cReqsOther)
275 {
276 /*
277 * The other manager has more requests to handle now.
278 * We will keep the current endpoint.
279 */
280 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
281 cReqsHere += pCurr->AioMgr.cReqsPerSec;
282 pCurr = pCurr->AioMgr.pEndpointNext;
283 }
284 else
285 {
286 /* Move to other endpoint. */
[23603]287 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
[22757]288 cReqsOther += pCurr->AioMgr.cReqsPerSec;
289
290 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
291
292 pCurr = pCurr->AioMgr.pEndpointNext;
293
294 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
295
296 if (fReqsPending)
297 {
298 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
299 pMove->AioMgr.fMoving = true;
300 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
301 }
302 else
303 {
304 pMove->AioMgr.fMoving = false;
305 pMove->AioMgr.pAioMgrDst = NULL;
306 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
307 }
308 }
309 }
310 }
311 else
312 {
313 /* Don't process further but leave a log entry about reduced performance. */
314 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
315 }
316 }
[26479]317 else
318 Log(("AIOMgr: Load balancing would not improve anything\n"));
[22757]319}
320
[39070]321#endif /* unused */
322
[22757]323/**
[28719]324 * Increase the maximum number of active requests for the given I/O manager.
325 *
326 * @returns VBox status code.
327 * @param pAioMgr The I/O manager to grow.
328 */
329static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
330{
331 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
332
333 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
334 && !pAioMgr->cRequestsActive,
335 ("Invalid state of the I/O manager\n"));
336
[29029]337#ifdef RT_OS_WINDOWS
[28719]338 /*
339 * Reopen the files of all assigned endpoints first so we can assign them to the new
340 * I/O context.
341 */
342 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
343
344 while (pCurr)
345 {
[37596]346 RTFileClose(pCurr->hFile);
[39035]347 int rc2 = RTFileOpen(&pCurr->hFile, pCurr->Core.pszUri, pCurr->fFlags); AssertRC(rc2);
[28719]348
349 pCurr = pCurr->AioMgr.pEndpointNext;
350 }
[29029]351#endif
[28719]352
353 /* Create the new bigger context. */
354 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
355
[39034]356 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
[45678]357 int rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS, 0 /* fFlags */);
[28719]358 if (rc == VERR_OUT_OF_RANGE)
[45678]359 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax, 0 /* fFlags */);
[28719]360
361 if (RT_SUCCESS(rc))
362 {
363 /* Close the old context. */
364 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
[39034]365 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
[28719]366
367 pAioMgr->hAioCtx = hAioCtxNew;
368
369 /* Create a new I/O task handle array */
370 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
371 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
372
373 if (pahReqNew)
374 {
375 /* Copy the cached request handles. */
376 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
377 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
378
379 RTMemFree(pAioMgr->pahReqsFree);
380 pAioMgr->pahReqsFree = pahReqNew;
381 pAioMgr->cReqEntries = cReqEntriesNew;
382 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
383 pAioMgr->cRequestsActiveMax));
384 }
385 else
386 rc = VERR_NO_MEMORY;
387 }
388
[29029]389#ifdef RT_OS_WINDOWS
[28719]390 /* Assign the file to the new context. */
391 pCurr = pAioMgr->pEndpointsHead;
392 while (pCurr)
393 {
[37596]394 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->hFile);
[39034]395 AssertRC(rc); /** @todo r=bird: Ignoring error code, will propagate. */
[28719]396
397 pCurr = pCurr->AioMgr.pEndpointNext;
398 }
[29029]399#endif
[28719]400
401 if (RT_FAILURE(rc))
402 {
403 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
404 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
405 }
406
407 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
408 LogFlowFunc(("returns rc=%Rrc\n", rc));
409
410 return rc;
411}
412
413/**
[29154]414 * Checks if a given status code is fatal.
415 * Non fatal errors can be fixed by migrating the endpoint to a
416 * failsafe manager.
417 *
418 * @returns true If the error is fatal and migrating to a failsafe manager doesn't help
419 * false If the error can be fixed by a migration. (image on NFS disk for example)
420 * @param rcReq The status code to check.
421 */
422DECLINLINE(bool) pdmacFileAioMgrNormalRcIsFatal(int rcReq)
423{
[39034]424 return rcReq == VERR_DEV_IO_ERROR
425 || rcReq == VERR_FILE_IO_ERROR
426 || rcReq == VERR_DISK_IO_ERROR
427 || rcReq == VERR_DISK_FULL
428 || rcReq == VERR_FILE_TOO_BIG;
[29154]429}
430
431/**
[21496]432 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
433 *
434 * @returns VBox status code
[33540]435 * @param pAioMgr The I/O manager the error occurred on.
[21496]436 * @param rc The error code.
437 */
[22309]438static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
[21496]439{
[22309]440 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
441 pAioMgr, rc));
442 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
443 LogRel(("AIOMgr: Please contact the product vendor\n"));
444
445 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
446
447 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
[27299]448 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
[22309]449
[21496]450 AssertMsgFailed(("Implement\n"));
451 return VINF_SUCCESS;
452}
453
[23404]454/**
455 * Put a list of tasks in the pending request list of an endpoint.
456 */
457DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
458{
459 /* Add the rest of the tasks to the pending list */
460 if (!pEndpoint->AioMgr.pReqsPendingHead)
461 {
462 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
463 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
464 }
465 else
466 {
467 Assert(pEndpoint->AioMgr.pReqsPendingTail);
468 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
469 }
470
471 /* Update the tail. */
472 while (pTaskHead->pNext)
473 pTaskHead = pTaskHead->pNext;
474
475 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
[28947]476 pTaskHead->pNext = NULL;
[23404]477}
478
479/**
480 * Put one task in the pending request list of an endpoint.
481 */
482DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
483{
484 /* Add the rest of the tasks to the pending list */
485 if (!pEndpoint->AioMgr.pReqsPendingHead)
486 {
487 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
488 pEndpoint->AioMgr.pReqsPendingHead = pTask;
489 }
490 else
491 {
492 Assert(pEndpoint->AioMgr.pReqsPendingTail);
493 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
494 }
495
496 pEndpoint->AioMgr.pReqsPendingTail = pTask;
[28947]497 pTask->pNext = NULL;
[23404]498}
499
500/**
[28719]501 * Allocates a async I/O request.
502 *
503 * @returns Handle to the request.
504 * @param pAioMgr The I/O manager.
505 */
506static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
507{
508 /* Get a request handle. */
[39034]509 RTFILEAIOREQ hReq;
[28719]510 if (pAioMgr->iFreeEntry > 0)
511 {
512 pAioMgr->iFreeEntry--;
513 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
514 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
515 Assert(hReq != NIL_RTFILEAIOREQ);
516 }
517 else
518 {
519 int rc = RTFileAioReqCreate(&hReq);
[39034]520 AssertRCReturn(rc, NIL_RTFILEAIOREQ);
[28719]521 }
522
523 return hReq;
524}
525
526/**
527 * Frees a async I/O request handle.
528 *
529 * @returns nothing.
530 * @param pAioMgr The I/O manager.
531 * @param hReq The I/O request handle to free.
532 */
533static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
534{
535 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
536 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
537
538 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
539 pAioMgr->iFreeEntry++;
540}
541
542/**
[23404]543 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
544 */
545static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
546 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
[26526]547 PRTFILEAIOREQ pahReqs, unsigned cReqs)
[23404]548{
549 pAioMgr->cRequestsActive += cReqs;
550 pEndpoint->AioMgr.cRequestsActive += cReqs;
551
[23603]552 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
553 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
554
[39034]555 int rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
[23404]556 if (RT_FAILURE(rc))
557 {
[32427]558 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
[23404]559 {
[32427]560 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
[23404]561
[32427]562 /* Append any not submitted task to the waiting list. */
563 for (size_t i = 0; i < cReqs; i++)
[23404]564 {
[32427]565 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
[23404]566
[32427]567 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
[23404]568 {
[32427]569 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
570
571 Assert(pTask->hReq == pahReqs[i]);
572 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
573 pAioMgr->cRequestsActive--;
574 pEndpoint->AioMgr.cRequestsActive--;
575
576 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
[29121]577 {
578 /* Clear the pending flush */
579 Assert(pEndpoint->pFlushReq == pTask);
580 pEndpoint->pFlushReq = NULL;
581 }
582 }
[32427]583 }
[23404]584
[32427]585 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
[28224]586
[32427]587 /* Print an entry in the release log */
588 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
589 {
590 pEpClass->fOutOfResourcesWarningPrinted = true;
591 LogRel(("AIOMgr: Host limits number of active IO requests to %u. Expect a performance impact.\n",
592 pAioMgr->cRequestsActive));
593 }
[29121]594
[32427]595 LogFlow(("Removed requests. I/O manager has a total of %u active requests now\n", pAioMgr->cRequestsActive));
596 LogFlow(("Endpoint has a total of %u active requests now\n", pEndpoint->AioMgr.cRequestsActive));
597 rc = VINF_SUCCESS;
598 }
599 else /* Another kind of error happened (full disk, ...) */
600 {
601 /* An error happened. Find out which one caused the error and resubmit all other tasks. */
602 for (size_t i = 0; i < cReqs; i++)
603 {
604 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
605
606 if (rcReq == VERR_FILE_AIO_NOT_SUBMITTED)
[29121]607 {
[32427]608 /* We call ourself again to do any error handling which might come up now. */
609 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &pahReqs[i], 1);
610 AssertRC(rc);
[23404]611 }
[32427]612 else if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
613 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, pahReqs[i], rcReq, 0);
[29121]614 }
[28224]615
[32427]616
617 if ( pEndpoint->pFlushReq
618 && !pAioMgr->cRequestsActive
619 && !pEndpoint->fAsyncFlushSupported)
[29121]620 {
[29124]621 /*
622 * Complete a pending flush if we don't have requests enqueued and the host doesn't support
623 * the async flush API.
624 * Happens only if this we just noticed that this is not supported
625 * and the only active request was a flush.
626 */
627 PPDMACTASKFILE pFlush = pEndpoint->pFlushReq;
628 pEndpoint->pFlushReq = NULL;
629 pFlush->pfnCompleted(pFlush, pFlush->pvUser, VINF_SUCCESS);
630 pdmacFileTaskFree(pEndpoint, pFlush);
631 }
[29121]632 }
[23404]633 }
634
[32427]635 return VINF_SUCCESS;
[23404]636}
637
[26338]638static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
639 RTFOFF offStart, size_t cbRange,
[44397]640 PPDMACTASKFILE pTask, bool fAlignedReq)
[26338]641{
642 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
643 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
644 ("Invalid task type %d\n", pTask->enmTransferType));
645
[44397]646 /*
647 * If there is no unaligned request active and the current one is aligned
648 * just pass it through.
649 */
650 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
651 return false;
652
[39034]653 PPDMACFILERANGELOCK pRangeLock;
[28045]654 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
[26338]655 if (!pRangeLock)
656 {
657 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
658 /* Check if we intersect with the range. */
659 if ( !pRangeLock
660 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
661 && (pRangeLock->Core.KeyLast) >= offStart))
662 {
663 pRangeLock = NULL; /* False alarm */
664 }
665 }
666
667 /* Check whether we have one of the situations explained below */
[44397]668 if (pRangeLock)
[26338]669 {
670 /* Add to the list. */
671 pTask->pNext = NULL;
672
673 if (!pRangeLock->pWaitingTasksHead)
674 {
675 Assert(!pRangeLock->pWaitingTasksTail);
676 pRangeLock->pWaitingTasksHead = pTask;
677 pRangeLock->pWaitingTasksTail = pTask;
678 }
679 else
680 {
681 AssertPtr(pRangeLock->pWaitingTasksTail);
682 pRangeLock->pWaitingTasksTail->pNext = pTask;
683 pRangeLock->pWaitingTasksTail = pTask;
684 }
685 return true;
686 }
687
688 return false;
689}
690
[27280]691static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
692 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
[26338]693 RTFOFF offStart, size_t cbRange,
[44397]694 PPDMACTASKFILE pTask, bool fAlignedReq)
[26338]695{
[43725]696 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p offStart=%RTfoff cbRange=%zu pTask=%#p\n",
697 pAioMgr, pEndpoint, offStart, cbRange, pTask));
698
[44397]699 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask, fAlignedReq),
[26338]700 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
701 offStart, cbRange));
702
[44397]703 /*
704 * If there is no unaligned request active and the current one is aligned
705 * just don't use the lock.
706 */
707 if (!pEndpoint->AioMgr.cLockedReqsActive && fAlignedReq)
708 {
709 pTask->pRangeLock = NULL;
710 return VINF_SUCCESS;
711 }
712
[27280]713 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
[26338]714 if (!pRangeLock)
715 return VERR_NO_MEMORY;
716
717 /* Init the lock. */
[27336]718 pRangeLock->Core.Key = offStart;
719 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
720 pRangeLock->cRefs = 1;
721 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
722 pRangeLock->pWaitingTasksHead = NULL;
723 pRangeLock->pWaitingTasksTail = NULL;
[26338]724
725 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
726 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
727
728 /* Let the task point to its lock. */
729 pTask->pRangeLock = pRangeLock;
[44397]730 pEndpoint->AioMgr.cLockedReqsActive++;
[26338]731
732 return VINF_SUCCESS;
733}
734
[28224]735static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
736 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
737 PPDMACFILERANGELOCK pRangeLock)
[26338]738{
739 PPDMACTASKFILE pTasksWaitingHead;
740
[43725]741 LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p pRangeLock=%#p\n",
742 pAioMgr, pEndpoint, pRangeLock));
743
[44397]744 /* pRangeLock can be NULL if there was no lock assigned with the task. */
745 if (!pRangeLock)
746 return NULL;
747
[26338]748 Assert(pRangeLock->cRefs == 1);
749
750 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
751 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
[27336]752 pRangeLock->pWaitingTasksHead = NULL;
753 pRangeLock->pWaitingTasksTail = NULL;
[27280]754 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
[44397]755 pEndpoint->AioMgr.cLockedReqsActive--;
[26338]756
[28224]757 return pTasksWaitingHead;
[26338]758}
759
[27299]760static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
761 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
762 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
[26338]763{
[27299]764 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
[39034]765 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
766 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
767 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
[26338]768
[27299]769 pTask->fPrefetch = false;
[28317]770 pTask->cbBounceBuffer = 0;
[27299]771
[26338]772 /*
[27299]773 * Before we start to setup the request we have to check whether there is a task
774 * already active which range intersects with ours. We have to defer execution
775 * of this task in two cases:
776 * - The pending task is a write and the current is either read or write
777 * - The pending task is a read and the current task is a write task.
778 *
779 * To check whether a range is currently "locked" we use the AVL tree where every pending task
780 * is stored by its file offset range. The current task will be added to the active task
781 * and will be executed when the active one completes. (The method below
782 * which checks whether a range is already used will add the task)
783 *
[33540]784 * This is necessary because of the requirement to align all requests to a 512 boundary
[27299]785 * which is enforced by the host OS (Linux and Windows atm). It is possible that
786 * we have to process unaligned tasks and need to align them using bounce buffers.
787 * While the data is fetched from the file another request might arrive writing to
788 * the same range. This will result in data corruption if both are executed concurrently.
789 */
[39034]790 int rc = VINF_SUCCESS;
[44397]791 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask,
792 true /* fAlignedReq */);
[27299]793 if (!fLocked)
794 {
795 /* Get a request handle. */
[39034]796 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
[27299]797 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
798
799 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
800 {
801 /* Grow the file if needed. */
802 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
803 {
804 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
[37596]805 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
[27299]806 }
807
[37596]808 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
[27299]809 pTask->Off, pTask->DataSeg.pvSeg,
810 pTask->DataSeg.cbSeg, pTask);
811 }
812 else
[37596]813 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
[27299]814 pTask->Off, pTask->DataSeg.pvSeg,
815 pTask->DataSeg.cbSeg, pTask);
816 AssertRC(rc);
817
818 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
819 pTask->DataSeg.cbSeg,
[44397]820 pTask, true /* fAlignedReq */);
[27299]821
822 if (RT_SUCCESS(rc))
[32466]823 {
824 pTask->hReq = hReq;
[27299]825 *phReq = hReq;
[32466]826 }
[27299]827 }
828 else
829 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
830
831 return rc;
832}
833
834static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
835 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
836 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
837{
838 /*
[26338]839 * Check if the alignment requirements are met.
840 * Offset, transfer size and buffer address
841 * need to be on a 512 boundary.
842 */
843 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
844 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
845 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
[44397]846 bool fAlignedReq = cbToTransfer == pTask->DataSeg.cbSeg
847 && offStart == pTask->Off;
[26338]848
849 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
[27299]850 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
851 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
852 offStart, cbToTransfer, pEndpoint->cbFile));
[26338]853
854 pTask->fPrefetch = false;
855
856 /*
857 * Before we start to setup the request we have to check whether there is a task
858 * already active which range intersects with ours. We have to defer execution
859 * of this task in two cases:
860 * - The pending task is a write and the current is either read or write
861 * - The pending task is a read and the current task is a write task.
862 *
863 * To check whether a range is currently "locked" we use the AVL tree where every pending task
864 * is stored by its file offset range. The current task will be added to the active task
865 * and will be executed when the active one completes. (The method below
866 * which checks whether a range is already used will add the task)
867 *
[33540]868 * This is necessary because of the requirement to align all requests to a 512 boundary
[26338]869 * which is enforced by the host OS (Linux and Windows atm). It is possible that
870 * we have to process unaligned tasks and need to align them using bounce buffers.
[27280]871 * While the data is fetched from the file another request might arrive writing to
[26338]872 * the same range. This will result in data corruption if both are executed concurrently.
873 */
[39034]874 int rc = VINF_SUCCESS;
[44397]875 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
[26338]876 if (!fLocked)
877 {
[39034]878 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
879 void *pvBuf = pTask->DataSeg.pvSeg;
880
[27299]881 /* Get a request handle. */
[39034]882 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
[27299]883 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
884
[44397]885 if ( !fAlignedReq
[26338]886 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
887 {
888 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
889 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
890
891 /* Create bounce buffer. */
[28317]892 pTask->cbBounceBuffer = cbToTransfer;
[26338]893
894 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
895 pTask->Off, offStart));
[28317]896 pTask->offBounceBuffer = pTask->Off - offStart;
[26338]897
898 /** @todo: I think we need something like a RTMemAllocAligned method here.
899 * Current assumption is that the maximum alignment is 4096byte
900 * (GPT disk on Windows)
901 * so we can use RTMemPageAlloc here.
902 */
903 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
904 if (RT_LIKELY(pTask->pvBounceBuffer))
905 {
906 pvBuf = pTask->pvBounceBuffer;
907
908 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
909 {
910 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
911 || RT_UNLIKELY(offStart != pTask->Off))
912 {
913 /* We have to fill the buffer first before we can update the data. */
914 LogFlow(("Prefetching data for task %#p\n", pTask));
915 pTask->fPrefetch = true;
916 enmTransferType = PDMACTASKFILETRANSFER_READ;
917 }
918 else
919 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
920 }
921 }
922 else
923 rc = VERR_NO_MEMORY;
924 }
925 else
[28317]926 pTask->cbBounceBuffer = 0;
[26338]927
928 if (RT_SUCCESS(rc))
929 {
930 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
931 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
932
933 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
934 {
935 /* Grow the file if needed. */
936 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
937 {
938 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
[37596]939 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
[26338]940 }
941
[37596]942 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
[26338]943 offStart, pvBuf, cbToTransfer, pTask);
944 }
945 else
[37596]946 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile,
[26338]947 offStart, pvBuf, cbToTransfer, pTask);
948 AssertRC(rc);
949
[44397]950 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask, fAlignedReq);
[26338]951 if (RT_SUCCESS(rc))
[32466]952 {
953 pTask->hReq = hReq;
[26338]954 *phReq = hReq;
[32466]955 }
[26338]956 else
957 {
958 /* Cleanup */
[28317]959 if (pTask->cbBounceBuffer)
960 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
[26338]961 }
962 }
963 }
964 else
965 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
966
967 return rc;
968}
969
[22309]970static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
[21496]971 PPDMACEPFILEMGR pAioMgr,
972 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
973{
974 RTFILEAIOREQ apReqs[20];
975 unsigned cRequests = 0;
[39034]976 int rc = VINF_SUCCESS;
[21496]977
[22309]978 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
979 ("Trying to process request lists of a non active endpoint!\n"));
980
[21496]981 /* Go through the list and queue the requests until we get a flush request */
[23603]982 while ( pTaskHead
983 && !pEndpoint->pFlushReq
[28224]984 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
[23603]985 && RT_SUCCESS(rc))
[21496]986 {
[33218]987 RTMSINTERVAL msWhenNext;
[22309]988 PPDMACTASKFILE pCurr = pTaskHead;
[21496]989
[33218]990 if (!pdmacEpIsTransferAllowed(&pEndpoint->Core, (uint32_t)pCurr->DataSeg.cbSeg, &msWhenNext))
[26671]991 {
[35205]992 pAioMgr->msBwLimitExpired = RT_MIN(pAioMgr->msBwLimitExpired, msWhenNext);
[26671]993 break;
994 }
995
[21496]996 pTaskHead = pTaskHead->pNext;
997
[23404]998 pCurr->pNext = NULL;
999
[22309]1000 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
1001 ("Endpoints do not match\n"));
1002
1003 switch (pCurr->enmTransferType)
[21496]1004 {
1005 case PDMACTASKFILETRANSFER_FLUSH:
1006 {
1007 /* If there is no data transfer request this flush request finished immediately. */
[29121]1008 if (pEndpoint->fAsyncFlushSupported)
[21496]1009 {
[29121]1010 /* Issue a flush to the host. */
1011 RTFILEAIOREQ hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
1012 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
1013
[29450]1014 LogFlow(("Flush request %#p\n", hReq));
1015
[37596]1016 rc = RTFileAioReqPrepareFlush(hReq, pEndpoint->hFile, pCurr);
[29121]1017 if (RT_FAILURE(rc))
1018 {
[43858]1019 LogRel(("AIOMgr: Preparing flush failed with %Rrc, disabling async flushes\n", rc));
[29121]1020 pEndpoint->fAsyncFlushSupported = false;
1021 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
[29228]1022 rc = VINF_SUCCESS; /* Fake success */
[29121]1023 }
1024 else
1025 {
[32467]1026 pCurr->hReq = hReq;
[29121]1027 apReqs[cRequests] = hReq;
1028 pEndpoint->AioMgr.cReqsProcessed++;
1029 cRequests++;
1030 }
1031 }
1032
1033 if ( !pEndpoint->AioMgr.cRequestsActive
1034 && !pEndpoint->fAsyncFlushSupported)
1035 {
[27920]1036 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
[22309]1037 pdmacFileTaskFree(pEndpoint, pCurr);
[21496]1038 }
1039 else
1040 {
[26338]1041 Assert(!pEndpoint->pFlushReq);
[22309]1042 pEndpoint->pFlushReq = pCurr;
[21496]1043 }
1044 break;
1045 }
1046 case PDMACTASKFILETRANSFER_READ:
1047 case PDMACTASKFILETRANSFER_WRITE:
1048 {
[22309]1049 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
[21496]1050
[29466]1051 if (pCurr->hReq == NIL_RTFILEAIOREQ)
1052 {
1053 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
1054 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1055 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
1056 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
1057 else
1058 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
1059
1060 AssertRC(rc);
1061 }
[27299]1062 else
[29466]1063 {
1064 LogFlow(("Task %#p has I/O request %#p already\n", pCurr, pCurr->hReq));
1065 hReq = pCurr->hReq;
1066 }
[27299]1067
[29450]1068 LogFlow(("Read/Write request %#p\n", hReq));
1069
[26338]1070 if (hReq != NIL_RTFILEAIOREQ)
[22309]1071 {
[26338]1072 apReqs[cRequests] = hReq;
1073 cRequests++;
[22309]1074 }
[21496]1075 break;
1076 }
1077 default:
[22309]1078 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
[29121]1079 } /* switch transfer type */
1080
1081 /* Queue the requests if the array is full. */
1082 if (cRequests == RT_ELEMENTS(apReqs))
1083 {
1084 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1085 cRequests = 0;
1086 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1087 ("Unexpected return code\n"));
[21496]1088 }
1089 }
1090
1091 if (cRequests)
1092 {
[23404]1093 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
1094 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1095 ("Unexpected return code rc=%Rrc\n", rc));
[22757]1096 }
1097
[23404]1098 if (pTaskHead)
[22757]1099 {
[23404]1100 /* Add the rest of the tasks to the pending list */
1101 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
[22757]1102
[28224]1103 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
[35205]1104 && !pEndpoint->pFlushReq))
[21496]1105 {
[28719]1106#if 0
[23404]1107 /*
1108 * The I/O manager has no room left for more requests
1109 * but there are still requests to process.
1110 * Create a new I/O manager and let it handle some endpoints.
1111 */
1112 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
[28719]1113#else
1114 /* Grow the I/O manager */
1115 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1116#endif
[21496]1117 }
[23404]1118 }
[22757]1119
[23404]1120 /* Insufficient resources are not fatal. */
1121 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1122 rc = VINF_SUCCESS;
[22757]1123
[21496]1124 return rc;
1125}
1126
1127/**
1128 * Adds all pending requests for the given endpoint
1129 * until a flush request is encountered or there is no
1130 * request anymore.
1131 *
1132 * @returns VBox status code.
1133 * @param pAioMgr The async I/O manager for the endpoint
1134 * @param pEndpoint The endpoint to get the requests from.
1135 */
1136static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1137 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1138{
1139 int rc = VINF_SUCCESS;
[22309]1140 PPDMACTASKFILE pTasksHead = NULL;
[21496]1141
[22309]1142 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1143 ("Trying to process request lists of a non active endpoint!\n"));
1144
[21496]1145 Assert(!pEndpoint->pFlushReq);
1146
1147 /* Check the pending list first */
1148 if (pEndpoint->AioMgr.pReqsPendingHead)
1149 {
[23603]1150 LogFlow(("Queuing pending requests first\n"));
1151
[21496]1152 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1153 /*
1154 * Clear the list as the processing routine will insert them into the list
[22309]1155 * again if it gets a flush request.
[21496]1156 */
1157 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1158 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1159 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
[39034]1160 AssertRC(rc); /** @todo r=bird: status code potentially overwritten. */
[21496]1161 }
1162
[23404]1163 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
[21496]1164 {
1165 /* Now the request queue. */
1166 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1167 if (pTasksHead)
1168 {
1169 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1170 AssertRC(rc);
1171 }
1172 }
1173
1174 return rc;
1175}
1176
1177static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1178{
1179 int rc = VINF_SUCCESS;
[22309]1180 bool fNotifyWaiter = false;
[21496]1181
[23603]1182 LogFlowFunc((": Enter\n"));
1183
[21496]1184 Assert(pAioMgr->fBlockingEventPending);
1185
1186 switch (pAioMgr->enmBlockingEvent)
1187 {
1188 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1189 {
[30111]1190 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
[21496]1191 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1192
[22309]1193 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1194
[21496]1195 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1196 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1197 if (pAioMgr->pEndpointsHead)
1198 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1199 pAioMgr->pEndpointsHead = pEndpointNew;
1200
1201 /* Assign the completion point to this file. */
[37596]1202 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->hFile);
[22309]1203 fNotifyWaiter = true;
[22757]1204 pAioMgr->cEndpoints++;
[21496]1205 break;
1206 }
1207 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1208 {
[30111]1209 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
[21496]1210 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1211
[22309]1212 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
[22757]1213 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
[21496]1214 break;
1215 }
1216 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1217 {
[30111]1218 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = ASMAtomicReadPtrT(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, PPDMASYNCCOMPLETIONENDPOINTFILE);
[21496]1219 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1220
[24530]1221 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1222 {
1223 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
[23603]1224
[24530]1225 /* Make sure all tasks finished. Process the queues a last time first. */
1226 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1227 AssertRC(rc);
[21496]1228
[24530]1229 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1230 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1231 }
1232 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1233 && (!pEndpointClose->AioMgr.cRequestsActive))
1234 fNotifyWaiter = true;
[21496]1235 break;
1236 }
1237 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
[22309]1238 {
1239 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1240 if (!pAioMgr->cRequestsActive)
1241 fNotifyWaiter = true;
[21496]1242 break;
[22309]1243 }
[21496]1244 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
[22309]1245 {
1246 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
[21496]1247 break;
[22309]1248 }
1249 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1250 {
1251 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1252 fNotifyWaiter = true;
1253 break;
1254 }
[21496]1255 default:
1256 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1257 }
1258
1259 if (fNotifyWaiter)
1260 {
1261 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
[22309]1262 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
[21496]1263
1264 /* Release the waiting thread. */
[22967]1265 LogFlow(("Signalling waiter\n"));
[21496]1266 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1267 AssertRC(rc);
1268 }
1269
[23603]1270 LogFlowFunc((": Leave\n"));
[21496]1271 return rc;
1272}
1273
[23603]1274/**
1275 * Checks all endpoints for pending events or new requests.
1276 *
1277 * @returns VBox status code.
1278 * @param pAioMgr The I/O manager handle.
1279 */
1280static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1281{
1282 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1283 int rc = VINF_SUCCESS;
1284 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1285
[35205]1286 pAioMgr->msBwLimitExpired = RT_INDEFINITE_WAIT;
[26671]1287
[23603]1288 while (pEndpoint)
1289 {
[24358]1290 if (!pEndpoint->pFlushReq
1291 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1292 && !pEndpoint->AioMgr.fMoving)
[23603]1293 {
1294 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1295 if (RT_FAILURE(rc))
1296 return rc;
1297 }
[29124]1298 else if ( !pEndpoint->AioMgr.cRequestsActive
1299 && pEndpoint->enmState != PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
[23603]1300 {
[33540]1301 /* Reopen the file so that the new endpoint can re-associate with the file */
[37596]1302 RTFileClose(pEndpoint->hFile);
1303 rc = RTFileOpen(&pEndpoint->hFile, pEndpoint->Core.pszUri, pEndpoint->fFlags);
[23603]1304 AssertRC(rc);
1305
1306 if (pEndpoint->AioMgr.fMoving)
1307 {
1308 pEndpoint->AioMgr.fMoving = false;
1309 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1310 }
1311 else
1312 {
1313 Assert(pAioMgr->fBlockingEventPending);
1314 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1315
1316 /* Release the waiting thread. */
1317 LogFlow(("Signalling waiter\n"));
1318 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
[24530]1319 AssertRC(rc);
[23603]1320 }
1321 }
1322
1323 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1324 }
1325
1326 return rc;
1327}
1328
[32427]1329/**
1330 * Wrapper around pdmacFileAioMgrNormalReqCompleteRc().
1331 */
[26671]1332static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1333{
[32427]1334 size_t cbTransfered = 0;
1335 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1336
1337 pdmacFileAioMgrNormalReqCompleteRc(pAioMgr, hReq, rcReq, cbTransfered);
1338}
1339
1340static void pdmacFileAioMgrNormalReqCompleteRc(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq,
1341 int rcReq, size_t cbTransfered)
1342{
[26671]1343 int rc = VINF_SUCCESS;
1344 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1345 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
[28224]1346 PPDMACTASKFILE pTasksWaiting;
[26671]1347
[29466]1348 LogFlowFunc(("pAioMgr=%#p hReq=%#p\n", pAioMgr, hReq));
1349
[26671]1350 pEndpoint = pTask->pEndpoint;
1351
[29466]1352 pTask->hReq = NIL_RTFILEAIOREQ;
1353
1354 pAioMgr->cRequestsActive--;
1355 pEndpoint->AioMgr.cRequestsActive--;
1356 pEndpoint->AioMgr.cReqsProcessed++;
1357
[26671]1358 /*
1359 * It is possible that the request failed on Linux with kernels < 2.6.23
1360 * if the passed buffer was allocated with remap_pfn_range or if the file
1361 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1362 * The endpoint will be migrated to a failsafe manager in case a request fails.
1363 */
1364 if (RT_FAILURE(rcReq))
1365 {
1366 /* Free bounce buffers and the IPRT request. */
[28719]1367 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
[26671]1368
[29121]1369 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
1370 {
[44235]1371 LogRel(("AIOMgr: Flush failed with %Rrc, disabling async flushes\n", rcReq));
[29121]1372 pEndpoint->fAsyncFlushSupported = false;
1373 AssertMsg(pEndpoint->pFlushReq == pTask, ("Failed flush request doesn't match active one\n"));
1374 /* The other method will take over now. */
[43725]1375
1376 pEndpoint->pFlushReq = NULL;
1377 /* Call completion callback */
1378 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, VINF_SUCCESS));
1379 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1380 pdmacFileTaskFree(pEndpoint, pTask);
[29121]1381 }
1382 else
1383 {
[33540]1384 /* Free the lock and process pending tasks if necessary */
[29121]1385 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1386 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1387 AssertRC(rc);
[26671]1388
[29121]1389 if (pTask->cbBounceBuffer)
1390 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
[26671]1391
[29154]1392 /*
1393 * Fatal errors are reported to the guest and non-fatal errors
1394 * will cause a migration to the failsafe manager in the hope
1395 * that the error disappears.
1396 */
1397 if (!pdmacFileAioMgrNormalRcIsFatal(rcReq))
[29121]1398 {
[29154]1399 /* Queue the request on the pending list. */
1400 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1401 pEndpoint->AioMgr.pReqsPendingHead = pTask;
[26671]1402
[33540]1403 /* Create a new failsafe manager if necessary. */
[29154]1404 if (!pEndpoint->AioMgr.fMoving)
1405 {
1406 PPDMACEPFILEMGR pAioMgrFailsafe;
[26671]1407
[29154]1408 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1409 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
[26671]1410
[29154]1411 pEndpoint->AioMgr.fMoving = true;
[26671]1412
[29154]1413 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1414 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1415 AssertRC(rc);
[29121]1416
[29154]1417 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
[26671]1418
[29154]1419 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1420 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1421 }
[26671]1422
[29154]1423 /* If this was the last request for the endpoint migrate it to the new manager. */
1424 if (!pEndpoint->AioMgr.cRequestsActive)
1425 {
1426 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1427 Assert(!fReqsPending);
1428
1429 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1430 AssertRC(rc);
1431 }
1432 }
1433 else
1434 {
1435 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1436 pdmacFileTaskFree(pEndpoint, pTask);
1437 }
[26671]1438 }
1439 }
1440 else
1441 {
[29121]1442 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
[28884]1443 {
[29121]1444 /* Clear pending flush */
1445 AssertMsg(pEndpoint->pFlushReq == pTask, ("Completed flush request doesn't match active one\n"));
1446 pEndpoint->pFlushReq = NULL;
1447 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
[26671]1448
[29121]1449 /* Call completion callback */
1450 LogFlow(("Flush task=%#p completed with %Rrc\n", pTask, rcReq));
1451 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1452 pdmacFileTaskFree(pEndpoint, pTask);
1453 }
1454 else
1455 {
1456 /*
1457 * Restart an incomplete transfer.
1458 * This usually means that the request will return an error now
1459 * but to get the cause of the error (disk full, file too big, I/O error, ...)
1460 * the transfer needs to be continued.
1461 */
1462 if (RT_UNLIKELY( cbTransfered < pTask->DataSeg.cbSeg
1463 || ( pTask->cbBounceBuffer
1464 && cbTransfered < pTask->cbBounceBuffer)))
[28884]1465 {
[29121]1466 RTFOFF offStart;
1467 size_t cbToTransfer;
1468 uint8_t *pbBuf = NULL;
[28884]1469
[33595]1470 LogFlow(("Restarting incomplete transfer %#p (%zu bytes transferred)\n",
[29121]1471 pTask, cbTransfered));
1472 Assert(cbTransfered % 512 == 0);
[28884]1473
[29121]1474 if (pTask->cbBounceBuffer)
1475 {
1476 AssertPtr(pTask->pvBounceBuffer);
1477 offStart = (pTask->Off & ~((RTFOFF)512-1)) + cbTransfered;
1478 cbToTransfer = pTask->cbBounceBuffer - cbTransfered;
1479 pbBuf = (uint8_t *)pTask->pvBounceBuffer + cbTransfered;
1480 }
1481 else
1482 {
1483 Assert(!pTask->pvBounceBuffer);
1484 offStart = pTask->Off + cbTransfered;
1485 cbToTransfer = pTask->DataSeg.cbSeg - cbTransfered;
1486 pbBuf = (uint8_t *)pTask->DataSeg.pvSeg + cbTransfered;
1487 }
1488
1489 if (pTask->fPrefetch || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1490 {
[37596]1491 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->hFile, offStart,
[29121]1492 pbBuf, cbToTransfer, pTask);
1493 }
1494 else
1495 {
1496 AssertMsg(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE,
1497 ("Invalid transfer type\n"));
[37596]1498 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile, offStart,
[29121]1499 pbBuf, cbToTransfer, pTask);
1500 }
[34929]1501 AssertRC(rc);
[29121]1502
[34929]1503 pTask->hReq = hReq;
[29466]1504 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1505 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1506 ("Unexpected return code rc=%Rrc\n", rc));
1507 }
1508 else if (pTask->fPrefetch)
1509 {
[29121]1510 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1511 Assert(pTask->cbBounceBuffer);
[26671]1512
[29121]1513 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1514 pTask->DataSeg.pvSeg,
1515 pTask->DataSeg.cbSeg);
[26671]1516
[29121]1517 /* Write it now. */
1518 pTask->fPrefetch = false;
1519 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
[43858]1520 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
[26671]1521
[29121]1522 /* Grow the file if needed. */
1523 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1524 {
1525 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
[37596]1526 RTFileSetSize(pEndpoint->hFile, pTask->Off + pTask->DataSeg.cbSeg);
[29121]1527 }
1528
[37596]1529 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->hFile,
[29121]1530 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1531 AssertRC(rc);
[34929]1532 pTask->hReq = hReq;
[29466]1533 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, &hReq, 1);
1534 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
1535 ("Unexpected return code rc=%Rrc\n", rc));
[26671]1536 }
[29121]1537 else
[26671]1538 {
[29121]1539 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1540 {
1541 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1542 memcpy(pTask->DataSeg.pvSeg,
1543 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1544 pTask->DataSeg.cbSeg);
[26671]1545
[29121]1546 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1547 }
[26671]1548
[29121]1549 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
[26671]1550
[33540]1551 /* Free the lock and process pending tasks if necessary */
[29121]1552 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
[29474]1553 if (pTasksWaiting)
1554 {
1555 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1556 AssertRC(rc);
1557 }
[26671]1558
1559 /* Call completion callback */
[29121]1560 LogFlow(("Task=%#p completed with %Rrc\n", pTask, rcReq));
1561 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1562 pdmacFileTaskFree(pEndpoint, pTask);
[26671]1563
[29121]1564 /*
1565 * If there is no request left on the endpoint but a flush request is set
1566 * it completed now and we notify the owner.
1567 * Furthermore we look for new requests and continue.
1568 */
1569 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1570 {
1571 /* Call completion callback */
1572 pTask = pEndpoint->pFlushReq;
1573 pEndpoint->pFlushReq = NULL;
[26671]1574
[29121]1575 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
[26671]1576
[29121]1577 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1578 pdmacFileTaskFree(pEndpoint, pTask);
1579 }
1580 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1581 {
1582 /* If the endpoint is about to be migrated do it now. */
1583 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1584 Assert(!fReqsPending);
1585
1586 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1587 AssertRC(rc);
1588 }
[26671]1589 }
[29121]1590 } /* Not a flush request */
[26671]1591 } /* request completed successfully */
1592}
1593
[21496]1594/** Helper macro for checking for error codes. */
1595#define CHECK_RC(pAioMgr, rc) \
1596 if (RT_FAILURE(rc)) \
1597 {\
[22309]1598 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
[21496]1599 return rc2;\
1600 }
1601
1602/**
1603 * The normal I/O manager using the RTFileAio* API
1604 *
1605 * @returns VBox status code.
[39078]1606 * @param hThreadSelf Handle of the thread.
[21496]1607 * @param pvUser Opaque user data.
1608 */
[39078]1609DECLCALLBACK(int) pdmacFileAioMgrNormal(RTTHREAD hThreadSelf, void *pvUser)
[21496]1610{
[39034]1611 int rc = VINF_SUCCESS;
1612 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1613 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
[39078]1614 NOREF(hThreadSelf);
[21496]1615
[39034]1616 while ( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1617 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING
1618 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
[21496]1619 {
[28947]1620 if (!pAioMgr->cRequestsActive)
1621 {
1622 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1623 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
[35205]1624 rc = RTSemEventWait(pAioMgr->EventSem, pAioMgr->msBwLimitExpired);
[28947]1625 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
[35205]1626 Assert(RT_SUCCESS(rc) || rc == VERR_TIMEOUT);
[21496]1627
[28947]1628 LogFlow(("Got woken up\n"));
1629 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1630 }
[21496]1631
1632 /* Check for an external blocking event first. */
1633 if (pAioMgr->fBlockingEventPending)
1634 {
1635 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1636 CHECK_RC(pAioMgr, rc);
1637 }
1638
[28947]1639 if (RT_LIKELY( pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING
1640 || pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING))
[22309]1641 {
[23603]1642 /* We got woken up because an endpoint issued new requests. Queue them. */
1643 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1644 CHECK_RC(pAioMgr, rc);
[21496]1645
[35205]1646 while (pAioMgr->cRequestsActive)
[22309]1647 {
[35205]1648 RTFILEAIOREQ apReqs[20];
1649 uint32_t cReqsCompleted = 0;
1650 size_t cReqsWait;
[21496]1651
[35205]1652 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1653 cReqsWait = RT_ELEMENTS(apReqs);
1654 else
1655 cReqsWait = pAioMgr->cRequestsActive;
[22757]1656
[35205]1657 LogFlow(("Waiting for %d of %d tasks to complete\n", 1, cReqsWait));
[23603]1658
[35205]1659 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1660 1,
1661 RT_INDEFINITE_WAIT, apReqs,
1662 cReqsWait, &cReqsCompleted);
1663 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1664 CHECK_RC(pAioMgr, rc);
[21496]1665
[35205]1666 LogFlow(("%d tasks completed\n", cReqsCompleted));
[23603]1667
[35205]1668 for (uint32_t i = 0; i < cReqsCompleted; i++)
1669 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
[21496]1670
[35205]1671 /* Check for an external blocking event before we go to sleep again. */
1672 if (pAioMgr->fBlockingEventPending)
1673 {
1674 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1675 CHECK_RC(pAioMgr, rc);
1676 }
[21496]1677
[35205]1678 /* Update load statistics. */
1679 uint64_t uMillisCurr = RTTimeMilliTS();
1680 if (uMillisCurr > uMillisEnd)
1681 {
1682 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
[21496]1683
[35205]1684 /* Calculate timespan. */
1685 uMillisCurr -= uMillisEnd;
[22851]1686
[35205]1687 while (pEndpointCurr)
1688 {
1689 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1690 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1691 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1692 }
[22851]1693
[35205]1694 /* Set new update interval */
1695 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
[21496]1696 }
1697
[23603]1698 /* Check endpoints for new requests. */
[28719]1699 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1700 {
1701 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1702 CHECK_RC(pAioMgr, rc);
1703 }
[23603]1704 } /* while requests are active. */
[28719]1705
1706 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1707 {
1708 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1709 AssertRC(rc);
1710 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
[28947]1711
1712 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1713 CHECK_RC(pAioMgr, rc);
[28719]1714 }
[23603]1715 } /* if still running */
1716 } /* while running */
1717
[28947]1718 LogFlowFunc(("rc=%Rrc\n", rc));
[21496]1719 return rc;
1720}
1721
1722#undef CHECK_RC
1723
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use