VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 28800

Last change on this file since 28800 was 28800, checked in by vboxsync, 14 years ago

Automated rebranding to Oracle copyright/license strings via filemuncher

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 56.3 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 28800 2010-04-27 08:22:32Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Oracle Corporation
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 */
18#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
19#include <iprt/types.h>
20#include <iprt/asm.h>
21#include <iprt/file.h>
22#include <iprt/mem.h>
23#include <iprt/string.h>
24#include <iprt/assert.h>
25#include <VBox/log.h>
26
27#include "PDMAsyncCompletionFileInternal.h"
28
29/** The update period for the I/O load statistics in ms. */
30#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
31/** Maximum number of requests a manager will handle. */
32#define PDMACEPFILEMGR_REQS_STEP 512
33
34/*******************************************************************************
35* Internal functions *
36*******************************************************************************/
37static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
38 PPDMACEPFILEMGR pAioMgr,
39 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint);
40
41static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
42 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
43 PPDMACFILERANGELOCK pRangeLock);
44
45int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
46{
47 int rc = VINF_SUCCESS;
48
49 pAioMgr->cRequestsActiveMax = PDMACEPFILEMGR_REQS_STEP;
50
51 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
52 if (rc == VERR_OUT_OF_RANGE)
53 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, pAioMgr->cRequestsActiveMax);
54
55 if (RT_SUCCESS(rc))
56 {
57 /* Initialize request handle array. */
58 pAioMgr->iFreeEntry = 0;
59 pAioMgr->cReqEntries = pAioMgr->cRequestsActiveMax;
60 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
61
62 if (pAioMgr->pahReqsFree)
63 {
64 /* Create the range lock memcache. */
65 rc = RTMemCacheCreate(&pAioMgr->hMemCacheRangeLocks, sizeof(PDMACFILERANGELOCK),
66 0, UINT32_MAX, NULL, NULL, NULL, 0);
67 if (RT_SUCCESS(rc))
68 return VINF_SUCCESS;
69
70 RTMemFree(pAioMgr->pahReqsFree);
71 }
72 else
73 {
74 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
75 rc = VERR_NO_MEMORY;
76 }
77 }
78
79 return rc;
80}
81
82void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
83{
84 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
85
86 while (pAioMgr->iFreeEntry > 0)
87 {
88 pAioMgr->iFreeEntry--;
89 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] != NIL_RTFILEAIOREQ);
90 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry]);
91 }
92
93 RTMemFree(pAioMgr->pahReqsFree);
94 RTMemCacheDestroy(pAioMgr->hMemCacheRangeLocks);
95}
96
97/**
98 * Sorts the endpoint list with insertion sort.
99 */
100static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
101{
102 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
103
104 pEpPrev = pAioMgr->pEndpointsHead;
105 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
106
107 while (pEpCurr)
108 {
109 /* Remember the next element to sort because the list might change. */
110 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
111
112 /* Unlink the current element from the list. */
113 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
114 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
115
116 if (pPrev)
117 pPrev->AioMgr.pEndpointNext = pNext;
118 else
119 pAioMgr->pEndpointsHead = pNext;
120
121 if (pNext)
122 pNext->AioMgr.pEndpointPrev = pPrev;
123
124 /* Go back until we reached the place to insert the current endpoint into. */
125 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
126 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
127
128 /* Link the endpoint into the list. */
129 if (pEpPrev)
130 pNext = pEpPrev->AioMgr.pEndpointNext;
131 else
132 pNext = pAioMgr->pEndpointsHead;
133
134 pEpCurr->AioMgr.pEndpointNext = pNext;
135 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
136
137 if (pNext)
138 pNext->AioMgr.pEndpointPrev = pEpCurr;
139
140 if (pEpPrev)
141 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
142 else
143 pAioMgr->pEndpointsHead = pEpCurr;
144
145 pEpCurr = pEpNextToSort;
146 }
147
148#ifdef DEBUG
149 /* Validate sorting alogrithm */
150 unsigned cEndpoints = 0;
151 pEpCurr = pAioMgr->pEndpointsHead;
152
153 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
154 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
155
156 while (pEpCurr)
157 {
158 cEndpoints++;
159
160 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
161 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
162
163 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
164 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
165
166 pEpCurr = pNext;
167 }
168
169 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
170
171#endif
172}
173
174/**
175 * Removes an endpoint from the currently assigned manager.
176 *
177 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
178 * FALSE otherwise.
179 * @param pEndpointRemove The endpoint to remove.
180 */
181static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
182{
183 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
184 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
185 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
186
187 pAioMgr->cEndpoints--;
188
189 if (pPrev)
190 pPrev->AioMgr.pEndpointNext = pNext;
191 else
192 pAioMgr->pEndpointsHead = pNext;
193
194 if (pNext)
195 pNext->AioMgr.pEndpointPrev = pPrev;
196
197 /* Make sure that there is no request pending on this manager for the endpoint. */
198 if (!pEndpointRemove->AioMgr.cRequestsActive)
199 {
200 Assert(!pEndpointRemove->pFlushReq);
201
202 /* Reopen the file so that the new endpoint can reassociate with the file */
203 RTFileClose(pEndpointRemove->File);
204 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
205 AssertRC(rc);
206 return false;
207 }
208
209 return true;
210}
211
212static bool pdmacFileAioMgrNormalIsBalancePossible(PPDMACEPFILEMGR pAioMgr)
213{
214 /* Balancing doesn't make sense with only one endpoint. */
215 if (pAioMgr->cEndpoints == 1)
216 return false;
217
218 /* Doesn't make sens to move endpoints if only one produces the whole load */
219 unsigned cEndpointsWithLoad = 0;
220
221 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
222
223 while (pCurr)
224 {
225 if (pCurr->AioMgr.cReqsPerSec)
226 cEndpointsWithLoad++;
227
228 pCurr = pCurr->AioMgr.pEndpointNext;
229 }
230
231 return (cEndpointsWithLoad > 1);
232}
233
234/**
235 * Creates a new I/O manager and spreads the I/O load of the endpoints
236 * between the given I/O manager and the new one.
237 *
238 * @returns nothing.
239 * @param pAioMgr The I/O manager with high I/O load.
240 */
241static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
242{
243 PPDMACEPFILEMGR pAioMgrNew = NULL;
244 int rc = VINF_SUCCESS;
245
246 /*
247 * Check if balancing would improve the situation.
248 */
249 if (pdmacFileAioMgrNormalIsBalancePossible(pAioMgr))
250 {
251 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
252
253 rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgrNew, PDMACEPFILEMGRTYPE_ASYNC);
254 if (RT_SUCCESS(rc))
255 {
256 /* We will sort the list by request count per second. */
257 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
258
259 /* Now move some endpoints to the new manager. */
260 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
261 unsigned cReqsOther = 0;
262 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
263
264 while (pCurr)
265 {
266 if (cReqsHere <= cReqsOther)
267 {
268 /*
269 * The other manager has more requests to handle now.
270 * We will keep the current endpoint.
271 */
272 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
273 cReqsHere += pCurr->AioMgr.cReqsPerSec;
274 pCurr = pCurr->AioMgr.pEndpointNext;
275 }
276 else
277 {
278 /* Move to other endpoint. */
279 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
280 cReqsOther += pCurr->AioMgr.cReqsPerSec;
281
282 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
283
284 pCurr = pCurr->AioMgr.pEndpointNext;
285
286 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
287
288 if (fReqsPending)
289 {
290 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
291 pMove->AioMgr.fMoving = true;
292 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
293 }
294 else
295 {
296 pMove->AioMgr.fMoving = false;
297 pMove->AioMgr.pAioMgrDst = NULL;
298 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
299 }
300 }
301 }
302 }
303 else
304 {
305 /* Don't process further but leave a log entry about reduced performance. */
306 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
307 }
308 }
309 else
310 Log(("AIOMgr: Load balancing would not improve anything\n"));
311}
312
313/**
314 * Increase the maximum number of active requests for the given I/O manager.
315 *
316 * @returns VBox status code.
317 * @param pAioMgr The I/O manager to grow.
318 */
319static int pdmacFileAioMgrNormalGrow(PPDMACEPFILEMGR pAioMgr)
320{
321 int rc = VINF_SUCCESS;
322 RTFILEAIOCTX hAioCtxNew = NIL_RTFILEAIOCTX;
323
324 LogFlowFunc(("pAioMgr=%#p\n", pAioMgr));
325
326 AssertMsg( pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING
327 && !pAioMgr->cRequestsActive,
328 ("Invalid state of the I/O manager\n"));
329
330 /*
331 * Reopen the files of all assigned endpoints first so we can assign them to the new
332 * I/O context.
333 */
334 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead;
335
336 while (pCurr)
337 {
338 RTFileClose(pCurr->File);
339 rc = RTFileOpen(&pCurr->File, pCurr->Core.pszUri, pCurr->fFlags);
340 AssertRC(rc);
341
342 pCurr = pCurr->AioMgr.pEndpointNext;
343 }
344
345 /* Create the new bigger context. */
346 pAioMgr->cRequestsActiveMax += PDMACEPFILEMGR_REQS_STEP;
347
348 rc = RTFileAioCtxCreate(&hAioCtxNew, RTFILEAIO_UNLIMITED_REQS);
349 if (rc == VERR_OUT_OF_RANGE)
350 rc = RTFileAioCtxCreate(&hAioCtxNew, pAioMgr->cRequestsActiveMax);
351
352 if (RT_SUCCESS(rc))
353 {
354 /* Close the old context. */
355 rc = RTFileAioCtxDestroy(pAioMgr->hAioCtx);
356 AssertRC(rc);
357
358 pAioMgr->hAioCtx = hAioCtxNew;
359
360 /* Create a new I/O task handle array */
361 uint32_t cReqEntriesNew = pAioMgr->cRequestsActiveMax + 1;
362 RTFILEAIOREQ *pahReqNew = (RTFILEAIOREQ *)RTMemAllocZ(cReqEntriesNew * sizeof(RTFILEAIOREQ));
363
364 if (pahReqNew)
365 {
366 /* Copy the cached request handles. */
367 for (uint32_t iReq = 0; iReq < pAioMgr->cReqEntries; iReq++)
368 pahReqNew[iReq] = pAioMgr->pahReqsFree[iReq];
369
370 RTMemFree(pAioMgr->pahReqsFree);
371 pAioMgr->pahReqsFree = pahReqNew;
372 pAioMgr->cReqEntries = cReqEntriesNew;
373 LogFlowFunc(("I/O manager increased to handle a maximum of %u requests\n",
374 pAioMgr->cRequestsActiveMax));
375 }
376 else
377 rc = VERR_NO_MEMORY;
378 }
379
380 /* Assign the file to the new context. */
381 pCurr = pAioMgr->pEndpointsHead;
382
383 while (pCurr)
384 {
385 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pCurr->File);
386 AssertRC(rc);
387
388 pCurr = pCurr->AioMgr.pEndpointNext;
389 }
390
391 if (RT_FAILURE(rc))
392 {
393 LogFlow(("Increasing size of the I/O manager failed with rc=%Rrc\n", rc));
394 pAioMgr->cRequestsActiveMax -= PDMACEPFILEMGR_REQS_STEP;
395 }
396
397 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
398 LogFlowFunc(("returns rc=%Rrc\n", rc));
399
400 return rc;
401}
402
403/**
404 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
405 *
406 * @returns VBox status code
407 * @param pAioMgr The I/O manager the error ocurred on.
408 * @param rc The error code.
409 */
410static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
411{
412 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
413 pAioMgr, rc));
414 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
415 LogRel(("AIOMgr: Please contact the product vendor\n"));
416
417 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
418
419 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
420 ASMAtomicWriteU32((volatile uint32_t *)&pEpClassFile->enmMgrTypeOverride, PDMACEPFILEMGRTYPE_SIMPLE);
421
422 AssertMsgFailed(("Implement\n"));
423 return VINF_SUCCESS;
424}
425
426/**
427 * Put a list of tasks in the pending request list of an endpoint.
428 */
429DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
430{
431 /* Add the rest of the tasks to the pending list */
432 if (!pEndpoint->AioMgr.pReqsPendingHead)
433 {
434 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
435 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
436 }
437 else
438 {
439 Assert(pEndpoint->AioMgr.pReqsPendingTail);
440 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
441 }
442
443 /* Update the tail. */
444 while (pTaskHead->pNext)
445 pTaskHead = pTaskHead->pNext;
446
447 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
448}
449
450/**
451 * Put one task in the pending request list of an endpoint.
452 */
453DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
454{
455 /* Add the rest of the tasks to the pending list */
456 if (!pEndpoint->AioMgr.pReqsPendingHead)
457 {
458 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
459 pEndpoint->AioMgr.pReqsPendingHead = pTask;
460 }
461 else
462 {
463 Assert(pEndpoint->AioMgr.pReqsPendingTail);
464 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
465 }
466
467 pEndpoint->AioMgr.pReqsPendingTail = pTask;
468}
469
470/**
471 * Allocates a async I/O request.
472 *
473 * @returns Handle to the request.
474 * @param pAioMgr The I/O manager.
475 */
476static RTFILEAIOREQ pdmacFileAioMgrNormalRequestAlloc(PPDMACEPFILEMGR pAioMgr)
477{
478 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
479
480 /* Get a request handle. */
481 if (pAioMgr->iFreeEntry > 0)
482 {
483 pAioMgr->iFreeEntry--;
484 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeEntry];
485 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = NIL_RTFILEAIOREQ;
486 Assert(hReq != NIL_RTFILEAIOREQ);
487 }
488 else
489 {
490 int rc = RTFileAioReqCreate(&hReq);
491 AssertRC(rc);
492 }
493
494 return hReq;
495}
496
497/**
498 * Frees a async I/O request handle.
499 *
500 * @returns nothing.
501 * @param pAioMgr The I/O manager.
502 * @param hReq The I/O request handle to free.
503 */
504static void pdmacFileAioMgrNormalRequestFree(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
505{
506 Assert(pAioMgr->iFreeEntry < pAioMgr->cReqEntries);
507 Assert(pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] == NIL_RTFILEAIOREQ);
508
509 pAioMgr->pahReqsFree[pAioMgr->iFreeEntry] = hReq;
510 pAioMgr->iFreeEntry++;
511}
512
513/**
514 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
515 */
516static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
517 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
518 PRTFILEAIOREQ pahReqs, unsigned cReqs)
519{
520 int rc;
521
522 pAioMgr->cRequestsActive += cReqs;
523 pEndpoint->AioMgr.cRequestsActive += cReqs;
524
525 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
526 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
527
528 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
529 if (RT_FAILURE(rc))
530 {
531 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
532 {
533 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
534
535 /*
536 * We run out of resources.
537 * Need to check which requests got queued
538 * and put the rest on the pending list again.
539 */
540 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
541 {
542 pEpClass->fOutOfResourcesWarningPrinted = true;
543 LogRel(("AIOMgr: The operating system doesn't have enough resources "
544 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
545 }
546
547 for (size_t i = 0; i < cReqs; i++)
548 {
549 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
550
551 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
552 {
553 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
554 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
555
556 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
557 PPDMACTASKFILE pTasksWaiting;
558
559 pdmacFileAioMgrNormalRequestFree(pAioMgr, pahReqs[i]);
560
561 if (pTask->cbBounceBuffer)
562 RTMemFree(pTask->pvBounceBuffer);
563
564 pTask->fPrefetch = false;
565
566 /* Free the lock and process pending tasks if neccessary */
567 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
568
569 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
570 if (pTasksWaiting)
571 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTasksWaiting);
572
573 pAioMgr->cRequestsActive--;
574 pEndpoint->AioMgr.cRequestsActive--;
575 }
576
577 pAioMgr->cRequestsActiveMax = pAioMgr->cRequestsActive;
578 }
579
580 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
581 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
582 }
583 else
584 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
585 }
586
587 return rc;
588}
589
590static bool pdmacFileAioMgrNormalIsRangeLocked(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
591 RTFOFF offStart, size_t cbRange,
592 PPDMACTASKFILE pTask)
593{
594 PPDMACFILERANGELOCK pRangeLock = NULL; /** < Range lock */
595
596 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
597 || pTask->enmTransferType == PDMACTASKFILETRANSFER_READ,
598 ("Invalid task type %d\n", pTask->enmTransferType));
599
600 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetRangeGet(pEndpoint->AioMgr.pTreeRangesLocked, offStart);
601 if (!pRangeLock)
602 {
603 pRangeLock = (PPDMACFILERANGELOCK)RTAvlrFileOffsetGetBestFit(pEndpoint->AioMgr.pTreeRangesLocked, offStart, true);
604 /* Check if we intersect with the range. */
605 if ( !pRangeLock
606 || !( (pRangeLock->Core.Key) <= (offStart + (RTFOFF)cbRange - 1)
607 && (pRangeLock->Core.KeyLast) >= offStart))
608 {
609 pRangeLock = NULL; /* False alarm */
610 }
611 }
612
613 /* Check whether we have one of the situations explained below */
614 if ( pRangeLock
615#if 0 /** @todo: later. For now we will just block all requests if they interfere */
616 && ( (pRangeLock->fReadLock && pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
617 || (!pRangeLock->fReadLock)
618#endif
619 )
620 {
621 /* Add to the list. */
622 pTask->pNext = NULL;
623
624 if (!pRangeLock->pWaitingTasksHead)
625 {
626 Assert(!pRangeLock->pWaitingTasksTail);
627 pRangeLock->pWaitingTasksHead = pTask;
628 pRangeLock->pWaitingTasksTail = pTask;
629 }
630 else
631 {
632 AssertPtr(pRangeLock->pWaitingTasksTail);
633 pRangeLock->pWaitingTasksTail->pNext = pTask;
634 pRangeLock->pWaitingTasksTail = pTask;
635 }
636 return true;
637 }
638
639 return false;
640}
641
642static int pdmacFileAioMgrNormalRangeLock(PPDMACEPFILEMGR pAioMgr,
643 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
644 RTFOFF offStart, size_t cbRange,
645 PPDMACTASKFILE pTask)
646{
647 AssertMsg(!pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbRange, pTask),
648 ("Range is already locked offStart=%RTfoff cbRange=%u\n",
649 offStart, cbRange));
650
651 PPDMACFILERANGELOCK pRangeLock = (PPDMACFILERANGELOCK)RTMemCacheAlloc(pAioMgr->hMemCacheRangeLocks);
652 if (!pRangeLock)
653 return VERR_NO_MEMORY;
654
655 /* Init the lock. */
656 pRangeLock->Core.Key = offStart;
657 pRangeLock->Core.KeyLast = offStart + cbRange - 1;
658 pRangeLock->cRefs = 1;
659 pRangeLock->fReadLock = pTask->enmTransferType == PDMACTASKFILETRANSFER_READ;
660 pRangeLock->pWaitingTasksHead = NULL;
661 pRangeLock->pWaitingTasksTail = NULL;
662
663 bool fInserted = RTAvlrFileOffsetInsert(pEndpoint->AioMgr.pTreeRangesLocked, &pRangeLock->Core);
664 AssertMsg(fInserted, ("Range lock was not inserted!\n"));
665
666 /* Let the task point to its lock. */
667 pTask->pRangeLock = pRangeLock;
668
669 return VINF_SUCCESS;
670}
671
672static PPDMACTASKFILE pdmacFileAioMgrNormalRangeLockFree(PPDMACEPFILEMGR pAioMgr,
673 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
674 PPDMACFILERANGELOCK pRangeLock)
675{
676 PPDMACTASKFILE pTasksWaitingHead;
677
678 AssertPtr(pRangeLock);
679 Assert(pRangeLock->cRefs == 1);
680
681 RTAvlrFileOffsetRemove(pEndpoint->AioMgr.pTreeRangesLocked, pRangeLock->Core.Key);
682 pTasksWaitingHead = pRangeLock->pWaitingTasksHead;
683 pRangeLock->pWaitingTasksHead = NULL;
684 pRangeLock->pWaitingTasksTail = NULL;
685 RTMemCacheFree(pAioMgr->hMemCacheRangeLocks, pRangeLock);
686
687 return pTasksWaitingHead;
688}
689
690static int pdmacFileAioMgrNormalTaskPrepareBuffered(PPDMACEPFILEMGR pAioMgr,
691 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
692 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
693{
694 int rc = VINF_SUCCESS;
695 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
696 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
697 void *pvBuf = pTask->DataSeg.pvSeg;
698
699 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
700 || (uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) <= pEndpoint->cbFile,
701 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
702 pTask->Off, pTask->DataSeg.cbSeg, pEndpoint->cbFile));
703
704 pTask->fPrefetch = false;
705 pTask->cbBounceBuffer = 0;
706
707 /*
708 * Before we start to setup the request we have to check whether there is a task
709 * already active which range intersects with ours. We have to defer execution
710 * of this task in two cases:
711 * - The pending task is a write and the current is either read or write
712 * - The pending task is a read and the current task is a write task.
713 *
714 * To check whether a range is currently "locked" we use the AVL tree where every pending task
715 * is stored by its file offset range. The current task will be added to the active task
716 * and will be executed when the active one completes. (The method below
717 * which checks whether a range is already used will add the task)
718 *
719 * This is neccessary because of the requirement to align all requests to a 512 boundary
720 * which is enforced by the host OS (Linux and Windows atm). It is possible that
721 * we have to process unaligned tasks and need to align them using bounce buffers.
722 * While the data is fetched from the file another request might arrive writing to
723 * the same range. This will result in data corruption if both are executed concurrently.
724 */
725 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, pTask->Off, pTask->DataSeg.cbSeg, pTask);
726
727 if (!fLocked)
728 {
729 /* Get a request handle. */
730 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
731 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
732
733 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
734 {
735 /* Grow the file if needed. */
736 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
737 {
738 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
739 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
740 }
741
742 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
743 pTask->Off, pTask->DataSeg.pvSeg,
744 pTask->DataSeg.cbSeg, pTask);
745 }
746 else
747 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
748 pTask->Off, pTask->DataSeg.pvSeg,
749 pTask->DataSeg.cbSeg, pTask);
750 AssertRC(rc);
751
752 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, pTask->Off,
753 pTask->DataSeg.cbSeg,
754 pTask);
755
756 if (RT_SUCCESS(rc))
757 *phReq = hReq;
758 }
759 else
760 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
761
762 return rc;
763}
764
765static int pdmacFileAioMgrNormalTaskPrepareNonBuffered(PPDMACEPFILEMGR pAioMgr,
766 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
767 PPDMACTASKFILE pTask, PRTFILEAIOREQ phReq)
768{
769 int rc = VINF_SUCCESS;
770 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
771 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
772 void *pvBuf = pTask->DataSeg.pvSeg;
773
774 /*
775 * Check if the alignment requirements are met.
776 * Offset, transfer size and buffer address
777 * need to be on a 512 boundary.
778 */
779 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
780 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg + (pTask->Off - offStart), 512);
781 PDMACTASKFILETRANSFER enmTransferType = pTask->enmTransferType;
782
783 AssertMsg( pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE
784 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
785 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
786 offStart, cbToTransfer, pEndpoint->cbFile));
787
788 pTask->fPrefetch = false;
789
790 /*
791 * Before we start to setup the request we have to check whether there is a task
792 * already active which range intersects with ours. We have to defer execution
793 * of this task in two cases:
794 * - The pending task is a write and the current is either read or write
795 * - The pending task is a read and the current task is a write task.
796 *
797 * To check whether a range is currently "locked" we use the AVL tree where every pending task
798 * is stored by its file offset range. The current task will be added to the active task
799 * and will be executed when the active one completes. (The method below
800 * which checks whether a range is already used will add the task)
801 *
802 * This is neccessary because of the requirement to align all requests to a 512 boundary
803 * which is enforced by the host OS (Linux and Windows atm). It is possible that
804 * we have to process unaligned tasks and need to align them using bounce buffers.
805 * While the data is fetched from the file another request might arrive writing to
806 * the same range. This will result in data corruption if both are executed concurrently.
807 */
808 bool fLocked = pdmacFileAioMgrNormalIsRangeLocked(pEndpoint, offStart, cbToTransfer, pTask);
809
810 if (!fLocked)
811 {
812 /* Get a request handle. */
813 hReq = pdmacFileAioMgrNormalRequestAlloc(pAioMgr);
814 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
815
816 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
817 || RT_UNLIKELY(offStart != pTask->Off)
818 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
819 {
820 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
821 pTask, cbToTransfer, pTask->DataSeg.cbSeg, offStart, pTask->Off));
822
823 /* Create bounce buffer. */
824 pTask->cbBounceBuffer = cbToTransfer;
825
826 AssertMsg(pTask->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
827 pTask->Off, offStart));
828 pTask->offBounceBuffer = pTask->Off - offStart;
829
830 /** @todo: I think we need something like a RTMemAllocAligned method here.
831 * Current assumption is that the maximum alignment is 4096byte
832 * (GPT disk on Windows)
833 * so we can use RTMemPageAlloc here.
834 */
835 pTask->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
836 if (RT_LIKELY(pTask->pvBounceBuffer))
837 {
838 pvBuf = pTask->pvBounceBuffer;
839
840 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
841 {
842 if ( RT_UNLIKELY(cbToTransfer != pTask->DataSeg.cbSeg)
843 || RT_UNLIKELY(offStart != pTask->Off))
844 {
845 /* We have to fill the buffer first before we can update the data. */
846 LogFlow(("Prefetching data for task %#p\n", pTask));
847 pTask->fPrefetch = true;
848 enmTransferType = PDMACTASKFILETRANSFER_READ;
849 }
850 else
851 memcpy(pvBuf, pTask->DataSeg.pvSeg, pTask->DataSeg.cbSeg);
852 }
853 }
854 else
855 rc = VERR_NO_MEMORY;
856 }
857 else
858 pTask->cbBounceBuffer = 0;
859
860 if (RT_SUCCESS(rc))
861 {
862 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
863 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
864
865 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
866 {
867 /* Grow the file if needed. */
868 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
869 {
870 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
871 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
872 }
873
874 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
875 offStart, pvBuf, cbToTransfer, pTask);
876 }
877 else
878 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
879 offStart, pvBuf, cbToTransfer, pTask);
880 AssertRC(rc);
881
882 rc = pdmacFileAioMgrNormalRangeLock(pAioMgr, pEndpoint, offStart, cbToTransfer, pTask);
883
884 if (RT_SUCCESS(rc))
885 *phReq = hReq;
886 else
887 {
888 /* Cleanup */
889 if (pTask->cbBounceBuffer)
890 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
891 }
892 }
893 }
894 else
895 LogFlow(("Task %#p was deferred because the access range is locked\n", pTask));
896
897 return rc;
898}
899
900static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
901 PPDMACEPFILEMGR pAioMgr,
902 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
903{
904 RTFILEAIOREQ apReqs[20];
905 unsigned cRequests = 0;
906 unsigned cMaxRequests = pAioMgr->cRequestsActiveMax - pAioMgr->cRequestsActive;
907 int rc = VINF_SUCCESS;
908
909 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
910 ("Trying to process request lists of a non active endpoint!\n"));
911
912 /* Go through the list and queue the requests until we get a flush request */
913 while ( pTaskHead
914 && !pEndpoint->pFlushReq
915 && (pAioMgr->cRequestsActive + cRequests < pAioMgr->cRequestsActiveMax)
916 && RT_SUCCESS(rc))
917 {
918 PPDMACTASKFILE pCurr = pTaskHead;
919
920 if (!pdmacFileBwMgrIsTransferAllowed(pEndpoint->pBwMgr, (uint32_t)pCurr->DataSeg.cbSeg))
921 {
922 pAioMgr->fBwLimitReached = true;
923 break;
924 }
925
926 pTaskHead = pTaskHead->pNext;
927
928 pCurr->pNext = NULL;
929
930 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
931 ("Endpoints do not match\n"));
932
933 switch (pCurr->enmTransferType)
934 {
935 case PDMACTASKFILETRANSFER_FLUSH:
936 {
937 /* If there is no data transfer request this flush request finished immediately. */
938 if (!pEndpoint->AioMgr.cRequestsActive)
939 {
940 pCurr->pfnCompleted(pCurr, pCurr->pvUser, VINF_SUCCESS);
941 pdmacFileTaskFree(pEndpoint, pCurr);
942 }
943 else
944 {
945 Assert(!pEndpoint->pFlushReq);
946 pEndpoint->pFlushReq = pCurr;
947 }
948 break;
949 }
950 case PDMACTASKFILETRANSFER_READ:
951 case PDMACTASKFILETRANSFER_WRITE:
952 {
953 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
954
955 if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
956 rc = pdmacFileAioMgrNormalTaskPrepareBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
957 else if (pEndpoint->enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
958 rc = pdmacFileAioMgrNormalTaskPrepareNonBuffered(pAioMgr, pEndpoint, pCurr, &hReq);
959 else
960 AssertMsgFailed(("Invalid backend type %d\n", pEndpoint->enmBackendType));
961
962 AssertRC(rc);
963
964 if (hReq != NIL_RTFILEAIOREQ)
965 {
966 apReqs[cRequests] = hReq;
967 pEndpoint->AioMgr.cReqsProcessed++;
968 cRequests++;
969 if (cRequests == RT_ELEMENTS(apReqs))
970 {
971 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
972 cRequests = 0;
973 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
974 ("Unexpected return code\n"));
975 }
976 }
977 break;
978 }
979 default:
980 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
981 }
982 }
983
984 if (cRequests)
985 {
986 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
987 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
988 ("Unexpected return code rc=%Rrc\n", rc));
989 }
990
991 if (pTaskHead)
992 {
993 /* Add the rest of the tasks to the pending list */
994 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
995
996 if (RT_UNLIKELY( pAioMgr->cRequestsActiveMax == pAioMgr->cRequestsActive
997 && !pEndpoint->pFlushReq
998 && !pAioMgr->fBwLimitReached))
999 {
1000#if 0
1001 /*
1002 * The I/O manager has no room left for more requests
1003 * but there are still requests to process.
1004 * Create a new I/O manager and let it handle some endpoints.
1005 */
1006 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
1007#else
1008 /* Grow the I/O manager */
1009 pAioMgr->enmState = PDMACEPFILEMGRSTATE_GROWING;
1010#endif
1011 }
1012 }
1013
1014 /* Insufficient resources are not fatal. */
1015 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
1016 rc = VINF_SUCCESS;
1017
1018 return rc;
1019}
1020
1021/**
1022 * Adds all pending requests for the given endpoint
1023 * until a flush request is encountered or there is no
1024 * request anymore.
1025 *
1026 * @returns VBox status code.
1027 * @param pAioMgr The async I/O manager for the endpoint
1028 * @param pEndpoint The endpoint to get the requests from.
1029 */
1030static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
1031 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
1032{
1033 int rc = VINF_SUCCESS;
1034 PPDMACTASKFILE pTasksHead = NULL;
1035
1036 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
1037 ("Trying to process request lists of a non active endpoint!\n"));
1038
1039 Assert(!pEndpoint->pFlushReq);
1040
1041 /* Check the pending list first */
1042 if (pEndpoint->AioMgr.pReqsPendingHead)
1043 {
1044 LogFlow(("Queuing pending requests first\n"));
1045
1046 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
1047 /*
1048 * Clear the list as the processing routine will insert them into the list
1049 * again if it gets a flush request.
1050 */
1051 pEndpoint->AioMgr.pReqsPendingHead = NULL;
1052 pEndpoint->AioMgr.pReqsPendingTail = NULL;
1053 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1054 AssertRC(rc);
1055 }
1056
1057 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
1058 {
1059 /* Now the request queue. */
1060 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
1061 if (pTasksHead)
1062 {
1063 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
1064 AssertRC(rc);
1065 }
1066 }
1067
1068 return rc;
1069}
1070
1071static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
1072{
1073 int rc = VINF_SUCCESS;
1074 bool fNotifyWaiter = false;
1075
1076 LogFlowFunc((": Enter\n"));
1077
1078 Assert(pAioMgr->fBlockingEventPending);
1079
1080 switch (pAioMgr->enmBlockingEvent)
1081 {
1082 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
1083 {
1084 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
1085 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
1086
1087 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1088
1089 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
1090 pEndpointNew->AioMgr.pEndpointPrev = NULL;
1091 if (pAioMgr->pEndpointsHead)
1092 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
1093 pAioMgr->pEndpointsHead = pEndpointNew;
1094
1095 /* Assign the completion point to this file. */
1096 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
1097 fNotifyWaiter = true;
1098 pAioMgr->cEndpoints++;
1099 break;
1100 }
1101 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
1102 {
1103 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
1104 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
1105
1106 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
1107 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
1108 break;
1109 }
1110 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
1111 {
1112 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
1113 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
1114
1115 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1116 {
1117 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
1118
1119 /* Make sure all tasks finished. Process the queues a last time first. */
1120 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
1121 AssertRC(rc);
1122
1123 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
1124 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
1125 }
1126 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
1127 && (!pEndpointClose->AioMgr.cRequestsActive))
1128 fNotifyWaiter = true;
1129 break;
1130 }
1131 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
1132 {
1133 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
1134 if (!pAioMgr->cRequestsActive)
1135 fNotifyWaiter = true;
1136 break;
1137 }
1138 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
1139 {
1140 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
1141 break;
1142 }
1143 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
1144 {
1145 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
1146 fNotifyWaiter = true;
1147 break;
1148 }
1149 default:
1150 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
1151 }
1152
1153 if (fNotifyWaiter)
1154 {
1155 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1156 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
1157
1158 /* Release the waiting thread. */
1159 LogFlow(("Signalling waiter\n"));
1160 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1161 AssertRC(rc);
1162 }
1163
1164 LogFlowFunc((": Leave\n"));
1165 return rc;
1166}
1167
1168/**
1169 * Checks all endpoints for pending events or new requests.
1170 *
1171 * @returns VBox status code.
1172 * @param pAioMgr The I/O manager handle.
1173 */
1174static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
1175{
1176 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
1177 int rc = VINF_SUCCESS;
1178 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
1179
1180 pAioMgr->fBwLimitReached = false;
1181
1182 while (pEndpoint)
1183 {
1184 if (!pEndpoint->pFlushReq
1185 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
1186 && !pEndpoint->AioMgr.fMoving)
1187 {
1188 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
1189 if (RT_FAILURE(rc))
1190 return rc;
1191 }
1192 else if (!pEndpoint->AioMgr.cRequestsActive)
1193 {
1194 /* Reopen the file so that the new endpoint can reassociate with the file */
1195 RTFileClose(pEndpoint->File);
1196 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
1197 AssertRC(rc);
1198
1199 if (pEndpoint->AioMgr.fMoving)
1200 {
1201 pEndpoint->AioMgr.fMoving = false;
1202 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1203 }
1204 else
1205 {
1206 Assert(pAioMgr->fBlockingEventPending);
1207 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
1208
1209 /* Release the waiting thread. */
1210 LogFlow(("Signalling waiter\n"));
1211 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
1212 AssertRC(rc);
1213 }
1214 }
1215
1216 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
1217 }
1218
1219 return rc;
1220}
1221
1222static void pdmacFileAioMgrNormalReqComplete(PPDMACEPFILEMGR pAioMgr, RTFILEAIOREQ hReq)
1223{
1224 int rc = VINF_SUCCESS;
1225 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
1226 size_t cbTransfered = 0;
1227 int rcReq = RTFileAioReqGetRC(hReq, &cbTransfered);
1228 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(hReq);
1229 PPDMACTASKFILE pTasksWaiting;
1230
1231 pEndpoint = pTask->pEndpoint;
1232
1233 /*
1234 * It is possible that the request failed on Linux with kernels < 2.6.23
1235 * if the passed buffer was allocated with remap_pfn_range or if the file
1236 * is on an NFS endpoint which does not support async and direct I/O at the same time.
1237 * The endpoint will be migrated to a failsafe manager in case a request fails.
1238 */
1239 if (RT_FAILURE(rcReq))
1240 {
1241 /* Free bounce buffers and the IPRT request. */
1242 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1243
1244 /* Free the lock and process pending tasks if neccessary */
1245 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1246 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1247 AssertRC(rc);
1248
1249 pAioMgr->cRequestsActive--;
1250 pEndpoint->AioMgr.cRequestsActive--;
1251 pEndpoint->AioMgr.cReqsProcessed++;
1252
1253 if (pTask->cbBounceBuffer)
1254 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1255
1256 /* Queue the request on the pending list. */
1257 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
1258 pEndpoint->AioMgr.pReqsPendingHead = pTask;
1259
1260 /* Create a new failsafe manager if neccessary. */
1261 if (!pEndpoint->AioMgr.fMoving)
1262 {
1263 PPDMACEPFILEMGR pAioMgrFailsafe;
1264
1265 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
1266 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
1267
1268 pEndpoint->AioMgr.fMoving = true;
1269
1270 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
1271 &pAioMgrFailsafe, PDMACEPFILEMGRTYPE_SIMPLE);
1272 AssertRC(rc);
1273
1274 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
1275
1276 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
1277 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
1278 }
1279
1280 /* If this was the last request for the endpoint migrate it to the new manager. */
1281 if (!pEndpoint->AioMgr.cRequestsActive)
1282 {
1283 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1284 Assert(!fReqsPending);
1285
1286 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1287 AssertRC(rc);
1288 }
1289 }
1290 else
1291 {
1292 AssertMsg( RT_FAILURE(rcReq)
1293 || ( (cbTransfered == pTask->DataSeg.cbSeg)
1294 || (pTask->cbBounceBuffer && cbTransfered >= pTask->DataSeg.cbSeg)),
1295 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
1296
1297 if (pTask->fPrefetch)
1298 {
1299 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
1300 Assert(pTask->cbBounceBuffer);
1301
1302 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1303 pTask->DataSeg.pvSeg,
1304 pTask->DataSeg.cbSeg);
1305
1306 /* Write it now. */
1307 pTask->fPrefetch = false;
1308 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
1309 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
1310
1311 /* Grow the file if needed. */
1312 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
1313 {
1314 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
1315 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
1316 }
1317
1318 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
1319 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
1320 AssertRC(rc);
1321 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &hReq, 1);
1322 AssertRC(rc);
1323 }
1324 else
1325 {
1326 if (RT_SUCCESS(rc) && pTask->cbBounceBuffer)
1327 {
1328 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
1329 memcpy(pTask->DataSeg.pvSeg,
1330 ((uint8_t *)pTask->pvBounceBuffer) + pTask->offBounceBuffer,
1331 pTask->DataSeg.cbSeg);
1332
1333 RTMemPageFree(pTask->pvBounceBuffer, pTask->cbBounceBuffer);
1334 }
1335
1336 pdmacFileAioMgrNormalRequestFree(pAioMgr, hReq);
1337
1338 pAioMgr->cRequestsActive--;
1339 pEndpoint->AioMgr.cRequestsActive--;
1340 pEndpoint->AioMgr.cReqsProcessed++;
1341
1342 /* Free the lock and process pending tasks if neccessary */
1343 pTasksWaiting = pdmacFileAioMgrNormalRangeLockFree(pAioMgr, pEndpoint, pTask->pRangeLock);
1344 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksWaiting, pAioMgr, pEndpoint);
1345 AssertRC(rc);
1346
1347 /* Call completion callback */
1348 pTask->pfnCompleted(pTask, pTask->pvUser, rcReq);
1349 pdmacFileTaskFree(pEndpoint, pTask);
1350
1351 /*
1352 * If there is no request left on the endpoint but a flush request is set
1353 * it completed now and we notify the owner.
1354 * Furthermore we look for new requests and continue.
1355 */
1356 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
1357 {
1358 /* Call completion callback */
1359 pTask = pEndpoint->pFlushReq;
1360 pEndpoint->pFlushReq = NULL;
1361
1362 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
1363
1364 pTask->pfnCompleted(pTask, pTask->pvUser, VINF_SUCCESS);
1365 pdmacFileTaskFree(pEndpoint, pTask);
1366 }
1367 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
1368 {
1369 /* If the endpoint is about to be migrated do it now. */
1370 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
1371 Assert(!fReqsPending);
1372
1373 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
1374 AssertRC(rc);
1375 }
1376 }
1377 } /* request completed successfully */
1378}
1379
1380/** Helper macro for checking for error codes. */
1381#define CHECK_RC(pAioMgr, rc) \
1382 if (RT_FAILURE(rc)) \
1383 {\
1384 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
1385 return rc2;\
1386 }
1387
1388/**
1389 * The normal I/O manager using the RTFileAio* API
1390 *
1391 * @returns VBox status code.
1392 * @param ThreadSelf Handle of the thread.
1393 * @param pvUser Opaque user data.
1394 */
1395int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
1396{
1397 int rc = VINF_SUCCESS;
1398 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
1399 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1400
1401 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
1402 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
1403 {
1404 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
1405 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
1406 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
1407 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
1408 AssertRC(rc);
1409
1410 LogFlow(("Got woken up\n"));
1411 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
1412
1413 /* Check for an external blocking event first. */
1414 if (pAioMgr->fBlockingEventPending)
1415 {
1416 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1417 CHECK_RC(pAioMgr, rc);
1418 }
1419
1420 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
1421 {
1422 /* We got woken up because an endpoint issued new requests. Queue them. */
1423 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1424 CHECK_RC(pAioMgr, rc);
1425
1426 while ( pAioMgr->cRequestsActive
1427 || pAioMgr->fBwLimitReached)
1428 {
1429 if (pAioMgr->cRequestsActive)
1430 {
1431 RTFILEAIOREQ apReqs[20];
1432 uint32_t cReqsCompleted = 0;
1433 size_t cReqsWait;
1434
1435 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
1436 cReqsWait = RT_ELEMENTS(apReqs);
1437 else
1438 cReqsWait = pAioMgr->cRequestsActive;
1439
1440 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
1441
1442 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
1443 cReqsWait,
1444 RT_INDEFINITE_WAIT, apReqs,
1445 RT_ELEMENTS(apReqs), &cReqsCompleted);
1446 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
1447 CHECK_RC(pAioMgr, rc);
1448
1449 LogFlow(("%d tasks completed\n", cReqsCompleted));
1450
1451 for (uint32_t i = 0; i < cReqsCompleted; i++)
1452 pdmacFileAioMgrNormalReqComplete(pAioMgr, apReqs[i]);
1453
1454 /* Check for an external blocking event before we go to sleep again. */
1455 if (pAioMgr->fBlockingEventPending)
1456 {
1457 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1458 CHECK_RC(pAioMgr, rc);
1459 }
1460
1461 /* Update load statistics. */
1462 uint64_t uMillisCurr = RTTimeMilliTS();
1463 if (uMillisCurr > uMillisEnd)
1464 {
1465 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1466
1467 /* Calculate timespan. */
1468 uMillisCurr -= uMillisEnd;
1469
1470 while (pEndpointCurr)
1471 {
1472 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1473 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1474 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1475 }
1476
1477 /* Set new update interval */
1478 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1479 }
1480 }
1481 else
1482 {
1483 /*
1484 * Bandwidth limit reached for all endpoints.
1485 * Yield and wait until we have enough resources again.
1486 */
1487 RTThreadYield();
1488 }
1489
1490 /* Check endpoints for new requests. */
1491 if (pAioMgr->enmState != PDMACEPFILEMGRSTATE_GROWING)
1492 {
1493 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1494 CHECK_RC(pAioMgr, rc);
1495 }
1496 } /* while requests are active. */
1497
1498 if (pAioMgr->enmState == PDMACEPFILEMGRSTATE_GROWING)
1499 {
1500 rc = pdmacFileAioMgrNormalGrow(pAioMgr);
1501 AssertRC(rc);
1502 Assert(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING);
1503 }
1504 } /* if still running */
1505 } /* while running */
1506
1507 return rc;
1508}
1509
1510#undef CHECK_RC
1511
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use