VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 24355

Last change on this file since 24355 was 24355, checked in by vboxsync, 16 years ago

AsyncCompletion: Fix crash when doing load balancing

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 40.6 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 24355 2009-11-04 20:30:10Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#define RT_STRICT
24#include <iprt/types.h>
25#include <iprt/asm.h>
26#include <iprt/file.h>
27#include <iprt/mem.h>
28#include <iprt/string.h>
29#include <iprt/assert.h>
30#include <VBox/log.h>
31
32#include "PDMAsyncCompletionFileInternal.h"
33
34/** The update period for the I/O load statistics in ms. */
35#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
36/** Maximum number of requests a manager will handle. */
37#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
38
39int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
40{
41 int rc = VINF_SUCCESS;
42
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
44 if (rc == VERR_OUT_OF_RANGE)
45 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
46
47 if (RT_SUCCESS(rc))
48 {
49 /* Initialize request handle array. */
50 pAioMgr->iFreeEntryNext = 0;
51 pAioMgr->iFreeReqNext = 0;
52 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
53 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
54
55 if (pAioMgr->pahReqsFree)
56 {
57 return VINF_SUCCESS;
58 }
59 else
60 {
61 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
62 rc = VERR_NO_MEMORY;
63 }
64 }
65
66 return rc;
67}
68
69void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
70{
71 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
72
73 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
74 {
75 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
76 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
77 }
78
79 RTMemFree(pAioMgr->pahReqsFree);
80}
81
82/**
83 * Sorts the endpoint list with insertion sort.
84 */
85static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
86{
87 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
88
89 pEpPrev = pAioMgr->pEndpointsHead;
90 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
91
92 while (pEpCurr)
93 {
94 /* Remember the next element to sort because the list might change. */
95 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
96
97 /* Unlink the current element from the list. */
98 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
99 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
100
101 if (pPrev)
102 pPrev->AioMgr.pEndpointNext = pNext;
103 else
104 pAioMgr->pEndpointsHead = pNext;
105
106 if (pNext)
107 pNext->AioMgr.pEndpointPrev = pPrev;
108
109 /* Go back until we reached the place to insert the current endpoint into. */
110 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
111 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
112
113 /* Link the endpoint into the list. */
114 if (pEpPrev)
115 pNext = pEpPrev->AioMgr.pEndpointNext;
116 else
117 pNext = pAioMgr->pEndpointsHead;
118
119 pEpCurr->AioMgr.pEndpointNext = pNext;
120 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
121
122 if (pNext)
123 pNext->AioMgr.pEndpointPrev = pEpCurr;
124
125 if (pEpPrev)
126 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
127 else
128 pAioMgr->pEndpointsHead = pEpCurr;
129
130 pEpCurr = pEpNextToSort;
131 }
132
133#ifdef DEBUG
134 /* Validate sorting alogrithm */
135 unsigned cEndpoints = 0;
136 pEpCurr = pAioMgr->pEndpointsHead;
137
138 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
139 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
140
141 while (pEpCurr)
142 {
143 cEndpoints++;
144
145 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
146 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
147
148 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
149 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
150
151 pEpCurr = pNext;
152 }
153
154 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
155
156#endif
157}
158
159/**
160 * Removes an endpoint from the currently assigned manager.
161 *
162 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
163 * FALSE otherwise.
164 * @param pEndpointRemove The endpoint to remove.
165 */
166static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
167{
168 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
169 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
170 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
171
172 pAioMgr->cEndpoints--;
173
174 if (pPrev)
175 pPrev->AioMgr.pEndpointNext = pNext;
176 else
177 pAioMgr->pEndpointsHead = pNext;
178
179 if (pNext)
180 pNext->AioMgr.pEndpointPrev = pPrev;
181
182 /* Make sure that there is no request pending on this manager for the endpoint. */
183 if (!pEndpointRemove->AioMgr.cRequestsActive)
184 {
185 Assert(!pEndpointRemove->pFlushReq);
186
187 /* Reopen the file so that the new endpoint can reassociate with the file */
188 RTFileClose(pEndpointRemove->File);
189 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
190 AssertRC(rc);
191 return false;
192 }
193
194 return true;
195}
196
197/**
198 * Creates a new I/O manager and spreads the I/O load of the endpoints
199 * between the given I/O manager and the new one.
200 *
201 * @returns nothing.
202 * @param pAioMgr The I/O manager with high I/O load.
203 */
204static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
205{
206 PPDMACEPFILEMGR pAioMgrNew = NULL;
207 int rc = VINF_SUCCESS;
208
209 /* Splitting can't be done with only one open endpoint. */
210 if (pAioMgr->cEndpoints > 1)
211 {
212 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
213 &pAioMgrNew, false);
214 if (RT_SUCCESS(rc))
215 {
216 /* We will sort the list by request count per second. */
217 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
218
219 /* Now move some endpoints to the new manager. */
220 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
221 unsigned cReqsOther = 0;
222 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
223
224 while (pCurr)
225 {
226 if (cReqsHere <= cReqsOther)
227 {
228 /*
229 * The other manager has more requests to handle now.
230 * We will keep the current endpoint.
231 */
232 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
233 cReqsHere += pCurr->AioMgr.cReqsPerSec;
234 pCurr = pCurr->AioMgr.pEndpointNext;
235 }
236 else
237 {
238 /* Move to other endpoint. */
239 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
240 cReqsOther += pCurr->AioMgr.cReqsPerSec;
241
242 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
243
244 pCurr = pCurr->AioMgr.pEndpointNext;
245
246 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
247
248 if (fReqsPending)
249 {
250 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
251 pMove->AioMgr.fMoving = true;
252 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
253 }
254 else
255 {
256 pMove->AioMgr.fMoving = false;
257 pMove->AioMgr.pAioMgrDst = NULL;
258 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
259 }
260 }
261 }
262 }
263 else
264 {
265 /* Don't process further but leave a log entry about reduced performance. */
266 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
267 }
268 }
269}
270
271/**
272 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
273 *
274 * @returns VBox status code
275 * @param pAioMgr The I/O manager the error ocurred on.
276 * @param rc The error code.
277 */
278static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
279{
280 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
281 pAioMgr, rc));
282 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
283 LogRel(("AIOMgr: Please contact the product vendor\n"));
284
285 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
286
287 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
288 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
289
290 AssertMsgFailed(("Implement\n"));
291 return VINF_SUCCESS;
292}
293
294/**
295 * Put a list of tasks in the pending request list of an endpoint.
296 */
297DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
298{
299 /* Add the rest of the tasks to the pending list */
300 if (!pEndpoint->AioMgr.pReqsPendingHead)
301 {
302 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
303 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
304 }
305 else
306 {
307 Assert(pEndpoint->AioMgr.pReqsPendingTail);
308 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
309 }
310
311 /* Update the tail. */
312 while (pTaskHead->pNext)
313 pTaskHead = pTaskHead->pNext;
314
315 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
316}
317
318/**
319 * Put one task in the pending request list of an endpoint.
320 */
321DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
322{
323 /* Add the rest of the tasks to the pending list */
324 if (!pEndpoint->AioMgr.pReqsPendingHead)
325 {
326 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
327 pEndpoint->AioMgr.pReqsPendingHead = pTask;
328 }
329 else
330 {
331 Assert(pEndpoint->AioMgr.pReqsPendingTail);
332 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
333 }
334
335 pEndpoint->AioMgr.pReqsPendingTail = pTask;
336}
337
338/**
339 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
340 */
341static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
342 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
343 PRTFILEAIOREQ pahReqs, size_t cReqs)
344{
345 int rc;
346
347 pAioMgr->cRequestsActive += cReqs;
348 pEndpoint->AioMgr.cRequestsActive += cReqs;
349
350 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
351 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
352
353 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
354 if (RT_FAILURE(rc))
355 {
356 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
357 {
358 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
359
360 /*
361 * We run out of resources.
362 * Need to check which requests got queued
363 * and put the rest on the pending list again.
364 */
365 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
366 {
367 pEpClass->fOutOfResourcesWarningPrinted = true;
368 LogRel(("AIOMgr: The operating system doesn't have enough resources "
369 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
370 }
371
372 for (size_t i = 0; i < cReqs; i++)
373 {
374 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
375
376 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
377 {
378 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
379 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
380
381 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
382
383 /* Put the entry on the free array */
384 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
385 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
386
387 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
388 pAioMgr->cRequestsActive--;
389 pEndpoint->AioMgr.cRequestsActive--;
390 }
391 }
392 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
393 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
394 }
395 else
396 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
397 }
398
399 return rc;
400}
401
402static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
403 PPDMACEPFILEMGR pAioMgr,
404 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
405{
406 RTFILEAIOREQ apReqs[20];
407 unsigned cRequests = 0;
408 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
409 int rc = VINF_SUCCESS;
410 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
411
412 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
413 ("Trying to process request lists of a non active endpoint!\n"));
414
415 /* Go through the list and queue the requests until we get a flush request */
416 while ( pTaskHead
417 && !pEndpoint->pFlushReq
418 && (cMaxRequests > 0)
419 && RT_SUCCESS(rc))
420 {
421 PPDMACTASKFILE pCurr = pTaskHead;
422
423 pTaskHead = pTaskHead->pNext;
424
425 pCurr->pNext = NULL;
426
427 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
428 ("Endpoints do not match\n"));
429
430 switch (pCurr->enmTransferType)
431 {
432 case PDMACTASKFILETRANSFER_FLUSH:
433 {
434 /* If there is no data transfer request this flush request finished immediately. */
435 if (!pEndpoint->AioMgr.cRequestsActive)
436 {
437 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
438 pdmacFileTaskFree(pEndpoint, pCurr);
439 }
440 else
441 {
442 pEndpoint->pFlushReq = pCurr;
443 }
444 break;
445 }
446 case PDMACTASKFILETRANSFER_READ:
447 case PDMACTASKFILETRANSFER_WRITE:
448 {
449 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
450 void *pvBuf = pCurr->DataSeg.pvSeg;
451
452 /* Get a request handle. */
453 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
454 {
455 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
456 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
457 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
458 }
459 else
460 {
461 rc = RTFileAioReqCreate(&hReq);
462 AssertRC(rc);
463 }
464
465 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
466
467 /* Check if the alignment requirements are met.
468 * Offset, transfer size and buffer address
469 * need to be on a 512 boundary. */
470 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
471 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
472 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
473
474 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
475 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
476 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
477 offStart, cbToTransfer, pEndpoint->cbFile));
478
479 pCurr->fPrefetch = false;
480
481 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
482 || RT_UNLIKELY(offStart != pCurr->Off)
483 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
484 {
485 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
486 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
487
488 /* Create bounce buffer. */
489 pCurr->fBounceBuffer = true;
490
491 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
492 pCurr->Off, offStart));
493 pCurr->uBounceBufOffset = pCurr->Off - offStart;
494
495 /** @todo: I think we need something like a RTMemAllocAligned method here.
496 * Current assumption is that the maximum alignment is 4096byte
497 * (GPT disk on Windows)
498 * so we can use RTMemPageAlloc here.
499 */
500 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
501 AssertPtr(pCurr->pvBounceBuffer);
502 pvBuf = pCurr->pvBounceBuffer;
503
504 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
505 {
506 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
507 || RT_UNLIKELY(offStart != pCurr->Off))
508 {
509 /* We have to fill the buffer first before we can update the data. */
510 LogFlow(("Prefetching data for task %#p\n", pCurr));
511 pCurr->fPrefetch = true;
512 enmTransferType = PDMACTASKFILETRANSFER_READ;
513 }
514 else
515 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
516 }
517 }
518 else
519 pCurr->fBounceBuffer = false;
520
521 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
522 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
523
524 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
525 {
526 /* Grow the file if needed. */
527 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
528 {
529 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
530 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
531 }
532
533 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
534 offStart, pvBuf, cbToTransfer, pCurr);
535 }
536 else
537 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
538 offStart, pvBuf, cbToTransfer, pCurr);
539 AssertRC(rc);
540
541 apReqs[cRequests] = hReq;
542 pEndpoint->AioMgr.cReqsProcessed++;
543 cMaxRequests--;
544 cRequests++;
545 if (cRequests == RT_ELEMENTS(apReqs))
546 {
547 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
548 cRequests = 0;
549 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
550 ("Unexpected return code\n"));
551 }
552 break;
553 }
554 default:
555 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
556 }
557 }
558
559 if (cRequests)
560 {
561 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
562 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
563 ("Unexpected return code rc=%Rrc\n", rc));
564 }
565
566 if (pTaskHead)
567 {
568 /* Add the rest of the tasks to the pending list */
569 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
570
571 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
572 {
573 /*
574 * The I/O manager has no room left for more requests
575 * but there are still requests to process.
576 * Create a new I/O manager and let it handle some endpoints.
577 */
578 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
579 }
580 }
581
582 /* Insufficient resources are not fatal. */
583 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
584 rc = VINF_SUCCESS;
585
586 return rc;
587}
588
589/**
590 * Adds all pending requests for the given endpoint
591 * until a flush request is encountered or there is no
592 * request anymore.
593 *
594 * @returns VBox status code.
595 * @param pAioMgr The async I/O manager for the endpoint
596 * @param pEndpoint The endpoint to get the requests from.
597 */
598static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
599 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
600{
601 int rc = VINF_SUCCESS;
602 PPDMACTASKFILE pTasksHead = NULL;
603
604 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
605 ("Trying to process request lists of a non active endpoint!\n"));
606
607 Assert(!pEndpoint->pFlushReq);
608
609 /* Check the pending list first */
610 if (pEndpoint->AioMgr.pReqsPendingHead)
611 {
612 LogFlow(("Queuing pending requests first\n"));
613
614 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
615 /*
616 * Clear the list as the processing routine will insert them into the list
617 * again if it gets a flush request.
618 */
619 pEndpoint->AioMgr.pReqsPendingHead = NULL;
620 pEndpoint->AioMgr.pReqsPendingTail = NULL;
621 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
622 AssertRC(rc);
623 }
624
625 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
626 {
627 /* Now the request queue. */
628 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
629 if (pTasksHead)
630 {
631 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
632 AssertRC(rc);
633 }
634 }
635
636 return rc;
637}
638
639static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
640{
641 int rc = VINF_SUCCESS;
642 bool fNotifyWaiter = false;
643
644 LogFlowFunc((": Enter\n"));
645
646 Assert(pAioMgr->fBlockingEventPending);
647
648 switch (pAioMgr->enmBlockingEvent)
649 {
650 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
651 {
652 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
653 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
654
655 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
656
657 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
658 pEndpointNew->AioMgr.pEndpointPrev = NULL;
659 if (pAioMgr->pEndpointsHead)
660 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
661 pAioMgr->pEndpointsHead = pEndpointNew;
662
663 /* Assign the completion point to this file. */
664 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
665 fNotifyWaiter = true;
666 pAioMgr->cEndpoints++;
667 break;
668 }
669 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
670 {
671 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
672 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
673
674 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
675 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
676 break;
677 }
678 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
679 {
680 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
681 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
682
683 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
684
685 /* Make sure all tasks finished. Process the queues a last time first. */
686 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
687 AssertRC(rc);
688
689 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
690 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
691 break;
692 }
693 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
694 {
695 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
696 if (!pAioMgr->cRequestsActive)
697 fNotifyWaiter = true;
698 break;
699 }
700 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
701 {
702 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
703 break;
704 }
705 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
706 {
707 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
708 fNotifyWaiter = true;
709 break;
710 }
711 default:
712 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
713 }
714
715 if (fNotifyWaiter)
716 {
717 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
718 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
719
720 /* Release the waiting thread. */
721 LogFlow(("Signalling waiter\n"));
722 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
723 AssertRC(rc);
724 }
725
726 LogFlowFunc((": Leave\n"));
727 return rc;
728}
729
730/**
731 * Checks all endpoints for pending events or new requests.
732 *
733 * @returns VBox status code.
734 * @param pAioMgr The I/O manager handle.
735 */
736static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
737{
738 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
739 int rc = VINF_SUCCESS;
740 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
741
742 while (pEndpoint)
743 {
744 if (!pEndpoint->pFlushReq && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE))
745 {
746 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
747 if (RT_FAILURE(rc))
748 return rc;
749 }
750 else if (!pEndpoint->AioMgr.cRequestsActive)
751 {
752 /* Reopen the file so that the new endpoint can reassociate with the file */
753 RTFileClose(pEndpoint->File);
754 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
755 AssertRC(rc);
756
757 if (pEndpoint->AioMgr.fMoving)
758 {
759 pEndpoint->AioMgr.fMoving = false;
760 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
761 }
762 else
763 {
764 Assert(pAioMgr->fBlockingEventPending);
765 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
766
767 /* Release the waiting thread. */
768 LogFlow(("Signalling waiter\n"));
769 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
770 AssertRC(rc);
771 }
772 }
773
774 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
775 }
776
777 return rc;
778}
779
780/** Helper macro for checking for error codes. */
781#define CHECK_RC(pAioMgr, rc) \
782 if (RT_FAILURE(rc)) \
783 {\
784 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
785 return rc2;\
786 }
787
788/**
789 * The normal I/O manager using the RTFileAio* API
790 *
791 * @returns VBox status code.
792 * @param ThreadSelf Handle of the thread.
793 * @param pvUser Opaque user data.
794 */
795int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
796{
797 int rc = VINF_SUCCESS;
798 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
799 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
800
801 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
802 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
803 {
804 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
805 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
806 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
807 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
808 AssertRC(rc);
809
810 LogFlow(("Got woken up\n"));
811 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
812
813 /* Check for an external blocking event first. */
814 if (pAioMgr->fBlockingEventPending)
815 {
816 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
817 CHECK_RC(pAioMgr, rc);
818 }
819
820 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
821 {
822 /* We got woken up because an endpoint issued new requests. Queue them. */
823 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
824 CHECK_RC(pAioMgr, rc);
825
826 while (pAioMgr->cRequestsActive)
827 {
828 RTFILEAIOREQ apReqs[20];
829 uint32_t cReqsCompleted = 0;
830 size_t cReqsWait;
831
832 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
833 cReqsWait = RT_ELEMENTS(apReqs);
834 else
835 cReqsWait = pAioMgr->cRequestsActive;
836
837 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
838
839 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
840 cReqsWait,
841 RT_INDEFINITE_WAIT, apReqs,
842 RT_ELEMENTS(apReqs), &cReqsCompleted);
843 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
844 CHECK_RC(pAioMgr, rc);
845
846 LogFlow(("%d tasks completed\n", cReqsCompleted));
847
848 for (uint32_t i = 0; i < cReqsCompleted; i++)
849 {
850 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
851 size_t cbTransfered = 0;
852 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
853 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
854
855 pEndpoint = pTask->pEndpoint;
856
857 /*
858 * It is possible that the request failed on Linux with kernels < 2.6.23
859 * if the passed buffer was allocated with remap_pfn_range or if the file
860 * is on an NFS endpoint which does not support async and direct I/O at the same time.
861 * The endpoint will be migrated to a failsafe manager in case a request fails.
862 */
863 if (RT_FAILURE(rcReq))
864 {
865 /* Free bounce buffers and the IPRT request. */
866 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
867 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
868
869 pAioMgr->cRequestsActive--;
870 pEndpoint->AioMgr.cRequestsActive--;
871 pEndpoint->AioMgr.cReqsProcessed++;
872
873 if (pTask->fBounceBuffer)
874 RTMemFree(pTask->pvBounceBuffer);
875
876 /* Queue the request on the pending list. */
877 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
878 pEndpoint->AioMgr.pReqsPendingHead = pTask;
879
880 /* Create a new failsafe manager if neccessary. */
881 if (!pEndpoint->AioMgr.fMoving)
882 {
883 PPDMACEPFILEMGR pAioMgrFailsafe;
884
885 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
886 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
887
888 pEndpoint->AioMgr.fMoving = true;
889
890 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
891 &pAioMgrFailsafe, true);
892 AssertRC(rc);
893
894 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
895
896 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
897 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
898 }
899
900 /* If this was the last request for the endpoint migrate it to the new manager. */
901 if (!pEndpoint->AioMgr.cRequestsActive)
902 {
903 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
904 Assert(!fReqsPending);
905
906 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
907 AssertRC(rc);
908 }
909 }
910 else
911 {
912 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
913 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
914 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
915
916 if (pTask->fPrefetch)
917 {
918 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
919 Assert(pTask->fBounceBuffer);
920
921 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
922 pTask->DataSeg.pvSeg,
923 pTask->DataSeg.cbSeg);
924
925 /* Write it now. */
926 pTask->fPrefetch = false;
927 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
928 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
929
930 /* Grow the file if needed. */
931 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
932 {
933 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
934 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
935 }
936
937 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
938 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
939 AssertRC(rc);
940 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
941 AssertRC(rc);
942 }
943 else
944 {
945 if (pTask->fBounceBuffer)
946 {
947 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
948 memcpy(pTask->DataSeg.pvSeg,
949 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
950 pTask->DataSeg.cbSeg);
951
952 RTMemPageFree(pTask->pvBounceBuffer);
953 }
954
955 /* Put the entry on the free array */
956 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
957 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
958
959 pAioMgr->cRequestsActive--;
960 pEndpoint->AioMgr.cRequestsActive--;
961 pEndpoint->AioMgr.cReqsProcessed++;
962
963 /* Call completion callback */
964 pTask->pfnCompleted(pTask, pTask->pvUser);
965 pdmacFileTaskFree(pEndpoint, pTask);
966
967 /*
968 * If there is no request left on the endpoint but a flush request is set
969 * it completed now and we notify the owner.
970 * Furthermore we look for new requests and continue.
971 */
972 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
973 {
974 /* Call completion callback */
975 pTask = pEndpoint->pFlushReq;
976 pEndpoint->pFlushReq = NULL;
977
978 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
979
980 pTask->pfnCompleted(pTask, pTask->pvUser);
981 pdmacFileTaskFree(pEndpoint, pTask);
982 }
983 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
984 {
985 /* If the endpoint is about to be migrated do it now. */
986 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
987 Assert(!fReqsPending);
988
989 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
990 AssertRC(rc);
991 }
992 }
993 } /* request completed successfully */
994 } /* for every completed request */
995
996 /* Check for an external blocking event before we go to sleep again. */
997 if (pAioMgr->fBlockingEventPending)
998 {
999 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1000 CHECK_RC(pAioMgr, rc);
1001 }
1002
1003 /* Update load statistics. */
1004 uint64_t uMillisCurr = RTTimeMilliTS();
1005 if (uMillisCurr > uMillisEnd)
1006 {
1007 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1008
1009 /* Calculate timespan. */
1010 uMillisCurr -= uMillisEnd;
1011
1012 while (pEndpointCurr)
1013 {
1014 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1015 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1016 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1017 }
1018
1019 /* Set new update interval */
1020 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1021 }
1022
1023 /* Check endpoints for new requests. */
1024 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1025 CHECK_RC(pAioMgr, rc);
1026 } /* while requests are active. */
1027 } /* if still running */
1028 } /* while running */
1029
1030 return rc;
1031}
1032
1033#undef CHECK_RC
1034
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette