VirtualBox

source: vbox/trunk/src/VBox/VMM/PDMAsyncCompletionFileNormal.cpp@ 24912

Last change on this file since 24912 was 24530, checked in by vboxsync, 15 years ago

AsyncCompletion: Fix assertion when closing endpoints

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 41.0 KB
Line 
1/* $Id: PDMAsyncCompletionFileNormal.cpp 24530 2009-11-09 23:16:52Z vboxsync $ */
2/** @file
3 * PDM Async I/O - Transport data asynchronous in R3 using EMT.
4 * Async File I/O manager.
5 */
6
7/*
8 * Copyright (C) 2006-2008 Sun Microsystems, Inc.
9 *
10 * This file is part of VirtualBox Open Source Edition (OSE), as
11 * available from http://www.virtualbox.org. This file is free software;
12 * you can redistribute it and/or modify it under the terms of the GNU
13 * General Public License (GPL) as published by the Free Software
14 * Foundation, in version 2 as it comes in the "COPYING" file of the
15 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
16 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
17 *
18 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
19 * Clara, CA 95054 USA or visit http://www.sun.com if you need
20 * additional information or have any questions.
21 */
22#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23#define RT_STRICT
24#include <iprt/types.h>
25#include <iprt/asm.h>
26#include <iprt/file.h>
27#include <iprt/mem.h>
28#include <iprt/string.h>
29#include <iprt/assert.h>
30#include <VBox/log.h>
31
32#include "PDMAsyncCompletionFileInternal.h"
33
34/** The update period for the I/O load statistics in ms. */
35#define PDMACEPFILEMGR_LOAD_UPDATE_PERIOD 1000
36/** Maximum number of requests a manager will handle. */
37#define PDMACEPFILEMGR_REQS_MAX 512 /* @todo: Find better solution wrt. the request number*/
38
39int pdmacFileAioMgrNormalInit(PPDMACEPFILEMGR pAioMgr)
40{
41 int rc = VINF_SUCCESS;
42
43 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, RTFILEAIO_UNLIMITED_REQS);
44 if (rc == VERR_OUT_OF_RANGE)
45 rc = RTFileAioCtxCreate(&pAioMgr->hAioCtx, PDMACEPFILEMGR_REQS_MAX);
46
47 if (RT_SUCCESS(rc))
48 {
49 /* Initialize request handle array. */
50 pAioMgr->iFreeEntryNext = 0;
51 pAioMgr->iFreeReqNext = 0;
52 pAioMgr->cReqEntries = PDMACEPFILEMGR_REQS_MAX + 1;
53 pAioMgr->pahReqsFree = (RTFILEAIOREQ *)RTMemAllocZ(pAioMgr->cReqEntries * sizeof(RTFILEAIOREQ));
54
55 if (pAioMgr->pahReqsFree)
56 {
57 return VINF_SUCCESS;
58 }
59 else
60 {
61 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
62 rc = VERR_NO_MEMORY;
63 }
64 }
65
66 return rc;
67}
68
69void pdmacFileAioMgrNormalDestroy(PPDMACEPFILEMGR pAioMgr)
70{
71 RTFileAioCtxDestroy(pAioMgr->hAioCtx);
72
73 while (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
74 {
75 RTFileAioReqDestroy(pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext]);
76 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
77 }
78
79 RTMemFree(pAioMgr->pahReqsFree);
80}
81
82/**
83 * Sorts the endpoint list with insertion sort.
84 */
85static void pdmacFileAioMgrNormalEndpointsSortByLoad(PPDMACEPFILEMGR pAioMgr)
86{
87 PPDMASYNCCOMPLETIONENDPOINTFILE pEpPrev, pEpCurr, pEpNextToSort;
88
89 pEpPrev = pAioMgr->pEndpointsHead;
90 pEpCurr = pEpPrev->AioMgr.pEndpointNext;
91
92 while (pEpCurr)
93 {
94 /* Remember the next element to sort because the list might change. */
95 pEpNextToSort = pEpCurr->AioMgr.pEndpointNext;
96
97 /* Unlink the current element from the list. */
98 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
99 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
100
101 if (pPrev)
102 pPrev->AioMgr.pEndpointNext = pNext;
103 else
104 pAioMgr->pEndpointsHead = pNext;
105
106 if (pNext)
107 pNext->AioMgr.pEndpointPrev = pPrev;
108
109 /* Go back until we reached the place to insert the current endpoint into. */
110 while (pEpPrev && (pEpPrev->AioMgr.cReqsPerSec < pEpCurr->AioMgr.cReqsPerSec))
111 pEpPrev = pEpPrev->AioMgr.pEndpointPrev;
112
113 /* Link the endpoint into the list. */
114 if (pEpPrev)
115 pNext = pEpPrev->AioMgr.pEndpointNext;
116 else
117 pNext = pAioMgr->pEndpointsHead;
118
119 pEpCurr->AioMgr.pEndpointNext = pNext;
120 pEpCurr->AioMgr.pEndpointPrev = pEpPrev;
121
122 if (pNext)
123 pNext->AioMgr.pEndpointPrev = pEpCurr;
124
125 if (pEpPrev)
126 pEpPrev->AioMgr.pEndpointNext = pEpCurr;
127 else
128 pAioMgr->pEndpointsHead = pEpCurr;
129
130 pEpCurr = pEpNextToSort;
131 }
132
133#ifdef DEBUG
134 /* Validate sorting alogrithm */
135 unsigned cEndpoints = 0;
136 pEpCurr = pAioMgr->pEndpointsHead;
137
138 AssertMsg(pEpCurr, ("No endpoint in the list?\n"));
139 AssertMsg(!pEpCurr->AioMgr.pEndpointPrev, ("First element in the list points to previous element\n"));
140
141 while (pEpCurr)
142 {
143 cEndpoints++;
144
145 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEpCurr->AioMgr.pEndpointNext;
146 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEpCurr->AioMgr.pEndpointPrev;
147
148 Assert(!pNext || pNext->AioMgr.cReqsPerSec <= pEpCurr->AioMgr.cReqsPerSec);
149 Assert(!pPrev || pPrev->AioMgr.cReqsPerSec >= pEpCurr->AioMgr.cReqsPerSec);
150
151 pEpCurr = pNext;
152 }
153
154 AssertMsg(cEndpoints == pAioMgr->cEndpoints, ("Endpoints lost during sort!\n"));
155
156#endif
157}
158
159/**
160 * Removes an endpoint from the currently assigned manager.
161 *
162 * @returns TRUE if there are still requests pending on the current manager for this endpoint.
163 * FALSE otherwise.
164 * @param pEndpointRemove The endpoint to remove.
165 */
166static bool pdmacFileAioMgrNormalRemoveEndpoint(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove)
167{
168 PPDMASYNCCOMPLETIONENDPOINTFILE pPrev = pEndpointRemove->AioMgr.pEndpointPrev;
169 PPDMASYNCCOMPLETIONENDPOINTFILE pNext = pEndpointRemove->AioMgr.pEndpointNext;
170 PPDMACEPFILEMGR pAioMgr = pEndpointRemove->pAioMgr;
171
172 pAioMgr->cEndpoints--;
173
174 if (pPrev)
175 pPrev->AioMgr.pEndpointNext = pNext;
176 else
177 pAioMgr->pEndpointsHead = pNext;
178
179 if (pNext)
180 pNext->AioMgr.pEndpointPrev = pPrev;
181
182 /* Make sure that there is no request pending on this manager for the endpoint. */
183 if (!pEndpointRemove->AioMgr.cRequestsActive)
184 {
185 Assert(!pEndpointRemove->pFlushReq);
186
187 /* Reopen the file so that the new endpoint can reassociate with the file */
188 RTFileClose(pEndpointRemove->File);
189 int rc = RTFileOpen(&pEndpointRemove->File, pEndpointRemove->Core.pszUri, pEndpointRemove->fFlags);
190 AssertRC(rc);
191 return false;
192 }
193
194 return true;
195}
196
197/**
198 * Creates a new I/O manager and spreads the I/O load of the endpoints
199 * between the given I/O manager and the new one.
200 *
201 * @returns nothing.
202 * @param pAioMgr The I/O manager with high I/O load.
203 */
204static void pdmacFileAioMgrNormalBalanceLoad(PPDMACEPFILEMGR pAioMgr)
205{
206 PPDMACEPFILEMGR pAioMgrNew = NULL;
207 int rc = VINF_SUCCESS;
208
209 /* Splitting can't be done with only one open endpoint. */
210 if (pAioMgr->cEndpoints > 1)
211 {
212 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass,
213 &pAioMgrNew, false);
214 if (RT_SUCCESS(rc))
215 {
216 /* We will sort the list by request count per second. */
217 pdmacFileAioMgrNormalEndpointsSortByLoad(pAioMgr);
218
219 /* Now move some endpoints to the new manager. */
220 unsigned cReqsHere = pAioMgr->pEndpointsHead->AioMgr.cReqsPerSec;
221 unsigned cReqsOther = 0;
222 PPDMASYNCCOMPLETIONENDPOINTFILE pCurr = pAioMgr->pEndpointsHead->AioMgr.pEndpointNext;
223
224 while (pCurr)
225 {
226 if (cReqsHere <= cReqsOther)
227 {
228 /*
229 * The other manager has more requests to handle now.
230 * We will keep the current endpoint.
231 */
232 Log(("Keeping endpoint %#p{%s} with %u reqs/s\n", pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
233 cReqsHere += pCurr->AioMgr.cReqsPerSec;
234 pCurr = pCurr->AioMgr.pEndpointNext;
235 }
236 else
237 {
238 /* Move to other endpoint. */
239 Log(("Moving endpoint %#p{%s} with %u reqs/s to other manager\n", pCurr, pCurr->Core.pszUri, pCurr->AioMgr.cReqsPerSec));
240 cReqsOther += pCurr->AioMgr.cReqsPerSec;
241
242 PPDMASYNCCOMPLETIONENDPOINTFILE pMove = pCurr;
243
244 pCurr = pCurr->AioMgr.pEndpointNext;
245
246 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pMove);
247
248 if (fReqsPending)
249 {
250 pMove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
251 pMove->AioMgr.fMoving = true;
252 pMove->AioMgr.pAioMgrDst = pAioMgrNew;
253 }
254 else
255 {
256 pMove->AioMgr.fMoving = false;
257 pMove->AioMgr.pAioMgrDst = NULL;
258 pdmacFileAioMgrAddEndpoint(pAioMgrNew, pMove);
259 }
260 }
261 }
262 }
263 else
264 {
265 /* Don't process further but leave a log entry about reduced performance. */
266 LogRel(("AIOMgr: Could not create new I/O manager (rc=%Rrc). Expect reduced performance\n", rc));
267 }
268 }
269}
270
271/**
272 * Error handler which will create the failsafe managers and destroy the failed I/O manager.
273 *
274 * @returns VBox status code
275 * @param pAioMgr The I/O manager the error ocurred on.
276 * @param rc The error code.
277 */
278static int pdmacFileAioMgrNormalErrorHandler(PPDMACEPFILEMGR pAioMgr, int rc, RT_SRC_POS_DECL)
279{
280 LogRel(("AIOMgr: I/O manager %#p encountered a critical error (rc=%Rrc) during operation. Falling back to failsafe mode. Expect reduced performance\n",
281 pAioMgr, rc));
282 LogRel(("AIOMgr: Error happened in %s:(%u){%s}\n", RT_SRC_POS_ARGS));
283 LogRel(("AIOMgr: Please contact the product vendor\n"));
284
285 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pAioMgr->pEndpointsHead->Core.pEpClass;
286
287 pAioMgr->enmState = PDMACEPFILEMGRSTATE_FAULT;
288 ASMAtomicWriteBool(&pEpClassFile->fFailsafe, true);
289
290 AssertMsgFailed(("Implement\n"));
291 return VINF_SUCCESS;
292}
293
294/**
295 * Put a list of tasks in the pending request list of an endpoint.
296 */
297DECLINLINE(void) pdmacFileAioMgrEpAddTaskList(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTaskHead)
298{
299 /* Add the rest of the tasks to the pending list */
300 if (!pEndpoint->AioMgr.pReqsPendingHead)
301 {
302 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
303 pEndpoint->AioMgr.pReqsPendingHead = pTaskHead;
304 }
305 else
306 {
307 Assert(pEndpoint->AioMgr.pReqsPendingTail);
308 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTaskHead;
309 }
310
311 /* Update the tail. */
312 while (pTaskHead->pNext)
313 pTaskHead = pTaskHead->pNext;
314
315 pEndpoint->AioMgr.pReqsPendingTail = pTaskHead;
316}
317
318/**
319 * Put one task in the pending request list of an endpoint.
320 */
321DECLINLINE(void) pdmacFileAioMgrEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
322{
323 /* Add the rest of the tasks to the pending list */
324 if (!pEndpoint->AioMgr.pReqsPendingHead)
325 {
326 Assert(!pEndpoint->AioMgr.pReqsPendingTail);
327 pEndpoint->AioMgr.pReqsPendingHead = pTask;
328 }
329 else
330 {
331 Assert(pEndpoint->AioMgr.pReqsPendingTail);
332 pEndpoint->AioMgr.pReqsPendingTail->pNext = pTask;
333 }
334
335 pEndpoint->AioMgr.pReqsPendingTail = pTask;
336}
337
338/**
339 * Wrapper around RTFIleAioCtxSubmit() which is also doing error handling.
340 */
341static int pdmacFileAioMgrNormalReqsEnqueue(PPDMACEPFILEMGR pAioMgr,
342 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
343 PRTFILEAIOREQ pahReqs, size_t cReqs)
344{
345 int rc;
346
347 pAioMgr->cRequestsActive += cReqs;
348 pEndpoint->AioMgr.cRequestsActive += cReqs;
349
350 LogFlow(("Enqueuing %d requests. I/O manager has a total of %d active requests now\n", cReqs, pAioMgr->cRequestsActive));
351 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
352
353 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, pahReqs, cReqs);
354 if (RT_FAILURE(rc))
355 {
356 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
357 {
358 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
359
360 /*
361 * We run out of resources.
362 * Need to check which requests got queued
363 * and put the rest on the pending list again.
364 */
365 if (RT_UNLIKELY(!pEpClass->fOutOfResourcesWarningPrinted))
366 {
367 pEpClass->fOutOfResourcesWarningPrinted = true;
368 LogRel(("AIOMgr: The operating system doesn't have enough resources "
369 "to handle the I/O load of the VM. Expect reduced I/O performance\n"));
370 }
371
372 for (size_t i = 0; i < cReqs; i++)
373 {
374 int rcReq = RTFileAioReqGetRC(pahReqs[i], NULL);
375
376 if (rcReq != VERR_FILE_AIO_IN_PROGRESS)
377 {
378 AssertMsg(rcReq == VERR_FILE_AIO_NOT_SUBMITTED,
379 ("Request returned unexpected return code: rc=%Rrc\n", rcReq));
380
381 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(pahReqs[i]);
382
383 /* Put the entry on the free array */
384 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = pahReqs[i];
385 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
386
387 pdmacFileAioMgrEpAddTask(pEndpoint, pTask);
388 pAioMgr->cRequestsActive--;
389 pEndpoint->AioMgr.cRequestsActive--;
390 }
391 }
392 LogFlow(("Removed requests. I/O manager has a total of %d active requests now\n", pAioMgr->cRequestsActive));
393 LogFlow(("Endpoint has a total of %d active requests now\n", pEndpoint->AioMgr.cRequestsActive));
394 }
395 else
396 AssertMsgFailed(("Unexpected return code rc=%Rrc\n", rc));
397 }
398
399 return rc;
400}
401
402static int pdmacFileAioMgrNormalProcessTaskList(PPDMACTASKFILE pTaskHead,
403 PPDMACEPFILEMGR pAioMgr,
404 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
405{
406 RTFILEAIOREQ apReqs[20];
407 unsigned cRequests = 0;
408 unsigned cMaxRequests = PDMACEPFILEMGR_REQS_MAX - pAioMgr->cRequestsActive;
409 int rc = VINF_SUCCESS;
410 PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
411
412 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
413 ("Trying to process request lists of a non active endpoint!\n"));
414
415 /* Go through the list and queue the requests until we get a flush request */
416 while ( pTaskHead
417 && !pEndpoint->pFlushReq
418 && (cMaxRequests > 0)
419 && RT_SUCCESS(rc))
420 {
421 PPDMACTASKFILE pCurr = pTaskHead;
422
423 pTaskHead = pTaskHead->pNext;
424
425 pCurr->pNext = NULL;
426
427 AssertMsg(VALID_PTR(pCurr->pEndpoint) && (pCurr->pEndpoint == pEndpoint),
428 ("Endpoints do not match\n"));
429
430 switch (pCurr->enmTransferType)
431 {
432 case PDMACTASKFILETRANSFER_FLUSH:
433 {
434 /* If there is no data transfer request this flush request finished immediately. */
435 if (!pEndpoint->AioMgr.cRequestsActive)
436 {
437 pCurr->pfnCompleted(pCurr, pCurr->pvUser);
438 pdmacFileTaskFree(pEndpoint, pCurr);
439 }
440 else
441 {
442 pEndpoint->pFlushReq = pCurr;
443 }
444 break;
445 }
446 case PDMACTASKFILETRANSFER_READ:
447 case PDMACTASKFILETRANSFER_WRITE:
448 {
449 RTFILEAIOREQ hReq = NIL_RTFILEAIOREQ;
450 void *pvBuf = pCurr->DataSeg.pvSeg;
451
452 /* Get a request handle. */
453 if (pAioMgr->iFreeReqNext != pAioMgr->iFreeEntryNext)
454 {
455 hReq = pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext];
456 pAioMgr->pahReqsFree[pAioMgr->iFreeReqNext] = NIL_RTFILEAIOREQ;
457 pAioMgr->iFreeReqNext = (pAioMgr->iFreeReqNext + 1) % pAioMgr->cReqEntries;
458 }
459 else
460 {
461 rc = RTFileAioReqCreate(&hReq);
462 AssertRC(rc);
463 }
464
465 AssertMsg(hReq != NIL_RTFILEAIOREQ, ("Out of request handles\n"));
466
467 /* Check if the alignment requirements are met.
468 * Offset, transfer size and buffer address
469 * need to be on a 512 boundary. */
470 RTFOFF offStart = pCurr->Off & ~(RTFOFF)(512-1);
471 size_t cbToTransfer = RT_ALIGN_Z(pCurr->DataSeg.cbSeg + (pCurr->Off - offStart), 512);
472 PDMACTASKFILETRANSFER enmTransferType = pCurr->enmTransferType;
473
474 AssertMsg( pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE
475 || (uint64_t)(offStart + cbToTransfer) <= pEndpoint->cbFile,
476 ("Read exceeds file size offStart=%RTfoff cbToTransfer=%d cbFile=%llu\n",
477 offStart, cbToTransfer, pEndpoint->cbFile));
478
479 pCurr->fPrefetch = false;
480
481 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
482 || RT_UNLIKELY(offStart != pCurr->Off)
483 || ((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) != (RTR3UINTPTR)pvBuf))
484 {
485 LogFlow(("Using bounce buffer for task %#p cbToTransfer=%zd cbSeg=%zd offStart=%RTfoff off=%RTfoff\n",
486 pCurr, cbToTransfer, pCurr->DataSeg.cbSeg, offStart, pCurr->Off));
487
488 /* Create bounce buffer. */
489 pCurr->fBounceBuffer = true;
490
491 AssertMsg(pCurr->Off >= offStart, ("Overflow in calculation Off=%llu offStart=%llu\n",
492 pCurr->Off, offStart));
493 pCurr->uBounceBufOffset = pCurr->Off - offStart;
494
495 /** @todo: I think we need something like a RTMemAllocAligned method here.
496 * Current assumption is that the maximum alignment is 4096byte
497 * (GPT disk on Windows)
498 * so we can use RTMemPageAlloc here.
499 */
500 pCurr->pvBounceBuffer = RTMemPageAlloc(cbToTransfer);
501 AssertPtr(pCurr->pvBounceBuffer);
502 pvBuf = pCurr->pvBounceBuffer;
503
504 if (pCurr->enmTransferType == PDMACTASKFILETRANSFER_WRITE)
505 {
506 if ( RT_UNLIKELY(cbToTransfer != pCurr->DataSeg.cbSeg)
507 || RT_UNLIKELY(offStart != pCurr->Off))
508 {
509 /* We have to fill the buffer first before we can update the data. */
510 LogFlow(("Prefetching data for task %#p\n", pCurr));
511 pCurr->fPrefetch = true;
512 enmTransferType = PDMACTASKFILETRANSFER_READ;
513 }
514 else
515 memcpy(pvBuf, pCurr->DataSeg.pvSeg, pCurr->DataSeg.cbSeg);
516 }
517 }
518 else
519 pCurr->fBounceBuffer = false;
520
521 AssertMsg((pEpClassFile->uBitmaskAlignment & (RTR3UINTPTR)pvBuf) == (RTR3UINTPTR)pvBuf,
522 ("AIO: Alignment restrictions not met! pvBuf=%p uBitmaskAlignment=%p\n", pvBuf, pEpClassFile->uBitmaskAlignment));
523
524 if (enmTransferType == PDMACTASKFILETRANSFER_WRITE)
525 {
526 /* Grow the file if needed. */
527 if (RT_UNLIKELY((uint64_t)(pCurr->Off + pCurr->DataSeg.cbSeg) > pEndpoint->cbFile))
528 {
529 ASMAtomicWriteU64(&pEndpoint->cbFile, pCurr->Off + pCurr->DataSeg.cbSeg);
530 RTFileSetSize(pEndpoint->File, pCurr->Off + pCurr->DataSeg.cbSeg);
531 }
532
533 rc = RTFileAioReqPrepareWrite(hReq, pEndpoint->File,
534 offStart, pvBuf, cbToTransfer, pCurr);
535 }
536 else
537 rc = RTFileAioReqPrepareRead(hReq, pEndpoint->File,
538 offStart, pvBuf, cbToTransfer, pCurr);
539 AssertRC(rc);
540
541 apReqs[cRequests] = hReq;
542 pEndpoint->AioMgr.cReqsProcessed++;
543 cMaxRequests--;
544 cRequests++;
545 if (cRequests == RT_ELEMENTS(apReqs))
546 {
547 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
548 cRequests = 0;
549 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
550 ("Unexpected return code\n"));
551 }
552 break;
553 }
554 default:
555 AssertMsgFailed(("Invalid transfer type %d\n", pCurr->enmTransferType));
556 }
557 }
558
559 if (cRequests)
560 {
561 rc = pdmacFileAioMgrNormalReqsEnqueue(pAioMgr, pEndpoint, apReqs, cRequests);
562 AssertMsg(RT_SUCCESS(rc) || (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES),
563 ("Unexpected return code rc=%Rrc\n", rc));
564 }
565
566 if (pTaskHead)
567 {
568 /* Add the rest of the tasks to the pending list */
569 pdmacFileAioMgrEpAddTaskList(pEndpoint, pTaskHead);
570
571 if (RT_UNLIKELY(!cMaxRequests && !pEndpoint->pFlushReq))
572 {
573 /*
574 * The I/O manager has no room left for more requests
575 * but there are still requests to process.
576 * Create a new I/O manager and let it handle some endpoints.
577 */
578 pdmacFileAioMgrNormalBalanceLoad(pAioMgr);
579 }
580 }
581
582 /* Insufficient resources are not fatal. */
583 if (rc == VERR_FILE_AIO_INSUFFICIENT_RESSOURCES)
584 rc = VINF_SUCCESS;
585
586 return rc;
587}
588
589/**
590 * Adds all pending requests for the given endpoint
591 * until a flush request is encountered or there is no
592 * request anymore.
593 *
594 * @returns VBox status code.
595 * @param pAioMgr The async I/O manager for the endpoint
596 * @param pEndpoint The endpoint to get the requests from.
597 */
598static int pdmacFileAioMgrNormalQueueReqs(PPDMACEPFILEMGR pAioMgr,
599 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
600{
601 int rc = VINF_SUCCESS;
602 PPDMACTASKFILE pTasksHead = NULL;
603
604 AssertMsg(pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE,
605 ("Trying to process request lists of a non active endpoint!\n"));
606
607 Assert(!pEndpoint->pFlushReq);
608
609 /* Check the pending list first */
610 if (pEndpoint->AioMgr.pReqsPendingHead)
611 {
612 LogFlow(("Queuing pending requests first\n"));
613
614 pTasksHead = pEndpoint->AioMgr.pReqsPendingHead;
615 /*
616 * Clear the list as the processing routine will insert them into the list
617 * again if it gets a flush request.
618 */
619 pEndpoint->AioMgr.pReqsPendingHead = NULL;
620 pEndpoint->AioMgr.pReqsPendingTail = NULL;
621 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
622 AssertRC(rc);
623 }
624
625 if (!pEndpoint->pFlushReq && !pEndpoint->AioMgr.pReqsPendingHead)
626 {
627 /* Now the request queue. */
628 pTasksHead = pdmacFileEpGetNewTasks(pEndpoint);
629 if (pTasksHead)
630 {
631 rc = pdmacFileAioMgrNormalProcessTaskList(pTasksHead, pAioMgr, pEndpoint);
632 AssertRC(rc);
633 }
634 }
635
636 return rc;
637}
638
639static int pdmacFileAioMgrNormalProcessBlockingEvent(PPDMACEPFILEMGR pAioMgr)
640{
641 int rc = VINF_SUCCESS;
642 bool fNotifyWaiter = false;
643
644 LogFlowFunc((": Enter\n"));
645
646 Assert(pAioMgr->fBlockingEventPending);
647
648 switch (pAioMgr->enmBlockingEvent)
649 {
650 case PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT:
651 {
652 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointNew = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
653 AssertMsg(VALID_PTR(pEndpointNew), ("Adding endpoint event without a endpoint to add\n"));
654
655 pEndpointNew->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
656
657 pEndpointNew->AioMgr.pEndpointNext = pAioMgr->pEndpointsHead;
658 pEndpointNew->AioMgr.pEndpointPrev = NULL;
659 if (pAioMgr->pEndpointsHead)
660 pAioMgr->pEndpointsHead->AioMgr.pEndpointPrev = pEndpointNew;
661 pAioMgr->pEndpointsHead = pEndpointNew;
662
663 /* Assign the completion point to this file. */
664 rc = RTFileAioCtxAssociateWithFile(pAioMgr->hAioCtx, pEndpointNew->File);
665 fNotifyWaiter = true;
666 pAioMgr->cEndpoints++;
667 break;
668 }
669 case PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT:
670 {
671 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointRemove = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
672 AssertMsg(VALID_PTR(pEndpointRemove), ("Removing endpoint event without a endpoint to remove\n"));
673
674 pEndpointRemove->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_REMOVING;
675 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointRemove);
676 break;
677 }
678 case PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT:
679 {
680 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointClose = (PPDMASYNCCOMPLETIONENDPOINTFILE)ASMAtomicReadPtr((void * volatile *)&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
681 AssertMsg(VALID_PTR(pEndpointClose), ("Close endpoint event without a endpoint to close\n"));
682
683 if (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
684 {
685 LogFlowFunc((": Closing endpoint %#p{%s}\n", pEndpointClose, pEndpointClose->Core.pszUri));
686
687 /* Make sure all tasks finished. Process the queues a last time first. */
688 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpointClose);
689 AssertRC(rc);
690
691 pEndpointClose->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING;
692 fNotifyWaiter = !pdmacFileAioMgrNormalRemoveEndpoint(pEndpointClose);
693 }
694 else if ( (pEndpointClose->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_CLOSING)
695 && (!pEndpointClose->AioMgr.cRequestsActive))
696 fNotifyWaiter = true;
697 break;
698 }
699 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN:
700 {
701 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SHUTDOWN;
702 if (!pAioMgr->cRequestsActive)
703 fNotifyWaiter = true;
704 break;
705 }
706 case PDMACEPFILEAIOMGRBLOCKINGEVENT_SUSPEND:
707 {
708 pAioMgr->enmState = PDMACEPFILEMGRSTATE_SUSPENDING;
709 break;
710 }
711 case PDMACEPFILEAIOMGRBLOCKINGEVENT_RESUME:
712 {
713 pAioMgr->enmState = PDMACEPFILEMGRSTATE_RUNNING;
714 fNotifyWaiter = true;
715 break;
716 }
717 default:
718 AssertReleaseMsgFailed(("Invalid event type %d\n", pAioMgr->enmBlockingEvent));
719 }
720
721 if (fNotifyWaiter)
722 {
723 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
724 pAioMgr->enmBlockingEvent = PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID;
725
726 /* Release the waiting thread. */
727 LogFlow(("Signalling waiter\n"));
728 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
729 AssertRC(rc);
730 }
731
732 LogFlowFunc((": Leave\n"));
733 return rc;
734}
735
736/**
737 * Checks all endpoints for pending events or new requests.
738 *
739 * @returns VBox status code.
740 * @param pAioMgr The I/O manager handle.
741 */
742static int pdmacFileAioMgrNormalCheckEndpoints(PPDMACEPFILEMGR pAioMgr)
743{
744 /* Check the assigned endpoints for new tasks if there isn't a flush request active at the moment. */
745 int rc = VINF_SUCCESS;
746 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint = pAioMgr->pEndpointsHead;
747
748 while (pEndpoint)
749 {
750 if (!pEndpoint->pFlushReq
751 && (pEndpoint->enmState == PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE)
752 && !pEndpoint->AioMgr.fMoving)
753 {
754 rc = pdmacFileAioMgrNormalQueueReqs(pAioMgr, pEndpoint);
755 if (RT_FAILURE(rc))
756 return rc;
757 }
758 else if (!pEndpoint->AioMgr.cRequestsActive)
759 {
760 /* Reopen the file so that the new endpoint can reassociate with the file */
761 RTFileClose(pEndpoint->File);
762 rc = RTFileOpen(&pEndpoint->File, pEndpoint->Core.pszUri, pEndpoint->fFlags);
763 AssertRC(rc);
764
765 if (pEndpoint->AioMgr.fMoving)
766 {
767 pEndpoint->AioMgr.fMoving = false;
768 pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
769 }
770 else
771 {
772 Assert(pAioMgr->fBlockingEventPending);
773 ASMAtomicWriteBool(&pAioMgr->fBlockingEventPending, false);
774
775 /* Release the waiting thread. */
776 LogFlow(("Signalling waiter\n"));
777 rc = RTSemEventSignal(pAioMgr->EventSemBlock);
778 AssertRC(rc);
779 }
780 }
781
782 pEndpoint = pEndpoint->AioMgr.pEndpointNext;
783 }
784
785 return rc;
786}
787
788/** Helper macro for checking for error codes. */
789#define CHECK_RC(pAioMgr, rc) \
790 if (RT_FAILURE(rc)) \
791 {\
792 int rc2 = pdmacFileAioMgrNormalErrorHandler(pAioMgr, rc, RT_SRC_POS);\
793 return rc2;\
794 }
795
796/**
797 * The normal I/O manager using the RTFileAio* API
798 *
799 * @returns VBox status code.
800 * @param ThreadSelf Handle of the thread.
801 * @param pvUser Opaque user data.
802 */
803int pdmacFileAioMgrNormal(RTTHREAD ThreadSelf, void *pvUser)
804{
805 int rc = VINF_SUCCESS;
806 PPDMACEPFILEMGR pAioMgr = (PPDMACEPFILEMGR)pvUser;
807 uint64_t uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
808
809 while ( (pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING)
810 || (pAioMgr->enmState == PDMACEPFILEMGRSTATE_SUSPENDING))
811 {
812 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, true);
813 if (!ASMAtomicReadBool(&pAioMgr->fWokenUp))
814 rc = RTSemEventWait(pAioMgr->EventSem, RT_INDEFINITE_WAIT);
815 ASMAtomicWriteBool(&pAioMgr->fWaitingEventSem, false);
816 AssertRC(rc);
817
818 LogFlow(("Got woken up\n"));
819 ASMAtomicWriteBool(&pAioMgr->fWokenUp, false);
820
821 /* Check for an external blocking event first. */
822 if (pAioMgr->fBlockingEventPending)
823 {
824 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
825 CHECK_RC(pAioMgr, rc);
826 }
827
828 if (RT_LIKELY(pAioMgr->enmState == PDMACEPFILEMGRSTATE_RUNNING))
829 {
830 /* We got woken up because an endpoint issued new requests. Queue them. */
831 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
832 CHECK_RC(pAioMgr, rc);
833
834 while (pAioMgr->cRequestsActive)
835 {
836 RTFILEAIOREQ apReqs[20];
837 uint32_t cReqsCompleted = 0;
838 size_t cReqsWait;
839
840 if (pAioMgr->cRequestsActive > RT_ELEMENTS(apReqs))
841 cReqsWait = RT_ELEMENTS(apReqs);
842 else
843 cReqsWait = pAioMgr->cRequestsActive;
844
845 LogFlow(("Waiting for %d of %d tasks to complete\n", pAioMgr->cRequestsActive, cReqsWait));
846
847 rc = RTFileAioCtxWait(pAioMgr->hAioCtx,
848 cReqsWait,
849 RT_INDEFINITE_WAIT, apReqs,
850 RT_ELEMENTS(apReqs), &cReqsCompleted);
851 if (RT_FAILURE(rc) && (rc != VERR_INTERRUPTED))
852 CHECK_RC(pAioMgr, rc);
853
854 LogFlow(("%d tasks completed\n", cReqsCompleted));
855
856 for (uint32_t i = 0; i < cReqsCompleted; i++)
857 {
858 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint;
859 size_t cbTransfered = 0;
860 int rcReq = RTFileAioReqGetRC(apReqs[i], &cbTransfered);
861 PPDMACTASKFILE pTask = (PPDMACTASKFILE)RTFileAioReqGetUser(apReqs[i]);
862
863 pEndpoint = pTask->pEndpoint;
864
865 /*
866 * It is possible that the request failed on Linux with kernels < 2.6.23
867 * if the passed buffer was allocated with remap_pfn_range or if the file
868 * is on an NFS endpoint which does not support async and direct I/O at the same time.
869 * The endpoint will be migrated to a failsafe manager in case a request fails.
870 */
871 if (RT_FAILURE(rcReq))
872 {
873 /* Free bounce buffers and the IPRT request. */
874 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
875 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
876
877 pAioMgr->cRequestsActive--;
878 pEndpoint->AioMgr.cRequestsActive--;
879 pEndpoint->AioMgr.cReqsProcessed++;
880
881 if (pTask->fBounceBuffer)
882 RTMemFree(pTask->pvBounceBuffer);
883
884 /* Queue the request on the pending list. */
885 pTask->pNext = pEndpoint->AioMgr.pReqsPendingHead;
886 pEndpoint->AioMgr.pReqsPendingHead = pTask;
887
888 /* Create a new failsafe manager if neccessary. */
889 if (!pEndpoint->AioMgr.fMoving)
890 {
891 PPDMACEPFILEMGR pAioMgrFailsafe;
892
893 LogRel(("%s: Request %#p failed with rc=%Rrc, migrating endpoint %s to failsafe manager.\n",
894 RTThreadGetName(pAioMgr->Thread), pTask, rcReq, pEndpoint->Core.pszUri));
895
896 pEndpoint->AioMgr.fMoving = true;
897
898 rc = pdmacFileAioMgrCreate((PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass,
899 &pAioMgrFailsafe, true);
900 AssertRC(rc);
901
902 pEndpoint->AioMgr.pAioMgrDst = pAioMgrFailsafe;
903
904 /* Update the flags to open the file with. Disable async I/O and enable the host cache. */
905 pEndpoint->fFlags &= ~(RTFILE_O_ASYNC_IO | RTFILE_O_NO_CACHE);
906 }
907
908 /* If this was the last request for the endpoint migrate it to the new manager. */
909 if (!pEndpoint->AioMgr.cRequestsActive)
910 {
911 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
912 Assert(!fReqsPending);
913
914 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
915 AssertRC(rc);
916 }
917 }
918 else
919 {
920 AssertMsg(( (cbTransfered == pTask->DataSeg.cbSeg)
921 || (pTask->fBounceBuffer && (cbTransfered >= pTask->DataSeg.cbSeg))),
922 ("Task didn't completed successfully (rc=%Rrc) or was incomplete (cbTransfered=%u)\n", rcReq, cbTransfered));
923
924 if (pTask->fPrefetch)
925 {
926 Assert(pTask->enmTransferType == PDMACTASKFILETRANSFER_WRITE);
927 Assert(pTask->fBounceBuffer);
928
929 memcpy(((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
930 pTask->DataSeg.pvSeg,
931 pTask->DataSeg.cbSeg);
932
933 /* Write it now. */
934 pTask->fPrefetch = false;
935 size_t cbToTransfer = RT_ALIGN_Z(pTask->DataSeg.cbSeg, 512);
936 RTFOFF offStart = pTask->Off & ~(RTFOFF)(512-1);
937
938 /* Grow the file if needed. */
939 if (RT_UNLIKELY((uint64_t)(pTask->Off + pTask->DataSeg.cbSeg) > pEndpoint->cbFile))
940 {
941 ASMAtomicWriteU64(&pEndpoint->cbFile, pTask->Off + pTask->DataSeg.cbSeg);
942 RTFileSetSize(pEndpoint->File, pTask->Off + pTask->DataSeg.cbSeg);
943 }
944
945 rc = RTFileAioReqPrepareWrite(apReqs[i], pEndpoint->File,
946 offStart, pTask->pvBounceBuffer, cbToTransfer, pTask);
947 AssertRC(rc);
948 rc = RTFileAioCtxSubmit(pAioMgr->hAioCtx, &apReqs[i], 1);
949 AssertRC(rc);
950 }
951 else
952 {
953 if (pTask->fBounceBuffer)
954 {
955 if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
956 memcpy(pTask->DataSeg.pvSeg,
957 ((uint8_t *)pTask->pvBounceBuffer) + pTask->uBounceBufOffset,
958 pTask->DataSeg.cbSeg);
959
960 RTMemPageFree(pTask->pvBounceBuffer);
961 }
962
963 /* Put the entry on the free array */
964 pAioMgr->pahReqsFree[pAioMgr->iFreeEntryNext] = apReqs[i];
965 pAioMgr->iFreeEntryNext = (pAioMgr->iFreeEntryNext + 1) % pAioMgr->cReqEntries;
966
967 pAioMgr->cRequestsActive--;
968 pEndpoint->AioMgr.cRequestsActive--;
969 pEndpoint->AioMgr.cReqsProcessed++;
970
971 /* Call completion callback */
972 pTask->pfnCompleted(pTask, pTask->pvUser);
973 pdmacFileTaskFree(pEndpoint, pTask);
974
975 /*
976 * If there is no request left on the endpoint but a flush request is set
977 * it completed now and we notify the owner.
978 * Furthermore we look for new requests and continue.
979 */
980 if (!pEndpoint->AioMgr.cRequestsActive && pEndpoint->pFlushReq)
981 {
982 /* Call completion callback */
983 pTask = pEndpoint->pFlushReq;
984 pEndpoint->pFlushReq = NULL;
985
986 AssertMsg(pTask->pEndpoint == pEndpoint, ("Endpoint of the flush request does not match assigned one\n"));
987
988 pTask->pfnCompleted(pTask, pTask->pvUser);
989 pdmacFileTaskFree(pEndpoint, pTask);
990 }
991 else if (RT_UNLIKELY(!pEndpoint->AioMgr.cRequestsActive && pEndpoint->AioMgr.fMoving))
992 {
993 /* If the endpoint is about to be migrated do it now. */
994 bool fReqsPending = pdmacFileAioMgrNormalRemoveEndpoint(pEndpoint);
995 Assert(!fReqsPending);
996
997 rc = pdmacFileAioMgrAddEndpoint(pEndpoint->AioMgr.pAioMgrDst, pEndpoint);
998 AssertRC(rc);
999 }
1000 }
1001 } /* request completed successfully */
1002 } /* for every completed request */
1003
1004 /* Check for an external blocking event before we go to sleep again. */
1005 if (pAioMgr->fBlockingEventPending)
1006 {
1007 rc = pdmacFileAioMgrNormalProcessBlockingEvent(pAioMgr);
1008 CHECK_RC(pAioMgr, rc);
1009 }
1010
1011 /* Update load statistics. */
1012 uint64_t uMillisCurr = RTTimeMilliTS();
1013 if (uMillisCurr > uMillisEnd)
1014 {
1015 PPDMASYNCCOMPLETIONENDPOINTFILE pEndpointCurr = pAioMgr->pEndpointsHead;
1016
1017 /* Calculate timespan. */
1018 uMillisCurr -= uMillisEnd;
1019
1020 while (pEndpointCurr)
1021 {
1022 pEndpointCurr->AioMgr.cReqsPerSec = pEndpointCurr->AioMgr.cReqsProcessed / (uMillisCurr + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD);
1023 pEndpointCurr->AioMgr.cReqsProcessed = 0;
1024 pEndpointCurr = pEndpointCurr->AioMgr.pEndpointNext;
1025 }
1026
1027 /* Set new update interval */
1028 uMillisEnd = RTTimeMilliTS() + PDMACEPFILEMGR_LOAD_UPDATE_PERIOD;
1029 }
1030
1031 /* Check endpoints for new requests. */
1032 rc = pdmacFileAioMgrNormalCheckEndpoints(pAioMgr);
1033 CHECK_RC(pAioMgr, rc);
1034 } /* while requests are active. */
1035 } /* if still running */
1036 } /* while running */
1037
1038 return rc;
1039}
1040
1041#undef CHECK_RC
1042
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use