VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/fileaio-posix.cpp@ 76553

Last change on this file since 76553 was 76553, checked in by vboxsync, 5 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.8 KB
Line 
1/* $Id: fileaio-posix.cpp 76553 2019-01-01 01:45:53Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for POSIX compliant host platforms.
4 */
5
6/*
7 * Copyright (C) 2006-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#define LOG_GROUP RTLOGGROUP_DIR
32#include <iprt/asm.h>
33#include <iprt/file.h>
34#include <iprt/mem.h>
35#include <iprt/assert.h>
36#include <iprt/string.h>
37#include <iprt/err.h>
38#include <iprt/log.h>
39#include <iprt/thread.h>
40#include <iprt/semaphore.h>
41#include "internal/fileaio.h"
42
43#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
44# include <sys/types.h>
45# include <sys/sysctl.h> /* for sysctlbyname */
46#endif
47#if defined(RT_OS_FREEBSD)
48# include <fcntl.h> /* O_SYNC */
49#endif
50#include <aio.h>
51#include <errno.h>
52#include <time.h>
53
54/*
55 * Linux does not define this value.
56 * Just define it with really big
57 * value.
58 */
59#ifndef AIO_LISTIO_MAX
60# define AIO_LISTIO_MAX UINT32_MAX
61#endif
62
63#if 0 /* Only used for debugging */
64# undef AIO_LISTIO_MAX
65# define AIO_LISTIO_MAX 16
66#endif
67
68/** Invalid entry in the waiting array. */
69#define RTFILEAIOCTX_WAIT_ENTRY_INVALID (~0U)
70
71/** No-op replacement for rtFileAioCtxDump for non debug builds */
72#ifndef LOG_ENABLED
73# define rtFileAioCtxDump(pCtxInt) do {} while (0)
74#endif
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * Async I/O request state.
82 */
83typedef struct RTFILEAIOREQINTERNAL
84{
85 /** The aio control block. FIRST ELEMENT! */
86 struct aiocb AioCB;
87 /** Next element in the chain. */
88 struct RTFILEAIOREQINTERNAL *pNext;
89 /** Previous element in the chain. */
90 struct RTFILEAIOREQINTERNAL *pPrev;
91 /** Current state the request is in. */
92 RTFILEAIOREQSTATE enmState;
93 /** Flag whether this is a flush request. */
94 bool fFlush;
95 /** Flag indicating if the request was canceled. */
96 volatile bool fCanceled;
97 /** Opaque user data. */
98 void *pvUser;
99 /** Number of bytes actually transferred. */
100 size_t cbTransfered;
101 /** Status code. */
102 int Rc;
103 /** Completion context we are assigned to. */
104 struct RTFILEAIOCTXINTERNAL *pCtxInt;
105 /** Entry in the waiting list the request is in. */
106 unsigned iWaitingList;
107 /** Magic value (RTFILEAIOREQ_MAGIC). */
108 uint32_t u32Magic;
109} RTFILEAIOREQINTERNAL, *PRTFILEAIOREQINTERNAL;
110
111/**
112 * Async I/O completion context state.
113 */
114typedef struct RTFILEAIOCTXINTERNAL
115{
116 /** Current number of requests active on this context. */
117 volatile int32_t cRequests;
118 /** Maximum number of requests this context can handle. */
119 uint32_t cMaxRequests;
120 /** The ID of the thread which is currently waiting for requests. */
121 volatile RTTHREAD hThreadWait;
122 /** Flag whether the thread was woken up. */
123 volatile bool fWokenUp;
124 /** Flag whether the thread is currently waiting in the syscall. */
125 volatile bool fWaiting;
126 /** Flags given during creation. */
127 uint32_t fFlags;
128 /** Magic value (RTFILEAIOCTX_MAGIC). */
129 uint32_t u32Magic;
130 /** Flag whether the thread was woken up due to a internal event. */
131 volatile bool fWokenUpInternal;
132 /** List of new requests which needs to be inserted into apReqs by the
133 * waiting thread. */
134 volatile PRTFILEAIOREQINTERNAL apReqsNewHead[5];
135 /** Special entry for requests which are canceled. Because only one
136 * request can be canceled at a time and the thread canceling the request
137 * has to wait we need only one entry. */
138 volatile PRTFILEAIOREQINTERNAL pReqToCancel;
139 /** Event semaphore the canceling thread is waiting for completion of
140 * the operation. */
141 RTSEMEVENT SemEventCancel;
142 /** Head of submitted elements waiting to get into the array. */
143 PRTFILEAIOREQINTERNAL pReqsWaitHead;
144 /** Tail of submitted elements waiting to get into the array. */
145 PRTFILEAIOREQINTERNAL pReqsWaitTail;
146 /** Maximum number of elements in the waiting array. */
147 unsigned cReqsWaitMax;
148 /** First free slot in the waiting list. */
149 unsigned iFirstFree;
150 /** List of requests we are currently waiting on.
151 * Size depends on cMaxRequests and AIO_LISTIO_MAX. */
152 volatile PRTFILEAIOREQINTERNAL apReqs[1];
153} RTFILEAIOCTXINTERNAL, *PRTFILEAIOCTXINTERNAL;
154
155/**
156 * Internal worker for waking up the waiting thread.
157 */
158static void rtFileAioCtxWakeup(PRTFILEAIOCTXINTERNAL pCtxInt)
159{
160 /*
161 * Read the thread handle before the status flag.
162 * If we read the handle after the flag we might
163 * end up with an invalid handle because the thread
164 * waiting in RTFileAioCtxWakeup() might get scheduled
165 * before we read the flag and returns.
166 * We can ensure that the handle is valid if fWaiting is true
167 * when reading the handle before the status flag.
168 */
169 RTTHREAD hThread;
170 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
171 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
172 if (fWaiting)
173 {
174 /*
175 * If a thread waits the handle must be valid.
176 * It is possible that the thread returns from
177 * aio_suspend() before the signal is send.
178 * This is no problem because we already set fWokenUp
179 * to true which will let the thread return VERR_INTERRUPTED
180 * and the next call to RTFileAioCtxWait() will not
181 * return VERR_INTERRUPTED because signals are not saved
182 * and will simply vanish if the destination thread can't
183 * receive it.
184 */
185 Assert(hThread != NIL_RTTHREAD);
186 RTThreadPoke(hThread);
187 }
188}
189
190/**
191 * Internal worker processing events and inserting new requests into the waiting list.
192 */
193static int rtFileAioCtxProcessEvents(PRTFILEAIOCTXINTERNAL pCtxInt)
194{
195 int rc = VINF_SUCCESS;
196
197 /* Process new requests first. */
198 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, false);
199 if (fWokenUp)
200 {
201 for (unsigned iSlot = 0; iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead); iSlot++)
202 {
203 PRTFILEAIOREQINTERNAL pReqHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[iSlot], NULL, PRTFILEAIOREQINTERNAL);
204
205 while ( (pCtxInt->iFirstFree < pCtxInt->cReqsWaitMax)
206 && pReqHead)
207 {
208 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
209 pCtxInt->apReqs[pCtxInt->iFirstFree] = pReqHead;
210 pReqHead->iWaitingList = pCtxInt->iFirstFree;
211 pReqHead = pReqHead->pNext;
212
213 /* Clear pointer to next and previous element just for safety. */
214 pCtxInt->apReqs[pCtxInt->iFirstFree]->pNext = NULL;
215 pCtxInt->apReqs[pCtxInt->iFirstFree]->pPrev = NULL;
216 pCtxInt->iFirstFree++;
217
218 Assert( (pCtxInt->iFirstFree <= pCtxInt->cMaxRequests)
219 && (pCtxInt->iFirstFree <= pCtxInt->cReqsWaitMax));
220 }
221
222 /* Append the rest to the wait list. */
223 if (pReqHead)
224 {
225 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
226 if (!pCtxInt->pReqsWaitHead)
227 {
228 Assert(!pCtxInt->pReqsWaitTail);
229 pCtxInt->pReqsWaitHead = pReqHead;
230 pReqHead->pPrev = NULL;
231 }
232 else
233 {
234 AssertPtr(pCtxInt->pReqsWaitTail);
235
236 pCtxInt->pReqsWaitTail->pNext = pReqHead;
237 pReqHead->pPrev = pCtxInt->pReqsWaitTail;
238 }
239
240 /* Update tail. */
241 while (pReqHead->pNext)
242 {
243 RTFIELAIOREQ_ASSERT_STATE(pReqHead->pNext, SUBMITTED);
244 pReqHead = pReqHead->pNext;
245 }
246
247 pCtxInt->pReqsWaitTail = pReqHead;
248 pCtxInt->pReqsWaitTail->pNext = NULL;
249 }
250 }
251
252 /* Check if a request needs to be canceled. */
253 PRTFILEAIOREQINTERNAL pReqToCancel = ASMAtomicReadPtrT(&pCtxInt->pReqToCancel, PRTFILEAIOREQINTERNAL);
254 if (pReqToCancel)
255 {
256 /* The request can be in the array waiting for completion or still in the list because it is full. */
257 if (pReqToCancel->iWaitingList != RTFILEAIOCTX_WAIT_ENTRY_INVALID)
258 {
259 /* Put it out of the waiting list. */
260 pCtxInt->apReqs[pReqToCancel->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
261 pCtxInt->apReqs[pReqToCancel->iWaitingList]->iWaitingList = pReqToCancel->iWaitingList;
262 }
263 else
264 {
265 /* Unlink from the waiting list. */
266 PRTFILEAIOREQINTERNAL pPrev = pReqToCancel->pPrev;
267 PRTFILEAIOREQINTERNAL pNext = pReqToCancel->pNext;
268
269 if (pNext)
270 pNext->pPrev = pPrev;
271 else
272 {
273 /* We canceled the tail. */
274 pCtxInt->pReqsWaitTail = pPrev;
275 }
276
277 if (pPrev)
278 pPrev->pNext = pNext;
279 else
280 {
281 /* We canceled the head. */
282 pCtxInt->pReqsWaitHead = pNext;
283 }
284 }
285
286 ASMAtomicDecS32(&pCtxInt->cRequests);
287 AssertMsg(pCtxInt->cRequests >= 0, ("Canceled request not which is not in this context\n"));
288 RTSemEventSignal(pCtxInt->SemEventCancel);
289 }
290 }
291 else
292 {
293 if (ASMAtomicXchgBool(&pCtxInt->fWokenUp, false))
294 rc = VERR_INTERRUPTED;
295 }
296
297 return rc;
298}
299
300RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
301{
302 int rcBSD = 0;
303 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
304
305#if defined(RT_OS_DARWIN)
306 int cReqsOutstandingMax = 0;
307 size_t cbParameter = sizeof(int);
308
309 rcBSD = sysctlbyname("kern.aioprocmax", /* name */
310 &cReqsOutstandingMax, /* Where to store the old value. */
311 &cbParameter, /* Size of the memory pointed to. */
312 NULL, /* Where the new value is located. */
313 0); /* Where the size of the new value is stored. */
314 if (rcBSD == -1)
315 return RTErrConvertFromErrno(errno);
316
317 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
318 pAioLimits->cbBufferAlignment = 0;
319#elif defined(RT_OS_FREEBSD)
320 /*
321 * The AIO API is implemented in a kernel module which is not
322 * loaded by default.
323 * If it is loaded there are additional sysctl parameters.
324 */
325 int cReqsOutstandingMax = 0;
326 size_t cbParameter = sizeof(int);
327
328 rcBSD = sysctlbyname("vfs.aio.max_aio_per_proc", /* name */
329 &cReqsOutstandingMax, /* Where to store the old value. */
330 &cbParameter, /* Size of the memory pointed to. */
331 NULL, /* Where the new value is located. */
332 0); /* Where the size of the new value is stored. */
333 if (rcBSD == -1)
334 {
335 /* ENOENT means the value is unknown thus the module is not loaded. */
336 if (errno == ENOENT)
337 return VERR_NOT_SUPPORTED;
338 else
339 return RTErrConvertFromErrno(errno);
340 }
341
342 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
343 pAioLimits->cbBufferAlignment = 0;
344#else
345 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
346 pAioLimits->cbBufferAlignment = 0;
347#endif
348
349 return VINF_SUCCESS;
350}
351
352RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
353{
354 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
355
356 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOREQINTERNAL));
357 if (RT_UNLIKELY(!pReqInt))
358 return VERR_NO_MEMORY;
359
360 pReqInt->pCtxInt = NULL;
361 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
362 pReqInt->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
363 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
364
365 *phReq = (RTFILEAIOREQ)pReqInt;
366
367 return VINF_SUCCESS;
368}
369
370
371RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
372{
373 /*
374 * Validate the handle and ignore nil.
375 */
376 if (hReq == NIL_RTFILEAIOREQ)
377 return VINF_SUCCESS;
378 PRTFILEAIOREQINTERNAL pReqInt = hReq;
379 RTFILEAIOREQ_VALID_RETURN(pReqInt);
380 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
381
382 /*
383 * Trash the magic and free it.
384 */
385 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
386 RTMemFree(pReqInt);
387 return VINF_SUCCESS;
388}
389
390/**
391 * Worker setting up the request.
392 */
393DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
394 unsigned uTransferDirection,
395 RTFOFF off, void *pvBuf, size_t cbTransfer,
396 void *pvUser)
397{
398 /*
399 * Validate the input.
400 */
401 PRTFILEAIOREQINTERNAL pReqInt = hReq;
402 RTFILEAIOREQ_VALID_RETURN(pReqInt);
403 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
404 Assert(hFile != NIL_RTFILE);
405 AssertPtr(pvBuf);
406 Assert(off >= 0);
407 Assert(cbTransfer > 0);
408
409 memset(&pReqInt->AioCB, 0, sizeof(struct aiocb));
410 pReqInt->fFlush = false;
411 pReqInt->AioCB.aio_lio_opcode = uTransferDirection;
412 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
413 pReqInt->AioCB.aio_offset = off;
414 pReqInt->AioCB.aio_nbytes = cbTransfer;
415 pReqInt->AioCB.aio_buf = pvBuf;
416 pReqInt->pvUser = pvUser;
417 pReqInt->pCtxInt = NULL;
418 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
419 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
420
421 return VINF_SUCCESS;
422}
423
424
425RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
426 void *pvBuf, size_t cbRead, void *pvUser)
427{
428 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_READ,
429 off, pvBuf, cbRead, pvUser);
430}
431
432
433RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
434 void const *pvBuf, size_t cbWrite, void *pvUser)
435{
436 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_WRITE,
437 off, (void *)pvBuf, cbWrite, pvUser);
438}
439
440
441RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
442{
443 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)hReq;
444
445 RTFILEAIOREQ_VALID_RETURN(pReqInt);
446 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
447 Assert(hFile != NIL_RTFILE);
448
449 pReqInt->fFlush = true;
450 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
451 pReqInt->AioCB.aio_offset = 0;
452 pReqInt->AioCB.aio_nbytes = 0;
453 pReqInt->AioCB.aio_buf = NULL;
454 pReqInt->pvUser = pvUser;
455 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
456 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
457
458 return VINF_SUCCESS;
459}
460
461
462RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
463{
464 PRTFILEAIOREQINTERNAL pReqInt = hReq;
465 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
466
467 return pReqInt->pvUser;
468}
469
470
471RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
472{
473 PRTFILEAIOREQINTERNAL pReqInt = hReq;
474 RTFILEAIOREQ_VALID_RETURN(pReqInt);
475 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
476
477 ASMAtomicXchgBool(&pReqInt->fCanceled, true);
478
479 int rcPosix = aio_cancel(pReqInt->AioCB.aio_fildes, &pReqInt->AioCB);
480
481 if (rcPosix == AIO_CANCELED)
482 {
483 PRTFILEAIOCTXINTERNAL pCtxInt = pReqInt->pCtxInt;
484 /*
485 * Notify the waiting thread that the request was canceled.
486 */
487 AssertMsg(VALID_PTR(pCtxInt),
488 ("Invalid state. Request was canceled but wasn't submitted\n"));
489
490 Assert(!pCtxInt->pReqToCancel);
491 ASMAtomicWritePtr(&pCtxInt->pReqToCancel, pReqInt);
492 rtFileAioCtxWakeup(pCtxInt);
493
494 /* Wait for acknowledge. */
495 int rc = RTSemEventWait(pCtxInt->SemEventCancel, RT_INDEFINITE_WAIT);
496 AssertRC(rc);
497
498 ASMAtomicWriteNullPtr(&pCtxInt->pReqToCancel);
499 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
500 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
501 return VINF_SUCCESS;
502 }
503 else if (rcPosix == AIO_ALLDONE)
504 return VERR_FILE_AIO_COMPLETED;
505 else if (rcPosix == AIO_NOTCANCELED)
506 return VERR_FILE_AIO_IN_PROGRESS;
507 else
508 return RTErrConvertFromErrno(errno);
509}
510
511
512RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
513{
514 PRTFILEAIOREQINTERNAL pReqInt = hReq;
515 RTFILEAIOREQ_VALID_RETURN(pReqInt);
516 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
517 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
518 AssertPtrNull(pcbTransfered);
519
520 if ( (RT_SUCCESS(pReqInt->Rc))
521 && (pcbTransfered))
522 *pcbTransfered = pReqInt->cbTransfered;
523
524 return pReqInt->Rc;
525}
526
527
528RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
529 uint32_t fFlags)
530{
531 PRTFILEAIOCTXINTERNAL pCtxInt;
532 unsigned cReqsWaitMax;
533
534 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
535 AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
536
537 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
538 return VERR_OUT_OF_RANGE;
539
540 cReqsWaitMax = RT_MIN(cAioReqsMax, AIO_LISTIO_MAX);
541
542 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ( sizeof(RTFILEAIOCTXINTERNAL)
543 + cReqsWaitMax * sizeof(PRTFILEAIOREQINTERNAL));
544 if (RT_UNLIKELY(!pCtxInt))
545 return VERR_NO_MEMORY;
546
547 /* Create event semaphore. */
548 int rc = RTSemEventCreate(&pCtxInt->SemEventCancel);
549 if (RT_FAILURE(rc))
550 {
551 RTMemFree(pCtxInt);
552 return rc;
553 }
554
555 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
556 pCtxInt->cMaxRequests = cAioReqsMax;
557 pCtxInt->cReqsWaitMax = cReqsWaitMax;
558 pCtxInt->fFlags = fFlags;
559 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
560
561 return VINF_SUCCESS;
562}
563
564
565RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
566{
567 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
568
569 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
570
571 if (RT_UNLIKELY(pCtxInt->cRequests))
572 return VERR_FILE_AIO_BUSY;
573
574 RTSemEventDestroy(pCtxInt->SemEventCancel);
575 RTMemFree(pCtxInt);
576
577 return VINF_SUCCESS;
578}
579
580
581RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
582{
583 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
584
585 if (hAioCtx == NIL_RTFILEAIOCTX)
586 return RTFILEAIO_UNLIMITED_REQS;
587 return pCtxInt->cMaxRequests;
588}
589
590RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
591{
592 NOREF(hAioCtx); NOREF(hFile);
593 return VINF_SUCCESS;
594}
595
596#ifdef LOG_ENABLED
597/**
598 * Dumps the state of a async I/O context.
599 */
600static void rtFileAioCtxDump(PRTFILEAIOCTXINTERNAL pCtxInt)
601{
602 LogFlow(("cRequests=%d\n", pCtxInt->cRequests));
603 LogFlow(("cMaxRequests=%u\n", pCtxInt->cMaxRequests));
604 LogFlow(("hThreadWait=%#p\n", pCtxInt->hThreadWait));
605 LogFlow(("fWokenUp=%RTbool\n", pCtxInt->fWokenUp));
606 LogFlow(("fWaiting=%RTbool\n", pCtxInt->fWaiting));
607 LogFlow(("fWokenUpInternal=%RTbool\n", pCtxInt->fWokenUpInternal));
608 for (unsigned i = 0; i < RT_ELEMENTS(pCtxInt->apReqsNewHead); i++)
609 LogFlow(("apReqsNewHead[%u]=%#p\n", i, pCtxInt->apReqsNewHead[i]));
610 LogFlow(("pReqToCancel=%#p\n", pCtxInt->pReqToCancel));
611 LogFlow(("pReqsWaitHead=%#p\n", pCtxInt->pReqsWaitHead));
612 LogFlow(("pReqsWaitTail=%#p\n", pCtxInt->pReqsWaitTail));
613 LogFlow(("cReqsWaitMax=%u\n", pCtxInt->cReqsWaitMax));
614 LogFlow(("iFirstFree=%u\n", pCtxInt->iFirstFree));
615 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
616 LogFlow(("apReqs[%u]=%#p\n", i, pCtxInt->apReqs[i]));
617}
618#endif
619
620RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
621{
622 int rc = VINF_SUCCESS;
623 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
624
625 /* Parameter checks */
626 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
627 AssertReturn(cReqs != 0, VERR_INVALID_POINTER);
628 AssertPtrReturn(pahReqs, VERR_INVALID_PARAMETER);
629
630 rtFileAioCtxDump(pCtxInt);
631
632 /* Check that we don't exceed the limit */
633 if (ASMAtomicUoReadS32(&pCtxInt->cRequests) + cReqs > pCtxInt->cMaxRequests)
634 return VERR_FILE_AIO_LIMIT_EXCEEDED;
635
636 PRTFILEAIOREQINTERNAL pHead = NULL;
637
638 do
639 {
640 int rcPosix = 0;
641 size_t cReqsSubmit = 0;
642 size_t i = 0;
643 PRTFILEAIOREQINTERNAL pReqInt;
644
645 while ( (i < cReqs)
646 && (i < AIO_LISTIO_MAX))
647 {
648 pReqInt = pahReqs[i];
649 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
650 {
651 /* Undo everything and stop submitting. */
652 for (size_t iUndo = 0; iUndo < i; iUndo++)
653 {
654 pReqInt = pahReqs[iUndo];
655 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
656 pReqInt->pCtxInt = NULL;
657
658 /* Unlink from the list again. */
659 PRTFILEAIOREQINTERNAL pNext, pPrev;
660 pNext = pReqInt->pNext;
661 pPrev = pReqInt->pPrev;
662 if (pNext)
663 pNext->pPrev = pPrev;
664 if (pPrev)
665 pPrev->pNext = pNext;
666 else
667 pHead = pNext;
668 }
669 rc = VERR_INVALID_HANDLE;
670 break;
671 }
672
673 pReqInt->pCtxInt = pCtxInt;
674
675 if (pReqInt->fFlush)
676 break;
677
678 /* Link them together. */
679 pReqInt->pNext = pHead;
680 if (pHead)
681 pHead->pPrev = pReqInt;
682 pReqInt->pPrev = NULL;
683 pHead = pReqInt;
684 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
685
686 cReqsSubmit++;
687 i++;
688 }
689
690 if (cReqsSubmit)
691 {
692 rcPosix = lio_listio(LIO_NOWAIT, (struct aiocb **)pahReqs, cReqsSubmit, NULL);
693 if (RT_UNLIKELY(rcPosix < 0))
694 {
695 size_t cReqsSubmitted = cReqsSubmit;
696
697 if (errno == EAGAIN)
698 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
699 else
700 rc = RTErrConvertFromErrno(errno);
701
702 /* Check which ones were not submitted. */
703 for (i = 0; i < cReqsSubmit; i++)
704 {
705 pReqInt = pahReqs[i];
706
707 rcPosix = aio_error(&pReqInt->AioCB);
708
709 if ((rcPosix != EINPROGRESS) && (rcPosix != 0))
710 {
711 cReqsSubmitted--;
712
713#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
714 if (errno == EINVAL)
715#else
716 if (rcPosix == EINVAL)
717#endif
718 {
719 /* Was not submitted. */
720 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
721 }
722 else
723 {
724 /* An error occurred. */
725 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
726
727 /*
728 * Looks like Apple and glibc interpret the standard in different ways.
729 * glibc returns the error code which would be in errno but Apple returns
730 * -1 and sets errno to the appropriate value
731 */
732#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
733 Assert(rcPosix == -1);
734 pReqInt->Rc = RTErrConvertFromErrno(errno);
735#elif defined(RT_OS_LINUX)
736 pReqInt->Rc = RTErrConvertFromErrno(rcPosix);
737#endif
738 pReqInt->cbTransfered = 0;
739 }
740 /* Unlink from the list. */
741 PRTFILEAIOREQINTERNAL pNext, pPrev;
742 pNext = pReqInt->pNext;
743 pPrev = pReqInt->pPrev;
744 if (pNext)
745 pNext->pPrev = pPrev;
746 if (pPrev)
747 pPrev->pNext = pNext;
748 else
749 pHead = pNext;
750
751 pReqInt->pNext = NULL;
752 pReqInt->pPrev = NULL;
753 }
754 }
755 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
756 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
757 break;
758 }
759
760 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmit);
761 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
762 cReqs -= cReqsSubmit;
763 pahReqs += cReqsSubmit;
764 }
765
766 /*
767 * Check if we have a flush request now.
768 * If not we hit the AIO_LISTIO_MAX limit
769 * and will continue submitting requests
770 * above.
771 */
772 if (cReqs && RT_SUCCESS_NP(rc))
773 {
774 pReqInt = pahReqs[0];
775
776 if (pReqInt->fFlush)
777 {
778 /*
779 * lio_listio does not work with flush requests so
780 * we have to use aio_fsync directly.
781 */
782 rcPosix = aio_fsync(O_SYNC, &pReqInt->AioCB);
783 if (RT_UNLIKELY(rcPosix < 0))
784 {
785 if (errno == EAGAIN)
786 {
787 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
788 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
789 }
790 else
791 {
792 rc = RTErrConvertFromErrno(errno);
793 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
794 pReqInt->Rc = rc;
795 }
796 pReqInt->cbTransfered = 0;
797 break;
798 }
799
800 /* Link them together. */
801 pReqInt->pNext = pHead;
802 if (pHead)
803 pHead->pPrev = pReqInt;
804 pReqInt->pPrev = NULL;
805 pHead = pReqInt;
806 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
807
808 ASMAtomicIncS32(&pCtxInt->cRequests);
809 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
810 cReqs--;
811 pahReqs++;
812 }
813 }
814 } while ( cReqs
815 && RT_SUCCESS_NP(rc));
816
817 if (pHead)
818 {
819 /*
820 * Forward successfully submitted requests to the thread waiting for requests.
821 * We search for a free slot first and if we don't find one
822 * we will grab the first one and append our list to the existing entries.
823 */
824 unsigned iSlot = 0;
825 while ( (iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead))
826 && !ASMAtomicCmpXchgPtr(&pCtxInt->apReqsNewHead[iSlot], pHead, NULL))
827 iSlot++;
828
829 if (iSlot == RT_ELEMENTS(pCtxInt->apReqsNewHead))
830 {
831 /* Nothing found. */
832 PRTFILEAIOREQINTERNAL pOldHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[0], NULL, PRTFILEAIOREQINTERNAL);
833
834 /* Find the end of the current head and link the old list to the current. */
835 PRTFILEAIOREQINTERNAL pTail = pHead;
836 while (pTail->pNext)
837 pTail = pTail->pNext;
838
839 pTail->pNext = pOldHead;
840
841 ASMAtomicWritePtr(&pCtxInt->apReqsNewHead[0], pHead);
842 }
843
844 /* Set the internal wakeup flag and wakeup the thread if possible. */
845 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, true);
846 if (!fWokenUp)
847 rtFileAioCtxWakeup(pCtxInt);
848 }
849
850 rtFileAioCtxDump(pCtxInt);
851
852 return rc;
853}
854
855
856RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
857 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
858{
859 int rc = VINF_SUCCESS;
860 int cRequestsCompleted = 0;
861 PRTFILEAIOCTXINTERNAL pCtxInt = (PRTFILEAIOCTXINTERNAL)hAioCtx;
862 struct timespec Timeout;
863 struct timespec *pTimeout = NULL;
864 uint64_t StartNanoTS = 0;
865
866 LogFlowFunc(("hAioCtx=%#p cMinReqs=%zu cMillies=%u pahReqs=%#p cReqs=%zu pcbReqs=%#p\n",
867 hAioCtx, cMinReqs, cMillies, pahReqs, cReqs, pcReqs));
868
869 /* Check parameters. */
870 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
871 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
872 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
873 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
874 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
875
876 rtFileAioCtxDump(pCtxInt);
877
878 int32_t cRequestsWaiting = ASMAtomicReadS32(&pCtxInt->cRequests);
879
880 if ( RT_UNLIKELY(cRequestsWaiting <= 0)
881 && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
882 return VERR_FILE_AIO_NO_REQUEST;
883
884 if (RT_UNLIKELY(cMinReqs > (uint32_t)cRequestsWaiting))
885 return VERR_INVALID_PARAMETER;
886
887 if (cMillies != RT_INDEFINITE_WAIT)
888 {
889 Timeout.tv_sec = cMillies / 1000;
890 Timeout.tv_nsec = (cMillies % 1000) * 1000000;
891 pTimeout = &Timeout;
892 StartNanoTS = RTTimeNanoTS();
893 }
894
895 /* Wait for at least one. */
896 if (!cMinReqs)
897 cMinReqs = 1;
898
899 /* For the wakeup call. */
900 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
901 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
902
903 /* Update the waiting list once before we enter the loop. */
904 rc = rtFileAioCtxProcessEvents(pCtxInt);
905
906 while ( cMinReqs
907 && RT_SUCCESS_NP(rc))
908 {
909#ifdef RT_STRICT
910 if (RT_UNLIKELY(!pCtxInt->iFirstFree))
911 {
912 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
913 RTAssertMsg2Weak("wait[%d] = %#p\n", i, pCtxInt->apReqs[i]);
914
915 AssertMsgFailed(("No request to wait for. pReqsWaitHead=%#p pReqsWaitTail=%#p\n",
916 pCtxInt->pReqsWaitHead, pCtxInt->pReqsWaitTail));
917 }
918#endif
919
920 LogFlow(("Waiting for %d requests to complete\n", pCtxInt->iFirstFree));
921 rtFileAioCtxDump(pCtxInt);
922
923 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
924 int rcPosix = aio_suspend((const struct aiocb * const *)pCtxInt->apReqs,
925 pCtxInt->iFirstFree, pTimeout);
926 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
927 if (rcPosix < 0)
928 {
929 LogFlow(("aio_suspend failed %d nent=%u\n", errno, pCtxInt->iFirstFree));
930 /* Check that this is an external wakeup event. */
931 if (errno == EINTR)
932 rc = rtFileAioCtxProcessEvents(pCtxInt);
933 else
934 rc = RTErrConvertFromErrno(errno);
935 }
936 else
937 {
938 /* Requests finished. */
939 unsigned iReqCurr = 0;
940 unsigned cDone = 0;
941
942 /* Remove completed requests from the waiting list. */
943 while ( (iReqCurr < pCtxInt->iFirstFree)
944 && (cDone < cReqs))
945 {
946 PRTFILEAIOREQINTERNAL pReq = pCtxInt->apReqs[iReqCurr];
947 int rcReq = aio_error(&pReq->AioCB);
948
949 if (rcReq != EINPROGRESS)
950 {
951 /* Completed store the return code. */
952 if (rcReq == 0)
953 {
954 pReq->Rc = VINF_SUCCESS;
955 /* Call aio_return() to free resources. */
956 pReq->cbTransfered = aio_return(&pReq->AioCB);
957 }
958 else
959 {
960#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
961 pReq->Rc = RTErrConvertFromErrno(errno);
962#else
963 pReq->Rc = RTErrConvertFromErrno(rcReq);
964#endif
965 }
966
967 /* Mark the request as finished. */
968 RTFILEAIOREQ_SET_STATE(pReq, COMPLETED);
969 cDone++;
970
971 /* If there are other entries waiting put the head into the now free entry. */
972 if (pCtxInt->pReqsWaitHead)
973 {
974 PRTFILEAIOREQINTERNAL pReqInsert = pCtxInt->pReqsWaitHead;
975
976 pCtxInt->pReqsWaitHead = pReqInsert->pNext;
977 if (!pCtxInt->pReqsWaitHead)
978 {
979 /* List is empty now. Clear tail too. */
980 pCtxInt->pReqsWaitTail = NULL;
981 }
982
983 pReqInsert->iWaitingList = pReq->iWaitingList;
984 pCtxInt->apReqs[pReqInsert->iWaitingList] = pReqInsert;
985 iReqCurr++;
986 }
987 else
988 {
989 /*
990 * Move the last entry into the current position to avoid holes
991 * but only if it is not the last element already.
992 */
993 if (pReq->iWaitingList < pCtxInt->iFirstFree - 1)
994 {
995 pCtxInt->apReqs[pReq->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
996 pCtxInt->apReqs[pReq->iWaitingList]->iWaitingList = pReq->iWaitingList;
997 }
998 else
999 pCtxInt->iFirstFree--;
1000
1001 pCtxInt->apReqs[pCtxInt->iFirstFree] = NULL;
1002 }
1003
1004 /* Put the request into the completed list. */
1005 pahReqs[cRequestsCompleted++] = pReq;
1006 pReq->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
1007 }
1008 else
1009 iReqCurr++;
1010 }
1011
1012 AssertMsg((cDone <= cReqs), ("Overflow cReqs=%u cMinReqs=%u cDone=%u\n",
1013 cReqs, cDone));
1014 cReqs -= cDone;
1015 cMinReqs = RT_MAX(cMinReqs, cDone) - cDone;
1016 ASMAtomicSubS32(&pCtxInt->cRequests, cDone);
1017
1018 AssertMsg(pCtxInt->cRequests >= 0, ("Finished more requests than currently active\n"));
1019
1020 if (!cMinReqs)
1021 break;
1022
1023 if (cMillies != RT_INDEFINITE_WAIT)
1024 {
1025 uint64_t TimeDiff;
1026
1027 /* Recalculate the timeout. */
1028 TimeDiff = RTTimeSystemNanoTS() - StartNanoTS;
1029 Timeout.tv_sec = Timeout.tv_sec - (TimeDiff / 1000000);
1030 Timeout.tv_nsec = Timeout.tv_nsec - (TimeDiff % 1000000);
1031 }
1032
1033 /* Check for new elements. */
1034 rc = rtFileAioCtxProcessEvents(pCtxInt);
1035 }
1036 }
1037
1038 *pcReqs = cRequestsCompleted;
1039 Assert(pCtxInt->hThreadWait == RTThreadSelf());
1040 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
1041
1042 rtFileAioCtxDump(pCtxInt);
1043
1044 return rc;
1045}
1046
1047
1048RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
1049{
1050 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
1051 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
1052
1053 /** @todo r=bird: Define the protocol for how to resume work after calling
1054 * this function. */
1055
1056 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
1057 if (!fWokenUp)
1058 rtFileAioCtxWakeup(pCtxInt);
1059
1060 return VINF_SUCCESS;
1061}
1062
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use