VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/posix/fileaio-posix.cpp

Last change on this file was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 37.1 KB
Line 
1/* $Id: fileaio-posix.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for POSIX compliant host platforms.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP RTLOGGROUP_DIR
42#include <iprt/asm.h>
43#include <iprt/file.h>
44#include <iprt/mem.h>
45#include <iprt/assert.h>
46#include <iprt/string.h>
47#include <iprt/err.h>
48#include <iprt/log.h>
49#include <iprt/thread.h>
50#include <iprt/semaphore.h>
51#include "internal/fileaio.h"
52
53#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
54# include <sys/types.h>
55# include <sys/sysctl.h> /* for sysctlbyname */
56#endif
57#if defined(RT_OS_FREEBSD)
58# include <fcntl.h> /* O_SYNC */
59#endif
60#include <aio.h>
61#include <errno.h>
62#include <time.h>
63
64/*
65 * Linux does not define this value.
66 * Just define it with really big
67 * value.
68 */
69#ifndef AIO_LISTIO_MAX
70# define AIO_LISTIO_MAX UINT32_MAX
71#endif
72
73#if 0 /* Only used for debugging */
74# undef AIO_LISTIO_MAX
75# define AIO_LISTIO_MAX 16
76#endif
77
78/** Invalid entry in the waiting array. */
79#define RTFILEAIOCTX_WAIT_ENTRY_INVALID (~0U)
80
81/** No-op replacement for rtFileAioCtxDump for non debug builds */
82#ifndef LOG_ENABLED
83# define rtFileAioCtxDump(pCtxInt) do {} while (0)
84#endif
85
86
87/*********************************************************************************************************************************
88* Structures and Typedefs *
89*********************************************************************************************************************************/
90/**
91 * Async I/O request state.
92 */
93typedef struct RTFILEAIOREQINTERNAL
94{
95 /** The aio control block. FIRST ELEMENT! */
96 struct aiocb AioCB;
97 /** Next element in the chain. */
98 struct RTFILEAIOREQINTERNAL *pNext;
99 /** Previous element in the chain. */
100 struct RTFILEAIOREQINTERNAL *pPrev;
101 /** Current state the request is in. */
102 RTFILEAIOREQSTATE enmState;
103 /** Flag whether this is a flush request. */
104 bool fFlush;
105 /** Flag indicating if the request was canceled. */
106 volatile bool fCanceled;
107 /** Opaque user data. */
108 void *pvUser;
109 /** Number of bytes actually transferred. */
110 size_t cbTransfered;
111 /** Status code. */
112 int Rc;
113 /** Completion context we are assigned to. */
114 struct RTFILEAIOCTXINTERNAL *pCtxInt;
115 /** Entry in the waiting list the request is in. */
116 unsigned iWaitingList;
117 /** Magic value (RTFILEAIOREQ_MAGIC). */
118 uint32_t u32Magic;
119} RTFILEAIOREQINTERNAL, *PRTFILEAIOREQINTERNAL;
120
121/**
122 * Async I/O completion context state.
123 */
124typedef struct RTFILEAIOCTXINTERNAL
125{
126 /** Current number of requests active on this context. */
127 volatile int32_t cRequests;
128 /** Maximum number of requests this context can handle. */
129 uint32_t cMaxRequests;
130 /** The ID of the thread which is currently waiting for requests. */
131 volatile RTTHREAD hThreadWait;
132 /** Flag whether the thread was woken up. */
133 volatile bool fWokenUp;
134 /** Flag whether the thread is currently waiting in the syscall. */
135 volatile bool fWaiting;
136 /** Flags given during creation. */
137 uint32_t fFlags;
138 /** Magic value (RTFILEAIOCTX_MAGIC). */
139 uint32_t u32Magic;
140 /** Flag whether the thread was woken up due to a internal event. */
141 volatile bool fWokenUpInternal;
142 /** List of new requests which needs to be inserted into apReqs by the
143 * waiting thread. */
144 volatile PRTFILEAIOREQINTERNAL apReqsNewHead[5];
145 /** Special entry for requests which are canceled. Because only one
146 * request can be canceled at a time and the thread canceling the request
147 * has to wait we need only one entry. */
148 volatile PRTFILEAIOREQINTERNAL pReqToCancel;
149 /** Event semaphore the canceling thread is waiting for completion of
150 * the operation. */
151 RTSEMEVENT SemEventCancel;
152 /** Head of submitted elements waiting to get into the array. */
153 PRTFILEAIOREQINTERNAL pReqsWaitHead;
154 /** Tail of submitted elements waiting to get into the array. */
155 PRTFILEAIOREQINTERNAL pReqsWaitTail;
156 /** Maximum number of elements in the waiting array. */
157 unsigned cReqsWaitMax;
158 /** First free slot in the waiting list. */
159 unsigned iFirstFree;
160 /** List of requests we are currently waiting on.
161 * Size depends on cMaxRequests and AIO_LISTIO_MAX. */
162 volatile PRTFILEAIOREQINTERNAL apReqs[1];
163} RTFILEAIOCTXINTERNAL, *PRTFILEAIOCTXINTERNAL;
164
165/**
166 * Internal worker for waking up the waiting thread.
167 */
168static void rtFileAioCtxWakeup(PRTFILEAIOCTXINTERNAL pCtxInt)
169{
170 /*
171 * Read the thread handle before the status flag.
172 * If we read the handle after the flag we might
173 * end up with an invalid handle because the thread
174 * waiting in RTFileAioCtxWakeup() might get scheduled
175 * before we read the flag and returns.
176 * We can ensure that the handle is valid if fWaiting is true
177 * when reading the handle before the status flag.
178 */
179 RTTHREAD hThread;
180 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
181 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
182 if (fWaiting)
183 {
184 /*
185 * If a thread waits the handle must be valid.
186 * It is possible that the thread returns from
187 * aio_suspend() before the signal is send.
188 * This is no problem because we already set fWokenUp
189 * to true which will let the thread return VERR_INTERRUPTED
190 * and the next call to RTFileAioCtxWait() will not
191 * return VERR_INTERRUPTED because signals are not saved
192 * and will simply vanish if the destination thread can't
193 * receive it.
194 */
195 Assert(hThread != NIL_RTTHREAD);
196 RTThreadPoke(hThread);
197 }
198}
199
200/**
201 * Internal worker processing events and inserting new requests into the waiting list.
202 */
203static int rtFileAioCtxProcessEvents(PRTFILEAIOCTXINTERNAL pCtxInt)
204{
205 int rc = VINF_SUCCESS;
206
207 /* Process new requests first. */
208 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, false);
209 if (fWokenUp)
210 {
211 for (unsigned iSlot = 0; iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead); iSlot++)
212 {
213 PRTFILEAIOREQINTERNAL pReqHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[iSlot], NULL, PRTFILEAIOREQINTERNAL);
214
215 while ( (pCtxInt->iFirstFree < pCtxInt->cReqsWaitMax)
216 && pReqHead)
217 {
218 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
219 pCtxInt->apReqs[pCtxInt->iFirstFree] = pReqHead;
220 pReqHead->iWaitingList = pCtxInt->iFirstFree;
221 pReqHead = pReqHead->pNext;
222
223 /* Clear pointer to next and previous element just for safety. */
224 pCtxInt->apReqs[pCtxInt->iFirstFree]->pNext = NULL;
225 pCtxInt->apReqs[pCtxInt->iFirstFree]->pPrev = NULL;
226 pCtxInt->iFirstFree++;
227
228 Assert( (pCtxInt->iFirstFree <= pCtxInt->cMaxRequests)
229 && (pCtxInt->iFirstFree <= pCtxInt->cReqsWaitMax));
230 }
231
232 /* Append the rest to the wait list. */
233 if (pReqHead)
234 {
235 RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
236 if (!pCtxInt->pReqsWaitHead)
237 {
238 Assert(!pCtxInt->pReqsWaitTail);
239 pCtxInt->pReqsWaitHead = pReqHead;
240 pReqHead->pPrev = NULL;
241 }
242 else
243 {
244 AssertPtr(pCtxInt->pReqsWaitTail);
245
246 pCtxInt->pReqsWaitTail->pNext = pReqHead;
247 pReqHead->pPrev = pCtxInt->pReqsWaitTail;
248 }
249
250 /* Update tail. */
251 while (pReqHead->pNext)
252 {
253 RTFIELAIOREQ_ASSERT_STATE(pReqHead->pNext, SUBMITTED);
254 pReqHead = pReqHead->pNext;
255 }
256
257 pCtxInt->pReqsWaitTail = pReqHead;
258 pCtxInt->pReqsWaitTail->pNext = NULL;
259 }
260 }
261
262 /* Check if a request needs to be canceled. */
263 PRTFILEAIOREQINTERNAL pReqToCancel = ASMAtomicReadPtrT(&pCtxInt->pReqToCancel, PRTFILEAIOREQINTERNAL);
264 if (pReqToCancel)
265 {
266 /* The request can be in the array waiting for completion or still in the list because it is full. */
267 if (pReqToCancel->iWaitingList != RTFILEAIOCTX_WAIT_ENTRY_INVALID)
268 {
269 /* Put it out of the waiting list. */
270 pCtxInt->apReqs[pReqToCancel->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
271 pCtxInt->apReqs[pReqToCancel->iWaitingList]->iWaitingList = pReqToCancel->iWaitingList;
272 }
273 else
274 {
275 /* Unlink from the waiting list. */
276 PRTFILEAIOREQINTERNAL pPrev = pReqToCancel->pPrev;
277 PRTFILEAIOREQINTERNAL pNext = pReqToCancel->pNext;
278
279 if (pNext)
280 pNext->pPrev = pPrev;
281 else
282 {
283 /* We canceled the tail. */
284 pCtxInt->pReqsWaitTail = pPrev;
285 }
286
287 if (pPrev)
288 pPrev->pNext = pNext;
289 else
290 {
291 /* We canceled the head. */
292 pCtxInt->pReqsWaitHead = pNext;
293 }
294 }
295
296 ASMAtomicDecS32(&pCtxInt->cRequests);
297 AssertMsg(pCtxInt->cRequests >= 0, ("Canceled request not which is not in this context\n"));
298 RTSemEventSignal(pCtxInt->SemEventCancel);
299 }
300 }
301 else
302 {
303 if (ASMAtomicXchgBool(&pCtxInt->fWokenUp, false))
304 rc = VERR_INTERRUPTED;
305 }
306
307 return rc;
308}
309
310RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
311{
312 int rcBSD = 0;
313 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
314
315#if defined(RT_OS_DARWIN)
316 int cReqsOutstandingMax = 0;
317 size_t cbParameter = sizeof(int);
318
319 rcBSD = sysctlbyname("kern.aioprocmax", /* name */
320 &cReqsOutstandingMax, /* Where to store the old value. */
321 &cbParameter, /* Size of the memory pointed to. */
322 NULL, /* Where the new value is located. */
323 0); /* Where the size of the new value is stored. */
324 if (rcBSD == -1)
325 return RTErrConvertFromErrno(errno);
326
327 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
328 pAioLimits->cbBufferAlignment = 0;
329#elif defined(RT_OS_FREEBSD)
330 /*
331 * The AIO API is implemented in a kernel module which is not
332 * loaded by default.
333 * If it is loaded there are additional sysctl parameters.
334 */
335 int cReqsOutstandingMax = 0;
336 size_t cbParameter = sizeof(int);
337
338 rcBSD = sysctlbyname("vfs.aio.max_aio_per_proc", /* name */
339 &cReqsOutstandingMax, /* Where to store the old value. */
340 &cbParameter, /* Size of the memory pointed to. */
341 NULL, /* Where the new value is located. */
342 0); /* Where the size of the new value is stored. */
343 if (rcBSD == -1)
344 {
345 /* ENOENT means the value is unknown thus the module is not loaded. */
346 if (errno == ENOENT)
347 return VERR_NOT_SUPPORTED;
348 else
349 return RTErrConvertFromErrno(errno);
350 }
351
352 pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
353 pAioLimits->cbBufferAlignment = 0;
354#else
355 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
356 pAioLimits->cbBufferAlignment = 0;
357#endif
358
359 return VINF_SUCCESS;
360}
361
362RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
363{
364 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
365
366 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOREQINTERNAL));
367 if (RT_UNLIKELY(!pReqInt))
368 return VERR_NO_MEMORY;
369
370 pReqInt->pCtxInt = NULL;
371 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
372 pReqInt->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
373 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
374
375 *phReq = (RTFILEAIOREQ)pReqInt;
376
377 return VINF_SUCCESS;
378}
379
380
381RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
382{
383 /*
384 * Validate the handle and ignore nil.
385 */
386 if (hReq == NIL_RTFILEAIOREQ)
387 return VINF_SUCCESS;
388 PRTFILEAIOREQINTERNAL pReqInt = hReq;
389 RTFILEAIOREQ_VALID_RETURN(pReqInt);
390 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
391
392 /*
393 * Trash the magic and free it.
394 */
395 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
396 RTMemFree(pReqInt);
397 return VINF_SUCCESS;
398}
399
400/**
401 * Worker setting up the request.
402 */
403DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
404 unsigned uTransferDirection,
405 RTFOFF off, void *pvBuf, size_t cbTransfer,
406 void *pvUser)
407{
408 /*
409 * Validate the input.
410 */
411 PRTFILEAIOREQINTERNAL pReqInt = hReq;
412 RTFILEAIOREQ_VALID_RETURN(pReqInt);
413 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
414 Assert(hFile != NIL_RTFILE);
415 AssertPtr(pvBuf);
416 Assert(off >= 0);
417 Assert(cbTransfer > 0);
418
419 memset(&pReqInt->AioCB, 0, sizeof(struct aiocb));
420 pReqInt->fFlush = false;
421 pReqInt->AioCB.aio_lio_opcode = uTransferDirection;
422 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
423 pReqInt->AioCB.aio_offset = off;
424 pReqInt->AioCB.aio_nbytes = cbTransfer;
425 pReqInt->AioCB.aio_buf = pvBuf;
426 pReqInt->pvUser = pvUser;
427 pReqInt->pCtxInt = NULL;
428 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
429 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
430
431 return VINF_SUCCESS;
432}
433
434
435RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
436 void *pvBuf, size_t cbRead, void *pvUser)
437{
438 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_READ,
439 off, pvBuf, cbRead, pvUser);
440}
441
442
443RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
444 void const *pvBuf, size_t cbWrite, void *pvUser)
445{
446 return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_WRITE,
447 off, (void *)pvBuf, cbWrite, pvUser);
448}
449
450
451RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
452{
453 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)hReq;
454
455 RTFILEAIOREQ_VALID_RETURN(pReqInt);
456 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
457 Assert(hFile != NIL_RTFILE);
458
459 pReqInt->fFlush = true;
460 pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
461 pReqInt->AioCB.aio_offset = 0;
462 pReqInt->AioCB.aio_nbytes = 0;
463 pReqInt->AioCB.aio_buf = NULL;
464 pReqInt->pvUser = pvUser;
465 pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
466 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
467
468 return VINF_SUCCESS;
469}
470
471
472RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
473{
474 PRTFILEAIOREQINTERNAL pReqInt = hReq;
475 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
476
477 return pReqInt->pvUser;
478}
479
480
481RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
482{
483 PRTFILEAIOREQINTERNAL pReqInt = hReq;
484 RTFILEAIOREQ_VALID_RETURN(pReqInt);
485 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
486
487 ASMAtomicXchgBool(&pReqInt->fCanceled, true);
488
489 int rcPosix = aio_cancel(pReqInt->AioCB.aio_fildes, &pReqInt->AioCB);
490
491 if (rcPosix == AIO_CANCELED)
492 {
493 PRTFILEAIOCTXINTERNAL pCtxInt = pReqInt->pCtxInt;
494 /*
495 * Notify the waiting thread that the request was canceled.
496 */
497 AssertMsg(RT_VALID_PTR(pCtxInt), ("Invalid state. Request was canceled but wasn't submitted\n"));
498
499 Assert(!pCtxInt->pReqToCancel);
500 ASMAtomicWritePtr(&pCtxInt->pReqToCancel, pReqInt);
501 rtFileAioCtxWakeup(pCtxInt);
502
503 /* Wait for acknowledge. */
504 int rc = RTSemEventWait(pCtxInt->SemEventCancel, RT_INDEFINITE_WAIT);
505 AssertRC(rc);
506
507 ASMAtomicWriteNullPtr(&pCtxInt->pReqToCancel);
508 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
509 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
510 return VINF_SUCCESS;
511 }
512 else if (rcPosix == AIO_ALLDONE)
513 return VERR_FILE_AIO_COMPLETED;
514 else if (rcPosix == AIO_NOTCANCELED)
515 return VERR_FILE_AIO_IN_PROGRESS;
516 else
517 return RTErrConvertFromErrno(errno);
518}
519
520
521RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
522{
523 PRTFILEAIOREQINTERNAL pReqInt = hReq;
524 RTFILEAIOREQ_VALID_RETURN(pReqInt);
525 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
526 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
527 AssertPtrNull(pcbTransfered);
528
529 if ( (RT_SUCCESS(pReqInt->Rc))
530 && (pcbTransfered))
531 *pcbTransfered = pReqInt->cbTransfered;
532
533 return pReqInt->Rc;
534}
535
536
537RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
538 uint32_t fFlags)
539{
540 PRTFILEAIOCTXINTERNAL pCtxInt;
541 unsigned cReqsWaitMax;
542
543 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
544 AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
545
546 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
547 return VERR_OUT_OF_RANGE;
548
549 cReqsWaitMax = RT_MIN(cAioReqsMax, AIO_LISTIO_MAX);
550
551 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ( sizeof(RTFILEAIOCTXINTERNAL)
552 + cReqsWaitMax * sizeof(PRTFILEAIOREQINTERNAL));
553 if (RT_UNLIKELY(!pCtxInt))
554 return VERR_NO_MEMORY;
555
556 /* Create event semaphore. */
557 int rc = RTSemEventCreate(&pCtxInt->SemEventCancel);
558 if (RT_FAILURE(rc))
559 {
560 RTMemFree(pCtxInt);
561 return rc;
562 }
563
564 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
565 pCtxInt->cMaxRequests = cAioReqsMax;
566 pCtxInt->cReqsWaitMax = cReqsWaitMax;
567 pCtxInt->fFlags = fFlags;
568 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
569
570 return VINF_SUCCESS;
571}
572
573
574RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
575{
576 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
577
578 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
579
580 if (RT_UNLIKELY(pCtxInt->cRequests))
581 return VERR_FILE_AIO_BUSY;
582
583 RTSemEventDestroy(pCtxInt->SemEventCancel);
584 RTMemFree(pCtxInt);
585
586 return VINF_SUCCESS;
587}
588
589
590RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
591{
592 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
593
594 if (hAioCtx == NIL_RTFILEAIOCTX)
595 return RTFILEAIO_UNLIMITED_REQS;
596 return pCtxInt->cMaxRequests;
597}
598
599RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
600{
601 NOREF(hAioCtx); NOREF(hFile);
602 return VINF_SUCCESS;
603}
604
605#ifdef LOG_ENABLED
606/**
607 * Dumps the state of a async I/O context.
608 */
609static void rtFileAioCtxDump(PRTFILEAIOCTXINTERNAL pCtxInt)
610{
611 LogFlow(("cRequests=%d\n", pCtxInt->cRequests));
612 LogFlow(("cMaxRequests=%u\n", pCtxInt->cMaxRequests));
613 LogFlow(("hThreadWait=%#p\n", pCtxInt->hThreadWait));
614 LogFlow(("fWokenUp=%RTbool\n", pCtxInt->fWokenUp));
615 LogFlow(("fWaiting=%RTbool\n", pCtxInt->fWaiting));
616 LogFlow(("fWokenUpInternal=%RTbool\n", pCtxInt->fWokenUpInternal));
617 for (unsigned i = 0; i < RT_ELEMENTS(pCtxInt->apReqsNewHead); i++)
618 LogFlow(("apReqsNewHead[%u]=%#p\n", i, pCtxInt->apReqsNewHead[i]));
619 LogFlow(("pReqToCancel=%#p\n", pCtxInt->pReqToCancel));
620 LogFlow(("pReqsWaitHead=%#p\n", pCtxInt->pReqsWaitHead));
621 LogFlow(("pReqsWaitTail=%#p\n", pCtxInt->pReqsWaitTail));
622 LogFlow(("cReqsWaitMax=%u\n", pCtxInt->cReqsWaitMax));
623 LogFlow(("iFirstFree=%u\n", pCtxInt->iFirstFree));
624 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
625 LogFlow(("apReqs[%u]=%#p\n", i, pCtxInt->apReqs[i]));
626}
627#endif
628
629RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
630{
631 int rc = VINF_SUCCESS;
632 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
633
634 /* Parameter checks */
635 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
636 AssertReturn(cReqs != 0, VERR_INVALID_POINTER);
637 AssertPtrReturn(pahReqs, VERR_INVALID_PARAMETER);
638
639 rtFileAioCtxDump(pCtxInt);
640
641 /* Check that we don't exceed the limit */
642 if (ASMAtomicUoReadS32(&pCtxInt->cRequests) + cReqs > pCtxInt->cMaxRequests)
643 return VERR_FILE_AIO_LIMIT_EXCEEDED;
644
645 PRTFILEAIOREQINTERNAL pHead = NULL;
646
647 do
648 {
649 int rcPosix = 0;
650 size_t cReqsSubmit = 0;
651 size_t i = 0;
652 PRTFILEAIOREQINTERNAL pReqInt;
653
654 while ( (i < cReqs)
655 && (i < AIO_LISTIO_MAX))
656 {
657 pReqInt = pahReqs[i];
658 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
659 {
660 /* Undo everything and stop submitting. */
661 for (size_t iUndo = 0; iUndo < i; iUndo++)
662 {
663 pReqInt = pahReqs[iUndo];
664 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
665 pReqInt->pCtxInt = NULL;
666
667 /* Unlink from the list again. */
668 PRTFILEAIOREQINTERNAL pNext, pPrev;
669 pNext = pReqInt->pNext;
670 pPrev = pReqInt->pPrev;
671 if (pNext)
672 pNext->pPrev = pPrev;
673 if (pPrev)
674 pPrev->pNext = pNext;
675 else
676 pHead = pNext;
677 }
678 rc = VERR_INVALID_HANDLE;
679 break;
680 }
681
682 pReqInt->pCtxInt = pCtxInt;
683
684 if (pReqInt->fFlush)
685 break;
686
687 /* Link them together. */
688 pReqInt->pNext = pHead;
689 if (pHead)
690 pHead->pPrev = pReqInt;
691 pReqInt->pPrev = NULL;
692 pHead = pReqInt;
693 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
694
695 cReqsSubmit++;
696 i++;
697 }
698
699 if (cReqsSubmit)
700 {
701 rcPosix = lio_listio(LIO_NOWAIT, (struct aiocb **)pahReqs, cReqsSubmit, NULL);
702 if (RT_UNLIKELY(rcPosix < 0))
703 {
704 size_t cReqsSubmitted = cReqsSubmit;
705
706 if (errno == EAGAIN)
707 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
708 else
709 rc = RTErrConvertFromErrno(errno);
710
711 /* Check which ones were not submitted. */
712 for (i = 0; i < cReqsSubmit; i++)
713 {
714 pReqInt = pahReqs[i];
715
716 rcPosix = aio_error(&pReqInt->AioCB);
717
718 if ((rcPosix != EINPROGRESS) && (rcPosix != 0))
719 {
720 cReqsSubmitted--;
721
722#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
723 if (errno == EINVAL)
724#else
725 if (rcPosix == EINVAL)
726#endif
727 {
728 /* Was not submitted. */
729 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
730 }
731 else
732 {
733 /* An error occurred. */
734 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
735
736 /*
737 * Looks like Apple and glibc interpret the standard in different ways.
738 * glibc returns the error code which would be in errno but Apple returns
739 * -1 and sets errno to the appropriate value
740 */
741#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
742 Assert(rcPosix == -1);
743 pReqInt->Rc = RTErrConvertFromErrno(errno);
744#elif defined(RT_OS_LINUX)
745 pReqInt->Rc = RTErrConvertFromErrno(rcPosix);
746#endif
747 pReqInt->cbTransfered = 0;
748 }
749 /* Unlink from the list. */
750 PRTFILEAIOREQINTERNAL pNext, pPrev;
751 pNext = pReqInt->pNext;
752 pPrev = pReqInt->pPrev;
753 if (pNext)
754 pNext->pPrev = pPrev;
755 if (pPrev)
756 pPrev->pNext = pNext;
757 else
758 pHead = pNext;
759
760 pReqInt->pNext = NULL;
761 pReqInt->pPrev = NULL;
762 }
763 }
764 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
765 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
766 break;
767 }
768
769 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmit);
770 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
771 cReqs -= cReqsSubmit;
772 pahReqs += cReqsSubmit;
773 }
774
775 /*
776 * Check if we have a flush request now.
777 * If not we hit the AIO_LISTIO_MAX limit
778 * and will continue submitting requests
779 * above.
780 */
781 if (cReqs && RT_SUCCESS_NP(rc))
782 {
783 pReqInt = pahReqs[0];
784
785 if (pReqInt->fFlush)
786 {
787 /*
788 * lio_listio does not work with flush requests so
789 * we have to use aio_fsync directly.
790 */
791 rcPosix = aio_fsync(O_SYNC, &pReqInt->AioCB);
792 if (RT_UNLIKELY(rcPosix < 0))
793 {
794 if (errno == EAGAIN)
795 {
796 rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
797 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
798 }
799 else
800 {
801 rc = RTErrConvertFromErrno(errno);
802 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
803 pReqInt->Rc = rc;
804 }
805 pReqInt->cbTransfered = 0;
806 break;
807 }
808
809 /* Link them together. */
810 pReqInt->pNext = pHead;
811 if (pHead)
812 pHead->pPrev = pReqInt;
813 pReqInt->pPrev = NULL;
814 pHead = pReqInt;
815 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
816
817 ASMAtomicIncS32(&pCtxInt->cRequests);
818 AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
819 cReqs--;
820 pahReqs++;
821 }
822 }
823 } while ( cReqs
824 && RT_SUCCESS_NP(rc));
825
826 if (pHead)
827 {
828 /*
829 * Forward successfully submitted requests to the thread waiting for requests.
830 * We search for a free slot first and if we don't find one
831 * we will grab the first one and append our list to the existing entries.
832 */
833 unsigned iSlot = 0;
834 while ( (iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead))
835 && !ASMAtomicCmpXchgPtr(&pCtxInt->apReqsNewHead[iSlot], pHead, NULL))
836 iSlot++;
837
838 if (iSlot == RT_ELEMENTS(pCtxInt->apReqsNewHead))
839 {
840 /* Nothing found. */
841 PRTFILEAIOREQINTERNAL pOldHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[0], NULL, PRTFILEAIOREQINTERNAL);
842
843 /* Find the end of the current head and link the old list to the current. */
844 PRTFILEAIOREQINTERNAL pTail = pHead;
845 while (pTail->pNext)
846 pTail = pTail->pNext;
847
848 pTail->pNext = pOldHead;
849
850 ASMAtomicWritePtr(&pCtxInt->apReqsNewHead[0], pHead);
851 }
852
853 /* Set the internal wakeup flag and wakeup the thread if possible. */
854 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, true);
855 if (!fWokenUp)
856 rtFileAioCtxWakeup(pCtxInt);
857 }
858
859 rtFileAioCtxDump(pCtxInt);
860
861 return rc;
862}
863
864
865RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
866 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
867{
868 int rc = VINF_SUCCESS;
869 int cRequestsCompleted = 0;
870 PRTFILEAIOCTXINTERNAL pCtxInt = (PRTFILEAIOCTXINTERNAL)hAioCtx;
871 struct timespec Timeout;
872 struct timespec *pTimeout = NULL;
873 uint64_t StartNanoTS = 0;
874
875 LogFlowFunc(("hAioCtx=%#p cMinReqs=%zu cMillies=%u pahReqs=%#p cReqs=%zu pcbReqs=%#p\n",
876 hAioCtx, cMinReqs, cMillies, pahReqs, cReqs, pcReqs));
877
878 /* Check parameters. */
879 AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
880 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
881 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
882 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
883 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
884
885 rtFileAioCtxDump(pCtxInt);
886
887 int32_t cRequestsWaiting = ASMAtomicReadS32(&pCtxInt->cRequests);
888
889 if ( RT_UNLIKELY(cRequestsWaiting <= 0)
890 && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
891 return VERR_FILE_AIO_NO_REQUEST;
892
893 if (RT_UNLIKELY(cMinReqs > (uint32_t)cRequestsWaiting))
894 return VERR_INVALID_PARAMETER;
895
896 if (cMillies != RT_INDEFINITE_WAIT)
897 {
898 Timeout.tv_sec = cMillies / 1000;
899 Timeout.tv_nsec = (cMillies % 1000) * 1000000;
900 pTimeout = &Timeout;
901 StartNanoTS = RTTimeNanoTS();
902 }
903
904 /* Wait for at least one. */
905 if (!cMinReqs)
906 cMinReqs = 1;
907
908 /* For the wakeup call. */
909 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
910 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
911
912 /* Update the waiting list once before we enter the loop. */
913 rc = rtFileAioCtxProcessEvents(pCtxInt);
914
915 while ( cMinReqs
916 && RT_SUCCESS_NP(rc))
917 {
918#ifdef RT_STRICT
919 if (RT_UNLIKELY(!pCtxInt->iFirstFree))
920 {
921 for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
922 RTAssertMsg2Weak("wait[%d] = %#p\n", i, pCtxInt->apReqs[i]);
923
924 AssertMsgFailed(("No request to wait for. pReqsWaitHead=%#p pReqsWaitTail=%#p\n",
925 pCtxInt->pReqsWaitHead, pCtxInt->pReqsWaitTail));
926 }
927#endif
928
929 LogFlow(("Waiting for %d requests to complete\n", pCtxInt->iFirstFree));
930 rtFileAioCtxDump(pCtxInt);
931
932 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
933 int rcPosix = aio_suspend((const struct aiocb * const *)pCtxInt->apReqs,
934 pCtxInt->iFirstFree, pTimeout);
935 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
936 if (rcPosix < 0)
937 {
938 LogFlow(("aio_suspend failed %d nent=%u\n", errno, pCtxInt->iFirstFree));
939 /* Check that this is an external wakeup event. */
940 if (errno == EINTR)
941 rc = rtFileAioCtxProcessEvents(pCtxInt);
942 else
943 rc = RTErrConvertFromErrno(errno);
944 }
945 else
946 {
947 /* Requests finished. */
948 unsigned iReqCurr = 0;
949 unsigned cDone = 0;
950
951 /* Remove completed requests from the waiting list. */
952 while ( (iReqCurr < pCtxInt->iFirstFree)
953 && (cDone < cReqs))
954 {
955 PRTFILEAIOREQINTERNAL pReq = pCtxInt->apReqs[iReqCurr];
956 int rcReq = aio_error(&pReq->AioCB);
957
958 if (rcReq != EINPROGRESS)
959 {
960 /* Completed store the return code. */
961 if (rcReq == 0)
962 {
963 pReq->Rc = VINF_SUCCESS;
964 /* Call aio_return() to free resources. */
965 pReq->cbTransfered = aio_return(&pReq->AioCB);
966 }
967 else
968 {
969#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
970 pReq->Rc = RTErrConvertFromErrno(errno);
971#else
972 pReq->Rc = RTErrConvertFromErrno(rcReq);
973#endif
974 }
975
976 /* Mark the request as finished. */
977 RTFILEAIOREQ_SET_STATE(pReq, COMPLETED);
978 cDone++;
979
980 /* If there are other entries waiting put the head into the now free entry. */
981 if (pCtxInt->pReqsWaitHead)
982 {
983 PRTFILEAIOREQINTERNAL pReqInsert = pCtxInt->pReqsWaitHead;
984
985 pCtxInt->pReqsWaitHead = pReqInsert->pNext;
986 if (!pCtxInt->pReqsWaitHead)
987 {
988 /* List is empty now. Clear tail too. */
989 pCtxInt->pReqsWaitTail = NULL;
990 }
991
992 pReqInsert->iWaitingList = pReq->iWaitingList;
993 pCtxInt->apReqs[pReqInsert->iWaitingList] = pReqInsert;
994 iReqCurr++;
995 }
996 else
997 {
998 /*
999 * Move the last entry into the current position to avoid holes
1000 * but only if it is not the last element already.
1001 */
1002 if (pReq->iWaitingList < pCtxInt->iFirstFree - 1)
1003 {
1004 pCtxInt->apReqs[pReq->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
1005 pCtxInt->apReqs[pReq->iWaitingList]->iWaitingList = pReq->iWaitingList;
1006 }
1007 else
1008 pCtxInt->iFirstFree--;
1009
1010 pCtxInt->apReqs[pCtxInt->iFirstFree] = NULL;
1011 }
1012
1013 /* Put the request into the completed list. */
1014 pahReqs[cRequestsCompleted++] = pReq;
1015 pReq->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
1016 }
1017 else
1018 iReqCurr++;
1019 }
1020
1021 AssertMsg((cDone <= cReqs), ("Overflow cReqs=%u cMinReqs=%u cDone=%u\n",
1022 cReqs, cDone));
1023 cReqs -= cDone;
1024 cMinReqs = RT_MAX(cMinReqs, cDone) - cDone;
1025 ASMAtomicSubS32(&pCtxInt->cRequests, cDone);
1026
1027 AssertMsg(pCtxInt->cRequests >= 0, ("Finished more requests than currently active\n"));
1028
1029 if (!cMinReqs)
1030 break;
1031
1032 if (cMillies != RT_INDEFINITE_WAIT)
1033 {
1034 uint64_t TimeDiff;
1035
1036 /* Recalculate the timeout. */
1037 TimeDiff = RTTimeSystemNanoTS() - StartNanoTS;
1038 Timeout.tv_sec = Timeout.tv_sec - (TimeDiff / 1000000);
1039 Timeout.tv_nsec = Timeout.tv_nsec - (TimeDiff % 1000000);
1040 }
1041
1042 /* Check for new elements. */
1043 rc = rtFileAioCtxProcessEvents(pCtxInt);
1044 }
1045 }
1046
1047 *pcReqs = cRequestsCompleted;
1048 Assert(pCtxInt->hThreadWait == RTThreadSelf());
1049 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
1050
1051 rtFileAioCtxDump(pCtxInt);
1052
1053 return rc;
1054}
1055
1056
1057RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
1058{
1059 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
1060 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
1061
1062 /** @todo r=bird: Define the protocol for how to resume work after calling
1063 * this function. */
1064
1065 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
1066 if (!fWokenUp)
1067 rtFileAioCtxWakeup(pCtxInt);
1068
1069 return VINF_SUCCESS;
1070}
1071
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use