VirtualBox

source: vbox/trunk/src/VBox/Runtime/r3/linux/fileaio-linux.cpp@ 103795

Last change on this file since 103795 was 98103, checked in by vboxsync, 2 years ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 26.9 KB
Line 
1/* $Id: fileaio-linux.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * IPRT - File async I/O, native implementation for the Linux host platform.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37/** @page pg_rtfileaio_linux RTFile Async I/O - Linux Implementation Notes
38 * @internal
39 *
40 * Linux implements the kernel async I/O API through the io_* syscalls. They are
41 * not exposed in the glibc (the aio_* API uses userspace threads and blocking
42 * I/O operations to simulate async behavior). There is an external library
43 * called libaio which implements these syscalls but because we don't want to
44 * have another dependency and this library is not installed by default and the
45 * interface is really simple we use the kernel interface directly using wrapper
46 * functions.
47 *
48 * The interface has some limitations. The first one is that the file must be
49 * opened with O_DIRECT. This disables caching done by the kernel which can be
50 * compensated if the user of this API implements caching itself. The next
51 * limitation is that data buffers must be aligned at a 512 byte boundary or the
52 * request will fail.
53 */
54/** @todo r=bird: What's this about "must be opened with O_DIRECT"? An
55 * explanation would be nice, esp. seeing what Linus is quoted saying
56 * about it in the open man page... */
57
58
59/*********************************************************************************************************************************
60* Header Files *
61*********************************************************************************************************************************/
62#define LOG_GROUP RTLOGGROUP_FILE
63#include <iprt/asm.h>
64#include <iprt/mem.h>
65#include <iprt/assert.h>
66#include <iprt/string.h>
67#include <iprt/err.h>
68#include <iprt/log.h>
69#include <iprt/thread.h>
70#include "internal/fileaio.h"
71
72#include <unistd.h>
73#include <sys/syscall.h>
74#include <errno.h>
75
76#include <iprt/file.h>
77
78
79/*********************************************************************************************************************************
80* Structures and Typedefs *
81*********************************************************************************************************************************/
82/** The async I/O context handle */
83typedef unsigned long LNXKAIOCONTEXT;
84
85/**
86 * Supported commands for the iocbs
87 */
88enum
89{
90 LNXKAIO_IOCB_CMD_READ = 0,
91 LNXKAIO_IOCB_CMD_WRITE = 1,
92 LNXKAIO_IOCB_CMD_FSYNC = 2,
93 LNXKAIO_IOCB_CMD_FDSYNC = 3
94};
95
96/**
97 * The iocb structure of a request which is passed to the kernel.
98 *
99 * We redefined this here because the version in the header lacks padding
100 * for 32bit.
101 */
102typedef struct LNXKAIOIOCB
103{
104 /** Opaque pointer to data which is returned on an I/O event. */
105 void *pvUser;
106#ifdef RT_ARCH_X86
107 uint32_t u32Padding0;
108#endif
109 /** Contains the request number and is set by the kernel. */
110 uint32_t u32Key;
111 /** Reserved. */
112 uint32_t u32Reserved0;
113 /** The I/O opcode. */
114 uint16_t u16IoOpCode;
115 /** Request priority. */
116 int16_t i16Priority;
117 /** The file descriptor. */
118 uint32_t uFileDesc;
119 /** The userspace pointer to the buffer containing/receiving the data. */
120 void *pvBuf;
121#ifdef RT_ARCH_X86
122 uint32_t u32Padding1;
123#endif
124 /** How many bytes to transfer. */
125#if ARCH_BITS == 32
126 uint32_t cbTransfer;
127 uint32_t u32Padding2;
128#elif ARCH_BITS == 64
129 uint64_t cbTransfer;
130#else
131# error "Unknown architecture"
132#endif
133 /** At which offset to start the transfer. */
134 int64_t off;
135 /** Reserved. */
136 uint64_t u64Reserved1;
137 /** Flags */
138 uint32_t fFlags;
139 /** Readyness signal file descriptor. */
140 uint32_t u32ResFd;
141} LNXKAIOIOCB, *PLNXKAIOIOCB;
142
143/**
144 * I/O event structure to notify about completed requests.
145 * Redefined here too because of the padding.
146 */
147typedef struct LNXKAIOIOEVENT
148{
149 /** The pvUser field from the iocb. */
150 void *pvUser;
151#if ARCH_BITS == 32
152 uint32_t u32Padding0;
153#endif
154 /** The LNXKAIOIOCB object this event is for. */
155 PLNXKAIOIOCB *pIoCB;
156#if ARCH_BITS == 32
157 uint32_t u32Padding1;
158#endif
159 /** The result code of the operation .*/
160#if ARCH_BITS == 32
161 int32_t rc;
162 uint32_t u32Padding2;
163#elif ARCH_BITS == 64
164 int64_t rc;
165#else
166# error "Unknown architecture"
167#endif
168 /** Secondary result code. */
169#if ARCH_BITS == 32
170 int32_t rc2;
171 uint32_t u32Padding3;
172#elif ARCH_BITS == 64
173 int64_t rc2;
174#else
175# error "Unknown architecture"
176#endif
177} LNXKAIOIOEVENT, *PLNXKAIOIOEVENT;
178
179
180/**
181 * Async I/O completion context state.
182 */
183typedef struct RTFILEAIOCTXINTERNAL
184{
185 /** Handle to the async I/O context. */
186 LNXKAIOCONTEXT AioContext;
187 /** Maximum number of requests this context can handle. */
188 int cRequestsMax;
189 /** Current number of requests active on this context. */
190 volatile int32_t cRequests;
191 /** The ID of the thread which is currently waiting for requests. */
192 volatile RTTHREAD hThreadWait;
193 /** Flag whether the thread was woken up. */
194 volatile bool fWokenUp;
195 /** Flag whether the thread is currently waiting in the syscall. */
196 volatile bool fWaiting;
197 /** Flags given during creation. */
198 uint32_t fFlags;
199 /** Magic value (RTFILEAIOCTX_MAGIC). */
200 uint32_t u32Magic;
201} RTFILEAIOCTXINTERNAL;
202/** Pointer to an internal context structure. */
203typedef RTFILEAIOCTXINTERNAL *PRTFILEAIOCTXINTERNAL;
204
205/**
206 * Async I/O request state.
207 */
208typedef struct RTFILEAIOREQINTERNAL
209{
210 /** The aio control block. This must be the FIRST elment in
211 * the structure! (see notes below) */
212 LNXKAIOIOCB AioCB;
213 /** Current state the request is in. */
214 RTFILEAIOREQSTATE enmState;
215 /** The I/O context this request is associated with. */
216 LNXKAIOCONTEXT AioContext;
217 /** Return code the request completed with. */
218 int Rc;
219 /** Number of bytes actually transferred. */
220 size_t cbTransfered;
221 /** Completion context we are assigned to. */
222 PRTFILEAIOCTXINTERNAL pCtxInt;
223 /** Magic value (RTFILEAIOREQ_MAGIC). */
224 uint32_t u32Magic;
225} RTFILEAIOREQINTERNAL;
226/** Pointer to an internal request structure. */
227typedef RTFILEAIOREQINTERNAL *PRTFILEAIOREQINTERNAL;
228
229
230/*********************************************************************************************************************************
231* Defined Constants And Macros *
232*********************************************************************************************************************************/
233/** The max number of events to get in one call. */
234#define AIO_MAXIMUM_REQUESTS_PER_CONTEXT 64
235
236
237/**
238 * Creates a new async I/O context.
239 */
240DECLINLINE(int) rtFileAsyncIoLinuxCreate(unsigned cEvents, LNXKAIOCONTEXT *pAioContext)
241{
242 int rc = syscall(__NR_io_setup, cEvents, pAioContext);
243 if (RT_UNLIKELY(rc == -1))
244 {
245 if (errno == EAGAIN)
246 return VERR_FILE_AIO_INSUFFICIENT_EVENTS;
247 else
248 return RTErrConvertFromErrno(errno);
249 }
250
251 return VINF_SUCCESS;
252}
253
254/**
255 * Destroys a async I/O context.
256 */
257DECLINLINE(int) rtFileAsyncIoLinuxDestroy(LNXKAIOCONTEXT AioContext)
258{
259 int rc = syscall(__NR_io_destroy, AioContext);
260 if (RT_UNLIKELY(rc == -1))
261 return RTErrConvertFromErrno(errno);
262
263 return VINF_SUCCESS;
264}
265
266/**
267 * Submits an array of I/O requests to the kernel.
268 */
269DECLINLINE(int) rtFileAsyncIoLinuxSubmit(LNXKAIOCONTEXT AioContext, long cReqs, LNXKAIOIOCB **ppIoCB, int *pcSubmitted)
270{
271 int rc = syscall(__NR_io_submit, AioContext, cReqs, ppIoCB);
272 if (RT_UNLIKELY(rc == -1))
273 return RTErrConvertFromErrno(errno);
274
275 *pcSubmitted = rc;
276
277 return VINF_SUCCESS;
278}
279
280/**
281 * Cancels a I/O request.
282 */
283DECLINLINE(int) rtFileAsyncIoLinuxCancel(LNXKAIOCONTEXT AioContext, PLNXKAIOIOCB pIoCB, PLNXKAIOIOEVENT pIoResult)
284{
285 int rc = syscall(__NR_io_cancel, AioContext, pIoCB, pIoResult);
286 if (RT_UNLIKELY(rc == -1))
287 return RTErrConvertFromErrno(errno);
288
289 return VINF_SUCCESS;
290}
291
292/**
293 * Waits for I/O events.
294 * @returns Number of events (natural number w/ 0), IPRT error code (negative).
295 */
296DECLINLINE(int) rtFileAsyncIoLinuxGetEvents(LNXKAIOCONTEXT AioContext, long cReqsMin, long cReqs,
297 PLNXKAIOIOEVENT paIoResults, struct timespec *pTimeout)
298{
299 int rc = syscall(__NR_io_getevents, AioContext, cReqsMin, cReqs, paIoResults, pTimeout);
300 if (RT_UNLIKELY(rc == -1))
301 return RTErrConvertFromErrno(errno);
302
303 return rc;
304}
305
306RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
307{
308 int rc = VINF_SUCCESS;
309 AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
310
311 /*
312 * Check if the API is implemented by creating a
313 * completion port.
314 */
315 LNXKAIOCONTEXT AioContext = 0;
316 rc = rtFileAsyncIoLinuxCreate(1, &AioContext);
317 if (RT_FAILURE(rc))
318 return rc;
319
320 rc = rtFileAsyncIoLinuxDestroy(AioContext);
321 if (RT_FAILURE(rc))
322 return rc;
323
324 /* Supported - fill in the limits. The alignment is the only restriction. */
325 pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
326 pAioLimits->cbBufferAlignment = 512;
327
328 return VINF_SUCCESS;
329}
330
331
332RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
333{
334 AssertPtrReturn(phReq, VERR_INVALID_POINTER);
335
336 /*
337 * Allocate a new request and initialize it.
338 */
339 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(*pReqInt));
340 if (RT_UNLIKELY(!pReqInt))
341 return VERR_NO_MEMORY;
342
343 pReqInt->pCtxInt = NULL;
344 pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
345 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
346
347 *phReq = (RTFILEAIOREQ)pReqInt;
348 return VINF_SUCCESS;
349}
350
351
352RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
353{
354 /*
355 * Validate the handle and ignore nil.
356 */
357 if (hReq == NIL_RTFILEAIOREQ)
358 return VINF_SUCCESS;
359 PRTFILEAIOREQINTERNAL pReqInt = hReq;
360 RTFILEAIOREQ_VALID_RETURN(pReqInt);
361 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
362
363 /*
364 * Trash the magic and free it.
365 */
366 ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
367 RTMemFree(pReqInt);
368 return VINF_SUCCESS;
369}
370
371
372/**
373 * Worker setting up the request.
374 */
375DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
376 uint16_t uTransferDirection,
377 RTFOFF off, void *pvBuf, size_t cbTransfer,
378 void *pvUser)
379{
380 /*
381 * Validate the input.
382 */
383 PRTFILEAIOREQINTERNAL pReqInt = hReq;
384 RTFILEAIOREQ_VALID_RETURN(pReqInt);
385 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
386 Assert(hFile != NIL_RTFILE);
387
388 if (uTransferDirection != LNXKAIO_IOCB_CMD_FSYNC)
389 {
390 AssertPtr(pvBuf);
391 Assert(off >= 0);
392 Assert(cbTransfer > 0);
393 }
394
395 /*
396 * Setup the control block and clear the finished flag.
397 */
398 pReqInt->AioCB.u16IoOpCode = uTransferDirection;
399 pReqInt->AioCB.uFileDesc = RTFileToNative(hFile);
400 pReqInt->AioCB.off = off;
401 pReqInt->AioCB.cbTransfer = cbTransfer;
402 pReqInt->AioCB.pvBuf = pvBuf;
403 pReqInt->AioCB.pvUser = pvUser;
404
405 pReqInt->pCtxInt = NULL;
406 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
407
408 return VINF_SUCCESS;
409}
410
411
412RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
413 void *pvBuf, size_t cbRead, void *pvUser)
414{
415 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_READ,
416 off, pvBuf, cbRead, pvUser);
417}
418
419
420RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
421 void const *pvBuf, size_t cbWrite, void *pvUser)
422{
423 return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_WRITE,
424 off, (void *)pvBuf, cbWrite, pvUser);
425}
426
427
428RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
429{
430 PRTFILEAIOREQINTERNAL pReqInt = hReq;
431 RTFILEAIOREQ_VALID_RETURN(pReqInt);
432 AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_HANDLE);
433 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
434
435 return rtFileAioReqPrepareTransfer(pReqInt, hFile, LNXKAIO_IOCB_CMD_FSYNC,
436 0, NULL, 0, pvUser);
437}
438
439
440RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
441{
442 PRTFILEAIOREQINTERNAL pReqInt = hReq;
443 RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
444
445 return pReqInt->AioCB.pvUser;
446}
447
448
449RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
450{
451 PRTFILEAIOREQINTERNAL pReqInt = hReq;
452 RTFILEAIOREQ_VALID_RETURN(pReqInt);
453 RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
454
455 LNXKAIOIOEVENT AioEvent;
456 int rc = rtFileAsyncIoLinuxCancel(pReqInt->AioContext, &pReqInt->AioCB, &AioEvent);
457 if (RT_SUCCESS(rc))
458 {
459 /*
460 * Decrement request count because the request will never arrive at the
461 * completion port.
462 */
463 AssertMsg(RT_VALID_PTR(pReqInt->pCtxInt), ("Invalid state. Request was canceled but wasn't submitted\n"));
464
465 ASMAtomicDecS32(&pReqInt->pCtxInt->cRequests);
466 pReqInt->Rc = VERR_FILE_AIO_CANCELED;
467 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
468 return VINF_SUCCESS;
469 }
470 if (rc == VERR_TRY_AGAIN)
471 return VERR_FILE_AIO_IN_PROGRESS;
472 return rc;
473}
474
475
476RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
477{
478 PRTFILEAIOREQINTERNAL pReqInt = hReq;
479 RTFILEAIOREQ_VALID_RETURN(pReqInt);
480 AssertPtrNull(pcbTransfered);
481 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
482 RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
483
484 if ( pcbTransfered
485 && RT_SUCCESS(pReqInt->Rc))
486 *pcbTransfered = pReqInt->cbTransfered;
487
488 return pReqInt->Rc;
489}
490
491
492RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
493 uint32_t fFlags)
494{
495 PRTFILEAIOCTXINTERNAL pCtxInt;
496 AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
497 AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
498
499 /* The kernel interface needs a maximum. */
500 if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
501 return VERR_OUT_OF_RANGE;
502
503 pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOCTXINTERNAL));
504 if (RT_UNLIKELY(!pCtxInt))
505 return VERR_NO_MEMORY;
506
507 /* Init the event handle. */
508 int rc = rtFileAsyncIoLinuxCreate(cAioReqsMax, &pCtxInt->AioContext);
509 if (RT_SUCCESS(rc))
510 {
511 pCtxInt->fWokenUp = false;
512 pCtxInt->fWaiting = false;
513 pCtxInt->hThreadWait = NIL_RTTHREAD;
514 pCtxInt->cRequestsMax = cAioReqsMax;
515 pCtxInt->fFlags = fFlags;
516 pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
517 *phAioCtx = (RTFILEAIOCTX)pCtxInt;
518 }
519 else
520 RTMemFree(pCtxInt);
521
522 return rc;
523}
524
525
526RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
527{
528 /* Validate the handle and ignore nil. */
529 if (hAioCtx == NIL_RTFILEAIOCTX)
530 return VINF_SUCCESS;
531 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
532 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
533
534 /* Cannot destroy a busy context. */
535 if (RT_UNLIKELY(pCtxInt->cRequests))
536 return VERR_FILE_AIO_BUSY;
537
538 /* The native bit first, then mark it as dead and free it. */
539 int rc = rtFileAsyncIoLinuxDestroy(pCtxInt->AioContext);
540 if (RT_FAILURE(rc))
541 return rc;
542 ASMAtomicUoWriteU32(&pCtxInt->u32Magic, RTFILEAIOCTX_MAGIC_DEAD);
543 RTMemFree(pCtxInt);
544
545 return VINF_SUCCESS;
546}
547
548
549RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
550{
551 /* Nil means global here. */
552 if (hAioCtx == NIL_RTFILEAIOCTX)
553 return RTFILEAIO_UNLIMITED_REQS; /** @todo r=bird: I'm a bit puzzled by this return value since it
554 * is completely useless in RTFileAioCtxCreate. */
555
556 /* Return 0 if the handle is invalid, it's better than garbage I think... */
557 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
558 RTFILEAIOCTX_VALID_RETURN_RC(pCtxInt, 0);
559
560 return pCtxInt->cRequestsMax;
561}
562
563RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
564{
565 /* Nothing to do. */
566 NOREF(hAioCtx); NOREF(hFile);
567 return VINF_SUCCESS;
568}
569
570RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
571{
572 int rc = VINF_SUCCESS;
573
574 /*
575 * Parameter validation.
576 */
577 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
578 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
579 AssertReturn(cReqs > 0, VERR_INVALID_PARAMETER);
580 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
581 uint32_t i = cReqs;
582 PRTFILEAIOREQINTERNAL pReqInt = NULL;
583
584 /*
585 * Validate requests and associate with the context.
586 */
587 while (i-- > 0)
588 {
589 pReqInt = pahReqs[i];
590 if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
591 {
592 /* Undo everything and stop submitting. */
593 size_t iUndo = cReqs;
594 while (iUndo-- > i)
595 {
596 pReqInt = pahReqs[iUndo];
597 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
598 pReqInt->pCtxInt = NULL;
599 }
600 return VERR_INVALID_HANDLE;
601 }
602
603 pReqInt->AioContext = pCtxInt->AioContext;
604 pReqInt->pCtxInt = pCtxInt;
605 RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
606 }
607
608 do
609 {
610 /*
611 * We cast pahReqs to the Linux iocb structure to avoid copying the requests
612 * into a temporary array. This is possible because the iocb structure is
613 * the first element in the request structure (see PRTFILEAIOCTXINTERNAL).
614 */
615 int cReqsSubmitted = 0;
616 rc = rtFileAsyncIoLinuxSubmit(pCtxInt->AioContext, cReqs,
617 (PLNXKAIOIOCB *)pahReqs,
618 &cReqsSubmitted);
619 if (RT_FAILURE(rc))
620 {
621 /*
622 * We encountered an error.
623 * This means that the first IoCB
624 * is not correctly initialized
625 * (invalid buffer alignment or bad file descriptor).
626 * Revert every request into the prepared state except
627 * the first one which will switch to completed.
628 * Another reason could be insufficient resources.
629 */
630 i = cReqs;
631 while (i-- > 0)
632 {
633 /* Already validated. */
634 pReqInt = pahReqs[i];
635 pReqInt->pCtxInt = NULL;
636 pReqInt->AioContext = 0;
637 RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
638 }
639
640 if (rc == VERR_TRY_AGAIN)
641 return VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
642 else
643 {
644 /* The first request failed. */
645 pReqInt = pahReqs[0];
646 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
647 pReqInt->Rc = rc;
648 pReqInt->cbTransfered = 0;
649 return rc;
650 }
651 }
652
653 /* Advance. */
654 cReqs -= cReqsSubmitted;
655 pahReqs += cReqsSubmitted;
656 ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
657
658 } while (cReqs);
659
660 return rc;
661}
662
663
664RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
665 PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
666{
667 /*
668 * Validate the parameters, making sure to always set pcReqs.
669 */
670 AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
671 *pcReqs = 0; /* always set */
672 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
673 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
674 AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
675 AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
676 AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
677
678 /*
679 * Can't wait if there are not requests around.
680 */
681 if ( RT_UNLIKELY(ASMAtomicUoReadS32(&pCtxInt->cRequests) == 0)
682 && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
683 return VERR_FILE_AIO_NO_REQUEST;
684
685 /*
686 * Convert the timeout if specified.
687 */
688 struct timespec *pTimeout = NULL;
689 struct timespec Timeout = {0,0};
690 uint64_t StartNanoTS = 0;
691 if (cMillies != RT_INDEFINITE_WAIT)
692 {
693 Timeout.tv_sec = cMillies / 1000;
694 Timeout.tv_nsec = cMillies % 1000 * 1000000;
695 pTimeout = &Timeout;
696 StartNanoTS = RTTimeNanoTS();
697 }
698
699 /* Wait for at least one. */
700 if (!cMinReqs)
701 cMinReqs = 1;
702
703 /* For the wakeup call. */
704 Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
705 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
706
707 /*
708 * Loop until we're woken up, hit an error (incl timeout), or
709 * have collected the desired number of requests.
710 */
711 int rc = VINF_SUCCESS;
712 int cRequestsCompleted = 0;
713 while (!pCtxInt->fWokenUp)
714 {
715 LNXKAIOIOEVENT aPortEvents[AIO_MAXIMUM_REQUESTS_PER_CONTEXT];
716 int cRequestsToWait = RT_MIN(cReqs, AIO_MAXIMUM_REQUESTS_PER_CONTEXT);
717 ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
718 rc = rtFileAsyncIoLinuxGetEvents(pCtxInt->AioContext, cMinReqs, cRequestsToWait, &aPortEvents[0], pTimeout);
719 ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
720 if (RT_FAILURE(rc))
721 break;
722 uint32_t const cDone = rc;
723 rc = VINF_SUCCESS;
724
725 /*
726 * Process received events / requests.
727 */
728 for (uint32_t i = 0; i < cDone; i++)
729 {
730 /*
731 * The iocb is the first element in our request structure.
732 * So we can safely cast it directly to the handle (see above)
733 */
734 PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)aPortEvents[i].pIoCB;
735 AssertPtr(pReqInt);
736 Assert(pReqInt->u32Magic == RTFILEAIOREQ_MAGIC);
737
738 /** @todo aeichner: The rc field contains the result code
739 * like you can find in errno for the normal read/write ops.
740 * But there is a second field called rc2. I don't know the
741 * purpose for it yet.
742 */
743 if (RT_UNLIKELY(aPortEvents[i].rc < 0))
744 pReqInt->Rc = RTErrConvertFromErrno(-aPortEvents[i].rc); /* Convert to positive value. */
745 else
746 {
747 pReqInt->Rc = VINF_SUCCESS;
748 pReqInt->cbTransfered = aPortEvents[i].rc;
749 }
750
751 /* Mark the request as finished. */
752 RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
753
754 pahReqs[cRequestsCompleted++] = (RTFILEAIOREQ)pReqInt;
755 }
756
757 /*
758 * Done Yet? If not advance and try again.
759 */
760 if (cDone >= cMinReqs)
761 break;
762 cMinReqs -= cDone;
763 cReqs -= cDone;
764
765 if (cMillies != RT_INDEFINITE_WAIT)
766 {
767 /* The API doesn't return ETIMEDOUT, so we have to fix that ourselves. */
768 uint64_t NanoTS = RTTimeNanoTS();
769 uint64_t cMilliesElapsed = (NanoTS - StartNanoTS) / 1000000;
770 if (cMilliesElapsed >= cMillies)
771 {
772 rc = VERR_TIMEOUT;
773 break;
774 }
775
776 /* The syscall supposedly updates it, but we're paranoid. :-) */
777 Timeout.tv_sec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) / 1000;
778 Timeout.tv_nsec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) % 1000 * 1000000;
779 }
780 }
781
782 /*
783 * Update the context state and set the return value.
784 */
785 *pcReqs = cRequestsCompleted;
786 ASMAtomicSubS32(&pCtxInt->cRequests, cRequestsCompleted);
787 Assert(pCtxInt->hThreadWait == RTThreadSelf());
788 ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
789
790 /*
791 * Clear the wakeup flag and set rc.
792 */
793 if ( pCtxInt->fWokenUp
794 && RT_SUCCESS(rc))
795 {
796 ASMAtomicXchgBool(&pCtxInt->fWokenUp, false);
797 rc = VERR_INTERRUPTED;
798 }
799
800 return rc;
801}
802
803
804RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
805{
806 PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
807 RTFILEAIOCTX_VALID_RETURN(pCtxInt);
808
809 /** @todo r=bird: Define the protocol for how to resume work after calling
810 * this function. */
811
812 bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
813
814 /*
815 * Read the thread handle before the status flag.
816 * If we read the handle after the flag we might
817 * end up with an invalid handle because the thread
818 * waiting in RTFileAioCtxWakeup() might get scheduled
819 * before we read the flag and returns.
820 * We can ensure that the handle is valid if fWaiting is true
821 * when reading the handle before the status flag.
822 */
823 RTTHREAD hThread;
824 ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
825 bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
826 if ( !fWokenUp
827 && fWaiting)
828 {
829 /*
830 * If a thread waits the handle must be valid.
831 * It is possible that the thread returns from
832 * rtFileAsyncIoLinuxGetEvents() before the signal
833 * is send.
834 * This is no problem because we already set fWokenUp
835 * to true which will let the thread return VERR_INTERRUPTED
836 * and the next call to RTFileAioCtxWait() will not
837 * return VERR_INTERRUPTED because signals are not saved
838 * and will simply vanish if the destination thread can't
839 * receive it.
840 */
841 Assert(hThread != NIL_RTTHREAD);
842 RTThreadPoke(hThread);
843 }
844
845 return VINF_SUCCESS;
846}
847
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette