VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 73768

Last change on this file since 73768 was 73502, checked in by vboxsync, 6 years ago

IPRT: GCC 8.2.0 fixes

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 163.4 KB
Line 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2017 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# pragma warning(push)
44# pragma warning(disable:4668) /* Several incorrect __cplusplus uses. */
45# pragma warning(disable:4255) /* Incorrect __slwpcb prototype. */
46# include <intrin.h>
47# pragma warning(pop)
48 /* Emit the intrinsics at all optimization levels. */
49# pragma intrinsic(_ReadWriteBarrier)
50# pragma intrinsic(__cpuid)
51# pragma intrinsic(__stosd)
52# pragma intrinsic(__stosw)
53# pragma intrinsic(__stosb)
54# pragma intrinsic(_BitScanForward)
55# pragma intrinsic(_BitScanReverse)
56# pragma intrinsic(_bittest)
57# pragma intrinsic(_bittestandset)
58# pragma intrinsic(_bittestandreset)
59# pragma intrinsic(_bittestandcomplement)
60# pragma intrinsic(_byteswap_ushort)
61# pragma intrinsic(_byteswap_ulong)
62# pragma intrinsic(_interlockedbittestandset)
63# pragma intrinsic(_interlockedbittestandreset)
64# pragma intrinsic(_InterlockedAnd)
65# pragma intrinsic(_InterlockedOr)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedExchange64)
80# pragma intrinsic(_InterlockedExchangeAdd64)
81# pragma intrinsic(_InterlockedAnd64)
82# pragma intrinsic(_InterlockedOr64)
83# pragma intrinsic(_InterlockedIncrement64)
84# pragma intrinsic(_InterlockedDecrement64)
85# endif
86#endif
87
88/*
89 * Include #pragma aux definitions for Watcom C/C++.
90 */
91#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
92# include "asm-watcom-x86-16.h"
93#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
94# include "asm-watcom-x86-32.h"
95#endif
96
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are that
103 * the former will complete outstanding reads and writes before continuing
104 * while the latter doesn't make any promises about the order. Ordered
105 * operations doesn't, it seems, make any 100% promise wrt to whether
106 * the operation will complete before any subsequent memory access.
107 * (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
110 * are unordered (note the Uo).
111 *
112 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
113 * or even optimize assembler instructions away. For instance, in the following code
114 * the second rdmsr instruction is optimized away because gcc treats that instruction
115 * as deterministic:
116 *
117 * @code
118 * static inline uint64_t rdmsr_low(int idx)
119 * {
120 * uint32_t low;
121 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
122 * }
123 * ...
124 * uint32_t msr1 = rdmsr_low(1);
125 * foo(msr1);
126 * msr1 = rdmsr_low(1);
127 * bar(msr1);
128 * @endcode
129 *
130 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
131 * use the result of the first call as input parameter for bar() as well. For rdmsr this
132 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
133 * machine status information in general.
134 *
135 * @{
136 */
137
138
139/** @def RT_INLINE_ASM_GCC_4_3_X_X86
140 * Used to work around some 4.3.x register allocation issues in this version of
141 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
142 * definitely not for 5.x */
143#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
144# define RT_INLINE_ASM_GCC_4_3_X_X86 1
145#else
146# define RT_INLINE_ASM_GCC_4_3_X_X86 0
147#endif
148
149/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
150 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
151 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
152 * mode, x86.
153 *
154 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
155 * when in PIC mode on x86.
156 */
157#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
158# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
159# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
160# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
161# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
162# elif ( (defined(PIC) || defined(__PIC__)) \
163 && defined(RT_ARCH_X86) \
164 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
165 || defined(RT_OS_DARWIN)) )
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
167# else
168# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
169# endif
170#endif
171
172
173/** @def ASMReturnAddress
174 * Gets the return address of the current (or calling if you like) function or method.
175 */
176#ifdef _MSC_VER
177# ifdef __cplusplus
178extern "C"
179# endif
180void * _ReturnAddress(void);
181# pragma intrinsic(_ReturnAddress)
182# define ASMReturnAddress() _ReturnAddress()
183#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
184# define ASMReturnAddress() __builtin_return_address(0)
185#elif defined(__WATCOMC__)
186# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
187#else
188# error "Unsupported compiler."
189#endif
190
191
192/**
193 * Compiler memory barrier.
194 *
195 * Ensure that the compiler does not use any cached (register/tmp stack) memory
196 * values or any outstanding writes when returning from this function.
197 *
198 * This function must be used if non-volatile data is modified by a
199 * device or the VMM. Typical cases are port access, MMIO access,
200 * trapping instruction, etc.
201 */
202#if RT_INLINE_ASM_GNU_STYLE
203# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
204#elif RT_INLINE_ASM_USES_INTRIN
205# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
206#elif defined(__WATCOMC__)
207void ASMCompilerBarrier(void);
208#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
209DECLINLINE(void) ASMCompilerBarrier(void)
210{
211 __asm
212 {
213 }
214}
215#endif
216
217
218/** @def ASMBreakpoint
219 * Debugger Breakpoint.
220 * @deprecated Use RT_BREAKPOINT instead.
221 * @internal
222 */
223#define ASMBreakpoint() RT_BREAKPOINT()
224
225
226/**
227 * Spinloop hint for platforms that have these, empty function on the other
228 * platforms.
229 *
230 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
231 * spin locks.
232 */
233#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
234DECLASM(void) ASMNopPause(void);
235#else
236DECLINLINE(void) ASMNopPause(void)
237{
238# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
239# if RT_INLINE_ASM_GNU_STYLE
240 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
241# else
242 __asm {
243 _emit 0f3h
244 _emit 090h
245 }
246# endif
247# else
248 /* dummy */
249# endif
250}
251#endif
252
253
254/**
255 * Atomically Exchange an unsigned 8-bit value, ordered.
256 *
257 * @returns Current *pu8 value
258 * @param pu8 Pointer to the 8-bit variable to update.
259 * @param u8 The 8-bit value to assign to *pu8.
260 */
261#if RT_INLINE_ASM_EXTERNAL
262DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8);
263#else
264DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
265{
266# if RT_INLINE_ASM_GNU_STYLE
267 __asm__ __volatile__("xchgb %0, %1\n\t"
268 : "=m" (*pu8),
269 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
270 : "1" (u8),
271 "m" (*pu8));
272# else
273 __asm
274 {
275# ifdef RT_ARCH_AMD64
276 mov rdx, [pu8]
277 mov al, [u8]
278 xchg [rdx], al
279 mov [u8], al
280# else
281 mov edx, [pu8]
282 mov al, [u8]
283 xchg [edx], al
284 mov [u8], al
285# endif
286 }
287# endif
288 return u8;
289}
290#endif
291
292
293/**
294 * Atomically Exchange a signed 8-bit value, ordered.
295 *
296 * @returns Current *pu8 value
297 * @param pi8 Pointer to the 8-bit variable to update.
298 * @param i8 The 8-bit value to assign to *pi8.
299 */
300DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8)
301{
302 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
303}
304
305
306/**
307 * Atomically Exchange a bool value, ordered.
308 *
309 * @returns Current *pf value
310 * @param pf Pointer to the 8-bit variable to update.
311 * @param f The 8-bit value to assign to *pi8.
312 */
313DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f)
314{
315#ifdef _MSC_VER
316 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
317#else
318 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
319#endif
320}
321
322
323/**
324 * Atomically Exchange an unsigned 16-bit value, ordered.
325 *
326 * @returns Current *pu16 value
327 * @param pu16 Pointer to the 16-bit variable to update.
328 * @param u16 The 16-bit value to assign to *pu16.
329 */
330#if RT_INLINE_ASM_EXTERNAL
331DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16);
332#else
333DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
334{
335# if RT_INLINE_ASM_GNU_STYLE
336 __asm__ __volatile__("xchgw %0, %1\n\t"
337 : "=m" (*pu16),
338 "=r" (u16)
339 : "1" (u16),
340 "m" (*pu16));
341# else
342 __asm
343 {
344# ifdef RT_ARCH_AMD64
345 mov rdx, [pu16]
346 mov ax, [u16]
347 xchg [rdx], ax
348 mov [u16], ax
349# else
350 mov edx, [pu16]
351 mov ax, [u16]
352 xchg [edx], ax
353 mov [u16], ax
354# endif
355 }
356# endif
357 return u16;
358}
359#endif
360
361
362/**
363 * Atomically Exchange a signed 16-bit value, ordered.
364 *
365 * @returns Current *pu16 value
366 * @param pi16 Pointer to the 16-bit variable to update.
367 * @param i16 The 16-bit value to assign to *pi16.
368 */
369DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16)
370{
371 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
372}
373
374
375/**
376 * Atomically Exchange an unsigned 32-bit value, ordered.
377 *
378 * @returns Current *pu32 value
379 * @param pu32 Pointer to the 32-bit variable to update.
380 * @param u32 The 32-bit value to assign to *pu32.
381 *
382 * @remarks Does not work on 286 and earlier.
383 */
384#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
385DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32);
386#else
387DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
388{
389# if RT_INLINE_ASM_GNU_STYLE
390 __asm__ __volatile__("xchgl %0, %1\n\t"
391 : "=m" (*pu32),
392 "=r" (u32)
393 : "1" (u32),
394 "m" (*pu32));
395
396# elif RT_INLINE_ASM_USES_INTRIN
397 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
398
399# else
400 __asm
401 {
402# ifdef RT_ARCH_AMD64
403 mov rdx, [pu32]
404 mov eax, u32
405 xchg [rdx], eax
406 mov [u32], eax
407# else
408 mov edx, [pu32]
409 mov eax, u32
410 xchg [edx], eax
411 mov [u32], eax
412# endif
413 }
414# endif
415 return u32;
416}
417#endif
418
419
420/**
421 * Atomically Exchange a signed 32-bit value, ordered.
422 *
423 * @returns Current *pu32 value
424 * @param pi32 Pointer to the 32-bit variable to update.
425 * @param i32 The 32-bit value to assign to *pi32.
426 */
427DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32)
428{
429 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
430}
431
432
433/**
434 * Atomically Exchange an unsigned 64-bit value, ordered.
435 *
436 * @returns Current *pu64 value
437 * @param pu64 Pointer to the 64-bit variable to update.
438 * @param u64 The 64-bit value to assign to *pu64.
439 *
440 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
441 */
442#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
443 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
444DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64);
445#else
446DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
447{
448# if defined(RT_ARCH_AMD64)
449# if RT_INLINE_ASM_USES_INTRIN
450 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
451
452# elif RT_INLINE_ASM_GNU_STYLE
453 __asm__ __volatile__("xchgq %0, %1\n\t"
454 : "=m" (*pu64),
455 "=r" (u64)
456 : "1" (u64),
457 "m" (*pu64));
458# else
459 __asm
460 {
461 mov rdx, [pu64]
462 mov rax, [u64]
463 xchg [rdx], rax
464 mov [u64], rax
465 }
466# endif
467# else /* !RT_ARCH_AMD64 */
468# if RT_INLINE_ASM_GNU_STYLE
469# if defined(PIC) || defined(__PIC__)
470 uint32_t u32EBX = (uint32_t)u64;
471 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
472 "xchgl %%ebx, %3\n\t"
473 "1:\n\t"
474 "lock; cmpxchg8b (%5)\n\t"
475 "jnz 1b\n\t"
476 "movl %3, %%ebx\n\t"
477 /*"xchgl %%esi, %5\n\t"*/
478 : "=A" (u64),
479 "=m" (*pu64)
480 : "0" (*pu64),
481 "m" ( u32EBX ),
482 "c" ( (uint32_t)(u64 >> 32) ),
483 "S" (pu64));
484# else /* !PIC */
485 __asm__ __volatile__("1:\n\t"
486 "lock; cmpxchg8b %1\n\t"
487 "jnz 1b\n\t"
488 : "=A" (u64),
489 "=m" (*pu64)
490 : "0" (*pu64),
491 "b" ( (uint32_t)u64 ),
492 "c" ( (uint32_t)(u64 >> 32) ));
493# endif
494# else
495 __asm
496 {
497 mov ebx, dword ptr [u64]
498 mov ecx, dword ptr [u64 + 4]
499 mov edi, pu64
500 mov eax, dword ptr [edi]
501 mov edx, dword ptr [edi + 4]
502 retry:
503 lock cmpxchg8b [edi]
504 jnz retry
505 mov dword ptr [u64], eax
506 mov dword ptr [u64 + 4], edx
507 }
508# endif
509# endif /* !RT_ARCH_AMD64 */
510 return u64;
511}
512#endif
513
514
515/**
516 * Atomically Exchange an signed 64-bit value, ordered.
517 *
518 * @returns Current *pi64 value
519 * @param pi64 Pointer to the 64-bit variable to update.
520 * @param i64 The 64-bit value to assign to *pi64.
521 */
522DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64)
523{
524 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
525}
526
527
528/**
529 * Atomically Exchange a pointer value, ordered.
530 *
531 * @returns Current *ppv value
532 * @param ppv Pointer to the pointer variable to update.
533 * @param pv The pointer value to assign to *ppv.
534 */
535DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv)
536{
537#if ARCH_BITS == 32 || ARCH_BITS == 16
538 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
539#elif ARCH_BITS == 64
540 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
541#else
542# error "ARCH_BITS is bogus"
543#endif
544}
545
546
547/**
548 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
549 *
550 * @returns Current *pv value
551 * @param ppv Pointer to the pointer variable to update.
552 * @param pv The pointer value to assign to *ppv.
553 * @param Type The type of *ppv, sans volatile.
554 */
555#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
556# define ASMAtomicXchgPtrT(ppv, pv, Type) \
557 __extension__ \
558 ({\
559 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
560 Type const pvTypeChecked = (pv); \
561 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
562 pvTypeCheckedRet; \
563 })
564#else
565# define ASMAtomicXchgPtrT(ppv, pv, Type) \
566 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
567#endif
568
569
570/**
571 * Atomically Exchange a raw-mode context pointer value, ordered.
572 *
573 * @returns Current *ppv value
574 * @param ppvRC Pointer to the pointer variable to update.
575 * @param pvRC The pointer value to assign to *ppv.
576 */
577DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC)
578{
579 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
580}
581
582
583/**
584 * Atomically Exchange a ring-0 pointer value, ordered.
585 *
586 * @returns Current *ppv value
587 * @param ppvR0 Pointer to the pointer variable to update.
588 * @param pvR0 The pointer value to assign to *ppv.
589 */
590DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0)
591{
592#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
593 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
594#elif R0_ARCH_BITS == 64
595 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
596#else
597# error "R0_ARCH_BITS is bogus"
598#endif
599}
600
601
602/**
603 * Atomically Exchange a ring-3 pointer value, ordered.
604 *
605 * @returns Current *ppv value
606 * @param ppvR3 Pointer to the pointer variable to update.
607 * @param pvR3 The pointer value to assign to *ppv.
608 */
609DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3)
610{
611#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
612 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
613#elif R3_ARCH_BITS == 64
614 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
615#else
616# error "R3_ARCH_BITS is bogus"
617#endif
618}
619
620
621/** @def ASMAtomicXchgHandle
622 * Atomically Exchange a typical IPRT handle value, ordered.
623 *
624 * @param ph Pointer to the value to update.
625 * @param hNew The new value to assigned to *pu.
626 * @param phRes Where to store the current *ph value.
627 *
628 * @remarks This doesn't currently work for all handles (like RTFILE).
629 */
630#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
635 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
636 } while (0)
637#elif HC_ARCH_BITS == 64
638# define ASMAtomicXchgHandle(ph, hNew, phRes) \
639 do { \
640 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
641 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
642 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
643 } while (0)
644#else
645# error HC_ARCH_BITS
646#endif
647
648
649/**
650 * Atomically Exchange a value which size might differ
651 * between platforms or compilers, ordered.
652 *
653 * @param pu Pointer to the variable to update.
654 * @param uNew The value to assign to *pu.
655 * @todo This is busted as its missing the result argument.
656 */
657#define ASMAtomicXchgSize(pu, uNew) \
658 do { \
659 switch (sizeof(*(pu))) { \
660 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
661 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
662 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
663 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
664 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
665 } \
666 } while (0)
667
668/**
669 * Atomically Exchange a value which size might differ
670 * between platforms or compilers, ordered.
671 *
672 * @param pu Pointer to the variable to update.
673 * @param uNew The value to assign to *pu.
674 * @param puRes Where to store the current *pu value.
675 */
676#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
677 do { \
678 switch (sizeof(*(pu))) { \
679 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
680 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
681 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
682 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
683 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
684 } \
685 } while (0)
686
687
688
689/**
690 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
691 *
692 * @returns true if xchg was done.
693 * @returns false if xchg wasn't done.
694 *
695 * @param pu8 Pointer to the value to update.
696 * @param u8New The new value to assigned to *pu8.
697 * @param u8Old The old value to *pu8 compare with.
698 *
699 * @remarks x86: Requires a 486 or later.
700 */
701#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
702DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old);
703#else
704DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old)
705{
706 uint8_t u8Ret;
707 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
708 "setz %1\n\t"
709 : "=m" (*pu8),
710 "=qm" (u8Ret),
711 "=a" (u8Old)
712 : "q" (u8New),
713 "2" (u8Old),
714 "m" (*pu8));
715 return (bool)u8Ret;
716}
717#endif
718
719
720/**
721 * Atomically Compare and Exchange a signed 8-bit value, ordered.
722 *
723 * @returns true if xchg was done.
724 * @returns false if xchg wasn't done.
725 *
726 * @param pi8 Pointer to the value to update.
727 * @param i8New The new value to assigned to *pi8.
728 * @param i8Old The old value to *pi8 compare with.
729 *
730 * @remarks x86: Requires a 486 or later.
731 */
732DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old)
733{
734 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
735}
736
737
738/**
739 * Atomically Compare and Exchange a bool value, ordered.
740 *
741 * @returns true if xchg was done.
742 * @returns false if xchg wasn't done.
743 *
744 * @param pf Pointer to the value to update.
745 * @param fNew The new value to assigned to *pf.
746 * @param fOld The old value to *pf compare with.
747 *
748 * @remarks x86: Requires a 486 or later.
749 */
750DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld)
751{
752 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
753}
754
755
756/**
757 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
758 *
759 * @returns true if xchg was done.
760 * @returns false if xchg wasn't done.
761 *
762 * @param pu32 Pointer to the value to update.
763 * @param u32New The new value to assigned to *pu32.
764 * @param u32Old The old value to *pu32 compare with.
765 *
766 * @remarks x86: Requires a 486 or later.
767 */
768#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
769DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old);
770#else
771DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old)
772{
773# if RT_INLINE_ASM_GNU_STYLE
774 uint8_t u8Ret;
775 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
776 "setz %1\n\t"
777 : "=m" (*pu32),
778 "=qm" (u8Ret),
779 "=a" (u32Old)
780 : "r" (u32New),
781 "2" (u32Old),
782 "m" (*pu32));
783 return (bool)u8Ret;
784
785# elif RT_INLINE_ASM_USES_INTRIN
786 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
787
788# else
789 uint32_t u32Ret;
790 __asm
791 {
792# ifdef RT_ARCH_AMD64
793 mov rdx, [pu32]
794# else
795 mov edx, [pu32]
796# endif
797 mov eax, [u32Old]
798 mov ecx, [u32New]
799# ifdef RT_ARCH_AMD64
800 lock cmpxchg [rdx], ecx
801# else
802 lock cmpxchg [edx], ecx
803# endif
804 setz al
805 movzx eax, al
806 mov [u32Ret], eax
807 }
808 return !!u32Ret;
809# endif
810}
811#endif
812
813
814/**
815 * Atomically Compare and Exchange a signed 32-bit value, ordered.
816 *
817 * @returns true if xchg was done.
818 * @returns false if xchg wasn't done.
819 *
820 * @param pi32 Pointer to the value to update.
821 * @param i32New The new value to assigned to *pi32.
822 * @param i32Old The old value to *pi32 compare with.
823 *
824 * @remarks x86: Requires a 486 or later.
825 */
826DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old)
827{
828 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
829}
830
831
832/**
833 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
834 *
835 * @returns true if xchg was done.
836 * @returns false if xchg wasn't done.
837 *
838 * @param pu64 Pointer to the 64-bit variable to update.
839 * @param u64New The 64-bit value to assign to *pu64.
840 * @param u64Old The value to compare with.
841 *
842 * @remarks x86: Requires a Pentium or later.
843 */
844#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
845 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
846DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old);
847#else
848DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old)
849{
850# if RT_INLINE_ASM_USES_INTRIN
851 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
852
853# elif defined(RT_ARCH_AMD64)
854# if RT_INLINE_ASM_GNU_STYLE
855 uint8_t u8Ret;
856 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
857 "setz %1\n\t"
858 : "=m" (*pu64),
859 "=qm" (u8Ret),
860 "=a" (u64Old)
861 : "r" (u64New),
862 "2" (u64Old),
863 "m" (*pu64));
864 return (bool)u8Ret;
865# else
866 bool fRet;
867 __asm
868 {
869 mov rdx, [pu32]
870 mov rax, [u64Old]
871 mov rcx, [u64New]
872 lock cmpxchg [rdx], rcx
873 setz al
874 mov [fRet], al
875 }
876 return fRet;
877# endif
878# else /* !RT_ARCH_AMD64 */
879 uint32_t u32Ret;
880# if RT_INLINE_ASM_GNU_STYLE
881# if defined(PIC) || defined(__PIC__)
882 uint32_t u32EBX = (uint32_t)u64New;
883 uint32_t u32Spill;
884 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
885 "lock; cmpxchg8b (%6)\n\t"
886 "setz %%al\n\t"
887 "movl %4, %%ebx\n\t"
888 "movzbl %%al, %%eax\n\t"
889 : "=a" (u32Ret),
890 "=d" (u32Spill),
891# if RT_GNUC_PREREQ(4, 3)
892 "+m" (*pu64)
893# else
894 "=m" (*pu64)
895# endif
896 : "A" (u64Old),
897 "m" ( u32EBX ),
898 "c" ( (uint32_t)(u64New >> 32) ),
899 "S" (pu64));
900# else /* !PIC */
901 uint32_t u32Spill;
902 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
903 "setz %%al\n\t"
904 "movzbl %%al, %%eax\n\t"
905 : "=a" (u32Ret),
906 "=d" (u32Spill),
907 "+m" (*pu64)
908 : "A" (u64Old),
909 "b" ( (uint32_t)u64New ),
910 "c" ( (uint32_t)(u64New >> 32) ));
911# endif
912 return (bool)u32Ret;
913# else
914 __asm
915 {
916 mov ebx, dword ptr [u64New]
917 mov ecx, dword ptr [u64New + 4]
918 mov edi, [pu64]
919 mov eax, dword ptr [u64Old]
920 mov edx, dword ptr [u64Old + 4]
921 lock cmpxchg8b [edi]
922 setz al
923 movzx eax, al
924 mov dword ptr [u32Ret], eax
925 }
926 return !!u32Ret;
927# endif
928# endif /* !RT_ARCH_AMD64 */
929}
930#endif
931
932
933/**
934 * Atomically Compare and exchange a signed 64-bit value, ordered.
935 *
936 * @returns true if xchg was done.
937 * @returns false if xchg wasn't done.
938 *
939 * @param pi64 Pointer to the 64-bit variable to update.
940 * @param i64 The 64-bit value to assign to *pu64.
941 * @param i64Old The value to compare with.
942 *
943 * @remarks x86: Requires a Pentium or later.
944 */
945DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old)
946{
947 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
948}
949
950
951/**
952 * Atomically Compare and Exchange a pointer value, ordered.
953 *
954 * @returns true if xchg was done.
955 * @returns false if xchg wasn't done.
956 *
957 * @param ppv Pointer to the value to update.
958 * @param pvNew The new value to assigned to *ppv.
959 * @param pvOld The old value to *ppv compare with.
960 *
961 * @remarks x86: Requires a 486 or later.
962 */
963DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld)
964{
965#if ARCH_BITS == 32 || ARCH_BITS == 16
966 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
967#elif ARCH_BITS == 64
968 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
969#else
970# error "ARCH_BITS is bogus"
971#endif
972}
973
974
975/**
976 * Atomically Compare and Exchange a pointer value, ordered.
977 *
978 * @returns true if xchg was done.
979 * @returns false if xchg wasn't done.
980 *
981 * @param ppv Pointer to the value to update.
982 * @param pvNew The new value to assigned to *ppv.
983 * @param pvOld The old value to *ppv compare with.
984 *
985 * @remarks This is relatively type safe on GCC platforms.
986 * @remarks x86: Requires a 486 or later.
987 */
988#ifdef __GNUC__
989# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
990 __extension__ \
991 ({\
992 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
993 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
994 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
995 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
996 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
997 fMacroRet; \
998 })
999#else
1000# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1001 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1002#endif
1003
1004
1005/** @def ASMAtomicCmpXchgHandle
1006 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1007 *
1008 * @param ph Pointer to the value to update.
1009 * @param hNew The new value to assigned to *pu.
1010 * @param hOld The old value to *pu compare with.
1011 * @param fRc Where to store the result.
1012 *
1013 * @remarks This doesn't currently work for all handles (like RTFILE).
1014 * @remarks x86: Requires a 486 or later.
1015 */
1016#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1017# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1018 do { \
1019 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1020 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1021 } while (0)
1022#elif HC_ARCH_BITS == 64
1023# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1024 do { \
1025 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1026 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1027 } while (0)
1028#else
1029# error HC_ARCH_BITS
1030#endif
1031
1032
1033/** @def ASMAtomicCmpXchgSize
1034 * Atomically Compare and Exchange a value which size might differ
1035 * between platforms or compilers, ordered.
1036 *
1037 * @param pu Pointer to the value to update.
1038 * @param uNew The new value to assigned to *pu.
1039 * @param uOld The old value to *pu compare with.
1040 * @param fRc Where to store the result.
1041 *
1042 * @remarks x86: Requires a 486 or later.
1043 */
1044#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1045 do { \
1046 switch (sizeof(*(pu))) { \
1047 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1048 break; \
1049 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1050 break; \
1051 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1052 (fRc) = false; \
1053 break; \
1054 } \
1055 } while (0)
1056
1057
1058/**
1059 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1060 * passes back old value, ordered.
1061 *
1062 * @returns true if xchg was done.
1063 * @returns false if xchg wasn't done.
1064 *
1065 * @param pu32 Pointer to the value to update.
1066 * @param u32New The new value to assigned to *pu32.
1067 * @param u32Old The old value to *pu32 compare with.
1068 * @param pu32Old Pointer store the old value at.
1069 *
1070 * @remarks x86: Requires a 486 or later.
1071 */
1072#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1073DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old);
1074#else
1075DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old)
1076{
1077# if RT_INLINE_ASM_GNU_STYLE
1078 uint8_t u8Ret;
1079 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1080 "setz %1\n\t"
1081 : "=m" (*pu32),
1082 "=qm" (u8Ret),
1083 "=a" (*pu32Old)
1084 : "r" (u32New),
1085 "a" (u32Old),
1086 "m" (*pu32));
1087 return (bool)u8Ret;
1088
1089# elif RT_INLINE_ASM_USES_INTRIN
1090 return (*pu32Old =_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1091
1092# else
1093 uint32_t u32Ret;
1094 __asm
1095 {
1096# ifdef RT_ARCH_AMD64
1097 mov rdx, [pu32]
1098# else
1099 mov edx, [pu32]
1100# endif
1101 mov eax, [u32Old]
1102 mov ecx, [u32New]
1103# ifdef RT_ARCH_AMD64
1104 lock cmpxchg [rdx], ecx
1105 mov rdx, [pu32Old]
1106 mov [rdx], eax
1107# else
1108 lock cmpxchg [edx], ecx
1109 mov edx, [pu32Old]
1110 mov [edx], eax
1111# endif
1112 setz al
1113 movzx eax, al
1114 mov [u32Ret], eax
1115 }
1116 return !!u32Ret;
1117# endif
1118}
1119#endif
1120
1121
1122/**
1123 * Atomically Compare and Exchange a signed 32-bit value, additionally
1124 * passes back old value, ordered.
1125 *
1126 * @returns true if xchg was done.
1127 * @returns false if xchg wasn't done.
1128 *
1129 * @param pi32 Pointer to the value to update.
1130 * @param i32New The new value to assigned to *pi32.
1131 * @param i32Old The old value to *pi32 compare with.
1132 * @param pi32Old Pointer store the old value at.
1133 *
1134 * @remarks x86: Requires a 486 or later.
1135 */
1136DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old)
1137{
1138 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1139}
1140
1141
1142/**
1143 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1144 * passing back old value, ordered.
1145 *
1146 * @returns true if xchg was done.
1147 * @returns false if xchg wasn't done.
1148 *
1149 * @param pu64 Pointer to the 64-bit variable to update.
1150 * @param u64New The 64-bit value to assign to *pu64.
1151 * @param u64Old The value to compare with.
1152 * @param pu64Old Pointer store the old value at.
1153 *
1154 * @remarks x86: Requires a Pentium or later.
1155 */
1156#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1157 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1158DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old);
1159#else
1160DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old)
1161{
1162# if RT_INLINE_ASM_USES_INTRIN
1163 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1164
1165# elif defined(RT_ARCH_AMD64)
1166# if RT_INLINE_ASM_GNU_STYLE
1167 uint8_t u8Ret;
1168 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1169 "setz %1\n\t"
1170 : "=m" (*pu64),
1171 "=qm" (u8Ret),
1172 "=a" (*pu64Old)
1173 : "r" (u64New),
1174 "a" (u64Old),
1175 "m" (*pu64));
1176 return (bool)u8Ret;
1177# else
1178 bool fRet;
1179 __asm
1180 {
1181 mov rdx, [pu32]
1182 mov rax, [u64Old]
1183 mov rcx, [u64New]
1184 lock cmpxchg [rdx], rcx
1185 mov rdx, [pu64Old]
1186 mov [rdx], rax
1187 setz al
1188 mov [fRet], al
1189 }
1190 return fRet;
1191# endif
1192# else /* !RT_ARCH_AMD64 */
1193# if RT_INLINE_ASM_GNU_STYLE
1194 uint64_t u64Ret;
1195# if defined(PIC) || defined(__PIC__)
1196 /* NB: this code uses a memory clobber description, because the clean
1197 * solution with an output value for *pu64 makes gcc run out of registers.
1198 * This will cause suboptimal code, and anyone with a better solution is
1199 * welcome to improve this. */
1200 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1201 "lock; cmpxchg8b %3\n\t"
1202 "xchgl %%ebx, %1\n\t"
1203 : "=A" (u64Ret)
1204 : "DS" ((uint32_t)u64New),
1205 "c" ((uint32_t)(u64New >> 32)),
1206 "m" (*pu64),
1207 "0" (u64Old)
1208 : "memory" );
1209# else /* !PIC */
1210 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1211 : "=A" (u64Ret),
1212 "=m" (*pu64)
1213 : "b" ((uint32_t)u64New),
1214 "c" ((uint32_t)(u64New >> 32)),
1215 "m" (*pu64),
1216 "0" (u64Old));
1217# endif
1218 *pu64Old = u64Ret;
1219 return u64Ret == u64Old;
1220# else
1221 uint32_t u32Ret;
1222 __asm
1223 {
1224 mov ebx, dword ptr [u64New]
1225 mov ecx, dword ptr [u64New + 4]
1226 mov edi, [pu64]
1227 mov eax, dword ptr [u64Old]
1228 mov edx, dword ptr [u64Old + 4]
1229 lock cmpxchg8b [edi]
1230 mov ebx, [pu64Old]
1231 mov [ebx], eax
1232 setz al
1233 movzx eax, al
1234 add ebx, 4
1235 mov [ebx], edx
1236 mov dword ptr [u32Ret], eax
1237 }
1238 return !!u32Ret;
1239# endif
1240# endif /* !RT_ARCH_AMD64 */
1241}
1242#endif
1243
1244
1245/**
1246 * Atomically Compare and exchange a signed 64-bit value, additionally
1247 * passing back old value, ordered.
1248 *
1249 * @returns true if xchg was done.
1250 * @returns false if xchg wasn't done.
1251 *
1252 * @param pi64 Pointer to the 64-bit variable to update.
1253 * @param i64 The 64-bit value to assign to *pu64.
1254 * @param i64Old The value to compare with.
1255 * @param pi64Old Pointer store the old value at.
1256 *
1257 * @remarks x86: Requires a Pentium or later.
1258 */
1259DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old)
1260{
1261 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1262}
1263
1264/** @def ASMAtomicCmpXchgExHandle
1265 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1266 *
1267 * @param ph Pointer to the value to update.
1268 * @param hNew The new value to assigned to *pu.
1269 * @param hOld The old value to *pu compare with.
1270 * @param fRc Where to store the result.
1271 * @param phOldVal Pointer to where to store the old value.
1272 *
1273 * @remarks This doesn't currently work for all handles (like RTFILE).
1274 */
1275#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1279 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(puOldVal)); \
1281 } while (0)
1282#elif HC_ARCH_BITS == 64
1283# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1284 do { \
1285 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1286 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1287 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(puOldVal)); \
1288 } while (0)
1289#else
1290# error HC_ARCH_BITS
1291#endif
1292
1293
1294/** @def ASMAtomicCmpXchgExSize
1295 * Atomically Compare and Exchange a value which size might differ
1296 * between platforms or compilers. Additionally passes back old value.
1297 *
1298 * @param pu Pointer to the value to update.
1299 * @param uNew The new value to assigned to *pu.
1300 * @param uOld The old value to *pu compare with.
1301 * @param fRc Where to store the result.
1302 * @param puOldVal Pointer to where to store the old value.
1303 *
1304 * @remarks x86: Requires a 486 or later.
1305 */
1306#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1307 do { \
1308 switch (sizeof(*(pu))) { \
1309 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1310 break; \
1311 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1312 break; \
1313 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1314 (fRc) = false; \
1315 (uOldVal) = 0; \
1316 break; \
1317 } \
1318 } while (0)
1319
1320
1321/**
1322 * Atomically Compare and Exchange a pointer value, additionally
1323 * passing back old value, ordered.
1324 *
1325 * @returns true if xchg was done.
1326 * @returns false if xchg wasn't done.
1327 *
1328 * @param ppv Pointer to the value to update.
1329 * @param pvNew The new value to assigned to *ppv.
1330 * @param pvOld The old value to *ppv compare with.
1331 * @param ppvOld Pointer store the old value at.
1332 *
1333 * @remarks x86: Requires a 486 or later.
1334 */
1335DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1336 void RT_FAR * RT_FAR *ppvOld)
1337{
1338#if ARCH_BITS == 32 || ARCH_BITS == 16
1339 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1340#elif ARCH_BITS == 64
1341 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1342#else
1343# error "ARCH_BITS is bogus"
1344#endif
1345}
1346
1347
1348/**
1349 * Atomically Compare and Exchange a pointer value, additionally
1350 * passing back old value, ordered.
1351 *
1352 * @returns true if xchg was done.
1353 * @returns false if xchg wasn't done.
1354 *
1355 * @param ppv Pointer to the value to update.
1356 * @param pvNew The new value to assigned to *ppv.
1357 * @param pvOld The old value to *ppv compare with.
1358 * @param ppvOld Pointer store the old value at.
1359 *
1360 * @remarks This is relatively type safe on GCC platforms.
1361 * @remarks x86: Requires a 486 or later.
1362 */
1363#ifdef __GNUC__
1364# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1365 __extension__ \
1366 ({\
1367 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1368 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1369 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1370 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1371 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1372 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1373 (void **)ppvOldTypeChecked); \
1374 fMacroRet; \
1375 })
1376#else
1377# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1378 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
1379#endif
1380
1381
1382/**
1383 * Virtualization unfriendly serializing instruction, always exits.
1384 */
1385#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1386DECLASM(void) ASMSerializeInstructionCpuId(void);
1387#else
1388DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1389{
1390# if RT_INLINE_ASM_GNU_STYLE
1391 RTCCUINTREG xAX = 0;
1392# ifdef RT_ARCH_AMD64
1393 __asm__ __volatile__ ("cpuid"
1394 : "=a" (xAX)
1395 : "0" (xAX)
1396 : "rbx", "rcx", "rdx", "memory");
1397# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1398 __asm__ __volatile__ ("push %%ebx\n\t"
1399 "cpuid\n\t"
1400 "pop %%ebx\n\t"
1401 : "=a" (xAX)
1402 : "0" (xAX)
1403 : "ecx", "edx", "memory");
1404# else
1405 __asm__ __volatile__ ("cpuid"
1406 : "=a" (xAX)
1407 : "0" (xAX)
1408 : "ebx", "ecx", "edx", "memory");
1409# endif
1410
1411# elif RT_INLINE_ASM_USES_INTRIN
1412 int aInfo[4];
1413 _ReadWriteBarrier();
1414 __cpuid(aInfo, 0);
1415
1416# else
1417 __asm
1418 {
1419 push ebx
1420 xor eax, eax
1421 cpuid
1422 pop ebx
1423 }
1424# endif
1425}
1426#endif
1427
1428/**
1429 * Virtualization friendly serializing instruction, though more expensive.
1430 */
1431#if RT_INLINE_ASM_EXTERNAL
1432DECLASM(void) ASMSerializeInstructionIRet(void);
1433#else
1434DECLINLINE(void) ASMSerializeInstructionIRet(void)
1435{
1436# if RT_INLINE_ASM_GNU_STYLE
1437# ifdef RT_ARCH_AMD64
1438 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1439 "subq $128, %%rsp\n\t" /*redzone*/
1440 "mov %%ss, %%eax\n\t"
1441 "pushq %%rax\n\t"
1442 "pushq %%r10\n\t"
1443 "pushfq\n\t"
1444 "movl %%cs, %%eax\n\t"
1445 "pushq %%rax\n\t"
1446 "leaq 1f(%%rip), %%rax\n\t"
1447 "pushq %%rax\n\t"
1448 "iretq\n\t"
1449 "1:\n\t"
1450 ::: "rax", "r10", "memory");
1451# else
1452 __asm__ __volatile__ ("pushfl\n\t"
1453 "pushl %%cs\n\t"
1454 "pushl $1f\n\t"
1455 "iretl\n\t"
1456 "1:\n\t"
1457 ::: "memory");
1458# endif
1459
1460# else
1461 __asm
1462 {
1463 pushfd
1464 push cs
1465 push la_ret
1466 iretd
1467 la_ret:
1468 }
1469# endif
1470}
1471#endif
1472
1473/**
1474 * Virtualization friendlier serializing instruction, may still cause exits.
1475 */
1476#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1477DECLASM(void) ASMSerializeInstructionRdTscp(void);
1478#else
1479DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1480{
1481# if RT_INLINE_ASM_GNU_STYLE
1482 /* rdtscp is not supported by ancient linux build VM of course :-( */
1483# ifdef RT_ARCH_AMD64
1484 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1485 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1486# else
1487 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1488 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1489# endif
1490# else
1491# if RT_INLINE_ASM_USES_INTRIN >= 15
1492 uint32_t uIgnore;
1493 _ReadWriteBarrier();
1494 (void)__rdtscp(&uIgnore);
1495 (void)uIgnore;
1496# else
1497 __asm
1498 {
1499 rdtscp
1500 }
1501# endif
1502# endif
1503}
1504#endif
1505
1506
1507/**
1508 * Serialize Instruction.
1509 */
1510#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1511# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1512#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
1513# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1514#elif defined(RT_ARCH_SPARC64)
1515RTDECL(void) ASMSerializeInstruction(void);
1516#else
1517# error "Port me"
1518#endif
1519
1520
1521/**
1522 * Memory fence, waits for any pending writes and reads to complete.
1523 */
1524DECLINLINE(void) ASMMemoryFence(void)
1525{
1526#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1527# if RT_INLINE_ASM_GNU_STYLE
1528 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
1529# elif RT_INLINE_ASM_USES_INTRIN
1530 _mm_mfence();
1531# else
1532 __asm
1533 {
1534 _emit 0x0f
1535 _emit 0xae
1536 _emit 0xf0
1537 }
1538# endif
1539#elif ARCH_BITS == 16
1540 uint16_t volatile u16;
1541 ASMAtomicXchgU16(&u16, 0);
1542#else
1543 uint32_t volatile u32;
1544 ASMAtomicXchgU32(&u32, 0);
1545#endif
1546}
1547
1548
1549/**
1550 * Write fence, waits for any pending writes to complete.
1551 */
1552DECLINLINE(void) ASMWriteFence(void)
1553{
1554#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1555# if RT_INLINE_ASM_GNU_STYLE
1556 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
1557# elif RT_INLINE_ASM_USES_INTRIN
1558 _mm_sfence();
1559# else
1560 __asm
1561 {
1562 _emit 0x0f
1563 _emit 0xae
1564 _emit 0xf8
1565 }
1566# endif
1567#else
1568 ASMMemoryFence();
1569#endif
1570}
1571
1572
1573/**
1574 * Read fence, waits for any pending reads to complete.
1575 */
1576DECLINLINE(void) ASMReadFence(void)
1577{
1578#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
1579# if RT_INLINE_ASM_GNU_STYLE
1580 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
1581# elif RT_INLINE_ASM_USES_INTRIN
1582 _mm_lfence();
1583# else
1584 __asm
1585 {
1586 _emit 0x0f
1587 _emit 0xae
1588 _emit 0xe8
1589 }
1590# endif
1591#else
1592 ASMMemoryFence();
1593#endif
1594}
1595
1596
1597/**
1598 * Atomically reads an unsigned 8-bit value, ordered.
1599 *
1600 * @returns Current *pu8 value
1601 * @param pu8 Pointer to the 8-bit variable to read.
1602 */
1603DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8)
1604{
1605 ASMMemoryFence();
1606 return *pu8; /* byte reads are atomic on x86 */
1607}
1608
1609
1610/**
1611 * Atomically reads an unsigned 8-bit value, unordered.
1612 *
1613 * @returns Current *pu8 value
1614 * @param pu8 Pointer to the 8-bit variable to read.
1615 */
1616DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8)
1617{
1618 return *pu8; /* byte reads are atomic on x86 */
1619}
1620
1621
1622/**
1623 * Atomically reads a signed 8-bit value, ordered.
1624 *
1625 * @returns Current *pi8 value
1626 * @param pi8 Pointer to the 8-bit variable to read.
1627 */
1628DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8)
1629{
1630 ASMMemoryFence();
1631 return *pi8; /* byte reads are atomic on x86 */
1632}
1633
1634
1635/**
1636 * Atomically reads a signed 8-bit value, unordered.
1637 *
1638 * @returns Current *pi8 value
1639 * @param pi8 Pointer to the 8-bit variable to read.
1640 */
1641DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8)
1642{
1643 return *pi8; /* byte reads are atomic on x86 */
1644}
1645
1646
1647/**
1648 * Atomically reads an unsigned 16-bit value, ordered.
1649 *
1650 * @returns Current *pu16 value
1651 * @param pu16 Pointer to the 16-bit variable to read.
1652 */
1653DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16)
1654{
1655 ASMMemoryFence();
1656 Assert(!((uintptr_t)pu16 & 1));
1657 return *pu16;
1658}
1659
1660
1661/**
1662 * Atomically reads an unsigned 16-bit value, unordered.
1663 *
1664 * @returns Current *pu16 value
1665 * @param pu16 Pointer to the 16-bit variable to read.
1666 */
1667DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16)
1668{
1669 Assert(!((uintptr_t)pu16 & 1));
1670 return *pu16;
1671}
1672
1673
1674/**
1675 * Atomically reads a signed 16-bit value, ordered.
1676 *
1677 * @returns Current *pi16 value
1678 * @param pi16 Pointer to the 16-bit variable to read.
1679 */
1680DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16)
1681{
1682 ASMMemoryFence();
1683 Assert(!((uintptr_t)pi16 & 1));
1684 return *pi16;
1685}
1686
1687
1688/**
1689 * Atomically reads a signed 16-bit value, unordered.
1690 *
1691 * @returns Current *pi16 value
1692 * @param pi16 Pointer to the 16-bit variable to read.
1693 */
1694DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16)
1695{
1696 Assert(!((uintptr_t)pi16 & 1));
1697 return *pi16;
1698}
1699
1700
1701/**
1702 * Atomically reads an unsigned 32-bit value, ordered.
1703 *
1704 * @returns Current *pu32 value
1705 * @param pu32 Pointer to the 32-bit variable to read.
1706 */
1707DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32)
1708{
1709 ASMMemoryFence();
1710 Assert(!((uintptr_t)pu32 & 3));
1711#if ARCH_BITS == 16
1712 AssertFailed(); /** @todo 16-bit */
1713#endif
1714 return *pu32;
1715}
1716
1717
1718/**
1719 * Atomically reads an unsigned 32-bit value, unordered.
1720 *
1721 * @returns Current *pu32 value
1722 * @param pu32 Pointer to the 32-bit variable to read.
1723 */
1724DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32)
1725{
1726 Assert(!((uintptr_t)pu32 & 3));
1727#if ARCH_BITS == 16
1728 AssertFailed(); /** @todo 16-bit */
1729#endif
1730 return *pu32;
1731}
1732
1733
1734/**
1735 * Atomically reads a signed 32-bit value, ordered.
1736 *
1737 * @returns Current *pi32 value
1738 * @param pi32 Pointer to the 32-bit variable to read.
1739 */
1740DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32)
1741{
1742 ASMMemoryFence();
1743 Assert(!((uintptr_t)pi32 & 3));
1744#if ARCH_BITS == 16
1745 AssertFailed(); /** @todo 16-bit */
1746#endif
1747 return *pi32;
1748}
1749
1750
1751/**
1752 * Atomically reads a signed 32-bit value, unordered.
1753 *
1754 * @returns Current *pi32 value
1755 * @param pi32 Pointer to the 32-bit variable to read.
1756 */
1757DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32)
1758{
1759 Assert(!((uintptr_t)pi32 & 3));
1760#if ARCH_BITS == 16
1761 AssertFailed(); /** @todo 16-bit */
1762#endif
1763 return *pi32;
1764}
1765
1766
1767/**
1768 * Atomically reads an unsigned 64-bit value, ordered.
1769 *
1770 * @returns Current *pu64 value
1771 * @param pu64 Pointer to the 64-bit variable to read.
1772 * The memory pointed to must be writable.
1773 *
1774 * @remarks This may fault if the memory is read-only!
1775 * @remarks x86: Requires a Pentium or later.
1776 */
1777#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1778 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1779DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64);
1780#else
1781DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64)
1782{
1783 uint64_t u64;
1784# ifdef RT_ARCH_AMD64
1785 Assert(!((uintptr_t)pu64 & 7));
1786/*# if RT_INLINE_ASM_GNU_STYLE
1787 __asm__ __volatile__( "mfence\n\t"
1788 "movq %1, %0\n\t"
1789 : "=r" (u64)
1790 : "m" (*pu64));
1791# else
1792 __asm
1793 {
1794 mfence
1795 mov rdx, [pu64]
1796 mov rax, [rdx]
1797 mov [u64], rax
1798 }
1799# endif*/
1800 ASMMemoryFence();
1801 u64 = *pu64;
1802# else /* !RT_ARCH_AMD64 */
1803# if RT_INLINE_ASM_GNU_STYLE
1804# if defined(PIC) || defined(__PIC__)
1805 uint32_t u32EBX = 0;
1806 Assert(!((uintptr_t)pu64 & 7));
1807 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1808 "lock; cmpxchg8b (%5)\n\t"
1809 "movl %3, %%ebx\n\t"
1810 : "=A" (u64),
1811# if RT_GNUC_PREREQ(4, 3)
1812 "+m" (*pu64)
1813# else
1814 "=m" (*pu64)
1815# endif
1816 : "0" (0ULL),
1817 "m" (u32EBX),
1818 "c" (0),
1819 "S" (pu64));
1820# else /* !PIC */
1821 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1822 : "=A" (u64),
1823 "+m" (*pu64)
1824 : "0" (0ULL),
1825 "b" (0),
1826 "c" (0));
1827# endif
1828# else
1829 Assert(!((uintptr_t)pu64 & 7));
1830 __asm
1831 {
1832 xor eax, eax
1833 xor edx, edx
1834 mov edi, pu64
1835 xor ecx, ecx
1836 xor ebx, ebx
1837 lock cmpxchg8b [edi]
1838 mov dword ptr [u64], eax
1839 mov dword ptr [u64 + 4], edx
1840 }
1841# endif
1842# endif /* !RT_ARCH_AMD64 */
1843 return u64;
1844}
1845#endif
1846
1847
1848/**
1849 * Atomically reads an unsigned 64-bit value, unordered.
1850 *
1851 * @returns Current *pu64 value
1852 * @param pu64 Pointer to the 64-bit variable to read.
1853 * The memory pointed to must be writable.
1854 *
1855 * @remarks This may fault if the memory is read-only!
1856 * @remarks x86: Requires a Pentium or later.
1857 */
1858#if !defined(RT_ARCH_AMD64) \
1859 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1860 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1861DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64);
1862#else
1863DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64)
1864{
1865 uint64_t u64;
1866# ifdef RT_ARCH_AMD64
1867 Assert(!((uintptr_t)pu64 & 7));
1868/*# if RT_INLINE_ASM_GNU_STYLE
1869 Assert(!((uintptr_t)pu64 & 7));
1870 __asm__ __volatile__("movq %1, %0\n\t"
1871 : "=r" (u64)
1872 : "m" (*pu64));
1873# else
1874 __asm
1875 {
1876 mov rdx, [pu64]
1877 mov rax, [rdx]
1878 mov [u64], rax
1879 }
1880# endif */
1881 u64 = *pu64;
1882# else /* !RT_ARCH_AMD64 */
1883# if RT_INLINE_ASM_GNU_STYLE
1884# if defined(PIC) || defined(__PIC__)
1885 uint32_t u32EBX = 0;
1886 uint32_t u32Spill;
1887 Assert(!((uintptr_t)pu64 & 7));
1888 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1889 "xor %%ecx,%%ecx\n\t"
1890 "xor %%edx,%%edx\n\t"
1891 "xchgl %%ebx, %3\n\t"
1892 "lock; cmpxchg8b (%4)\n\t"
1893 "movl %3, %%ebx\n\t"
1894 : "=A" (u64),
1895# if RT_GNUC_PREREQ(4, 3)
1896 "+m" (*pu64),
1897# else
1898 "=m" (*pu64),
1899# endif
1900 "=c" (u32Spill)
1901 : "m" (u32EBX),
1902 "S" (pu64));
1903# else /* !PIC */
1904 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1905 : "=A" (u64),
1906 "+m" (*pu64)
1907 : "0" (0ULL),
1908 "b" (0),
1909 "c" (0));
1910# endif
1911# else
1912 Assert(!((uintptr_t)pu64 & 7));
1913 __asm
1914 {
1915 xor eax, eax
1916 xor edx, edx
1917 mov edi, pu64
1918 xor ecx, ecx
1919 xor ebx, ebx
1920 lock cmpxchg8b [edi]
1921 mov dword ptr [u64], eax
1922 mov dword ptr [u64 + 4], edx
1923 }
1924# endif
1925# endif /* !RT_ARCH_AMD64 */
1926 return u64;
1927}
1928#endif
1929
1930
1931/**
1932 * Atomically reads a signed 64-bit value, ordered.
1933 *
1934 * @returns Current *pi64 value
1935 * @param pi64 Pointer to the 64-bit variable to read.
1936 * The memory pointed to must be writable.
1937 *
1938 * @remarks This may fault if the memory is read-only!
1939 * @remarks x86: Requires a Pentium or later.
1940 */
1941DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64)
1942{
1943 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
1944}
1945
1946
1947/**
1948 * Atomically reads a signed 64-bit value, unordered.
1949 *
1950 * @returns Current *pi64 value
1951 * @param pi64 Pointer to the 64-bit variable to read.
1952 * The memory pointed to must be writable.
1953 *
1954 * @remarks This will fault if the memory is read-only!
1955 * @remarks x86: Requires a Pentium or later.
1956 */
1957DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64)
1958{
1959 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
1960}
1961
1962
1963/**
1964 * Atomically reads a size_t value, ordered.
1965 *
1966 * @returns Current *pcb value
1967 * @param pcb Pointer to the size_t variable to read.
1968 */
1969DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb)
1970{
1971#if ARCH_BITS == 64
1972 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
1973#elif ARCH_BITS == 32
1974 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
1975#elif ARCH_BITS == 16
1976 AssertCompileSize(size_t, 2);
1977 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
1978#else
1979# error "Unsupported ARCH_BITS value"
1980#endif
1981}
1982
1983
1984/**
1985 * Atomically reads a size_t value, unordered.
1986 *
1987 * @returns Current *pcb value
1988 * @param pcb Pointer to the size_t variable to read.
1989 */
1990DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb)
1991{
1992#if ARCH_BITS == 64 || ARCH_BITS == 16
1993 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
1994#elif ARCH_BITS == 32
1995 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
1996#elif ARCH_BITS == 16
1997 AssertCompileSize(size_t, 2);
1998 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
1999#else
2000# error "Unsupported ARCH_BITS value"
2001#endif
2002}
2003
2004
2005/**
2006 * Atomically reads a pointer value, ordered.
2007 *
2008 * @returns Current *pv value
2009 * @param ppv Pointer to the pointer variable to read.
2010 *
2011 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2012 * requires less typing (no casts).
2013 */
2014DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2015{
2016#if ARCH_BITS == 32 || ARCH_BITS == 16
2017 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2018#elif ARCH_BITS == 64
2019 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2020#else
2021# error "ARCH_BITS is bogus"
2022#endif
2023}
2024
2025/**
2026 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2027 *
2028 * @returns Current *pv value
2029 * @param ppv Pointer to the pointer variable to read.
2030 * @param Type The type of *ppv, sans volatile.
2031 */
2032#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2033# define ASMAtomicReadPtrT(ppv, Type) \
2034 __extension__ \
2035 ({\
2036 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2037 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2038 pvTypeChecked; \
2039 })
2040#else
2041# define ASMAtomicReadPtrT(ppv, Type) \
2042 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2043#endif
2044
2045
2046/**
2047 * Atomically reads a pointer value, unordered.
2048 *
2049 * @returns Current *pv value
2050 * @param ppv Pointer to the pointer variable to read.
2051 *
2052 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2053 * requires less typing (no casts).
2054 */
2055DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv)
2056{
2057#if ARCH_BITS == 32 || ARCH_BITS == 16
2058 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2059#elif ARCH_BITS == 64
2060 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2061#else
2062# error "ARCH_BITS is bogus"
2063#endif
2064}
2065
2066
2067/**
2068 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2069 *
2070 * @returns Current *pv value
2071 * @param ppv Pointer to the pointer variable to read.
2072 * @param Type The type of *ppv, sans volatile.
2073 */
2074#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2075# define ASMAtomicUoReadPtrT(ppv, Type) \
2076 __extension__ \
2077 ({\
2078 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2079 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2080 pvTypeChecked; \
2081 })
2082#else
2083# define ASMAtomicUoReadPtrT(ppv, Type) \
2084 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2085#endif
2086
2087
2088/**
2089 * Atomically reads a boolean value, ordered.
2090 *
2091 * @returns Current *pf value
2092 * @param pf Pointer to the boolean variable to read.
2093 */
2094DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf)
2095{
2096 ASMMemoryFence();
2097 return *pf; /* byte reads are atomic on x86 */
2098}
2099
2100
2101/**
2102 * Atomically reads a boolean value, unordered.
2103 *
2104 * @returns Current *pf value
2105 * @param pf Pointer to the boolean variable to read.
2106 */
2107DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf)
2108{
2109 return *pf; /* byte reads are atomic on x86 */
2110}
2111
2112
2113/**
2114 * Atomically read a typical IPRT handle value, ordered.
2115 *
2116 * @param ph Pointer to the handle variable to read.
2117 * @param phRes Where to store the result.
2118 *
2119 * @remarks This doesn't currently work for all handles (like RTFILE).
2120 */
2121#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2122# define ASMAtomicReadHandle(ph, phRes) \
2123 do { \
2124 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2125 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2126 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2127 } while (0)
2128#elif HC_ARCH_BITS == 64
2129# define ASMAtomicReadHandle(ph, phRes) \
2130 do { \
2131 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2132 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2133 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2134 } while (0)
2135#else
2136# error HC_ARCH_BITS
2137#endif
2138
2139
2140/**
2141 * Atomically read a typical IPRT handle value, unordered.
2142 *
2143 * @param ph Pointer to the handle variable to read.
2144 * @param phRes Where to store the result.
2145 *
2146 * @remarks This doesn't currently work for all handles (like RTFILE).
2147 */
2148#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2149# define ASMAtomicUoReadHandle(ph, phRes) \
2150 do { \
2151 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2152 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2153 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
2154 } while (0)
2155#elif HC_ARCH_BITS == 64
2156# define ASMAtomicUoReadHandle(ph, phRes) \
2157 do { \
2158 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2159 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2160 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
2161 } while (0)
2162#else
2163# error HC_ARCH_BITS
2164#endif
2165
2166
2167/**
2168 * Atomically read a value which size might differ
2169 * between platforms or compilers, ordered.
2170 *
2171 * @param pu Pointer to the variable to read.
2172 * @param puRes Where to store the result.
2173 */
2174#define ASMAtomicReadSize(pu, puRes) \
2175 do { \
2176 switch (sizeof(*(pu))) { \
2177 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2178 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2179 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2180 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2181 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2182 } \
2183 } while (0)
2184
2185
2186/**
2187 * Atomically read a value which size might differ
2188 * between platforms or compilers, unordered.
2189 *
2190 * @param pu Pointer to the variable to read.
2191 * @param puRes Where to store the result.
2192 */
2193#define ASMAtomicUoReadSize(pu, puRes) \
2194 do { \
2195 switch (sizeof(*(pu))) { \
2196 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2197 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2198 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2199 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
2200 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2201 } \
2202 } while (0)
2203
2204
2205/**
2206 * Atomically writes an unsigned 8-bit value, ordered.
2207 *
2208 * @param pu8 Pointer to the 8-bit variable.
2209 * @param u8 The 8-bit value to assign to *pu8.
2210 */
2211DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2212{
2213 ASMAtomicXchgU8(pu8, u8);
2214}
2215
2216
2217/**
2218 * Atomically writes an unsigned 8-bit value, unordered.
2219 *
2220 * @param pu8 Pointer to the 8-bit variable.
2221 * @param u8 The 8-bit value to assign to *pu8.
2222 */
2223DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8)
2224{
2225 *pu8 = u8; /* byte writes are atomic on x86 */
2226}
2227
2228
2229/**
2230 * Atomically writes a signed 8-bit value, ordered.
2231 *
2232 * @param pi8 Pointer to the 8-bit variable to read.
2233 * @param i8 The 8-bit value to assign to *pi8.
2234 */
2235DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2236{
2237 ASMAtomicXchgS8(pi8, i8);
2238}
2239
2240
2241/**
2242 * Atomically writes a signed 8-bit value, unordered.
2243 *
2244 * @param pi8 Pointer to the 8-bit variable to write.
2245 * @param i8 The 8-bit value to assign to *pi8.
2246 */
2247DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8)
2248{
2249 *pi8 = i8; /* byte writes are atomic on x86 */
2250}
2251
2252
2253/**
2254 * Atomically writes an unsigned 16-bit value, ordered.
2255 *
2256 * @param pu16 Pointer to the 16-bit variable to write.
2257 * @param u16 The 16-bit value to assign to *pu16.
2258 */
2259DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2260{
2261 ASMAtomicXchgU16(pu16, u16);
2262}
2263
2264
2265/**
2266 * Atomically writes an unsigned 16-bit value, unordered.
2267 *
2268 * @param pu16 Pointer to the 16-bit variable to write.
2269 * @param u16 The 16-bit value to assign to *pu16.
2270 */
2271DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16)
2272{
2273 Assert(!((uintptr_t)pu16 & 1));
2274 *pu16 = u16;
2275}
2276
2277
2278/**
2279 * Atomically writes a signed 16-bit value, ordered.
2280 *
2281 * @param pi16 Pointer to the 16-bit variable to write.
2282 * @param i16 The 16-bit value to assign to *pi16.
2283 */
2284DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2285{
2286 ASMAtomicXchgS16(pi16, i16);
2287}
2288
2289
2290/**
2291 * Atomically writes a signed 16-bit value, unordered.
2292 *
2293 * @param pi16 Pointer to the 16-bit variable to write.
2294 * @param i16 The 16-bit value to assign to *pi16.
2295 */
2296DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16)
2297{
2298 Assert(!((uintptr_t)pi16 & 1));
2299 *pi16 = i16;
2300}
2301
2302
2303/**
2304 * Atomically writes an unsigned 32-bit value, ordered.
2305 *
2306 * @param pu32 Pointer to the 32-bit variable to write.
2307 * @param u32 The 32-bit value to assign to *pu32.
2308 */
2309DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2310{
2311 ASMAtomicXchgU32(pu32, u32);
2312}
2313
2314
2315/**
2316 * Atomically writes an unsigned 32-bit value, unordered.
2317 *
2318 * @param pu32 Pointer to the 32-bit variable to write.
2319 * @param u32 The 32-bit value to assign to *pu32.
2320 */
2321DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32)
2322{
2323 Assert(!((uintptr_t)pu32 & 3));
2324#if ARCH_BITS >= 32
2325 *pu32 = u32;
2326#else
2327 ASMAtomicXchgU32(pu32, u32);
2328#endif
2329}
2330
2331
2332/**
2333 * Atomically writes a signed 32-bit value, ordered.
2334 *
2335 * @param pi32 Pointer to the 32-bit variable to write.
2336 * @param i32 The 32-bit value to assign to *pi32.
2337 */
2338DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2339{
2340 ASMAtomicXchgS32(pi32, i32);
2341}
2342
2343
2344/**
2345 * Atomically writes a signed 32-bit value, unordered.
2346 *
2347 * @param pi32 Pointer to the 32-bit variable to write.
2348 * @param i32 The 32-bit value to assign to *pi32.
2349 */
2350DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32)
2351{
2352 Assert(!((uintptr_t)pi32 & 3));
2353#if ARCH_BITS >= 32
2354 *pi32 = i32;
2355#else
2356 ASMAtomicXchgS32(pi32, i32);
2357#endif
2358}
2359
2360
2361/**
2362 * Atomically writes an unsigned 64-bit value, ordered.
2363 *
2364 * @param pu64 Pointer to the 64-bit variable to write.
2365 * @param u64 The 64-bit value to assign to *pu64.
2366 */
2367DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2368{
2369 ASMAtomicXchgU64(pu64, u64);
2370}
2371
2372
2373/**
2374 * Atomically writes an unsigned 64-bit value, unordered.
2375 *
2376 * @param pu64 Pointer to the 64-bit variable to write.
2377 * @param u64 The 64-bit value to assign to *pu64.
2378 */
2379DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64)
2380{
2381 Assert(!((uintptr_t)pu64 & 7));
2382#if ARCH_BITS == 64
2383 *pu64 = u64;
2384#else
2385 ASMAtomicXchgU64(pu64, u64);
2386#endif
2387}
2388
2389
2390/**
2391 * Atomically writes a signed 64-bit value, ordered.
2392 *
2393 * @param pi64 Pointer to the 64-bit variable to write.
2394 * @param i64 The 64-bit value to assign to *pi64.
2395 */
2396DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2397{
2398 ASMAtomicXchgS64(pi64, i64);
2399}
2400
2401
2402/**
2403 * Atomically writes a signed 64-bit value, unordered.
2404 *
2405 * @param pi64 Pointer to the 64-bit variable to write.
2406 * @param i64 The 64-bit value to assign to *pi64.
2407 */
2408DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64)
2409{
2410 Assert(!((uintptr_t)pi64 & 7));
2411#if ARCH_BITS == 64
2412 *pi64 = i64;
2413#else
2414 ASMAtomicXchgS64(pi64, i64);
2415#endif
2416}
2417
2418
2419/**
2420 * Atomically writes a size_t value, ordered.
2421 *
2422 * @returns nothing.
2423 * @param pcb Pointer to the size_t variable to write.
2424 * @param cb The value to assign to *pcb.
2425 */
2426DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb)
2427{
2428#if ARCH_BITS == 64
2429 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
2430#elif ARCH_BITS == 32
2431 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
2432#elif ARCH_BITS == 16
2433 AssertCompileSize(size_t, 2);
2434 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
2435#else
2436# error "Unsupported ARCH_BITS value"
2437#endif
2438}
2439
2440
2441/**
2442 * Atomically writes a boolean value, unordered.
2443 *
2444 * @param pf Pointer to the boolean variable to write.
2445 * @param f The boolean value to assign to *pf.
2446 */
2447DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f)
2448{
2449 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
2450}
2451
2452
2453/**
2454 * Atomically writes a boolean value, unordered.
2455 *
2456 * @param pf Pointer to the boolean variable to write.
2457 * @param f The boolean value to assign to *pf.
2458 */
2459DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f)
2460{
2461 *pf = f; /* byte writes are atomic on x86 */
2462}
2463
2464
2465/**
2466 * Atomically writes a pointer value, ordered.
2467 *
2468 * @param ppv Pointer to the pointer variable to write.
2469 * @param pv The pointer value to assign to *ppv.
2470 */
2471DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv)
2472{
2473#if ARCH_BITS == 32 || ARCH_BITS == 16
2474 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
2475#elif ARCH_BITS == 64
2476 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
2477#else
2478# error "ARCH_BITS is bogus"
2479#endif
2480}
2481
2482
2483/**
2484 * Atomically writes a pointer value, ordered.
2485 *
2486 * @param ppv Pointer to the pointer variable to write.
2487 * @param pv The pointer value to assign to *ppv. If NULL use
2488 * ASMAtomicWriteNullPtr or you'll land in trouble.
2489 *
2490 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2491 * NULL.
2492 */
2493#ifdef __GNUC__
2494# define ASMAtomicWritePtr(ppv, pv) \
2495 do \
2496 { \
2497 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
2498 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2499 \
2500 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2501 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2502 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2503 \
2504 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
2505 } while (0)
2506#else
2507# define ASMAtomicWritePtr(ppv, pv) \
2508 do \
2509 { \
2510 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2511 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2512 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2513 \
2514 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
2515 } while (0)
2516#endif
2517
2518
2519/**
2520 * Atomically sets a pointer to NULL, ordered.
2521 *
2522 * @param ppv Pointer to the pointer variable that should be set to NULL.
2523 *
2524 * @remarks This is relatively type safe on GCC platforms.
2525 */
2526#ifdef __GNUC__
2527# define ASMAtomicWriteNullPtr(ppv) \
2528 do \
2529 { \
2530 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2531 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2532 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2533 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
2534 } while (0)
2535#else
2536# define ASMAtomicWriteNullPtr(ppv) \
2537 do \
2538 { \
2539 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2540 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2541 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
2542 } while (0)
2543#endif
2544
2545
2546/**
2547 * Atomically writes a pointer value, unordered.
2548 *
2549 * @returns Current *pv value
2550 * @param ppv Pointer to the pointer variable.
2551 * @param pv The pointer value to assign to *ppv. If NULL use
2552 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2553 *
2554 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2555 * NULL.
2556 */
2557#ifdef __GNUC__
2558# define ASMAtomicUoWritePtr(ppv, pv) \
2559 do \
2560 { \
2561 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2562 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2563 \
2564 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2565 AssertCompile(sizeof(pv) == sizeof(void *)); \
2566 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2567 \
2568 *(ppvTypeChecked) = pvTypeChecked; \
2569 } while (0)
2570#else
2571# define ASMAtomicUoWritePtr(ppv, pv) \
2572 do \
2573 { \
2574 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2575 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
2576 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2577 *(ppv) = pv; \
2578 } while (0)
2579#endif
2580
2581
2582/**
2583 * Atomically sets a pointer to NULL, unordered.
2584 *
2585 * @param ppv Pointer to the pointer variable that should be set to NULL.
2586 *
2587 * @remarks This is relatively type safe on GCC platforms.
2588 */
2589#ifdef __GNUC__
2590# define ASMAtomicUoWriteNullPtr(ppv) \
2591 do \
2592 { \
2593 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2594 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2595 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2596 *(ppvTypeChecked) = NULL; \
2597 } while (0)
2598#else
2599# define ASMAtomicUoWriteNullPtr(ppv) \
2600 do \
2601 { \
2602 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
2603 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2604 *(ppv) = NULL; \
2605 } while (0)
2606#endif
2607
2608
2609/**
2610 * Atomically write a typical IPRT handle value, ordered.
2611 *
2612 * @param ph Pointer to the variable to update.
2613 * @param hNew The value to assign to *ph.
2614 *
2615 * @remarks This doesn't currently work for all handles (like RTFILE).
2616 */
2617#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2618# define ASMAtomicWriteHandle(ph, hNew) \
2619 do { \
2620 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2621 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
2622 } while (0)
2623#elif HC_ARCH_BITS == 64
2624# define ASMAtomicWriteHandle(ph, hNew) \
2625 do { \
2626 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2627 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
2628 } while (0)
2629#else
2630# error HC_ARCH_BITS
2631#endif
2632
2633
2634/**
2635 * Atomically write a typical IPRT handle value, unordered.
2636 *
2637 * @param ph Pointer to the variable to update.
2638 * @param hNew The value to assign to *ph.
2639 *
2640 * @remarks This doesn't currently work for all handles (like RTFILE).
2641 */
2642#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2643# define ASMAtomicUoWriteHandle(ph, hNew) \
2644 do { \
2645 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2646 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
2647 } while (0)
2648#elif HC_ARCH_BITS == 64
2649# define ASMAtomicUoWriteHandle(ph, hNew) \
2650 do { \
2651 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2652 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
2653 } while (0)
2654#else
2655# error HC_ARCH_BITS
2656#endif
2657
2658
2659/**
2660 * Atomically write a value which size might differ
2661 * between platforms or compilers, ordered.
2662 *
2663 * @param pu Pointer to the variable to update.
2664 * @param uNew The value to assign to *pu.
2665 */
2666#define ASMAtomicWriteSize(pu, uNew) \
2667 do { \
2668 switch (sizeof(*(pu))) { \
2669 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2670 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2671 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2672 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2673 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2674 } \
2675 } while (0)
2676
2677/**
2678 * Atomically write a value which size might differ
2679 * between platforms or compilers, unordered.
2680 *
2681 * @param pu Pointer to the variable to update.
2682 * @param uNew The value to assign to *pu.
2683 */
2684#define ASMAtomicUoWriteSize(pu, uNew) \
2685 do { \
2686 switch (sizeof(*(pu))) { \
2687 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
2688 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
2689 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2690 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2691 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2692 } \
2693 } while (0)
2694
2695
2696
2697/**
2698 * Atomically exchanges and adds to a 16-bit value, ordered.
2699 *
2700 * @returns The old value.
2701 * @param pu16 Pointer to the value.
2702 * @param u16 Number to add.
2703 *
2704 * @remarks Currently not implemented, just to make 16-bit code happy.
2705 * @remarks x86: Requires a 486 or later.
2706 */
2707DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16);
2708
2709
2710/**
2711 * Atomically exchanges and adds to a 32-bit value, ordered.
2712 *
2713 * @returns The old value.
2714 * @param pu32 Pointer to the value.
2715 * @param u32 Number to add.
2716 *
2717 * @remarks x86: Requires a 486 or later.
2718 */
2719#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2720DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
2721#else
2722DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2723{
2724# if RT_INLINE_ASM_USES_INTRIN
2725 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
2726 return u32;
2727
2728# elif RT_INLINE_ASM_GNU_STYLE
2729 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2730 : "=r" (u32),
2731 "=m" (*pu32)
2732 : "0" (u32),
2733 "m" (*pu32)
2734 : "memory");
2735 return u32;
2736# else
2737 __asm
2738 {
2739 mov eax, [u32]
2740# ifdef RT_ARCH_AMD64
2741 mov rdx, [pu32]
2742 lock xadd [rdx], eax
2743# else
2744 mov edx, [pu32]
2745 lock xadd [edx], eax
2746# endif
2747 mov [u32], eax
2748 }
2749 return u32;
2750# endif
2751}
2752#endif
2753
2754
2755/**
2756 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2757 *
2758 * @returns The old value.
2759 * @param pi32 Pointer to the value.
2760 * @param i32 Number to add.
2761 *
2762 * @remarks x86: Requires a 486 or later.
2763 */
2764DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2765{
2766 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
2767}
2768
2769
2770/**
2771 * Atomically exchanges and adds to a 64-bit value, ordered.
2772 *
2773 * @returns The old value.
2774 * @param pu64 Pointer to the value.
2775 * @param u64 Number to add.
2776 *
2777 * @remarks x86: Requires a Pentium or later.
2778 */
2779#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2780DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
2781#else
2782DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2783{
2784# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2785 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
2786 return u64;
2787
2788# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2789 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2790 : "=r" (u64),
2791 "=m" (*pu64)
2792 : "0" (u64),
2793 "m" (*pu64)
2794 : "memory");
2795 return u64;
2796# else
2797 uint64_t u64Old;
2798 for (;;)
2799 {
2800 uint64_t u64New;
2801 u64Old = ASMAtomicUoReadU64(pu64);
2802 u64New = u64Old + u64;
2803 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2804 break;
2805 ASMNopPause();
2806 }
2807 return u64Old;
2808# endif
2809}
2810#endif
2811
2812
2813/**
2814 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2815 *
2816 * @returns The old value.
2817 * @param pi64 Pointer to the value.
2818 * @param i64 Number to add.
2819 *
2820 * @remarks x86: Requires a Pentium or later.
2821 */
2822DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2823{
2824 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
2825}
2826
2827
2828/**
2829 * Atomically exchanges and adds to a size_t value, ordered.
2830 *
2831 * @returns The old value.
2832 * @param pcb Pointer to the size_t value.
2833 * @param cb Number to add.
2834 */
2835DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb)
2836{
2837#if ARCH_BITS == 64
2838 AssertCompileSize(size_t, 8);
2839 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
2840#elif ARCH_BITS == 32
2841 AssertCompileSize(size_t, 4);
2842 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
2843#elif ARCH_BITS == 16
2844 AssertCompileSize(size_t, 2);
2845 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
2846#else
2847# error "Unsupported ARCH_BITS value"
2848#endif
2849}
2850
2851
2852/**
2853 * Atomically exchanges and adds a value which size might differ between
2854 * platforms or compilers, ordered.
2855 *
2856 * @param pu Pointer to the variable to update.
2857 * @param uNew The value to add to *pu.
2858 * @param puOld Where to store the old value.
2859 */
2860#define ASMAtomicAddSize(pu, uNew, puOld) \
2861 do { \
2862 switch (sizeof(*(pu))) { \
2863 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2864 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
2865 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2866 } \
2867 } while (0)
2868
2869
2870
2871/**
2872 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2873 *
2874 * @returns The old value.
2875 * @param pu16 Pointer to the value.
2876 * @param u16 Number to subtract.
2877 *
2878 * @remarks x86: Requires a 486 or later.
2879 */
2880DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16)
2881{
2882 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2883}
2884
2885
2886/**
2887 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2888 *
2889 * @returns The old value.
2890 * @param pi16 Pointer to the value.
2891 * @param i16 Number to subtract.
2892 *
2893 * @remarks x86: Requires a 486 or later.
2894 */
2895DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16)
2896{
2897 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
2898}
2899
2900
2901/**
2902 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2903 *
2904 * @returns The old value.
2905 * @param pu32 Pointer to the value.
2906 * @param u32 Number to subtract.
2907 *
2908 * @remarks x86: Requires a 486 or later.
2909 */
2910DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
2911{
2912 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2913}
2914
2915
2916/**
2917 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2918 *
2919 * @returns The old value.
2920 * @param pi32 Pointer to the value.
2921 * @param i32 Number to subtract.
2922 *
2923 * @remarks x86: Requires a 486 or later.
2924 */
2925DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32)
2926{
2927 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
2928}
2929
2930
2931/**
2932 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2933 *
2934 * @returns The old value.
2935 * @param pu64 Pointer to the value.
2936 * @param u64 Number to subtract.
2937 *
2938 * @remarks x86: Requires a Pentium or later.
2939 */
2940DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
2941{
2942 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2943}
2944
2945
2946/**
2947 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2948 *
2949 * @returns The old value.
2950 * @param pi64 Pointer to the value.
2951 * @param i64 Number to subtract.
2952 *
2953 * @remarks x86: Requires a Pentium or later.
2954 */
2955DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64)
2956{
2957 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
2958}
2959
2960
2961/**
2962 * Atomically exchanges and subtracts to a size_t value, ordered.
2963 *
2964 * @returns The old value.
2965 * @param pcb Pointer to the size_t value.
2966 * @param cb Number to subtract.
2967 *
2968 * @remarks x86: Requires a 486 or later.
2969 */
2970DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb)
2971{
2972#if ARCH_BITS == 64
2973 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
2974#elif ARCH_BITS == 32
2975 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
2976#elif ARCH_BITS == 16
2977 AssertCompileSize(size_t, 2);
2978 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
2979#else
2980# error "Unsupported ARCH_BITS value"
2981#endif
2982}
2983
2984
2985/**
2986 * Atomically exchanges and subtracts a value which size might differ between
2987 * platforms or compilers, ordered.
2988 *
2989 * @param pu Pointer to the variable to update.
2990 * @param uNew The value to subtract to *pu.
2991 * @param puOld Where to store the old value.
2992 *
2993 * @remarks x86: Requires a 486 or later.
2994 */
2995#define ASMAtomicSubSize(pu, uNew, puOld) \
2996 do { \
2997 switch (sizeof(*(pu))) { \
2998 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
2999 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3000 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3001 } \
3002 } while (0)
3003
3004
3005
3006/**
3007 * Atomically increment a 16-bit value, ordered.
3008 *
3009 * @returns The new value.
3010 * @param pu16 Pointer to the value to increment.
3011 * @remarks Not implemented. Just to make 16-bit code happy.
3012 *
3013 * @remarks x86: Requires a 486 or later.
3014 */
3015DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16);
3016
3017
3018/**
3019 * Atomically increment a 32-bit value, ordered.
3020 *
3021 * @returns The new value.
3022 * @param pu32 Pointer to the value to increment.
3023 *
3024 * @remarks x86: Requires a 486 or later.
3025 */
3026#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3027DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32);
3028#else
3029DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32)
3030{
3031 uint32_t u32;
3032# if RT_INLINE_ASM_USES_INTRIN
3033 u32 = _InterlockedIncrement((long RT_FAR *)pu32);
3034 return u32;
3035
3036# elif RT_INLINE_ASM_GNU_STYLE
3037 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3038 : "=r" (u32),
3039 "=m" (*pu32)
3040 : "0" (1),
3041 "m" (*pu32)
3042 : "memory");
3043 return u32+1;
3044# else
3045 __asm
3046 {
3047 mov eax, 1
3048# ifdef RT_ARCH_AMD64
3049 mov rdx, [pu32]
3050 lock xadd [rdx], eax
3051# else
3052 mov edx, [pu32]
3053 lock xadd [edx], eax
3054# endif
3055 mov u32, eax
3056 }
3057 return u32+1;
3058# endif
3059}
3060#endif
3061
3062
3063/**
3064 * Atomically increment a signed 32-bit value, ordered.
3065 *
3066 * @returns The new value.
3067 * @param pi32 Pointer to the value to increment.
3068 *
3069 * @remarks x86: Requires a 486 or later.
3070 */
3071DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32)
3072{
3073 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
3074}
3075
3076
3077/**
3078 * Atomically increment a 64-bit value, ordered.
3079 *
3080 * @returns The new value.
3081 * @param pu64 Pointer to the value to increment.
3082 *
3083 * @remarks x86: Requires a Pentium or later.
3084 */
3085#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3086DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64);
3087#else
3088DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64)
3089{
3090# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3091 uint64_t u64;
3092 u64 = _InterlockedIncrement64((__int64 RT_FAR *)pu64);
3093 return u64;
3094
3095# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3096 uint64_t u64;
3097 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3098 : "=r" (u64),
3099 "=m" (*pu64)
3100 : "0" (1),
3101 "m" (*pu64)
3102 : "memory");
3103 return u64 + 1;
3104# else
3105 return ASMAtomicAddU64(pu64, 1) + 1;
3106# endif
3107}
3108#endif
3109
3110
3111/**
3112 * Atomically increment a signed 64-bit value, ordered.
3113 *
3114 * @returns The new value.
3115 * @param pi64 Pointer to the value to increment.
3116 *
3117 * @remarks x86: Requires a Pentium or later.
3118 */
3119DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64)
3120{
3121 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
3122}
3123
3124
3125/**
3126 * Atomically increment a size_t value, ordered.
3127 *
3128 * @returns The new value.
3129 * @param pcb Pointer to the value to increment.
3130 *
3131 * @remarks x86: Requires a 486 or later.
3132 */
3133DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb)
3134{
3135#if ARCH_BITS == 64
3136 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
3137#elif ARCH_BITS == 32
3138 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
3139#elif ARCH_BITS == 16
3140 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
3141#else
3142# error "Unsupported ARCH_BITS value"
3143#endif
3144}
3145
3146
3147
3148/**
3149 * Atomically decrement an unsigned 32-bit value, ordered.
3150 *
3151 * @returns The new value.
3152 * @param pu16 Pointer to the value to decrement.
3153 * @remarks Not implemented. Just to make 16-bit code happy.
3154 *
3155 * @remarks x86: Requires a 486 or later.
3156 */
3157DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16);
3158
3159
3160/**
3161 * Atomically decrement an unsigned 32-bit value, ordered.
3162 *
3163 * @returns The new value.
3164 * @param pu32 Pointer to the value to decrement.
3165 *
3166 * @remarks x86: Requires a 486 or later.
3167 */
3168#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3169DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32);
3170#else
3171DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32)
3172{
3173 uint32_t u32;
3174# if RT_INLINE_ASM_USES_INTRIN
3175 u32 = _InterlockedDecrement((long RT_FAR *)pu32);
3176 return u32;
3177
3178# elif RT_INLINE_ASM_GNU_STYLE
3179 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3180 : "=r" (u32),
3181 "=m" (*pu32)
3182 : "0" (-1),
3183 "m" (*pu32)
3184 : "memory");
3185 return u32-1;
3186# else
3187 __asm
3188 {
3189 mov eax, -1
3190# ifdef RT_ARCH_AMD64
3191 mov rdx, [pu32]
3192 lock xadd [rdx], eax
3193# else
3194 mov edx, [pu32]
3195 lock xadd [edx], eax
3196# endif
3197 mov u32, eax
3198 }
3199 return u32-1;
3200# endif
3201}
3202#endif
3203
3204
3205/**
3206 * Atomically decrement a signed 32-bit value, ordered.
3207 *
3208 * @returns The new value.
3209 * @param pi32 Pointer to the value to decrement.
3210 *
3211 * @remarks x86: Requires a 486 or later.
3212 */
3213DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32)
3214{
3215 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
3216}
3217
3218
3219/**
3220 * Atomically decrement an unsigned 64-bit value, ordered.
3221 *
3222 * @returns The new value.
3223 * @param pu64 Pointer to the value to decrement.
3224 *
3225 * @remarks x86: Requires a Pentium or later.
3226 */
3227#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3228DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64);
3229#else
3230DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64)
3231{
3232# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3233 uint64_t u64 = _InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
3234 return u64;
3235
3236# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3237 uint64_t u64;
3238 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3239 : "=r" (u64),
3240 "=m" (*pu64)
3241 : "0" (~(uint64_t)0),
3242 "m" (*pu64)
3243 : "memory");
3244 return u64-1;
3245# else
3246 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3247# endif
3248}
3249#endif
3250
3251
3252/**
3253 * Atomically decrement a signed 64-bit value, ordered.
3254 *
3255 * @returns The new value.
3256 * @param pi64 Pointer to the value to decrement.
3257 *
3258 * @remarks x86: Requires a Pentium or later.
3259 */
3260DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64)
3261{
3262 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
3263}
3264
3265
3266/**
3267 * Atomically decrement a size_t value, ordered.
3268 *
3269 * @returns The new value.
3270 * @param pcb Pointer to the value to decrement.
3271 *
3272 * @remarks x86: Requires a 486 or later.
3273 */
3274DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb)
3275{
3276#if ARCH_BITS == 64
3277 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
3278#elif ARCH_BITS == 32
3279 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
3280#elif ARCH_BITS == 16
3281 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
3282#else
3283# error "Unsupported ARCH_BITS value"
3284#endif
3285}
3286
3287
3288/**
3289 * Atomically Or an unsigned 32-bit value, ordered.
3290 *
3291 * @param pu32 Pointer to the pointer variable to OR u32 with.
3292 * @param u32 The value to OR *pu32 with.
3293 *
3294 * @remarks x86: Requires a 386 or later.
3295 */
3296#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3297DECLASM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3298#else
3299DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3300{
3301# if RT_INLINE_ASM_USES_INTRIN
3302 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
3303
3304# elif RT_INLINE_ASM_GNU_STYLE
3305 __asm__ __volatile__("lock; orl %1, %0\n\t"
3306 : "=m" (*pu32)
3307 : "ir" (u32),
3308 "m" (*pu32));
3309# else
3310 __asm
3311 {
3312 mov eax, [u32]
3313# ifdef RT_ARCH_AMD64
3314 mov rdx, [pu32]
3315 lock or [rdx], eax
3316# else
3317 mov edx, [pu32]
3318 lock or [edx], eax
3319# endif
3320 }
3321# endif
3322}
3323#endif
3324
3325
3326/**
3327 * Atomically Or a signed 32-bit value, ordered.
3328 *
3329 * @param pi32 Pointer to the pointer variable to OR u32 with.
3330 * @param i32 The value to OR *pu32 with.
3331 *
3332 * @remarks x86: Requires a 386 or later.
3333 */
3334DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3335{
3336 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3337}
3338
3339
3340/**
3341 * Atomically Or an unsigned 64-bit value, ordered.
3342 *
3343 * @param pu64 Pointer to the pointer variable to OR u64 with.
3344 * @param u64 The value to OR *pu64 with.
3345 *
3346 * @remarks x86: Requires a Pentium or later.
3347 */
3348#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3349DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3350#else
3351DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3352{
3353# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3354 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
3355
3356# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3357 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3358 : "=m" (*pu64)
3359 : "r" (u64),
3360 "m" (*pu64));
3361# else
3362 for (;;)
3363 {
3364 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3365 uint64_t u64New = u64Old | u64;
3366 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3367 break;
3368 ASMNopPause();
3369 }
3370# endif
3371}
3372#endif
3373
3374
3375/**
3376 * Atomically Or a signed 64-bit value, ordered.
3377 *
3378 * @param pi64 Pointer to the pointer variable to OR u64 with.
3379 * @param i64 The value to OR *pu64 with.
3380 *
3381 * @remarks x86: Requires a Pentium or later.
3382 */
3383DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3384{
3385 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3386}
3387
3388
3389/**
3390 * Atomically And an unsigned 32-bit value, ordered.
3391 *
3392 * @param pu32 Pointer to the pointer variable to AND u32 with.
3393 * @param u32 The value to AND *pu32 with.
3394 *
3395 * @remarks x86: Requires a 386 or later.
3396 */
3397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3398DECLASM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3399#else
3400DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3401{
3402# if RT_INLINE_ASM_USES_INTRIN
3403 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
3404
3405# elif RT_INLINE_ASM_GNU_STYLE
3406 __asm__ __volatile__("lock; andl %1, %0\n\t"
3407 : "=m" (*pu32)
3408 : "ir" (u32),
3409 "m" (*pu32));
3410# else
3411 __asm
3412 {
3413 mov eax, [u32]
3414# ifdef RT_ARCH_AMD64
3415 mov rdx, [pu32]
3416 lock and [rdx], eax
3417# else
3418 mov edx, [pu32]
3419 lock and [edx], eax
3420# endif
3421 }
3422# endif
3423}
3424#endif
3425
3426
3427/**
3428 * Atomically And a signed 32-bit value, ordered.
3429 *
3430 * @param pi32 Pointer to the pointer variable to AND i32 with.
3431 * @param i32 The value to AND *pi32 with.
3432 *
3433 * @remarks x86: Requires a 386 or later.
3434 */
3435DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3436{
3437 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3438}
3439
3440
3441/**
3442 * Atomically And an unsigned 64-bit value, ordered.
3443 *
3444 * @param pu64 Pointer to the pointer variable to AND u64 with.
3445 * @param u64 The value to AND *pu64 with.
3446 *
3447 * @remarks x86: Requires a Pentium or later.
3448 */
3449#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3450DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3451#else
3452DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3453{
3454# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3455 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
3456
3457# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3458 __asm__ __volatile__("lock; andq %1, %0\n\t"
3459 : "=m" (*pu64)
3460 : "r" (u64),
3461 "m" (*pu64));
3462# else
3463 for (;;)
3464 {
3465 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3466 uint64_t u64New = u64Old & u64;
3467 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3468 break;
3469 ASMNopPause();
3470 }
3471# endif
3472}
3473#endif
3474
3475
3476/**
3477 * Atomically And a signed 64-bit value, ordered.
3478 *
3479 * @param pi64 Pointer to the pointer variable to AND i64 with.
3480 * @param i64 The value to AND *pi64 with.
3481 *
3482 * @remarks x86: Requires a Pentium or later.
3483 */
3484DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3485{
3486 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3487}
3488
3489
3490/**
3491 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3492 *
3493 * @param pu32 Pointer to the pointer variable to OR u32 with.
3494 * @param u32 The value to OR *pu32 with.
3495 *
3496 * @remarks x86: Requires a 386 or later.
3497 */
3498#if RT_INLINE_ASM_EXTERNAL
3499DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3500#else
3501DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3502{
3503# if RT_INLINE_ASM_GNU_STYLE
3504 __asm__ __volatile__("orl %1, %0\n\t"
3505 : "=m" (*pu32)
3506 : "ir" (u32),
3507 "m" (*pu32));
3508# else
3509 __asm
3510 {
3511 mov eax, [u32]
3512# ifdef RT_ARCH_AMD64
3513 mov rdx, [pu32]
3514 or [rdx], eax
3515# else
3516 mov edx, [pu32]
3517 or [edx], eax
3518# endif
3519 }
3520# endif
3521}
3522#endif
3523
3524
3525/**
3526 * Atomically OR a signed 32-bit value, unordered.
3527 *
3528 * @param pi32 Pointer to the pointer variable to OR u32 with.
3529 * @param i32 The value to OR *pu32 with.
3530 *
3531 * @remarks x86: Requires a 386 or later.
3532 */
3533DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3534{
3535 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, i32);
3536}
3537
3538
3539/**
3540 * Atomically OR an unsigned 64-bit value, unordered.
3541 *
3542 * @param pu64 Pointer to the pointer variable to OR u64 with.
3543 * @param u64 The value to OR *pu64 with.
3544 *
3545 * @remarks x86: Requires a Pentium or later.
3546 */
3547#if RT_INLINE_ASM_EXTERNAL
3548DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3549#else
3550DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3551{
3552# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3553 __asm__ __volatile__("orq %1, %q0\n\t"
3554 : "=m" (*pu64)
3555 : "r" (u64),
3556 "m" (*pu64));
3557# else
3558 for (;;)
3559 {
3560 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3561 uint64_t u64New = u64Old | u64;
3562 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3563 break;
3564 ASMNopPause();
3565 }
3566# endif
3567}
3568#endif
3569
3570
3571/**
3572 * Atomically Or a signed 64-bit value, unordered.
3573 *
3574 * @param pi64 Pointer to the pointer variable to OR u64 with.
3575 * @param i64 The value to OR *pu64 with.
3576 *
3577 * @remarks x86: Requires a Pentium or later.
3578 */
3579DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3580{
3581 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, i64);
3582}
3583
3584
3585/**
3586 * Atomically And an unsigned 32-bit value, unordered.
3587 *
3588 * @param pu32 Pointer to the pointer variable to AND u32 with.
3589 * @param u32 The value to AND *pu32 with.
3590 *
3591 * @remarks x86: Requires a 386 or later.
3592 */
3593#if RT_INLINE_ASM_EXTERNAL
3594DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32);
3595#else
3596DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32)
3597{
3598# if RT_INLINE_ASM_GNU_STYLE
3599 __asm__ __volatile__("andl %1, %0\n\t"
3600 : "=m" (*pu32)
3601 : "ir" (u32),
3602 "m" (*pu32));
3603# else
3604 __asm
3605 {
3606 mov eax, [u32]
3607# ifdef RT_ARCH_AMD64
3608 mov rdx, [pu32]
3609 and [rdx], eax
3610# else
3611 mov edx, [pu32]
3612 and [edx], eax
3613# endif
3614 }
3615# endif
3616}
3617#endif
3618
3619
3620/**
3621 * Atomically And a signed 32-bit value, unordered.
3622 *
3623 * @param pi32 Pointer to the pointer variable to AND i32 with.
3624 * @param i32 The value to AND *pi32 with.
3625 *
3626 * @remarks x86: Requires a 386 or later.
3627 */
3628DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32)
3629{
3630 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3631}
3632
3633
3634/**
3635 * Atomically And an unsigned 64-bit value, unordered.
3636 *
3637 * @param pu64 Pointer to the pointer variable to AND u64 with.
3638 * @param u64 The value to AND *pu64 with.
3639 *
3640 * @remarks x86: Requires a Pentium or later.
3641 */
3642#if RT_INLINE_ASM_EXTERNAL
3643DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64);
3644#else
3645DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64)
3646{
3647# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3648 __asm__ __volatile__("andq %1, %0\n\t"
3649 : "=m" (*pu64)
3650 : "r" (u64),
3651 "m" (*pu64));
3652# else
3653 for (;;)
3654 {
3655 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3656 uint64_t u64New = u64Old & u64;
3657 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3658 break;
3659 ASMNopPause();
3660 }
3661# endif
3662}
3663#endif
3664
3665
3666/**
3667 * Atomically And a signed 64-bit value, unordered.
3668 *
3669 * @param pi64 Pointer to the pointer variable to AND i64 with.
3670 * @param i64 The value to AND *pi64 with.
3671 *
3672 * @remarks x86: Requires a Pentium or later.
3673 */
3674DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64)
3675{
3676 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3677}
3678
3679
3680/**
3681 * Atomically increment an unsigned 32-bit value, unordered.
3682 *
3683 * @returns the new value.
3684 * @param pu32 Pointer to the variable to increment.
3685 *
3686 * @remarks x86: Requires a 486 or later.
3687 */
3688#if RT_INLINE_ASM_EXTERNAL
3689DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32);
3690#else
3691DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32)
3692{
3693 uint32_t u32;
3694# if RT_INLINE_ASM_GNU_STYLE
3695 __asm__ __volatile__("xaddl %0, %1\n\t"
3696 : "=r" (u32),
3697 "=m" (*pu32)
3698 : "0" (1),
3699 "m" (*pu32)
3700 : "memory");
3701 return u32 + 1;
3702# else
3703 __asm
3704 {
3705 mov eax, 1
3706# ifdef RT_ARCH_AMD64
3707 mov rdx, [pu32]
3708 xadd [rdx], eax
3709# else
3710 mov edx, [pu32]
3711 xadd [edx], eax
3712# endif
3713 mov u32, eax
3714 }
3715 return u32 + 1;
3716# endif
3717}
3718#endif
3719
3720
3721/**
3722 * Atomically decrement an unsigned 32-bit value, unordered.
3723 *
3724 * @returns the new value.
3725 * @param pu32 Pointer to the variable to decrement.
3726 *
3727 * @remarks x86: Requires a 486 or later.
3728 */
3729#if RT_INLINE_ASM_EXTERNAL
3730DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32);
3731#else
3732DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32)
3733{
3734 uint32_t u32;
3735# if RT_INLINE_ASM_GNU_STYLE
3736 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3737 : "=r" (u32),
3738 "=m" (*pu32)
3739 : "0" (-1),
3740 "m" (*pu32)
3741 : "memory");
3742 return u32 - 1;
3743# else
3744 __asm
3745 {
3746 mov eax, -1
3747# ifdef RT_ARCH_AMD64
3748 mov rdx, [pu32]
3749 xadd [rdx], eax
3750# else
3751 mov edx, [pu32]
3752 xadd [edx], eax
3753# endif
3754 mov u32, eax
3755 }
3756 return u32 - 1;
3757# endif
3758}
3759#endif
3760
3761
3762/** @def RT_ASM_PAGE_SIZE
3763 * We try avoid dragging in iprt/param.h here.
3764 * @internal
3765 */
3766#if defined(RT_ARCH_SPARC64)
3767# define RT_ASM_PAGE_SIZE 0x2000
3768# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3769# if PAGE_SIZE != 0x2000
3770# error "PAGE_SIZE is not 0x2000!"
3771# endif
3772# endif
3773#else
3774# define RT_ASM_PAGE_SIZE 0x1000
3775# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3776# if PAGE_SIZE != 0x1000
3777# error "PAGE_SIZE is not 0x1000!"
3778# endif
3779# endif
3780#endif
3781
3782/**
3783 * Zeros a 4K memory page.
3784 *
3785 * @param pv Pointer to the memory block. This must be page aligned.
3786 */
3787#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3788DECLASM(void) ASMMemZeroPage(volatile void RT_FAR *pv);
3789# else
3790DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv)
3791{
3792# if RT_INLINE_ASM_USES_INTRIN
3793# ifdef RT_ARCH_AMD64
3794 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3795# else
3796 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3797# endif
3798
3799# elif RT_INLINE_ASM_GNU_STYLE
3800 RTCCUINTREG uDummy;
3801# ifdef RT_ARCH_AMD64
3802 __asm__ __volatile__("rep stosq"
3803 : "=D" (pv),
3804 "=c" (uDummy)
3805 : "0" (pv),
3806 "c" (RT_ASM_PAGE_SIZE >> 3),
3807 "a" (0)
3808 : "memory");
3809# else
3810 __asm__ __volatile__("rep stosl"
3811 : "=D" (pv),
3812 "=c" (uDummy)
3813 : "0" (pv),
3814 "c" (RT_ASM_PAGE_SIZE >> 2),
3815 "a" (0)
3816 : "memory");
3817# endif
3818# else
3819 __asm
3820 {
3821# ifdef RT_ARCH_AMD64
3822 xor rax, rax
3823 mov ecx, 0200h
3824 mov rdi, [pv]
3825 rep stosq
3826# else
3827 xor eax, eax
3828 mov ecx, 0400h
3829 mov edi, [pv]
3830 rep stosd
3831# endif
3832 }
3833# endif
3834}
3835# endif
3836
3837
3838/**
3839 * Zeros a memory block with a 32-bit aligned size.
3840 *
3841 * @param pv Pointer to the memory block.
3842 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3843 */
3844#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3845DECLASM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb);
3846#else
3847DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb)
3848{
3849# if RT_INLINE_ASM_USES_INTRIN
3850# ifdef RT_ARCH_AMD64
3851 if (!(cb & 7))
3852 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
3853 else
3854# endif
3855 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
3856
3857# elif RT_INLINE_ASM_GNU_STYLE
3858 __asm__ __volatile__("rep stosl"
3859 : "=D" (pv),
3860 "=c" (cb)
3861 : "0" (pv),
3862 "1" (cb >> 2),
3863 "a" (0)
3864 : "memory");
3865# else
3866 __asm
3867 {
3868 xor eax, eax
3869# ifdef RT_ARCH_AMD64
3870 mov rcx, [cb]
3871 shr rcx, 2
3872 mov rdi, [pv]
3873# else
3874 mov ecx, [cb]
3875 shr ecx, 2
3876 mov edi, [pv]
3877# endif
3878 rep stosd
3879 }
3880# endif
3881}
3882#endif
3883
3884
3885/**
3886 * Fills a memory block with a 32-bit aligned size.
3887 *
3888 * @param pv Pointer to the memory block.
3889 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3890 * @param u32 The value to fill with.
3891 */
3892#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3893DECLASM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32);
3894#else
3895DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32)
3896{
3897# if RT_INLINE_ASM_USES_INTRIN
3898# ifdef RT_ARCH_AMD64
3899 if (!(cb & 7))
3900 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3901 else
3902# endif
3903 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
3904
3905# elif RT_INLINE_ASM_GNU_STYLE
3906 __asm__ __volatile__("rep stosl"
3907 : "=D" (pv),
3908 "=c" (cb)
3909 : "0" (pv),
3910 "1" (cb >> 2),
3911 "a" (u32)
3912 : "memory");
3913# else
3914 __asm
3915 {
3916# ifdef RT_ARCH_AMD64
3917 mov rcx, [cb]
3918 shr rcx, 2
3919 mov rdi, [pv]
3920# else
3921 mov ecx, [cb]
3922 shr ecx, 2
3923 mov edi, [pv]
3924# endif
3925 mov eax, [u32]
3926 rep stosd
3927 }
3928# endif
3929}
3930#endif
3931
3932
3933/**
3934 * Checks if a memory block is all zeros.
3935 *
3936 * @returns Pointer to the first non-zero byte.
3937 * @returns NULL if all zero.
3938 *
3939 * @param pv Pointer to the memory block.
3940 * @param cb Number of bytes in the block.
3941 *
3942 * @todo Fix name, it is a predicate function but it's not returning boolean!
3943 */
3944#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
3945 && !defined(RT_ARCH_SPARC64) \
3946 && !defined(RT_ARCH_SPARC)
3947DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb);
3948#else
3949DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb)
3950{
3951 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
3952 for (; cb; cb--, pb++)
3953 if (RT_LIKELY(*pb == 0))
3954 { /* likely */ }
3955 else
3956 return (void RT_FAR *)pb;
3957 return NULL;
3958}
3959#endif
3960
3961
3962/**
3963 * Checks if a memory block is all zeros.
3964 *
3965 * @returns true if zero, false if not.
3966 *
3967 * @param pv Pointer to the memory block.
3968 * @param cb Number of bytes in the block.
3969 *
3970 * @sa ASMMemFirstNonZero
3971 */
3972DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb)
3973{
3974 return ASMMemFirstNonZero(pv, cb) == NULL;
3975}
3976
3977
3978/**
3979 * Checks if a memory page is all zeros.
3980 *
3981 * @returns true / false.
3982 *
3983 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3984 * boundary
3985 */
3986DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage)
3987{
3988# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3989 union { RTCCUINTREG r; bool f; } uAX;
3990 RTCCUINTREG xCX, xDI;
3991 Assert(!((uintptr_t)pvPage & 15));
3992 __asm__ __volatile__("repe; "
3993# ifdef RT_ARCH_AMD64
3994 "scasq\n\t"
3995# else
3996 "scasl\n\t"
3997# endif
3998 "setnc %%al\n\t"
3999 : "=&c" (xCX),
4000 "=&D" (xDI),
4001 "=&a" (uAX.r)
4002 : "mr" (pvPage),
4003# ifdef RT_ARCH_AMD64
4004 "0" (RT_ASM_PAGE_SIZE/8),
4005# else
4006 "0" (RT_ASM_PAGE_SIZE/4),
4007# endif
4008 "1" (pvPage),
4009 "2" (0));
4010 return uAX.f;
4011# else
4012 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
4013 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
4014 Assert(!((uintptr_t)pvPage & 15));
4015 for (;;)
4016 {
4017 if (puPtr[0]) return false;
4018 if (puPtr[4]) return false;
4019
4020 if (puPtr[2]) return false;
4021 if (puPtr[6]) return false;
4022
4023 if (puPtr[1]) return false;
4024 if (puPtr[5]) return false;
4025
4026 if (puPtr[3]) return false;
4027 if (puPtr[7]) return false;
4028
4029 if (!--cLeft)
4030 return true;
4031 puPtr += 8;
4032 }
4033# endif
4034}
4035
4036
4037/**
4038 * Checks if a memory block is filled with the specified byte, returning the
4039 * first mismatch.
4040 *
4041 * This is sort of an inverted memchr.
4042 *
4043 * @returns Pointer to the byte which doesn't equal u8.
4044 * @returns NULL if all equal to u8.
4045 *
4046 * @param pv Pointer to the memory block.
4047 * @param cb Number of bytes in the block.
4048 * @param u8 The value it's supposed to be filled with.
4049 *
4050 * @remarks No alignment requirements.
4051 */
4052#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
4053 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL)) \
4054 && !defined(RT_ARCH_SPARC64) \
4055 && !defined(RT_ARCH_SPARC)
4056DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8);
4057#else
4058DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4059{
4060 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
4061 for (; cb; cb--, pb++)
4062 if (RT_LIKELY(*pb == u8))
4063 { /* likely */ }
4064 else
4065 return (void *)pb;
4066 return NULL;
4067}
4068#endif
4069
4070
4071/**
4072 * Checks if a memory block is filled with the specified byte.
4073 *
4074 * @returns true if all matching, false if not.
4075 *
4076 * @param pv Pointer to the memory block.
4077 * @param cb Number of bytes in the block.
4078 * @param u8 The value it's supposed to be filled with.
4079 *
4080 * @remarks No alignment requirements.
4081 */
4082DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8)
4083{
4084 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
4085}
4086
4087
4088/**
4089 * Checks if a memory block is filled with the specified 32-bit value.
4090 *
4091 * This is a sort of inverted memchr.
4092 *
4093 * @returns Pointer to the first value which doesn't equal u32.
4094 * @returns NULL if all equal to u32.
4095 *
4096 * @param pv Pointer to the memory block.
4097 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
4098 * @param u32 The value it's supposed to be filled with.
4099 */
4100DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32)
4101{
4102/** @todo rewrite this in inline assembly? */
4103 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
4104 for (; cb; cb -= 4, pu32++)
4105 if (RT_LIKELY(*pu32 == u32))
4106 { /* likely */ }
4107 else
4108 return (uint32_t RT_FAR *)pu32;
4109 return NULL;
4110}
4111
4112
4113/**
4114 * Probes a byte pointer for read access.
4115 *
4116 * While the function will not fault if the byte is not read accessible,
4117 * the idea is to do this in a safe place like before acquiring locks
4118 * and such like.
4119 *
4120 * Also, this functions guarantees that an eager compiler is not going
4121 * to optimize the probing away.
4122 *
4123 * @param pvByte Pointer to the byte.
4124 */
4125#if RT_INLINE_ASM_EXTERNAL
4126DECLASM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte);
4127#else
4128DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte)
4129{
4130 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4131 uint8_t u8;
4132# if RT_INLINE_ASM_GNU_STYLE
4133 __asm__ __volatile__("movb (%1), %0\n\t"
4134 : "=r" (u8)
4135 : "r" (pvByte));
4136# else
4137 __asm
4138 {
4139# ifdef RT_ARCH_AMD64
4140 mov rax, [pvByte]
4141 mov al, [rax]
4142# else
4143 mov eax, [pvByte]
4144 mov al, [eax]
4145# endif
4146 mov [u8], al
4147 }
4148# endif
4149 return u8;
4150}
4151#endif
4152
4153/**
4154 * Probes a buffer for read access page by page.
4155 *
4156 * While the function will fault if the buffer is not fully read
4157 * accessible, the idea is to do this in a safe place like before
4158 * acquiring locks and such like.
4159 *
4160 * Also, this functions guarantees that an eager compiler is not going
4161 * to optimize the probing away.
4162 *
4163 * @param pvBuf Pointer to the buffer.
4164 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4165 */
4166DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf)
4167{
4168 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4169 /* the first byte */
4170 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
4171 ASMProbeReadByte(pu8);
4172
4173 /* the pages in between pages. */
4174 while (cbBuf > RT_ASM_PAGE_SIZE)
4175 {
4176 ASMProbeReadByte(pu8);
4177 cbBuf -= RT_ASM_PAGE_SIZE;
4178 pu8 += RT_ASM_PAGE_SIZE;
4179 }
4180
4181 /* the last byte */
4182 ASMProbeReadByte(pu8 + cbBuf - 1);
4183}
4184
4185
4186
4187/** @defgroup grp_inline_bits Bit Operations
4188 * @{
4189 */
4190
4191
4192/**
4193 * Sets a bit in a bitmap.
4194 *
4195 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4196 * @param iBit The bit to set.
4197 *
4198 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4199 * However, doing so will yield better performance as well as avoiding
4200 * traps accessing the last bits in the bitmap.
4201 */
4202#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4203DECLASM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4204#else
4205DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4206{
4207# if RT_INLINE_ASM_USES_INTRIN
4208 _bittestandset((long RT_FAR *)pvBitmap, iBit);
4209
4210# elif RT_INLINE_ASM_GNU_STYLE
4211 __asm__ __volatile__("btsl %1, %0"
4212 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4213 : "Ir" (iBit),
4214 "m" (*(volatile long RT_FAR *)pvBitmap)
4215 : "memory");
4216# else
4217 __asm
4218 {
4219# ifdef RT_ARCH_AMD64
4220 mov rax, [pvBitmap]
4221 mov edx, [iBit]
4222 bts [rax], edx
4223# else
4224 mov eax, [pvBitmap]
4225 mov edx, [iBit]
4226 bts [eax], edx
4227# endif
4228 }
4229# endif
4230}
4231#endif
4232
4233
4234/**
4235 * Atomically sets a bit in a bitmap, ordered.
4236 *
4237 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4238 * the memory access isn't atomic!
4239 * @param iBit The bit to set.
4240 *
4241 * @remarks x86: Requires a 386 or later.
4242 */
4243#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4244DECLASM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4245#else
4246DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4247{
4248 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4249# if RT_INLINE_ASM_USES_INTRIN
4250 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4251# elif RT_INLINE_ASM_GNU_STYLE
4252 __asm__ __volatile__("lock; btsl %1, %0"
4253 : "=m" (*(volatile long *)pvBitmap)
4254 : "Ir" (iBit),
4255 "m" (*(volatile long *)pvBitmap)
4256 : "memory");
4257# else
4258 __asm
4259 {
4260# ifdef RT_ARCH_AMD64
4261 mov rax, [pvBitmap]
4262 mov edx, [iBit]
4263 lock bts [rax], edx
4264# else
4265 mov eax, [pvBitmap]
4266 mov edx, [iBit]
4267 lock bts [eax], edx
4268# endif
4269 }
4270# endif
4271}
4272#endif
4273
4274
4275/**
4276 * Clears a bit in a bitmap.
4277 *
4278 * @param pvBitmap Pointer to the bitmap.
4279 * @param iBit The bit to clear.
4280 *
4281 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4282 * However, doing so will yield better performance as well as avoiding
4283 * traps accessing the last bits in the bitmap.
4284 */
4285#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4286DECLASM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4287#else
4288DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4289{
4290# if RT_INLINE_ASM_USES_INTRIN
4291 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4292
4293# elif RT_INLINE_ASM_GNU_STYLE
4294 __asm__ __volatile__("btrl %1, %0"
4295 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4296 : "Ir" (iBit),
4297 "m" (*(volatile long RT_FAR *)pvBitmap)
4298 : "memory");
4299# else
4300 __asm
4301 {
4302# ifdef RT_ARCH_AMD64
4303 mov rax, [pvBitmap]
4304 mov edx, [iBit]
4305 btr [rax], edx
4306# else
4307 mov eax, [pvBitmap]
4308 mov edx, [iBit]
4309 btr [eax], edx
4310# endif
4311 }
4312# endif
4313}
4314#endif
4315
4316
4317/**
4318 * Atomically clears a bit in a bitmap, ordered.
4319 *
4320 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4321 * the memory access isn't atomic!
4322 * @param iBit The bit to toggle set.
4323 *
4324 * @remarks No memory barrier, take care on smp.
4325 * @remarks x86: Requires a 386 or later.
4326 */
4327#if RT_INLINE_ASM_EXTERNAL
4328DECLASM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4329#else
4330DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4331{
4332 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4333# if RT_INLINE_ASM_GNU_STYLE
4334 __asm__ __volatile__("lock; btrl %1, %0"
4335 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4336 : "Ir" (iBit),
4337 "m" (*(volatile long RT_FAR *)pvBitmap)
4338 : "memory");
4339# else
4340 __asm
4341 {
4342# ifdef RT_ARCH_AMD64
4343 mov rax, [pvBitmap]
4344 mov edx, [iBit]
4345 lock btr [rax], edx
4346# else
4347 mov eax, [pvBitmap]
4348 mov edx, [iBit]
4349 lock btr [eax], edx
4350# endif
4351 }
4352# endif
4353}
4354#endif
4355
4356
4357/**
4358 * Toggles a bit in a bitmap.
4359 *
4360 * @param pvBitmap Pointer to the bitmap.
4361 * @param iBit The bit to toggle.
4362 *
4363 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4364 * However, doing so will yield better performance as well as avoiding
4365 * traps accessing the last bits in the bitmap.
4366 */
4367#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4368DECLASM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4369#else
4370DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4371{
4372# if RT_INLINE_ASM_USES_INTRIN
4373 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4374# elif RT_INLINE_ASM_GNU_STYLE
4375 __asm__ __volatile__("btcl %1, %0"
4376 : "=m" (*(volatile long *)pvBitmap)
4377 : "Ir" (iBit),
4378 "m" (*(volatile long *)pvBitmap)
4379 : "memory");
4380# else
4381 __asm
4382 {
4383# ifdef RT_ARCH_AMD64
4384 mov rax, [pvBitmap]
4385 mov edx, [iBit]
4386 btc [rax], edx
4387# else
4388 mov eax, [pvBitmap]
4389 mov edx, [iBit]
4390 btc [eax], edx
4391# endif
4392 }
4393# endif
4394}
4395#endif
4396
4397
4398/**
4399 * Atomically toggles a bit in a bitmap, ordered.
4400 *
4401 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4402 * the memory access isn't atomic!
4403 * @param iBit The bit to test and set.
4404 *
4405 * @remarks x86: Requires a 386 or later.
4406 */
4407#if RT_INLINE_ASM_EXTERNAL
4408DECLASM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4409#else
4410DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4411{
4412 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4413# if RT_INLINE_ASM_GNU_STYLE
4414 __asm__ __volatile__("lock; btcl %1, %0"
4415 : "=m" (*(volatile long RT_FAR *)pvBitmap)
4416 : "Ir" (iBit),
4417 "m" (*(volatile long RT_FAR *)pvBitmap)
4418 : "memory");
4419# else
4420 __asm
4421 {
4422# ifdef RT_ARCH_AMD64
4423 mov rax, [pvBitmap]
4424 mov edx, [iBit]
4425 lock btc [rax], edx
4426# else
4427 mov eax, [pvBitmap]
4428 mov edx, [iBit]
4429 lock btc [eax], edx
4430# endif
4431 }
4432# endif
4433}
4434#endif
4435
4436
4437/**
4438 * Tests and sets a bit in a bitmap.
4439 *
4440 * @returns true if the bit was set.
4441 * @returns false if the bit was clear.
4442 *
4443 * @param pvBitmap Pointer to the bitmap.
4444 * @param iBit The bit to test and set.
4445 *
4446 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4447 * However, doing so will yield better performance as well as avoiding
4448 * traps accessing the last bits in the bitmap.
4449 */
4450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4451DECLASM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4452#else
4453DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4454{
4455 union { bool f; uint32_t u32; uint8_t u8; } rc;
4456# if RT_INLINE_ASM_USES_INTRIN
4457 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
4458
4459# elif RT_INLINE_ASM_GNU_STYLE
4460 __asm__ __volatile__("btsl %2, %1\n\t"
4461 "setc %b0\n\t"
4462 "andl $1, %0\n\t"
4463 : "=q" (rc.u32),
4464 "=m" (*(volatile long RT_FAR *)pvBitmap)
4465 : "Ir" (iBit),
4466 "m" (*(volatile long RT_FAR *)pvBitmap)
4467 : "memory");
4468# else
4469 __asm
4470 {
4471 mov edx, [iBit]
4472# ifdef RT_ARCH_AMD64
4473 mov rax, [pvBitmap]
4474 bts [rax], edx
4475# else
4476 mov eax, [pvBitmap]
4477 bts [eax], edx
4478# endif
4479 setc al
4480 and eax, 1
4481 mov [rc.u32], eax
4482 }
4483# endif
4484 return rc.f;
4485}
4486#endif
4487
4488
4489/**
4490 * Atomically tests and sets a bit in a bitmap, ordered.
4491 *
4492 * @returns true if the bit was set.
4493 * @returns false if the bit was clear.
4494 *
4495 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4496 * the memory access isn't atomic!
4497 * @param iBit The bit to set.
4498 *
4499 * @remarks x86: Requires a 386 or later.
4500 */
4501#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4502DECLASM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit);
4503#else
4504DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit)
4505{
4506 union { bool f; uint32_t u32; uint8_t u8; } rc;
4507 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4508# if RT_INLINE_ASM_USES_INTRIN
4509 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
4510# elif RT_INLINE_ASM_GNU_STYLE
4511 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4512 "setc %b0\n\t"
4513 "andl $1, %0\n\t"
4514 : "=q" (rc.u32),
4515 "=m" (*(volatile long RT_FAR *)pvBitmap)
4516 : "Ir" (iBit),
4517 "m" (*(volatile long RT_FAR *)pvBitmap)
4518 : "memory");
4519# else
4520 __asm
4521 {
4522 mov edx, [iBit]
4523# ifdef RT_ARCH_AMD64
4524 mov rax, [pvBitmap]
4525 lock bts [rax], edx
4526# else
4527 mov eax, [pvBitmap]
4528 lock bts [eax], edx
4529# endif
4530 setc al
4531 and eax, 1
4532 mov [rc.u32], eax
4533 }
4534# endif
4535 return rc.f;
4536}
4537#endif
4538
4539
4540/**
4541 * Tests and clears a bit in a bitmap.
4542 *
4543 * @returns true if the bit was set.
4544 * @returns false if the bit was clear.
4545 *
4546 * @param pvBitmap Pointer to the bitmap.
4547 * @param iBit The bit to test and clear.
4548 *
4549 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4550 * However, doing so will yield better performance as well as avoiding
4551 * traps accessing the last bits in the bitmap.
4552 */
4553#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4554DECLASM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4555#else
4556DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4557{
4558 union { bool f; uint32_t u32; uint8_t u8; } rc;
4559# if RT_INLINE_ASM_USES_INTRIN
4560 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
4561
4562# elif RT_INLINE_ASM_GNU_STYLE
4563 __asm__ __volatile__("btrl %2, %1\n\t"
4564 "setc %b0\n\t"
4565 "andl $1, %0\n\t"
4566 : "=q" (rc.u32),
4567 "=m" (*(volatile long RT_FAR *)pvBitmap)
4568 : "Ir" (iBit),
4569 "m" (*(volatile long RT_FAR *)pvBitmap)
4570 : "memory");
4571# else
4572 __asm
4573 {
4574 mov edx, [iBit]
4575# ifdef RT_ARCH_AMD64
4576 mov rax, [pvBitmap]
4577 btr [rax], edx
4578# else
4579 mov eax, [pvBitmap]
4580 btr [eax], edx
4581# endif
4582 setc al
4583 and eax, 1
4584 mov [rc.u32], eax
4585 }
4586# endif
4587 return rc.f;
4588}
4589#endif
4590
4591
4592/**
4593 * Atomically tests and clears a bit in a bitmap, ordered.
4594 *
4595 * @returns true if the bit was set.
4596 * @returns false if the bit was clear.
4597 *
4598 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4599 * the memory access isn't atomic!
4600 * @param iBit The bit to test and clear.
4601 *
4602 * @remarks No memory barrier, take care on smp.
4603 * @remarks x86: Requires a 386 or later.
4604 */
4605#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4606DECLASM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit);
4607#else
4608DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit)
4609{
4610 union { bool f; uint32_t u32; uint8_t u8; } rc;
4611 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4612# if RT_INLINE_ASM_USES_INTRIN
4613 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
4614
4615# elif RT_INLINE_ASM_GNU_STYLE
4616 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4617 "setc %b0\n\t"
4618 "andl $1, %0\n\t"
4619 : "=q" (rc.u32),
4620 "=m" (*(volatile long RT_FAR *)pvBitmap)
4621 : "Ir" (iBit),
4622 "m" (*(volatile long RT_FAR *)pvBitmap)
4623 : "memory");
4624# else
4625 __asm
4626 {
4627 mov edx, [iBit]
4628# ifdef RT_ARCH_AMD64
4629 mov rax, [pvBitmap]
4630 lock btr [rax], edx
4631# else
4632 mov eax, [pvBitmap]
4633 lock btr [eax], edx
4634# endif
4635 setc al
4636 and eax, 1
4637 mov [rc.u32], eax
4638 }
4639# endif
4640 return rc.f;
4641}
4642#endif
4643
4644
4645/**
4646 * Tests and toggles a bit in a bitmap.
4647 *
4648 * @returns true if the bit was set.
4649 * @returns false if the bit was clear.
4650 *
4651 * @param pvBitmap Pointer to the bitmap.
4652 * @param iBit The bit to test and toggle.
4653 *
4654 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4655 * However, doing so will yield better performance as well as avoiding
4656 * traps accessing the last bits in the bitmap.
4657 */
4658#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4659DECLASM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4660#else
4661DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4662{
4663 union { bool f; uint32_t u32; uint8_t u8; } rc;
4664# if RT_INLINE_ASM_USES_INTRIN
4665 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
4666
4667# elif RT_INLINE_ASM_GNU_STYLE
4668 __asm__ __volatile__("btcl %2, %1\n\t"
4669 "setc %b0\n\t"
4670 "andl $1, %0\n\t"
4671 : "=q" (rc.u32),
4672 "=m" (*(volatile long RT_FAR *)pvBitmap)
4673 : "Ir" (iBit),
4674 "m" (*(volatile long RT_FAR *)pvBitmap)
4675 : "memory");
4676# else
4677 __asm
4678 {
4679 mov edx, [iBit]
4680# ifdef RT_ARCH_AMD64
4681 mov rax, [pvBitmap]
4682 btc [rax], edx
4683# else
4684 mov eax, [pvBitmap]
4685 btc [eax], edx
4686# endif
4687 setc al
4688 and eax, 1
4689 mov [rc.u32], eax
4690 }
4691# endif
4692 return rc.f;
4693}
4694#endif
4695
4696
4697/**
4698 * Atomically tests and toggles a bit in a bitmap, ordered.
4699 *
4700 * @returns true if the bit was set.
4701 * @returns false if the bit was clear.
4702 *
4703 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4704 * the memory access isn't atomic!
4705 * @param iBit The bit to test and toggle.
4706 *
4707 * @remarks x86: Requires a 386 or later.
4708 */
4709#if RT_INLINE_ASM_EXTERNAL
4710DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit);
4711#else
4712DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit)
4713{
4714 union { bool f; uint32_t u32; uint8_t u8; } rc;
4715 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4716# if RT_INLINE_ASM_GNU_STYLE
4717 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4718 "setc %b0\n\t"
4719 "andl $1, %0\n\t"
4720 : "=q" (rc.u32),
4721 "=m" (*(volatile long RT_FAR *)pvBitmap)
4722 : "Ir" (iBit),
4723 "m" (*(volatile long RT_FAR *)pvBitmap)
4724 : "memory");
4725# else
4726 __asm
4727 {
4728 mov edx, [iBit]
4729# ifdef RT_ARCH_AMD64
4730 mov rax, [pvBitmap]
4731 lock btc [rax], edx
4732# else
4733 mov eax, [pvBitmap]
4734 lock btc [eax], edx
4735# endif
4736 setc al
4737 and eax, 1
4738 mov [rc.u32], eax
4739 }
4740# endif
4741 return rc.f;
4742}
4743#endif
4744
4745
4746/**
4747 * Tests if a bit in a bitmap is set.
4748 *
4749 * @returns true if the bit is set.
4750 * @returns false if the bit is clear.
4751 *
4752 * @param pvBitmap Pointer to the bitmap.
4753 * @param iBit The bit to test.
4754 *
4755 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4756 * However, doing so will yield better performance as well as avoiding
4757 * traps accessing the last bits in the bitmap.
4758 */
4759#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4760DECLASM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit);
4761#else
4762DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit)
4763{
4764 union { bool f; uint32_t u32; uint8_t u8; } rc;
4765# if RT_INLINE_ASM_USES_INTRIN
4766 rc.u32 = _bittest((long *)pvBitmap, iBit);
4767# elif RT_INLINE_ASM_GNU_STYLE
4768
4769 __asm__ __volatile__("btl %2, %1\n\t"
4770 "setc %b0\n\t"
4771 "andl $1, %0\n\t"
4772 : "=q" (rc.u32)
4773 : "m" (*(const volatile long RT_FAR *)pvBitmap),
4774 "Ir" (iBit)
4775 : "memory");
4776# else
4777 __asm
4778 {
4779 mov edx, [iBit]
4780# ifdef RT_ARCH_AMD64
4781 mov rax, [pvBitmap]
4782 bt [rax], edx
4783# else
4784 mov eax, [pvBitmap]
4785 bt [eax], edx
4786# endif
4787 setc al
4788 and eax, 1
4789 mov [rc.u32], eax
4790 }
4791# endif
4792 return rc.f;
4793}
4794#endif
4795
4796
4797/**
4798 * Clears a bit range within a bitmap.
4799 *
4800 * @param pvBitmap Pointer to the bitmap.
4801 * @param iBitStart The First bit to clear.
4802 * @param iBitEnd The first bit not to clear.
4803 */
4804DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4805{
4806 if (iBitStart < iBitEnd)
4807 {
4808 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4809 int32_t iStart = iBitStart & ~31;
4810 int32_t iEnd = iBitEnd & ~31;
4811 if (iStart == iEnd)
4812 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4813 else
4814 {
4815 /* bits in first dword. */
4816 if (iBitStart & 31)
4817 {
4818 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4819 pu32++;
4820 iBitStart = iStart + 32;
4821 }
4822
4823 /* whole dword. */
4824 if (iBitStart != iEnd)
4825 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4826
4827 /* bits in last dword. */
4828 if (iBitEnd & 31)
4829 {
4830 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4831 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4832 }
4833 }
4834 }
4835}
4836
4837
4838/**
4839 * Sets a bit range within a bitmap.
4840 *
4841 * @param pvBitmap Pointer to the bitmap.
4842 * @param iBitStart The First bit to set.
4843 * @param iBitEnd The first bit not to set.
4844 */
4845DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4846{
4847 if (iBitStart < iBitEnd)
4848 {
4849 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
4850 int32_t iStart = iBitStart & ~31;
4851 int32_t iEnd = iBitEnd & ~31;
4852 if (iStart == iEnd)
4853 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4854 else
4855 {
4856 /* bits in first dword. */
4857 if (iBitStart & 31)
4858 {
4859 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4860 pu32++;
4861 iBitStart = iStart + 32;
4862 }
4863
4864 /* whole dword. */
4865 if (iBitStart != iEnd)
4866 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4867
4868 /* bits in last dword. */
4869 if (iBitEnd & 31)
4870 {
4871 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
4872 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4873 }
4874 }
4875 }
4876}
4877
4878
4879/**
4880 * Finds the first clear bit in a bitmap.
4881 *
4882 * @returns Index of the first zero bit.
4883 * @returns -1 if no clear bit was found.
4884 * @param pvBitmap Pointer to the bitmap.
4885 * @param cBits The number of bits in the bitmap. Multiple of 32.
4886 */
4887#if RT_INLINE_ASM_EXTERNAL
4888DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
4889#else
4890DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
4891{
4892 if (cBits)
4893 {
4894 int32_t iBit;
4895# if RT_INLINE_ASM_GNU_STYLE
4896 RTCCUINTREG uEAX, uECX, uEDI;
4897 cBits = RT_ALIGN_32(cBits, 32);
4898 __asm__ __volatile__("repe; scasl\n\t"
4899 "je 1f\n\t"
4900# ifdef RT_ARCH_AMD64
4901 "lea -4(%%rdi), %%rdi\n\t"
4902 "xorl (%%rdi), %%eax\n\t"
4903 "subq %5, %%rdi\n\t"
4904# else
4905 "lea -4(%%edi), %%edi\n\t"
4906 "xorl (%%edi), %%eax\n\t"
4907 "subl %5, %%edi\n\t"
4908# endif
4909 "shll $3, %%edi\n\t"
4910 "bsfl %%eax, %%edx\n\t"
4911 "addl %%edi, %%edx\n\t"
4912 "1:\t\n"
4913 : "=d" (iBit),
4914 "=&c" (uECX),
4915 "=&D" (uEDI),
4916 "=&a" (uEAX)
4917 : "0" (0xffffffff),
4918 "mr" (pvBitmap),
4919 "1" (cBits >> 5),
4920 "2" (pvBitmap),
4921 "3" (0xffffffff));
4922# else
4923 cBits = RT_ALIGN_32(cBits, 32);
4924 __asm
4925 {
4926# ifdef RT_ARCH_AMD64
4927 mov rdi, [pvBitmap]
4928 mov rbx, rdi
4929# else
4930 mov edi, [pvBitmap]
4931 mov ebx, edi
4932# endif
4933 mov edx, 0ffffffffh
4934 mov eax, edx
4935 mov ecx, [cBits]
4936 shr ecx, 5
4937 repe scasd
4938 je done
4939
4940# ifdef RT_ARCH_AMD64
4941 lea rdi, [rdi - 4]
4942 xor eax, [rdi]
4943 sub rdi, rbx
4944# else
4945 lea edi, [edi - 4]
4946 xor eax, [edi]
4947 sub edi, ebx
4948# endif
4949 shl edi, 3
4950 bsf edx, eax
4951 add edx, edi
4952 done:
4953 mov [iBit], edx
4954 }
4955# endif
4956 return iBit;
4957 }
4958 return -1;
4959}
4960#endif
4961
4962
4963/**
4964 * Finds the next clear bit in a bitmap.
4965 *
4966 * @returns Index of the first zero bit.
4967 * @returns -1 if no clear bit was found.
4968 * @param pvBitmap Pointer to the bitmap.
4969 * @param cBits The number of bits in the bitmap. Multiple of 32.
4970 * @param iBitPrev The bit returned from the last search.
4971 * The search will start at iBitPrev + 1.
4972 */
4973#if RT_INLINE_ASM_EXTERNAL
4974DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4975#else
4976DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4977{
4978 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
4979 int iBit = ++iBitPrev & 31;
4980 if (iBit)
4981 {
4982 /*
4983 * Inspect the 32-bit word containing the unaligned bit.
4984 */
4985 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4986
4987# if RT_INLINE_ASM_USES_INTRIN
4988 unsigned long ulBit = 0;
4989 if (_BitScanForward(&ulBit, u32))
4990 return ulBit + iBitPrev;
4991# else
4992# if RT_INLINE_ASM_GNU_STYLE
4993 __asm__ __volatile__("bsf %1, %0\n\t"
4994 "jnz 1f\n\t"
4995 "movl $-1, %0\n\t"
4996 "1:\n\t"
4997 : "=r" (iBit)
4998 : "r" (u32));
4999# else
5000 __asm
5001 {
5002 mov edx, [u32]
5003 bsf eax, edx
5004 jnz done
5005 mov eax, 0ffffffffh
5006 done:
5007 mov [iBit], eax
5008 }
5009# endif
5010 if (iBit >= 0)
5011 return iBit + iBitPrev;
5012# endif
5013
5014 /*
5015 * Skip ahead and see if there is anything left to search.
5016 */
5017 iBitPrev |= 31;
5018 iBitPrev++;
5019 if (cBits <= (uint32_t)iBitPrev)
5020 return -1;
5021 }
5022
5023 /*
5024 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5025 */
5026 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5027 if (iBit >= 0)
5028 iBit += iBitPrev;
5029 return iBit;
5030}
5031#endif
5032
5033
5034/**
5035 * Finds the first set bit in a bitmap.
5036 *
5037 * @returns Index of the first set bit.
5038 * @returns -1 if no clear bit was found.
5039 * @param pvBitmap Pointer to the bitmap.
5040 * @param cBits The number of bits in the bitmap. Multiple of 32.
5041 */
5042#if RT_INLINE_ASM_EXTERNAL
5043DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits);
5044#else
5045DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits)
5046{
5047 if (cBits)
5048 {
5049 int32_t iBit;
5050# if RT_INLINE_ASM_GNU_STYLE
5051 RTCCUINTREG uEAX, uECX, uEDI;
5052 cBits = RT_ALIGN_32(cBits, 32);
5053 __asm__ __volatile__("repe; scasl\n\t"
5054 "je 1f\n\t"
5055# ifdef RT_ARCH_AMD64
5056 "lea -4(%%rdi), %%rdi\n\t"
5057 "movl (%%rdi), %%eax\n\t"
5058 "subq %5, %%rdi\n\t"
5059# else
5060 "lea -4(%%edi), %%edi\n\t"
5061 "movl (%%edi), %%eax\n\t"
5062 "subl %5, %%edi\n\t"
5063# endif
5064 "shll $3, %%edi\n\t"
5065 "bsfl %%eax, %%edx\n\t"
5066 "addl %%edi, %%edx\n\t"
5067 "1:\t\n"
5068 : "=d" (iBit),
5069 "=&c" (uECX),
5070 "=&D" (uEDI),
5071 "=&a" (uEAX)
5072 : "0" (0xffffffff),
5073 "mr" (pvBitmap),
5074 "1" (cBits >> 5),
5075 "2" (pvBitmap),
5076 "3" (0));
5077# else
5078 cBits = RT_ALIGN_32(cBits, 32);
5079 __asm
5080 {
5081# ifdef RT_ARCH_AMD64
5082 mov rdi, [pvBitmap]
5083 mov rbx, rdi
5084# else
5085 mov edi, [pvBitmap]
5086 mov ebx, edi
5087# endif
5088 mov edx, 0ffffffffh
5089 xor eax, eax
5090 mov ecx, [cBits]
5091 shr ecx, 5
5092 repe scasd
5093 je done
5094# ifdef RT_ARCH_AMD64
5095 lea rdi, [rdi - 4]
5096 mov eax, [rdi]
5097 sub rdi, rbx
5098# else
5099 lea edi, [edi - 4]
5100 mov eax, [edi]
5101 sub edi, ebx
5102# endif
5103 shl edi, 3
5104 bsf edx, eax
5105 add edx, edi
5106 done:
5107 mov [iBit], edx
5108 }
5109# endif
5110 return iBit;
5111 }
5112 return -1;
5113}
5114#endif
5115
5116
5117/**
5118 * Finds the next set bit in a bitmap.
5119 *
5120 * @returns Index of the next set bit.
5121 * @returns -1 if no set bit was found.
5122 * @param pvBitmap Pointer to the bitmap.
5123 * @param cBits The number of bits in the bitmap. Multiple of 32.
5124 * @param iBitPrev The bit returned from the last search.
5125 * The search will start at iBitPrev + 1.
5126 */
5127#if RT_INLINE_ASM_EXTERNAL
5128DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5129#else
5130DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5131{
5132 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
5133 int iBit = ++iBitPrev & 31;
5134 if (iBit)
5135 {
5136 /*
5137 * Inspect the 32-bit word containing the unaligned bit.
5138 */
5139 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5140
5141# if RT_INLINE_ASM_USES_INTRIN
5142 unsigned long ulBit = 0;
5143 if (_BitScanForward(&ulBit, u32))
5144 return ulBit + iBitPrev;
5145# else
5146# if RT_INLINE_ASM_GNU_STYLE
5147 __asm__ __volatile__("bsf %1, %0\n\t"
5148 "jnz 1f\n\t"
5149 "movl $-1, %0\n\t"
5150 "1:\n\t"
5151 : "=r" (iBit)
5152 : "r" (u32));
5153# else
5154 __asm
5155 {
5156 mov edx, [u32]
5157 bsf eax, edx
5158 jnz done
5159 mov eax, 0ffffffffh
5160 done:
5161 mov [iBit], eax
5162 }
5163# endif
5164 if (iBit >= 0)
5165 return iBit + iBitPrev;
5166# endif
5167
5168 /*
5169 * Skip ahead and see if there is anything left to search.
5170 */
5171 iBitPrev |= 31;
5172 iBitPrev++;
5173 if (cBits <= (uint32_t)iBitPrev)
5174 return -1;
5175 }
5176
5177 /*
5178 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5179 */
5180 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5181 if (iBit >= 0)
5182 iBit += iBitPrev;
5183 return iBit;
5184}
5185#endif
5186
5187
5188/**
5189 * Finds the first bit which is set in the given 32-bit integer.
5190 * Bits are numbered from 1 (least significant) to 32.
5191 *
5192 * @returns index [1..32] of the first set bit.
5193 * @returns 0 if all bits are cleared.
5194 * @param u32 Integer to search for set bits.
5195 * @remarks Similar to ffs() in BSD.
5196 */
5197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5198DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5199#else
5200DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5201{
5202# if RT_INLINE_ASM_USES_INTRIN
5203 unsigned long iBit;
5204 if (_BitScanForward(&iBit, u32))
5205 iBit++;
5206 else
5207 iBit = 0;
5208# elif RT_INLINE_ASM_GNU_STYLE
5209 uint32_t iBit;
5210 __asm__ __volatile__("bsf %1, %0\n\t"
5211 "jnz 1f\n\t"
5212 "xorl %0, %0\n\t"
5213 "jmp 2f\n"
5214 "1:\n\t"
5215 "incl %0\n"
5216 "2:\n\t"
5217 : "=r" (iBit)
5218 : "rm" (u32));
5219# else
5220 uint32_t iBit;
5221 _asm
5222 {
5223 bsf eax, [u32]
5224 jnz found
5225 xor eax, eax
5226 jmp done
5227 found:
5228 inc eax
5229 done:
5230 mov [iBit], eax
5231 }
5232# endif
5233 return iBit;
5234}
5235#endif
5236
5237
5238/**
5239 * Finds the first bit which is set in the given 32-bit integer.
5240 * Bits are numbered from 1 (least significant) to 32.
5241 *
5242 * @returns index [1..32] of the first set bit.
5243 * @returns 0 if all bits are cleared.
5244 * @param i32 Integer to search for set bits.
5245 * @remark Similar to ffs() in BSD.
5246 */
5247DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5248{
5249 return ASMBitFirstSetU32((uint32_t)i32);
5250}
5251
5252
5253/**
5254 * Finds the first bit which is set in the given 64-bit integer.
5255 *
5256 * Bits are numbered from 1 (least significant) to 64.
5257 *
5258 * @returns index [1..64] of the first set bit.
5259 * @returns 0 if all bits are cleared.
5260 * @param u64 Integer to search for set bits.
5261 * @remarks Similar to ffs() in BSD.
5262 */
5263#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5264DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5265#else
5266DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5267{
5268# if RT_INLINE_ASM_USES_INTRIN
5269 unsigned long iBit;
5270# if ARCH_BITS == 64
5271 if (_BitScanForward64(&iBit, u64))
5272 iBit++;
5273 else
5274 iBit = 0;
5275# else
5276 if (_BitScanForward(&iBit, (uint32_t)u64))
5277 iBit++;
5278 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5279 iBit += 33;
5280 else
5281 iBit = 0;
5282# endif
5283# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5284 uint64_t iBit;
5285 __asm__ __volatile__("bsfq %1, %0\n\t"
5286 "jnz 1f\n\t"
5287 "xorl %k0, %k0\n\t"
5288 "jmp 2f\n"
5289 "1:\n\t"
5290 "incl %k0\n"
5291 "2:\n\t"
5292 : "=r" (iBit)
5293 : "rm" (u64));
5294# else
5295 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5296 if (!iBit)
5297 {
5298 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5299 if (iBit)
5300 iBit += 32;
5301 }
5302# endif
5303 return (unsigned)iBit;
5304}
5305#endif
5306
5307
5308/**
5309 * Finds the first bit which is set in the given 16-bit integer.
5310 *
5311 * Bits are numbered from 1 (least significant) to 16.
5312 *
5313 * @returns index [1..16] of the first set bit.
5314 * @returns 0 if all bits are cleared.
5315 * @param u16 Integer to search for set bits.
5316 * @remarks For 16-bit bs3kit code.
5317 */
5318#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5319DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5320#else
5321DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5322{
5323 return ASMBitFirstSetU32((uint32_t)u16);
5324}
5325#endif
5326
5327
5328/**
5329 * Finds the last bit which is set in the given 32-bit integer.
5330 * Bits are numbered from 1 (least significant) to 32.
5331 *
5332 * @returns index [1..32] of the last set bit.
5333 * @returns 0 if all bits are cleared.
5334 * @param u32 Integer to search for set bits.
5335 * @remark Similar to fls() in BSD.
5336 */
5337#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5338DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5339#else
5340DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5341{
5342# if RT_INLINE_ASM_USES_INTRIN
5343 unsigned long iBit;
5344 if (_BitScanReverse(&iBit, u32))
5345 iBit++;
5346 else
5347 iBit = 0;
5348# elif RT_INLINE_ASM_GNU_STYLE
5349 uint32_t iBit;
5350 __asm__ __volatile__("bsrl %1, %0\n\t"
5351 "jnz 1f\n\t"
5352 "xorl %0, %0\n\t"
5353 "jmp 2f\n"
5354 "1:\n\t"
5355 "incl %0\n"
5356 "2:\n\t"
5357 : "=r" (iBit)
5358 : "rm" (u32));
5359# else
5360 uint32_t iBit;
5361 _asm
5362 {
5363 bsr eax, [u32]
5364 jnz found
5365 xor eax, eax
5366 jmp done
5367 found:
5368 inc eax
5369 done:
5370 mov [iBit], eax
5371 }
5372# endif
5373 return iBit;
5374}
5375#endif
5376
5377
5378/**
5379 * Finds the last bit which is set in the given 32-bit integer.
5380 * Bits are numbered from 1 (least significant) to 32.
5381 *
5382 * @returns index [1..32] of the last set bit.
5383 * @returns 0 if all bits are cleared.
5384 * @param i32 Integer to search for set bits.
5385 * @remark Similar to fls() in BSD.
5386 */
5387DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5388{
5389 return ASMBitLastSetU32((uint32_t)i32);
5390}
5391
5392
5393/**
5394 * Finds the last bit which is set in the given 64-bit integer.
5395 *
5396 * Bits are numbered from 1 (least significant) to 64.
5397 *
5398 * @returns index [1..64] of the last set bit.
5399 * @returns 0 if all bits are cleared.
5400 * @param u64 Integer to search for set bits.
5401 * @remark Similar to fls() in BSD.
5402 */
5403#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5404DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5405#else
5406DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5407{
5408# if RT_INLINE_ASM_USES_INTRIN
5409 unsigned long iBit;
5410# if ARCH_BITS == 64
5411 if (_BitScanReverse64(&iBit, u64))
5412 iBit++;
5413 else
5414 iBit = 0;
5415# else
5416 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5417 iBit += 33;
5418 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5419 iBit++;
5420 else
5421 iBit = 0;
5422# endif
5423# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5424 uint64_t iBit;
5425 __asm__ __volatile__("bsrq %1, %0\n\t"
5426 "jnz 1f\n\t"
5427 "xorl %k0, %k0\n\t"
5428 "jmp 2f\n"
5429 "1:\n\t"
5430 "incl %k0\n"
5431 "2:\n\t"
5432 : "=r" (iBit)
5433 : "rm" (u64));
5434# else
5435 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5436 if (iBit)
5437 iBit += 32;
5438 else
5439 iBit = ASMBitLastSetU32((uint32_t)u64);
5440#endif
5441 return (unsigned)iBit;
5442}
5443#endif
5444
5445
5446/**
5447 * Finds the last bit which is set in the given 16-bit integer.
5448 *
5449 * Bits are numbered from 1 (least significant) to 16.
5450 *
5451 * @returns index [1..16] of the last set bit.
5452 * @returns 0 if all bits are cleared.
5453 * @param u16 Integer to search for set bits.
5454 * @remarks For 16-bit bs3kit code.
5455 */
5456#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5457DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5458#else
5459DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5460{
5461 return ASMBitLastSetU32((uint32_t)u16);
5462}
5463#endif
5464
5465
5466/**
5467 * Reverse the byte order of the given 16-bit integer.
5468 *
5469 * @returns Revert
5470 * @param u16 16-bit integer value.
5471 */
5472#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5473DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5474#else
5475DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5476{
5477# if RT_INLINE_ASM_USES_INTRIN
5478 u16 = _byteswap_ushort(u16);
5479# elif RT_INLINE_ASM_GNU_STYLE
5480 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5481# else
5482 _asm
5483 {
5484 mov ax, [u16]
5485 ror ax, 8
5486 mov [u16], ax
5487 }
5488# endif
5489 return u16;
5490}
5491#endif
5492
5493
5494/**
5495 * Reverse the byte order of the given 32-bit integer.
5496 *
5497 * @returns Revert
5498 * @param u32 32-bit integer value.
5499 */
5500#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5501DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5502#else
5503DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5504{
5505# if RT_INLINE_ASM_USES_INTRIN
5506 u32 = _byteswap_ulong(u32);
5507# elif RT_INLINE_ASM_GNU_STYLE
5508 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5509# else
5510 _asm
5511 {
5512 mov eax, [u32]
5513 bswap eax
5514 mov [u32], eax
5515 }
5516# endif
5517 return u32;
5518}
5519#endif
5520
5521
5522/**
5523 * Reverse the byte order of the given 64-bit integer.
5524 *
5525 * @returns Revert
5526 * @param u64 64-bit integer value.
5527 */
5528DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5529{
5530#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5531 u64 = _byteswap_uint64(u64);
5532#else
5533 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5534 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5535#endif
5536 return u64;
5537}
5538
5539
5540/**
5541 * Rotate 32-bit unsigned value to the left by @a cShift.
5542 *
5543 * @returns Rotated value.
5544 * @param u32 The value to rotate.
5545 * @param cShift How many bits to rotate by.
5546 */
5547#ifdef __WATCOMC__
5548DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5549#else
5550DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5551{
5552# if RT_INLINE_ASM_USES_INTRIN
5553 return _rotl(u32, cShift);
5554# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5555 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5556 return u32;
5557# else
5558 cShift &= 31;
5559 return (u32 << cShift) | (u32 >> (32 - cShift));
5560# endif
5561}
5562#endif
5563
5564
5565/**
5566 * Rotate 32-bit unsigned value to the right by @a cShift.
5567 *
5568 * @returns Rotated value.
5569 * @param u32 The value to rotate.
5570 * @param cShift How many bits to rotate by.
5571 */
5572#ifdef __WATCOMC__
5573DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5574#else
5575DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5576{
5577# if RT_INLINE_ASM_USES_INTRIN
5578 return _rotr(u32, cShift);
5579# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5580 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5581 return u32;
5582# else
5583 cShift &= 31;
5584 return (u32 >> cShift) | (u32 << (32 - cShift));
5585# endif
5586}
5587#endif
5588
5589
5590/**
5591 * Rotate 64-bit unsigned value to the left by @a cShift.
5592 *
5593 * @returns Rotated value.
5594 * @param u64 The value to rotate.
5595 * @param cShift How many bits to rotate by.
5596 */
5597DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5598{
5599#if RT_INLINE_ASM_USES_INTRIN
5600 return _rotl64(u64, cShift);
5601#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5602 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5603 return u64;
5604#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5605 uint32_t uSpill;
5606 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5607 "jz 1f\n\t"
5608 "xchgl %%eax, %%edx\n\t"
5609 "1:\n\t"
5610 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5611 "jz 2f\n\t"
5612 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5613 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5614 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5615 "2:\n\t" /* } */
5616 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5617 : "0" (u64),
5618 "1" (cShift));
5619 return u64;
5620#else
5621 cShift &= 63;
5622 return (u64 << cShift) | (u64 >> (64 - cShift));
5623#endif
5624}
5625
5626
5627/**
5628 * Rotate 64-bit unsigned value to the right by @a cShift.
5629 *
5630 * @returns Rotated value.
5631 * @param u64 The value to rotate.
5632 * @param cShift How many bits to rotate by.
5633 */
5634DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5635{
5636#if RT_INLINE_ASM_USES_INTRIN
5637 return _rotr64(u64, cShift);
5638#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5639 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5640 return u64;
5641#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5642 uint32_t uSpill;
5643 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5644 "jz 1f\n\t"
5645 "xchgl %%eax, %%edx\n\t"
5646 "1:\n\t"
5647 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5648 "jz 2f\n\t"
5649 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5650 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5651 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5652 "2:\n\t" /* } */
5653 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5654 : "0" (u64),
5655 "1" (cShift));
5656 return u64;
5657#else
5658 cShift &= 63;
5659 return (u64 >> cShift) | (u64 << (64 - cShift));
5660#endif
5661}
5662
5663/** @} */
5664
5665
5666/** @} */
5667
5668#endif
5669
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use