VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 7638

Last change on this file since 7638 was 7638, checked in by vboxsync, 16 years ago

Another one.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 132.4 KB
Line 
1/** @file
2 * innotek Portable Runtime - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2007 innotek GmbH
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.virtualbox.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @todo #include <iprt/param.h> for PAGE_SIZE. */
33/** @def RT_INLINE_ASM_USES_INTRIN
34 * Defined as 1 if we're using a _MSC_VER 1400.
35 * Otherwise defined as 0.
36 */
37
38#ifdef _MSC_VER
39# if _MSC_VER >= 1400
40# define RT_INLINE_ASM_USES_INTRIN 1
41# include <intrin.h>
42 /* Emit the intrinsics at all optimization levels. */
43# pragma intrinsic(_ReadWriteBarrier)
44# pragma intrinsic(__cpuid)
45# pragma intrinsic(_enable)
46# pragma intrinsic(_disable)
47# pragma intrinsic(__rdtsc)
48# pragma intrinsic(__readmsr)
49# pragma intrinsic(__writemsr)
50# pragma intrinsic(__outbyte)
51# pragma intrinsic(__outword)
52# pragma intrinsic(__outdword)
53# pragma intrinsic(__inbyte)
54# pragma intrinsic(__inword)
55# pragma intrinsic(__indword)
56# pragma intrinsic(__invlpg)
57# pragma intrinsic(__stosd)
58# pragma intrinsic(__stosw)
59# pragma intrinsic(__stosb)
60# pragma intrinsic(__readcr0)
61# pragma intrinsic(__readcr2)
62# pragma intrinsic(__readcr3)
63# pragma intrinsic(__readcr4)
64# pragma intrinsic(__writecr0)
65# pragma intrinsic(__writecr3)
66# pragma intrinsic(__writecr4)
67# pragma intrinsic(_BitScanForward)
68# pragma intrinsic(_BitScanReverse)
69# pragma intrinsic(_bittest)
70# pragma intrinsic(_bittestandset)
71# pragma intrinsic(_bittestandreset)
72# pragma intrinsic(_bittestandcomplement)
73# pragma intrinsic(_byteswap_ushort)
74# pragma intrinsic(_byteswap_ulong)
75# pragma intrinsic(_interlockedbittestandset)
76# pragma intrinsic(_interlockedbittestandreset)
77# pragma intrinsic(_InterlockedAnd)
78# pragma intrinsic(_InterlockedOr)
79# pragma intrinsic(_InterlockedIncrement)
80# pragma intrinsic(_InterlockedDecrement)
81# pragma intrinsic(_InterlockedExchange)
82# pragma intrinsic(_InterlockedExchangeAdd)
83# pragma intrinsic(_InterlockedCompareExchange)
84# pragma intrinsic(_InterlockedCompareExchange64)
85# ifdef RT_ARCH_AMD64
86# pragma intrinsic(__stosq)
87# pragma intrinsic(__readcr8)
88# pragma intrinsic(__writecr8)
89# pragma intrinsic(_byteswap_uint64)
90# pragma intrinsic(_InterlockedExchange64)
91# endif
92# endif
93#endif
94#ifndef RT_INLINE_ASM_USES_INTRIN
95# define RT_INLINE_ASM_USES_INTRIN 0
96#endif
97
98
99
100/** @defgroup grp_asm ASM - Assembly Routines
101 * @ingroup grp_rt
102 *
103 * @remarks The difference between ordered and unordered atomic operations are that
104 * the former will complete outstanding reads and writes before continuing
105 * while the latter doesn't make any promisses about the order. Ordered
106 * operations doesn't, it seems, make any 100% promise wrt to whether
107 * the operation will complete before any subsequent memory access.
108 * (please, correct if wrong.)
109 *
110 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
111 * are unordered (note the Uo).
112 *
113 * @{
114 */
115
116/** @def RT_INLINE_ASM_EXTERNAL
117 * Defined as 1 if the compiler does not support inline assembly.
118 * The ASM* functions will then be implemented in an external .asm file.
119 *
120 * @remark At the present time it's unconfirmed whether or not Microsoft skipped
121 * inline assmebly in their AMD64 compiler.
122 */
123#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
124# define RT_INLINE_ASM_EXTERNAL 1
125#else
126# define RT_INLINE_ASM_EXTERNAL 0
127#endif
128
129/** @def RT_INLINE_ASM_GNU_STYLE
130 * Defined as 1 if the compiler understand GNU style inline assembly.
131 */
132#if defined(_MSC_VER)
133# define RT_INLINE_ASM_GNU_STYLE 0
134#else
135# define RT_INLINE_ASM_GNU_STYLE 1
136#endif
137
138
139/** @todo find a more proper place for this structure? */
140#pragma pack(1)
141/** IDTR */
142typedef struct RTIDTR
143{
144 /** Size of the IDT. */
145 uint16_t cbIdt;
146 /** Address of the IDT. */
147 uintptr_t pIdt;
148} RTIDTR, *PRTIDTR;
149#pragma pack()
150
151#pragma pack(1)
152/** GDTR */
153typedef struct RTGDTR
154{
155 /** Size of the GDT. */
156 uint16_t cbGdt;
157 /** Address of the GDT. */
158 uintptr_t pGdt;
159} RTGDTR, *PRTGDTR;
160#pragma pack()
161
162
163/** @def ASMReturnAddress
164 * Gets the return address of the current (or calling if you like) function or method.
165 */
166#ifdef _MSC_VER
167# ifdef __cplusplus
168extern "C"
169# endif
170void * _ReturnAddress(void);
171# pragma intrinsic(_ReturnAddress)
172# define ASMReturnAddress() _ReturnAddress()
173#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
174# define ASMReturnAddress() __builtin_return_address(0)
175#else
176# error "Unsupported compiler."
177#endif
178
179
180/**
181 * Gets the content of the IDTR CPU register.
182 * @param pIdtr Where to store the IDTR contents.
183 */
184#if RT_INLINE_ASM_EXTERNAL
185DECLASM(void) ASMGetIDTR(PRTIDTR pIdtr);
186#else
187DECLINLINE(void) ASMGetIDTR(PRTIDTR pIdtr)
188{
189# if RT_INLINE_ASM_GNU_STYLE
190 __asm__ __volatile__ ("sidt %0" : "=m" (*pIdtr));
191# else
192 __asm
193 {
194# ifdef RT_ARCH_AMD64
195 mov rax, [pIdtr]
196 sidt [rax]
197# else
198 mov eax, [pIdtr]
199 sidt [eax]
200# endif
201 }
202# endif
203}
204#endif
205
206
207/**
208 * Sets the content of the IDTR CPU register.
209 * @param pIdtr Where to load the IDTR contents from
210 */
211#if RT_INLINE_ASM_EXTERNAL
212DECLASM(void) ASMSetIDTR(const RTIDTR *pIdtr);
213#else
214DECLINLINE(void) ASMSetIDTR(const RTIDTR *pIdtr)
215{
216# if RT_INLINE_ASM_GNU_STYLE
217 __asm__ __volatile__ ("lidt %0" : : "m" (*pIdtr));
218# else
219 __asm
220 {
221# ifdef RT_ARCH_AMD64
222 mov rax, [pIdtr]
223 lidt [rax]
224# else
225 mov eax, [pIdtr]
226 lidt [eax]
227# endif
228 }
229# endif
230}
231#endif
232
233
234/**
235 * Gets the content of the GDTR CPU register.
236 * @param pGdtr Where to store the GDTR contents.
237 */
238#if RT_INLINE_ASM_EXTERNAL
239DECLASM(void) ASMGetGDTR(PRTGDTR pGdtr);
240#else
241DECLINLINE(void) ASMGetGDTR(PRTGDTR pGdtr)
242{
243# if RT_INLINE_ASM_GNU_STYLE
244 __asm__ __volatile__ ("sgdt %0" : "=m" (*pGdtr));
245# else
246 __asm
247 {
248# ifdef RT_ARCH_AMD64
249 mov rax, [pGdtr]
250 sgdt [rax]
251# else
252 mov eax, [pGdtr]
253 sgdt [eax]
254# endif
255 }
256# endif
257}
258#endif
259
260/**
261 * Get the cs register.
262 * @returns cs.
263 */
264#if RT_INLINE_ASM_EXTERNAL
265DECLASM(RTSEL) ASMGetCS(void);
266#else
267DECLINLINE(RTSEL) ASMGetCS(void)
268{
269 RTSEL SelCS;
270# if RT_INLINE_ASM_GNU_STYLE
271 __asm__ __volatile__("movw %%cs, %0\n\t" : "=r" (SelCS));
272# else
273 __asm
274 {
275 mov ax, cs
276 mov [SelCS], ax
277 }
278# endif
279 return SelCS;
280}
281#endif
282
283
284/**
285 * Get the DS register.
286 * @returns DS.
287 */
288#if RT_INLINE_ASM_EXTERNAL
289DECLASM(RTSEL) ASMGetDS(void);
290#else
291DECLINLINE(RTSEL) ASMGetDS(void)
292{
293 RTSEL SelDS;
294# if RT_INLINE_ASM_GNU_STYLE
295 __asm__ __volatile__("movw %%ds, %0\n\t" : "=r" (SelDS));
296# else
297 __asm
298 {
299 mov ax, ds
300 mov [SelDS], ax
301 }
302# endif
303 return SelDS;
304}
305#endif
306
307
308/**
309 * Get the ES register.
310 * @returns ES.
311 */
312#if RT_INLINE_ASM_EXTERNAL
313DECLASM(RTSEL) ASMGetES(void);
314#else
315DECLINLINE(RTSEL) ASMGetES(void)
316{
317 RTSEL SelES;
318# if RT_INLINE_ASM_GNU_STYLE
319 __asm__ __volatile__("movw %%es, %0\n\t" : "=r" (SelES));
320# else
321 __asm
322 {
323 mov ax, es
324 mov [SelES], ax
325 }
326# endif
327 return SelES;
328}
329#endif
330
331
332/**
333 * Get the FS register.
334 * @returns FS.
335 */
336#if RT_INLINE_ASM_EXTERNAL
337DECLASM(RTSEL) ASMGetFS(void);
338#else
339DECLINLINE(RTSEL) ASMGetFS(void)
340{
341 RTSEL SelFS;
342# if RT_INLINE_ASM_GNU_STYLE
343 __asm__ __volatile__("movw %%fs, %0\n\t" : "=r" (SelFS));
344# else
345 __asm
346 {
347 mov ax, fs
348 mov [SelFS], ax
349 }
350# endif
351 return SelFS;
352}
353# endif
354
355
356/**
357 * Get the GS register.
358 * @returns GS.
359 */
360#if RT_INLINE_ASM_EXTERNAL
361DECLASM(RTSEL) ASMGetGS(void);
362#else
363DECLINLINE(RTSEL) ASMGetGS(void)
364{
365 RTSEL SelGS;
366# if RT_INLINE_ASM_GNU_STYLE
367 __asm__ __volatile__("movw %%gs, %0\n\t" : "=r" (SelGS));
368# else
369 __asm
370 {
371 mov ax, gs
372 mov [SelGS], ax
373 }
374# endif
375 return SelGS;
376}
377#endif
378
379
380/**
381 * Get the SS register.
382 * @returns SS.
383 */
384#if RT_INLINE_ASM_EXTERNAL
385DECLASM(RTSEL) ASMGetSS(void);
386#else
387DECLINLINE(RTSEL) ASMGetSS(void)
388{
389 RTSEL SelSS;
390# if RT_INLINE_ASM_GNU_STYLE
391 __asm__ __volatile__("movw %%ss, %0\n\t" : "=r" (SelSS));
392# else
393 __asm
394 {
395 mov ax, ss
396 mov [SelSS], ax
397 }
398# endif
399 return SelSS;
400}
401#endif
402
403
404/**
405 * Get the TR register.
406 * @returns TR.
407 */
408#if RT_INLINE_ASM_EXTERNAL
409DECLASM(RTSEL) ASMGetTR(void);
410#else
411DECLINLINE(RTSEL) ASMGetTR(void)
412{
413 RTSEL SelTR;
414# if RT_INLINE_ASM_GNU_STYLE
415 __asm__ __volatile__("str %w0\n\t" : "=r" (SelTR));
416# else
417 __asm
418 {
419 str ax
420 mov [SelTR], ax
421 }
422# endif
423 return SelTR;
424}
425#endif
426
427
428/**
429 * Get the [RE]FLAGS register.
430 * @returns [RE]FLAGS.
431 */
432#if RT_INLINE_ASM_EXTERNAL
433DECLASM(RTCCUINTREG) ASMGetFlags(void);
434#else
435DECLINLINE(RTCCUINTREG) ASMGetFlags(void)
436{
437 RTCCUINTREG uFlags;
438# if RT_INLINE_ASM_GNU_STYLE
439# ifdef RT_ARCH_AMD64
440 __asm__ __volatile__("pushfq\n\t"
441 "popq %0\n\t"
442 : "=g" (uFlags));
443# else
444 __asm__ __volatile__("pushfl\n\t"
445 "popl %0\n\t"
446 : "=g" (uFlags));
447# endif
448# else
449 __asm
450 {
451# ifdef RT_ARCH_AMD64
452 pushfq
453 pop [uFlags]
454# else
455 pushfd
456 pop [uFlags]
457# endif
458 }
459# endif
460 return uFlags;
461}
462#endif
463
464
465/**
466 * Set the [RE]FLAGS register.
467 * @param uFlags The new [RE]FLAGS value.
468 */
469#if RT_INLINE_ASM_EXTERNAL
470DECLASM(void) ASMSetFlags(RTCCUINTREG uFlags);
471#else
472DECLINLINE(void) ASMSetFlags(RTCCUINTREG uFlags)
473{
474# if RT_INLINE_ASM_GNU_STYLE
475# ifdef RT_ARCH_AMD64
476 __asm__ __volatile__("pushq %0\n\t"
477 "popfq\n\t"
478 : : "g" (uFlags));
479# else
480 __asm__ __volatile__("pushl %0\n\t"
481 "popfl\n\t"
482 : : "g" (uFlags));
483# endif
484# else
485 __asm
486 {
487# ifdef RT_ARCH_AMD64
488 push [uFlags]
489 popfq
490# else
491 push [uFlags]
492 popfd
493# endif
494 }
495# endif
496}
497#endif
498
499
500/**
501 * Gets the content of the CPU timestamp counter register.
502 *
503 * @returns TSC.
504 */
505#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
506DECLASM(uint64_t) ASMReadTSC(void);
507#else
508DECLINLINE(uint64_t) ASMReadTSC(void)
509{
510 RTUINT64U u;
511# if RT_INLINE_ASM_GNU_STYLE
512 __asm__ __volatile__ ("rdtsc\n\t" : "=a" (u.s.Lo), "=d" (u.s.Hi));
513# else
514# if RT_INLINE_ASM_USES_INTRIN
515 u.u = __rdtsc();
516# else
517 __asm
518 {
519 rdtsc
520 mov [u.s.Lo], eax
521 mov [u.s.Hi], edx
522 }
523# endif
524# endif
525 return u.u;
526}
527#endif
528
529
530/**
531 * Performs the cpuid instruction returning all registers.
532 *
533 * @param uOperator CPUID operation (eax).
534 * @param pvEAX Where to store eax.
535 * @param pvEBX Where to store ebx.
536 * @param pvECX Where to store ecx.
537 * @param pvEDX Where to store edx.
538 * @remark We're using void pointers to ease the use of special bitfield structures and such.
539 */
540#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
541DECLASM(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
542#else
543DECLINLINE(void) ASMCpuId(uint32_t uOperator, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
544{
545# if RT_INLINE_ASM_GNU_STYLE
546# ifdef RT_ARCH_AMD64
547 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
548 __asm__ ("cpuid\n\t"
549 : "=a" (uRAX),
550 "=b" (uRBX),
551 "=c" (uRCX),
552 "=d" (uRDX)
553 : "0" (uOperator));
554 *(uint32_t *)pvEAX = (uint32_t)uRAX;
555 *(uint32_t *)pvEBX = (uint32_t)uRBX;
556 *(uint32_t *)pvECX = (uint32_t)uRCX;
557 *(uint32_t *)pvEDX = (uint32_t)uRDX;
558# else
559 __asm__ ("xchgl %%ebx, %1\n\t"
560 "cpuid\n\t"
561 "xchgl %%ebx, %1\n\t"
562 : "=a" (*(uint32_t *)pvEAX),
563 "=r" (*(uint32_t *)pvEBX),
564 "=c" (*(uint32_t *)pvECX),
565 "=d" (*(uint32_t *)pvEDX)
566 : "0" (uOperator));
567# endif
568
569# elif RT_INLINE_ASM_USES_INTRIN
570 int aInfo[4];
571 __cpuid(aInfo, uOperator);
572 *(uint32_t *)pvEAX = aInfo[0];
573 *(uint32_t *)pvEBX = aInfo[1];
574 *(uint32_t *)pvECX = aInfo[2];
575 *(uint32_t *)pvEDX = aInfo[3];
576
577# else
578 uint32_t uEAX;
579 uint32_t uEBX;
580 uint32_t uECX;
581 uint32_t uEDX;
582 __asm
583 {
584 push ebx
585 mov eax, [uOperator]
586 cpuid
587 mov [uEAX], eax
588 mov [uEBX], ebx
589 mov [uECX], ecx
590 mov [uEDX], edx
591 pop ebx
592 }
593 *(uint32_t *)pvEAX = uEAX;
594 *(uint32_t *)pvEBX = uEBX;
595 *(uint32_t *)pvECX = uECX;
596 *(uint32_t *)pvEDX = uEDX;
597# endif
598}
599#endif
600
601
602/**
603 * Performs the cpuid instruction returning all registers.
604 * Some subfunctions of cpuid take ECX as additional parameter (currently known for EAX=4)
605 *
606 * @param uOperator CPUID operation (eax).
607 * @param uIdxECX ecx index
608 * @param pvEAX Where to store eax.
609 * @param pvEBX Where to store ebx.
610 * @param pvECX Where to store ecx.
611 * @param pvEDX Where to store edx.
612 * @remark We're using void pointers to ease the use of special bitfield structures and such.
613 */
614#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
615DECLASM(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX);
616#else
617DECLINLINE(void) ASMCpuId_Idx_ECX(uint32_t uOperator, uint32_t uIdxECX, void *pvEAX, void *pvEBX, void *pvECX, void *pvEDX)
618{
619# if RT_INLINE_ASM_GNU_STYLE
620# ifdef RT_ARCH_AMD64
621 RTCCUINTREG uRAX, uRBX, uRCX, uRDX;
622 __asm__ ("cpuid\n\t"
623 : "=a" (uRAX),
624 "=b" (uRBX),
625 "=c" (uRCX),
626 "=d" (uRDX)
627 : "0" (uOperator),
628 "2" (uIdxECX));
629 *(uint32_t *)pvEAX = (uint32_t)uRAX;
630 *(uint32_t *)pvEBX = (uint32_t)uRBX;
631 *(uint32_t *)pvECX = (uint32_t)uRCX;
632 *(uint32_t *)pvEDX = (uint32_t)uRDX;
633# else
634 __asm__ ("xchgl %%ebx, %1\n\t"
635 "cpuid\n\t"
636 "xchgl %%ebx, %1\n\t"
637 : "=a" (*(uint32_t *)pvEAX),
638 "=r" (*(uint32_t *)pvEBX),
639 "=c" (*(uint32_t *)pvECX),
640 "=d" (*(uint32_t *)pvEDX)
641 : "0" (uOperator),
642 "2" (uIdxECX));
643# endif
644
645# elif RT_INLINE_ASM_USES_INTRIN
646 int aInfo[4];
647 /* ??? another intrinsic ??? */
648 __cpuid(aInfo, uOperator);
649 *(uint32_t *)pvEAX = aInfo[0];
650 *(uint32_t *)pvEBX = aInfo[1];
651 *(uint32_t *)pvECX = aInfo[2];
652 *(uint32_t *)pvEDX = aInfo[3];
653
654# else
655 uint32_t uEAX;
656 uint32_t uEBX;
657 uint32_t uECX;
658 uint32_t uEDX;
659 __asm
660 {
661 push ebx
662 mov eax, [uOperator]
663 mov ecx, [uIdxECX]
664 cpuid
665 mov [uEAX], eax
666 mov [uEBX], ebx
667 mov [uECX], ecx
668 mov [uEDX], edx
669 pop ebx
670 }
671 *(uint32_t *)pvEAX = uEAX;
672 *(uint32_t *)pvEBX = uEBX;
673 *(uint32_t *)pvECX = uECX;
674 *(uint32_t *)pvEDX = uEDX;
675# endif
676}
677#endif
678
679
680/**
681 * Performs the cpuid instruction returning ecx and edx.
682 *
683 * @param uOperator CPUID operation (eax).
684 * @param pvECX Where to store ecx.
685 * @param pvEDX Where to store edx.
686 * @remark We're using void pointers to ease the use of special bitfield structures and such.
687 */
688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
689DECLASM(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX);
690#else
691DECLINLINE(void) ASMCpuId_ECX_EDX(uint32_t uOperator, void *pvECX, void *pvEDX)
692{
693 uint32_t uEBX;
694 ASMCpuId(uOperator, &uOperator, &uEBX, pvECX, pvEDX);
695}
696#endif
697
698
699/**
700 * Performs the cpuid instruction returning edx.
701 *
702 * @param uOperator CPUID operation (eax).
703 * @returns EDX after cpuid operation.
704 */
705#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
706DECLASM(uint32_t) ASMCpuId_EDX(uint32_t uOperator);
707#else
708DECLINLINE(uint32_t) ASMCpuId_EDX(uint32_t uOperator)
709{
710 RTCCUINTREG xDX;
711# if RT_INLINE_ASM_GNU_STYLE
712# ifdef RT_ARCH_AMD64
713 RTCCUINTREG uSpill;
714 __asm__ ("cpuid"
715 : "=a" (uSpill),
716 "=d" (xDX)
717 : "0" (uOperator)
718 : "rbx", "rcx");
719# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: PIC by default. */
720 __asm__ ("push %%ebx\n\t"
721 "cpuid\n\t"
722 "pop %%ebx\n\t"
723 : "=a" (uOperator),
724 "=d" (xDX)
725 : "0" (uOperator)
726 : "ecx");
727# else
728 __asm__ ("cpuid"
729 : "=a" (uOperator),
730 "=d" (xDX)
731 : "0" (uOperator)
732 : "ebx", "ecx");
733# endif
734
735# elif RT_INLINE_ASM_USES_INTRIN
736 int aInfo[4];
737 __cpuid(aInfo, uOperator);
738 xDX = aInfo[3];
739
740# else
741 __asm
742 {
743 push ebx
744 mov eax, [uOperator]
745 cpuid
746 mov [xDX], edx
747 pop ebx
748 }
749# endif
750 return (uint32_t)xDX;
751}
752#endif
753
754
755/**
756 * Performs the cpuid instruction returning ecx.
757 *
758 * @param uOperator CPUID operation (eax).
759 * @returns ECX after cpuid operation.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(uint32_t) ASMCpuId_ECX(uint32_t uOperator);
763#else
764DECLINLINE(uint32_t) ASMCpuId_ECX(uint32_t uOperator)
765{
766 RTCCUINTREG xCX;
767# if RT_INLINE_ASM_GNU_STYLE
768# ifdef RT_ARCH_AMD64
769 RTCCUINTREG uSpill;
770 __asm__ ("cpuid"
771 : "=a" (uSpill),
772 "=c" (xCX)
773 : "0" (uOperator)
774 : "rbx", "rdx");
775# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__) /* darwin: 4.0.1 compiler option / bug? */
776 __asm__ ("push %%ebx\n\t"
777 "cpuid\n\t"
778 "pop %%ebx\n\t"
779 : "=a" (uOperator),
780 "=c" (xCX)
781 : "0" (uOperator)
782 : "edx");
783# else
784 __asm__ ("cpuid"
785 : "=a" (uOperator),
786 "=c" (xCX)
787 : "0" (uOperator)
788 : "ebx", "edx");
789
790# endif
791
792# elif RT_INLINE_ASM_USES_INTRIN
793 int aInfo[4];
794 __cpuid(aInfo, uOperator);
795 xCX = aInfo[2];
796
797# else
798 __asm
799 {
800 push ebx
801 mov eax, [uOperator]
802 cpuid
803 mov [xCX], ecx
804 pop ebx
805 }
806# endif
807 return (uint32_t)xCX;
808}
809#endif
810
811
812/**
813 * Checks if the current CPU supports CPUID.
814 *
815 * @returns true if CPUID is supported.
816 */
817DECLINLINE(bool) ASMHasCpuId(void)
818{
819#ifdef RT_ARCH_AMD64
820 return true; /* ASSUME that all amd64 compatible CPUs have cpuid. */
821#else /* !RT_ARCH_AMD64 */
822 bool fRet = false;
823# if RT_INLINE_ASM_GNU_STYLE
824 uint32_t u1;
825 uint32_t u2;
826 __asm__ ("pushf\n\t"
827 "pop %1\n\t"
828 "mov %1, %2\n\t"
829 "xorl $0x200000, %1\n\t"
830 "push %1\n\t"
831 "popf\n\t"
832 "pushf\n\t"
833 "pop %1\n\t"
834 "cmpl %1, %2\n\t"
835 "setne %0\n\t"
836 "push %2\n\t"
837 "popf\n\t"
838 : "=m" (fRet), "=r" (u1), "=r" (u2));
839# else
840 __asm
841 {
842 pushfd
843 pop eax
844 mov ebx, eax
845 xor eax, 0200000h
846 push eax
847 popfd
848 pushfd
849 pop eax
850 cmp eax, ebx
851 setne fRet
852 push ebx
853 popfd
854 }
855# endif
856 return fRet;
857#endif /* !RT_ARCH_AMD64 */
858}
859
860
861/**
862 * Gets the APIC ID of the current CPU.
863 *
864 * @returns the APIC ID.
865 */
866#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
867DECLASM(uint8_t) ASMGetApicId(void);
868#else
869DECLINLINE(uint8_t) ASMGetApicId(void)
870{
871 RTCCUINTREG xBX;
872# if RT_INLINE_ASM_GNU_STYLE
873# ifdef RT_ARCH_AMD64
874 RTCCUINTREG uSpill;
875 __asm__ ("cpuid"
876 : "=a" (uSpill),
877 "=b" (xBX)
878 : "0" (1)
879 : "rcx", "rdx");
880# elif (defined(PIC) || defined(RT_OS_DARWIN)) && defined(__i386__)
881 RTCCUINTREG uSpill;
882 __asm__ ("mov %%ebx,%1\n\t"
883 "cpuid\n\t"
884 "xchgl %%ebx,%1\n\t"
885 : "=a" (uSpill),
886 "=r" (xBX)
887 : "0" (1)
888 : "ecx", "edx");
889# else
890 RTCCUINTREG uSpill;
891 __asm__ ("cpuid"
892 : "=a" (uSpill),
893 "=b" (xBX)
894 : "0" (1)
895 : "ecx", "edx");
896# endif
897
898# elif RT_INLINE_ASM_USES_INTRIN
899 int aInfo[4];
900 __cpuid(aInfo, 1);
901 xBX = aInfo[1];
902
903# else
904 __asm
905 {
906 push ebx
907 mov eax, 1
908 cpuid
909 mov [xBX], ebx
910 pop ebx
911 }
912# endif
913 return (uint8_t)(xBX >> 24);
914}
915#endif
916
917/**
918 * Get cr0.
919 * @returns cr0.
920 */
921#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
922DECLASM(RTCCUINTREG) ASMGetCR0(void);
923#else
924DECLINLINE(RTCCUINTREG) ASMGetCR0(void)
925{
926 RTCCUINTREG uCR0;
927# if RT_INLINE_ASM_USES_INTRIN
928 uCR0 = __readcr0();
929
930# elif RT_INLINE_ASM_GNU_STYLE
931# ifdef RT_ARCH_AMD64
932 __asm__ ("movq %%cr0, %0\t\n" : "=r" (uCR0));
933# else
934 __asm__ ("movl %%cr0, %0\t\n" : "=r" (uCR0));
935# endif
936# else
937 __asm
938 {
939# ifdef RT_ARCH_AMD64
940 mov rax, cr0
941 mov [uCR0], rax
942# else
943 mov eax, cr0
944 mov [uCR0], eax
945# endif
946 }
947# endif
948 return uCR0;
949}
950#endif
951
952
953/**
954 * Sets the CR0 register.
955 * @param uCR0 The new CR0 value.
956 */
957#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
958DECLASM(void) ASMSetCR0(RTCCUINTREG uCR0);
959#else
960DECLINLINE(void) ASMSetCR0(RTCCUINTREG uCR0)
961{
962# if RT_INLINE_ASM_USES_INTRIN
963 __writecr0(uCR0);
964
965# elif RT_INLINE_ASM_GNU_STYLE
966# ifdef RT_ARCH_AMD64
967 __asm__ __volatile__("movq %0, %%cr0\n\t" :: "r" (uCR0));
968# else
969 __asm__ __volatile__("movl %0, %%cr0\n\t" :: "r" (uCR0));
970# endif
971# else
972 __asm
973 {
974# ifdef RT_ARCH_AMD64
975 mov rax, [uCR0]
976 mov cr0, rax
977# else
978 mov eax, [uCR0]
979 mov cr0, eax
980# endif
981 }
982# endif
983}
984#endif
985
986
987/**
988 * Get cr2.
989 * @returns cr2.
990 */
991#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
992DECLASM(RTCCUINTREG) ASMGetCR2(void);
993#else
994DECLINLINE(RTCCUINTREG) ASMGetCR2(void)
995{
996 RTCCUINTREG uCR2;
997# if RT_INLINE_ASM_USES_INTRIN
998 uCR2 = __readcr2();
999
1000# elif RT_INLINE_ASM_GNU_STYLE
1001# ifdef RT_ARCH_AMD64
1002 __asm__ ("movq %%cr2, %0\t\n" : "=r" (uCR2));
1003# else
1004 __asm__ ("movl %%cr2, %0\t\n" : "=r" (uCR2));
1005# endif
1006# else
1007 __asm
1008 {
1009# ifdef RT_ARCH_AMD64
1010 mov rax, cr2
1011 mov [uCR2], rax
1012# else
1013 mov eax, cr2
1014 mov [uCR2], eax
1015# endif
1016 }
1017# endif
1018 return uCR2;
1019}
1020#endif
1021
1022
1023/**
1024 * Sets the CR2 register.
1025 * @param uCR2 The new CR0 value.
1026 */
1027#if RT_INLINE_ASM_EXTERNAL
1028DECLASM(void) ASMSetCR2(RTCCUINTREG uCR2);
1029#else
1030DECLINLINE(void) ASMSetCR2(RTCCUINTREG uCR2)
1031{
1032# if RT_INLINE_ASM_GNU_STYLE
1033# ifdef RT_ARCH_AMD64
1034 __asm__ __volatile__("movq %0, %%cr2\n\t" :: "r" (uCR2));
1035# else
1036 __asm__ __volatile__("movl %0, %%cr2\n\t" :: "r" (uCR2));
1037# endif
1038# else
1039 __asm
1040 {
1041# ifdef RT_ARCH_AMD64
1042 mov rax, [uCR2]
1043 mov cr2, rax
1044# else
1045 mov eax, [uCR2]
1046 mov cr2, eax
1047# endif
1048 }
1049# endif
1050}
1051#endif
1052
1053
1054/**
1055 * Get cr3.
1056 * @returns cr3.
1057 */
1058#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1059DECLASM(RTCCUINTREG) ASMGetCR3(void);
1060#else
1061DECLINLINE(RTCCUINTREG) ASMGetCR3(void)
1062{
1063 RTCCUINTREG uCR3;
1064# if RT_INLINE_ASM_USES_INTRIN
1065 uCR3 = __readcr3();
1066
1067# elif RT_INLINE_ASM_GNU_STYLE
1068# ifdef RT_ARCH_AMD64
1069 __asm__ ("movq %%cr3, %0\t\n" : "=r" (uCR3));
1070# else
1071 __asm__ ("movl %%cr3, %0\t\n" : "=r" (uCR3));
1072# endif
1073# else
1074 __asm
1075 {
1076# ifdef RT_ARCH_AMD64
1077 mov rax, cr3
1078 mov [uCR3], rax
1079# else
1080 mov eax, cr3
1081 mov [uCR3], eax
1082# endif
1083 }
1084# endif
1085 return uCR3;
1086}
1087#endif
1088
1089
1090/**
1091 * Sets the CR3 register.
1092 *
1093 * @param uCR3 New CR3 value.
1094 */
1095#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1096DECLASM(void) ASMSetCR3(RTCCUINTREG uCR3);
1097#else
1098DECLINLINE(void) ASMSetCR3(RTCCUINTREG uCR3)
1099{
1100# if RT_INLINE_ASM_USES_INTRIN
1101 __writecr3(uCR3);
1102
1103# elif RT_INLINE_ASM_GNU_STYLE
1104# ifdef RT_ARCH_AMD64
1105 __asm__ __volatile__ ("movq %0, %%cr3\n\t" : : "r" (uCR3));
1106# else
1107 __asm__ __volatile__ ("movl %0, %%cr3\n\t" : : "r" (uCR3));
1108# endif
1109# else
1110 __asm
1111 {
1112# ifdef RT_ARCH_AMD64
1113 mov rax, [uCR3]
1114 mov cr3, rax
1115# else
1116 mov eax, [uCR3]
1117 mov cr3, eax
1118# endif
1119 }
1120# endif
1121}
1122#endif
1123
1124
1125/**
1126 * Reloads the CR3 register.
1127 */
1128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1129DECLASM(void) ASMReloadCR3(void);
1130#else
1131DECLINLINE(void) ASMReloadCR3(void)
1132{
1133# if RT_INLINE_ASM_USES_INTRIN
1134 __writecr3(__readcr3());
1135
1136# elif RT_INLINE_ASM_GNU_STYLE
1137 RTCCUINTREG u;
1138# ifdef RT_ARCH_AMD64
1139 __asm__ __volatile__ ("movq %%cr3, %0\n\t"
1140 "movq %0, %%cr3\n\t"
1141 : "=r" (u));
1142# else
1143 __asm__ __volatile__ ("movl %%cr3, %0\n\t"
1144 "movl %0, %%cr3\n\t"
1145 : "=r" (u));
1146# endif
1147# else
1148 __asm
1149 {
1150# ifdef RT_ARCH_AMD64
1151 mov rax, cr3
1152 mov cr3, rax
1153# else
1154 mov eax, cr3
1155 mov cr3, eax
1156# endif
1157 }
1158# endif
1159}
1160#endif
1161
1162
1163/**
1164 * Get cr4.
1165 * @returns cr4.
1166 */
1167#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1168DECLASM(RTCCUINTREG) ASMGetCR4(void);
1169#else
1170DECLINLINE(RTCCUINTREG) ASMGetCR4(void)
1171{
1172 RTCCUINTREG uCR4;
1173# if RT_INLINE_ASM_USES_INTRIN
1174 uCR4 = __readcr4();
1175
1176# elif RT_INLINE_ASM_GNU_STYLE
1177# ifdef RT_ARCH_AMD64
1178 __asm__ ("movq %%cr4, %0\t\n" : "=r" (uCR4));
1179# else
1180 __asm__ ("movl %%cr4, %0\t\n" : "=r" (uCR4));
1181# endif
1182# else
1183 __asm
1184 {
1185# ifdef RT_ARCH_AMD64
1186 mov rax, cr4
1187 mov [uCR4], rax
1188# else
1189 push eax /* just in case */
1190 /*mov eax, cr4*/
1191 _emit 0x0f
1192 _emit 0x20
1193 _emit 0xe0
1194 mov [uCR4], eax
1195 pop eax
1196# endif
1197 }
1198# endif
1199 return uCR4;
1200}
1201#endif
1202
1203
1204/**
1205 * Sets the CR4 register.
1206 *
1207 * @param uCR4 New CR4 value.
1208 */
1209#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1210DECLASM(void) ASMSetCR4(RTCCUINTREG uCR4);
1211#else
1212DECLINLINE(void) ASMSetCR4(RTCCUINTREG uCR4)
1213{
1214# if RT_INLINE_ASM_USES_INTRIN
1215 __writecr4(uCR4);
1216
1217# elif RT_INLINE_ASM_GNU_STYLE
1218# ifdef RT_ARCH_AMD64
1219 __asm__ __volatile__ ("movq %0, %%cr4\n\t" : : "r" (uCR4));
1220# else
1221 __asm__ __volatile__ ("movl %0, %%cr4\n\t" : : "r" (uCR4));
1222# endif
1223# else
1224 __asm
1225 {
1226# ifdef RT_ARCH_AMD64
1227 mov rax, [uCR4]
1228 mov cr4, rax
1229# else
1230 mov eax, [uCR4]
1231 _emit 0x0F
1232 _emit 0x22
1233 _emit 0xE0 /* mov cr4, eax */
1234# endif
1235 }
1236# endif
1237}
1238#endif
1239
1240
1241/**
1242 * Get cr8.
1243 * @returns cr8.
1244 * @remark The lock prefix hack for access from non-64-bit modes is NOT used and 0 is returned.
1245 */
1246#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1247DECLASM(RTCCUINTREG) ASMGetCR8(void);
1248#else
1249DECLINLINE(RTCCUINTREG) ASMGetCR8(void)
1250{
1251# ifdef RT_ARCH_AMD64
1252 RTCCUINTREG uCR8;
1253# if RT_INLINE_ASM_USES_INTRIN
1254 uCR8 = __readcr8();
1255
1256# elif RT_INLINE_ASM_GNU_STYLE
1257 __asm__ ("movq %%cr8, %0\t\n" : "=r" (uCR8));
1258# else
1259 __asm
1260 {
1261 mov rax, cr8
1262 mov [uCR8], rax
1263 }
1264# endif
1265 return uCR8;
1266# else /* !RT_ARCH_AMD64 */
1267 return 0;
1268# endif /* !RT_ARCH_AMD64 */
1269}
1270#endif
1271
1272
1273/**
1274 * Enables interrupts (EFLAGS.IF).
1275 */
1276#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1277DECLASM(void) ASMIntEnable(void);
1278#else
1279DECLINLINE(void) ASMIntEnable(void)
1280{
1281# if RT_INLINE_ASM_GNU_STYLE
1282 __asm("sti\n");
1283# elif RT_INLINE_ASM_USES_INTRIN
1284 _enable();
1285# else
1286 __asm sti
1287# endif
1288}
1289#endif
1290
1291
1292/**
1293 * Disables interrupts (!EFLAGS.IF).
1294 */
1295#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1296DECLASM(void) ASMIntDisable(void);
1297#else
1298DECLINLINE(void) ASMIntDisable(void)
1299{
1300# if RT_INLINE_ASM_GNU_STYLE
1301 __asm("cli\n");
1302# elif RT_INLINE_ASM_USES_INTRIN
1303 _disable();
1304# else
1305 __asm cli
1306# endif
1307}
1308#endif
1309
1310
1311/**
1312 * Disables interrupts and returns previous xFLAGS.
1313 */
1314#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1315DECLASM(RTCCUINTREG) ASMIntDisableFlags(void);
1316#else
1317DECLINLINE(RTCCUINTREG) ASMIntDisableFlags(void)
1318{
1319 RTCCUINTREG xFlags;
1320# if RT_INLINE_ASM_GNU_STYLE
1321# ifdef RT_ARCH_AMD64
1322 __asm__ __volatile__("pushfq\n\t"
1323 "cli\n\t"
1324 "popq %0\n\t"
1325 : "=rm" (xFlags));
1326# else
1327 __asm__ __volatile__("pushfl\n\t"
1328 "cli\n\t"
1329 "popl %0\n\t"
1330 : "=rm" (xFlags));
1331# endif
1332# elif RT_INLINE_ASM_USES_INTRIN && !defined(RT_ARCH_X86)
1333 xFlags = ASMGetFlags();
1334 _disable();
1335# else
1336 __asm {
1337 pushfd
1338 cli
1339 pop [xFlags]
1340 }
1341# endif
1342 return xFlags;
1343}
1344#endif
1345
1346
1347/**
1348 * Reads a machine specific register.
1349 *
1350 * @returns Register content.
1351 * @param uRegister Register to read.
1352 */
1353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1354DECLASM(uint64_t) ASMRdMsr(uint32_t uRegister);
1355#else
1356DECLINLINE(uint64_t) ASMRdMsr(uint32_t uRegister)
1357{
1358 RTUINT64U u;
1359# if RT_INLINE_ASM_GNU_STYLE
1360 __asm__ ("rdmsr\n\t"
1361 : "=a" (u.s.Lo),
1362 "=d" (u.s.Hi)
1363 : "c" (uRegister));
1364
1365# elif RT_INLINE_ASM_USES_INTRIN
1366 u.u = __readmsr(uRegister);
1367
1368# else
1369 __asm
1370 {
1371 mov ecx, [uRegister]
1372 rdmsr
1373 mov [u.s.Lo], eax
1374 mov [u.s.Hi], edx
1375 }
1376# endif
1377
1378 return u.u;
1379}
1380#endif
1381
1382
1383/**
1384 * Writes a machine specific register.
1385 *
1386 * @returns Register content.
1387 * @param uRegister Register to write to.
1388 * @param u64Val Value to write.
1389 */
1390#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1391DECLASM(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val);
1392#else
1393DECLINLINE(void) ASMWrMsr(uint32_t uRegister, uint64_t u64Val)
1394{
1395 RTUINT64U u;
1396
1397 u.u = u64Val;
1398# if RT_INLINE_ASM_GNU_STYLE
1399 __asm__ __volatile__("wrmsr\n\t"
1400 ::"a" (u.s.Lo),
1401 "d" (u.s.Hi),
1402 "c" (uRegister));
1403
1404# elif RT_INLINE_ASM_USES_INTRIN
1405 __writemsr(uRegister, u.u);
1406
1407# else
1408 __asm
1409 {
1410 mov ecx, [uRegister]
1411 mov edx, [u.s.Hi]
1412 mov eax, [u.s.Lo]
1413 wrmsr
1414 }
1415# endif
1416}
1417#endif
1418
1419
1420/**
1421 * Reads low part of a machine specific register.
1422 *
1423 * @returns Register content.
1424 * @param uRegister Register to read.
1425 */
1426#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1427DECLASM(uint32_t) ASMRdMsr_Low(uint32_t uRegister);
1428#else
1429DECLINLINE(uint32_t) ASMRdMsr_Low(uint32_t uRegister)
1430{
1431 uint32_t u32;
1432# if RT_INLINE_ASM_GNU_STYLE
1433 __asm__ ("rdmsr\n\t"
1434 : "=a" (u32)
1435 : "c" (uRegister)
1436 : "edx");
1437
1438# elif RT_INLINE_ASM_USES_INTRIN
1439 u32 = (uint32_t)__readmsr(uRegister);
1440
1441#else
1442 __asm
1443 {
1444 mov ecx, [uRegister]
1445 rdmsr
1446 mov [u32], eax
1447 }
1448# endif
1449
1450 return u32;
1451}
1452#endif
1453
1454
1455/**
1456 * Reads high part of a machine specific register.
1457 *
1458 * @returns Register content.
1459 * @param uRegister Register to read.
1460 */
1461#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1462DECLASM(uint32_t) ASMRdMsr_High(uint32_t uRegister);
1463#else
1464DECLINLINE(uint32_t) ASMRdMsr_High(uint32_t uRegister)
1465{
1466 uint32_t u32;
1467# if RT_INLINE_ASM_GNU_STYLE
1468 __asm__ ("rdmsr\n\t"
1469 : "=d" (u32)
1470 : "c" (uRegister)
1471 : "eax");
1472
1473# elif RT_INLINE_ASM_USES_INTRIN
1474 u32 = (uint32_t)(__readmsr(uRegister) >> 32);
1475
1476# else
1477 __asm
1478 {
1479 mov ecx, [uRegister]
1480 rdmsr
1481 mov [u32], edx
1482 }
1483# endif
1484
1485 return u32;
1486}
1487#endif
1488
1489
1490/**
1491 * Gets dr7.
1492 *
1493 * @returns dr7.
1494 */
1495#if RT_INLINE_ASM_EXTERNAL
1496DECLASM(RTCCUINTREG) ASMGetDR7(void);
1497#else
1498DECLINLINE(RTCCUINTREG) ASMGetDR7(void)
1499{
1500 RTCCUINTREG uDR7;
1501# if RT_INLINE_ASM_GNU_STYLE
1502# ifdef RT_ARCH_AMD64
1503 __asm__ ("movq %%dr7, %0\n\t" : "=r" (uDR7));
1504# else
1505 __asm__ ("movl %%dr7, %0\n\t" : "=r" (uDR7));
1506# endif
1507# else
1508 __asm
1509 {
1510# ifdef RT_ARCH_AMD64
1511 mov rax, dr7
1512 mov [uDR7], rax
1513# else
1514 mov eax, dr7
1515 mov [uDR7], eax
1516# endif
1517 }
1518# endif
1519 return uDR7;
1520}
1521#endif
1522
1523
1524/**
1525 * Gets dr6.
1526 *
1527 * @returns dr6.
1528 */
1529#if RT_INLINE_ASM_EXTERNAL
1530DECLASM(RTCCUINTREG) ASMGetDR6(void);
1531#else
1532DECLINLINE(RTCCUINTREG) ASMGetDR6(void)
1533{
1534 RTCCUINTREG uDR6;
1535# if RT_INLINE_ASM_GNU_STYLE
1536# ifdef RT_ARCH_AMD64
1537 __asm__ ("movq %%dr6, %0\n\t" : "=r" (uDR6));
1538# else
1539 __asm__ ("movl %%dr6, %0\n\t" : "=r" (uDR6));
1540# endif
1541# else
1542 __asm
1543 {
1544# ifdef RT_ARCH_AMD64
1545 mov rax, dr6
1546 mov [uDR6], rax
1547# else
1548 mov eax, dr6
1549 mov [uDR6], eax
1550# endif
1551 }
1552# endif
1553 return uDR6;
1554}
1555#endif
1556
1557
1558/**
1559 * Reads and clears DR6.
1560 *
1561 * @returns DR6.
1562 */
1563#if RT_INLINE_ASM_EXTERNAL
1564DECLASM(RTCCUINTREG) ASMGetAndClearDR6(void);
1565#else
1566DECLINLINE(RTCCUINTREG) ASMGetAndClearDR6(void)
1567{
1568 RTCCUINTREG uDR6;
1569# if RT_INLINE_ASM_GNU_STYLE
1570 RTCCUINTREG uNewValue = 0xffff0ff0; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1571# ifdef RT_ARCH_AMD64
1572 __asm__ ("movq %%dr6, %0\n\t"
1573 "movq %1, %%dr6\n\t"
1574 : "=r" (uDR6)
1575 : "r" (uNewValue));
1576# else
1577 __asm__ ("movl %%dr6, %0\n\t"
1578 "movl %1, %%dr6\n\t"
1579 : "=r" (uDR6)
1580 : "r" (uNewValue));
1581# endif
1582# else
1583 __asm
1584 {
1585# ifdef RT_ARCH_AMD64
1586 mov rax, dr6
1587 mov [uDR6], rax
1588 mov rcx, rax
1589 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 and 63-31 are zero. */
1590 mov dr6, rcx
1591# else
1592 mov eax, dr6
1593 mov [uDR6], eax
1594 mov ecx, 0ffff0ff0h; /* 31-16 and 4-11 are 1's, 12 is zero. */
1595 mov dr6, ecx
1596# endif
1597 }
1598# endif
1599 return uDR6;
1600}
1601#endif
1602
1603
1604/**
1605 * Compiler memory barrier.
1606 *
1607 * Ensure that the compiler does not use any cached (register/tmp stack) memory
1608 * values or any outstanding writes when returning from this function.
1609 *
1610 * This function must be used if non-volatile data is modified by a
1611 * device or the VMM. Typical cases are port access, MMIO access,
1612 * trapping instruction, etc.
1613 */
1614#if RT_INLINE_ASM_GNU_STYLE
1615# define ASMCompilerBarrier() do { __asm__ __volatile__ ("" : : : "memory"); } while (0)
1616#elif RT_INLINE_ASM_USES_INTRIN
1617# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
1618#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
1619DECLINLINE(void) ASMCompilerBarrier(void)
1620{
1621 __asm
1622 {
1623 }
1624}
1625#endif
1626
1627
1628/**
1629 * Writes a 8-bit unsigned integer to an I/O port, ordered.
1630 *
1631 * @param Port I/O port to read from.
1632 * @param u8 8-bit integer to write.
1633 */
1634#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1635DECLASM(void) ASMOutU8(RTIOPORT Port, uint8_t u8);
1636#else
1637DECLINLINE(void) ASMOutU8(RTIOPORT Port, uint8_t u8)
1638{
1639# if RT_INLINE_ASM_GNU_STYLE
1640 __asm__ __volatile__("outb %b1, %w0\n\t"
1641 :: "Nd" (Port),
1642 "a" (u8));
1643
1644# elif RT_INLINE_ASM_USES_INTRIN
1645 __outbyte(Port, u8);
1646
1647# else
1648 __asm
1649 {
1650 mov dx, [Port]
1651 mov al, [u8]
1652 out dx, al
1653 }
1654# endif
1655}
1656#endif
1657
1658
1659/**
1660 * Gets a 8-bit unsigned integer from an I/O port, ordered.
1661 *
1662 * @returns 8-bit integer.
1663 * @param Port I/O port to read from.
1664 */
1665#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1666DECLASM(uint8_t) ASMInU8(RTIOPORT Port);
1667#else
1668DECLINLINE(uint8_t) ASMInU8(RTIOPORT Port)
1669{
1670 uint8_t u8;
1671# if RT_INLINE_ASM_GNU_STYLE
1672 __asm__ __volatile__("inb %w1, %b0\n\t"
1673 : "=a" (u8)
1674 : "Nd" (Port));
1675
1676# elif RT_INLINE_ASM_USES_INTRIN
1677 u8 = __inbyte(Port);
1678
1679# else
1680 __asm
1681 {
1682 mov dx, [Port]
1683 in al, dx
1684 mov [u8], al
1685 }
1686# endif
1687 return u8;
1688}
1689#endif
1690
1691
1692/**
1693 * Writes a 16-bit unsigned integer to an I/O port, ordered.
1694 *
1695 * @param Port I/O port to read from.
1696 * @param u16 16-bit integer to write.
1697 */
1698#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1699DECLASM(void) ASMOutU16(RTIOPORT Port, uint16_t u16);
1700#else
1701DECLINLINE(void) ASMOutU16(RTIOPORT Port, uint16_t u16)
1702{
1703# if RT_INLINE_ASM_GNU_STYLE
1704 __asm__ __volatile__("outw %w1, %w0\n\t"
1705 :: "Nd" (Port),
1706 "a" (u16));
1707
1708# elif RT_INLINE_ASM_USES_INTRIN
1709 __outword(Port, u16);
1710
1711# else
1712 __asm
1713 {
1714 mov dx, [Port]
1715 mov ax, [u16]
1716 out dx, ax
1717 }
1718# endif
1719}
1720#endif
1721
1722
1723/**
1724 * Gets a 16-bit unsigned integer from an I/O port, ordered.
1725 *
1726 * @returns 16-bit integer.
1727 * @param Port I/O port to read from.
1728 */
1729#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1730DECLASM(uint16_t) ASMInU16(RTIOPORT Port);
1731#else
1732DECLINLINE(uint16_t) ASMInU16(RTIOPORT Port)
1733{
1734 uint16_t u16;
1735# if RT_INLINE_ASM_GNU_STYLE
1736 __asm__ __volatile__("inw %w1, %w0\n\t"
1737 : "=a" (u16)
1738 : "Nd" (Port));
1739
1740# elif RT_INLINE_ASM_USES_INTRIN
1741 u16 = __inword(Port);
1742
1743# else
1744 __asm
1745 {
1746 mov dx, [Port]
1747 in ax, dx
1748 mov [u16], ax
1749 }
1750# endif
1751 return u16;
1752}
1753#endif
1754
1755
1756/**
1757 * Writes a 32-bit unsigned integer to an I/O port, ordered.
1758 *
1759 * @param Port I/O port to read from.
1760 * @param u32 32-bit integer to write.
1761 */
1762#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1763DECLASM(void) ASMOutU32(RTIOPORT Port, uint32_t u32);
1764#else
1765DECLINLINE(void) ASMOutU32(RTIOPORT Port, uint32_t u32)
1766{
1767# if RT_INLINE_ASM_GNU_STYLE
1768 __asm__ __volatile__("outl %1, %w0\n\t"
1769 :: "Nd" (Port),
1770 "a" (u32));
1771
1772# elif RT_INLINE_ASM_USES_INTRIN
1773 __outdword(Port, u32);
1774
1775# else
1776 __asm
1777 {
1778 mov dx, [Port]
1779 mov eax, [u32]
1780 out dx, eax
1781 }
1782# endif
1783}
1784#endif
1785
1786
1787/**
1788 * Gets a 32-bit unsigned integer from an I/O port, ordered.
1789 *
1790 * @returns 32-bit integer.
1791 * @param Port I/O port to read from.
1792 */
1793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1794DECLASM(uint32_t) ASMInU32(RTIOPORT Port);
1795#else
1796DECLINLINE(uint32_t) ASMInU32(RTIOPORT Port)
1797{
1798 uint32_t u32;
1799# if RT_INLINE_ASM_GNU_STYLE
1800 __asm__ __volatile__("inl %w1, %0\n\t"
1801 : "=a" (u32)
1802 : "Nd" (Port));
1803
1804# elif RT_INLINE_ASM_USES_INTRIN
1805 u32 = __indword(Port);
1806
1807# else
1808 __asm
1809 {
1810 mov dx, [Port]
1811 in eax, dx
1812 mov [u32], eax
1813 }
1814# endif
1815 return u32;
1816}
1817#endif
1818
1819/** @todo string i/o */
1820
1821
1822/**
1823 * Atomically Exchange an unsigned 8-bit value, ordered.
1824 *
1825 * @returns Current *pu8 value
1826 * @param pu8 Pointer to the 8-bit variable to update.
1827 * @param u8 The 8-bit value to assign to *pu8.
1828 */
1829#if RT_INLINE_ASM_EXTERNAL
1830DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
1831#else
1832DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
1833{
1834# if RT_INLINE_ASM_GNU_STYLE
1835 __asm__ __volatile__("xchgb %0, %1\n\t"
1836 : "=m" (*pu8),
1837 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
1838 : "1" (u8));
1839# else
1840 __asm
1841 {
1842# ifdef RT_ARCH_AMD64
1843 mov rdx, [pu8]
1844 mov al, [u8]
1845 xchg [rdx], al
1846 mov [u8], al
1847# else
1848 mov edx, [pu8]
1849 mov al, [u8]
1850 xchg [edx], al
1851 mov [u8], al
1852# endif
1853 }
1854# endif
1855 return u8;
1856}
1857#endif
1858
1859
1860/**
1861 * Atomically Exchange a signed 8-bit value, ordered.
1862 *
1863 * @returns Current *pu8 value
1864 * @param pi8 Pointer to the 8-bit variable to update.
1865 * @param i8 The 8-bit value to assign to *pi8.
1866 */
1867DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
1868{
1869 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
1870}
1871
1872
1873/**
1874 * Atomically Exchange a bool value, ordered.
1875 *
1876 * @returns Current *pf value
1877 * @param pf Pointer to the 8-bit variable to update.
1878 * @param f The 8-bit value to assign to *pi8.
1879 */
1880DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
1881{
1882#ifdef _MSC_VER
1883 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1884#else
1885 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
1886#endif
1887}
1888
1889
1890/**
1891 * Atomically Exchange an unsigned 16-bit value, ordered.
1892 *
1893 * @returns Current *pu16 value
1894 * @param pu16 Pointer to the 16-bit variable to update.
1895 * @param u16 The 16-bit value to assign to *pu16.
1896 */
1897#if RT_INLINE_ASM_EXTERNAL
1898DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
1899#else
1900DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
1901{
1902# if RT_INLINE_ASM_GNU_STYLE
1903 __asm__ __volatile__("xchgw %0, %1\n\t"
1904 : "=m" (*pu16),
1905 "=r" (u16)
1906 : "1" (u16));
1907# else
1908 __asm
1909 {
1910# ifdef RT_ARCH_AMD64
1911 mov rdx, [pu16]
1912 mov ax, [u16]
1913 xchg [rdx], ax
1914 mov [u16], ax
1915# else
1916 mov edx, [pu16]
1917 mov ax, [u16]
1918 xchg [edx], ax
1919 mov [u16], ax
1920# endif
1921 }
1922# endif
1923 return u16;
1924}
1925#endif
1926
1927
1928/**
1929 * Atomically Exchange a signed 16-bit value, ordered.
1930 *
1931 * @returns Current *pu16 value
1932 * @param pi16 Pointer to the 16-bit variable to update.
1933 * @param i16 The 16-bit value to assign to *pi16.
1934 */
1935DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
1936{
1937 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
1938}
1939
1940
1941/**
1942 * Atomically Exchange an unsigned 32-bit value, ordered.
1943 *
1944 * @returns Current *pu32 value
1945 * @param pu32 Pointer to the 32-bit variable to update.
1946 * @param u32 The 32-bit value to assign to *pu32.
1947 */
1948#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1949DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
1950#else
1951DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
1952{
1953# if RT_INLINE_ASM_GNU_STYLE
1954 __asm__ __volatile__("xchgl %0, %1\n\t"
1955 : "=m" (*pu32),
1956 "=r" (u32)
1957 : "1" (u32));
1958
1959# elif RT_INLINE_ASM_USES_INTRIN
1960 u32 = _InterlockedExchange((long *)pu32, u32);
1961
1962# else
1963 __asm
1964 {
1965# ifdef RT_ARCH_AMD64
1966 mov rdx, [pu32]
1967 mov eax, u32
1968 xchg [rdx], eax
1969 mov [u32], eax
1970# else
1971 mov edx, [pu32]
1972 mov eax, u32
1973 xchg [edx], eax
1974 mov [u32], eax
1975# endif
1976 }
1977# endif
1978 return u32;
1979}
1980#endif
1981
1982
1983/**
1984 * Atomically Exchange a signed 32-bit value, ordered.
1985 *
1986 * @returns Current *pu32 value
1987 * @param pi32 Pointer to the 32-bit variable to update.
1988 * @param i32 The 32-bit value to assign to *pi32.
1989 */
1990DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
1991{
1992 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
1993}
1994
1995
1996/**
1997 * Atomically Exchange an unsigned 64-bit value, ordered.
1998 *
1999 * @returns Current *pu64 value
2000 * @param pu64 Pointer to the 64-bit variable to update.
2001 * @param u64 The 64-bit value to assign to *pu64.
2002 */
2003#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2004DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
2005#else
2006DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
2007{
2008# if defined(RT_ARCH_AMD64)
2009# if RT_INLINE_ASM_USES_INTRIN
2010 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
2011
2012# elif RT_INLINE_ASM_GNU_STYLE
2013 __asm__ __volatile__("xchgq %0, %1\n\t"
2014 : "=m" (*pu64),
2015 "=r" (u64)
2016 : "1" (u64));
2017# else
2018 __asm
2019 {
2020 mov rdx, [pu64]
2021 mov rax, [u64]
2022 xchg [rdx], rax
2023 mov [u64], rax
2024 }
2025# endif
2026# else /* !RT_ARCH_AMD64 */
2027# if RT_INLINE_ASM_GNU_STYLE
2028# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2029 uint32_t u32 = (uint32_t)u64;
2030 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
2031 "xchgl %%ebx, %3\n\t"
2032 "1:\n\t"
2033 "lock; cmpxchg8b (%5)\n\t"
2034 "jnz 1b\n\t"
2035 "xchgl %%ebx, %3\n\t"
2036 /*"xchgl %%esi, %5\n\t"*/
2037 : "=A" (u64),
2038 "=m" (*pu64)
2039 : "0" (*pu64),
2040 "m" ( u32 ),
2041 "c" ( (uint32_t)(u64 >> 32) ),
2042 "S" (pu64) );
2043# else /* !PIC */
2044 __asm__ __volatile__("1:\n\t"
2045 "lock; cmpxchg8b %1\n\t"
2046 "jnz 1b\n\t"
2047 : "=A" (u64),
2048 "=m" (*pu64)
2049 : "0" (*pu64),
2050 "b" ( (uint32_t)u64 ),
2051 "c" ( (uint32_t)(u64 >> 32) ));
2052# endif
2053# else
2054 __asm
2055 {
2056 mov ebx, dword ptr [u64]
2057 mov ecx, dword ptr [u64 + 4]
2058 mov edi, pu64
2059 mov eax, dword ptr [edi]
2060 mov edx, dword ptr [edi + 4]
2061 retry:
2062 lock cmpxchg8b [edi]
2063 jnz retry
2064 mov dword ptr [u64], eax
2065 mov dword ptr [u64 + 4], edx
2066 }
2067# endif
2068# endif /* !RT_ARCH_AMD64 */
2069 return u64;
2070}
2071#endif
2072
2073
2074/**
2075 * Atomically Exchange an signed 64-bit value, ordered.
2076 *
2077 * @returns Current *pi64 value
2078 * @param pi64 Pointer to the 64-bit variable to update.
2079 * @param i64 The 64-bit value to assign to *pi64.
2080 */
2081DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
2082{
2083 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
2084}
2085
2086
2087#ifdef RT_ARCH_AMD64
2088/**
2089 * Atomically Exchange an unsigned 128-bit value, ordered.
2090 *
2091 * @returns Current *pu128.
2092 * @param pu128 Pointer to the 128-bit variable to update.
2093 * @param u128 The 128-bit value to assign to *pu128.
2094 *
2095 * @remark We cannot really assume that any hardware supports this. Nor do I have
2096 * GAS support for it. So, for the time being we'll BREAK the atomic
2097 * bit of this function and use two 64-bit exchanges instead.
2098 */
2099# if 0 /* see remark RT_INLINE_ASM_EXTERNAL */
2100DECLASM(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128);
2101# else
2102DECLINLINE(uint128_t) ASMAtomicXchgU128(volatile uint128_t *pu128, uint128_t u128)
2103{
2104 if (true)/*ASMCpuId_ECX(1) & RT_BIT(13))*/
2105 {
2106 /** @todo this is clumsy code */
2107 RTUINT128U u128Ret;
2108 u128Ret.u = u128;
2109 u128Ret.s.Lo = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Lo, u128Ret.s.Lo);
2110 u128Ret.s.Hi = ASMAtomicXchgU64(&((PRTUINT128U)(uintptr_t)pu128)->s.Hi, u128Ret.s.Hi);
2111 return u128Ret.u;
2112 }
2113#if 0 /* later? */
2114 else
2115 {
2116# if RT_INLINE_ASM_GNU_STYLE
2117 __asm__ __volatile__("1:\n\t"
2118 "lock; cmpxchg8b %1\n\t"
2119 "jnz 1b\n\t"
2120 : "=A" (u128),
2121 "=m" (*pu128)
2122 : "0" (*pu128),
2123 "b" ( (uint64_t)u128 ),
2124 "c" ( (uint64_t)(u128 >> 64) ));
2125# else
2126 __asm
2127 {
2128 mov rbx, dword ptr [u128]
2129 mov rcx, dword ptr [u128 + 8]
2130 mov rdi, pu128
2131 mov rax, dword ptr [rdi]
2132 mov rdx, dword ptr [rdi + 8]
2133 retry:
2134 lock cmpxchg16b [rdi]
2135 jnz retry
2136 mov dword ptr [u128], rax
2137 mov dword ptr [u128 + 8], rdx
2138 }
2139# endif
2140 }
2141 return u128;
2142#endif
2143}
2144# endif
2145#endif /* RT_ARCH_AMD64 */
2146
2147
2148/**
2149 * Atomically Exchange a value which size might differ
2150 * between platforms or compilers, ordered.
2151 *
2152 * @param pu Pointer to the variable to update.
2153 * @param uNew The value to assign to *pu.
2154 */
2155#define ASMAtomicXchgSize(pu, uNew) \
2156 do { \
2157 switch (sizeof(*(pu))) { \
2158 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
2159 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2160 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2161 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2162 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2163 } \
2164 } while (0)
2165
2166
2167/**
2168 * Atomically Exchange a pointer value, ordered.
2169 *
2170 * @returns Current *ppv value
2171 * @param ppv Pointer to the pointer variable to update.
2172 * @param pv The pointer value to assign to *ppv.
2173 */
2174DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, void *pv)
2175{
2176#if ARCH_BITS == 32
2177 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2178#elif ARCH_BITS == 64
2179 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2180#else
2181# error "ARCH_BITS is bogus"
2182#endif
2183}
2184
2185
2186/**
2187 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
2188 *
2189 * @returns true if xchg was done.
2190 * @returns false if xchg wasn't done.
2191 *
2192 * @param pu32 Pointer to the value to update.
2193 * @param u32New The new value to assigned to *pu32.
2194 * @param u32Old The old value to *pu32 compare with.
2195 */
2196#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2197DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
2198#else
2199DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
2200{
2201# if RT_INLINE_ASM_GNU_STYLE
2202 uint8_t u8Ret;
2203 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2204 "setz %1\n\t"
2205 : "=m" (*pu32),
2206 "=qm" (u8Ret),
2207 "=a" (u32Old)
2208 : "r" (u32New),
2209 "2" (u32Old));
2210 return (bool)u8Ret;
2211
2212# elif RT_INLINE_ASM_USES_INTRIN
2213 return _InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
2214
2215# else
2216 uint32_t u32Ret;
2217 __asm
2218 {
2219# ifdef RT_ARCH_AMD64
2220 mov rdx, [pu32]
2221# else
2222 mov edx, [pu32]
2223# endif
2224 mov eax, [u32Old]
2225 mov ecx, [u32New]
2226# ifdef RT_ARCH_AMD64
2227 lock cmpxchg [rdx], ecx
2228# else
2229 lock cmpxchg [edx], ecx
2230# endif
2231 setz al
2232 movzx eax, al
2233 mov [u32Ret], eax
2234 }
2235 return !!u32Ret;
2236# endif
2237}
2238#endif
2239
2240
2241/**
2242 * Atomically Compare and Exchange a signed 32-bit value, ordered.
2243 *
2244 * @returns true if xchg was done.
2245 * @returns false if xchg wasn't done.
2246 *
2247 * @param pi32 Pointer to the value to update.
2248 * @param i32New The new value to assigned to *pi32.
2249 * @param i32Old The old value to *pi32 compare with.
2250 */
2251DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
2252{
2253 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
2254}
2255
2256
2257/**
2258 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
2259 *
2260 * @returns true if xchg was done.
2261 * @returns false if xchg wasn't done.
2262 *
2263 * @param pu64 Pointer to the 64-bit variable to update.
2264 * @param u64New The 64-bit value to assign to *pu64.
2265 * @param u64Old The value to compare with.
2266 */
2267#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2268DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
2269#else
2270DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, uint64_t u64Old)
2271{
2272# if RT_INLINE_ASM_USES_INTRIN
2273 return _InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
2274
2275# elif defined(RT_ARCH_AMD64)
2276# if RT_INLINE_ASM_GNU_STYLE
2277 uint8_t u8Ret;
2278 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2279 "setz %1\n\t"
2280 : "=m" (*pu64),
2281 "=qm" (u8Ret),
2282 "=a" (u64Old)
2283 : "r" (u64New),
2284 "2" (u64Old));
2285 return (bool)u8Ret;
2286# else
2287 bool fRet;
2288 __asm
2289 {
2290 mov rdx, [pu32]
2291 mov rax, [u64Old]
2292 mov rcx, [u64New]
2293 lock cmpxchg [rdx], rcx
2294 setz al
2295 mov [fRet], al
2296 }
2297 return fRet;
2298# endif
2299# else /* !RT_ARCH_AMD64 */
2300 uint32_t u32Ret;
2301# if RT_INLINE_ASM_GNU_STYLE
2302# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2303 uint32_t u32 = (uint32_t)u64New;
2304 uint32_t u32Spill;
2305 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
2306 "lock; cmpxchg8b (%6)\n\t"
2307 "setz %%al\n\t"
2308 "xchgl %%ebx, %4\n\t"
2309 "movzbl %%al, %%eax\n\t"
2310 : "=a" (u32Ret),
2311 "=d" (u32Spill),
2312 "=m" (*pu64)
2313 : "A" (u64Old),
2314 "m" ( u32 ),
2315 "c" ( (uint32_t)(u64New >> 32) ),
2316 "S" (pu64) );
2317# else /* !PIC */
2318 uint32_t u32Spill;
2319 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
2320 "setz %%al\n\t"
2321 "movzbl %%al, %%eax\n\t"
2322 : "=a" (u32Ret),
2323 "=d" (u32Spill),
2324 "=m" (*pu64)
2325 : "A" (u64Old),
2326 "b" ( (uint32_t)u64New ),
2327 "c" ( (uint32_t)(u64New >> 32) ));
2328# endif
2329 return (bool)u32Ret;
2330# else
2331 __asm
2332 {
2333 mov ebx, dword ptr [u64New]
2334 mov ecx, dword ptr [u64New + 4]
2335 mov edi, [pu64]
2336 mov eax, dword ptr [u64Old]
2337 mov edx, dword ptr [u64Old + 4]
2338 lock cmpxchg8b [edi]
2339 setz al
2340 movzx eax, al
2341 mov dword ptr [u32Ret], eax
2342 }
2343 return !!u32Ret;
2344# endif
2345# endif /* !RT_ARCH_AMD64 */
2346}
2347#endif
2348
2349
2350/**
2351 * Atomically Compare and exchange a signed 64-bit value, ordered.
2352 *
2353 * @returns true if xchg was done.
2354 * @returns false if xchg wasn't done.
2355 *
2356 * @param pi64 Pointer to the 64-bit variable to update.
2357 * @param i64 The 64-bit value to assign to *pu64.
2358 * @param i64Old The value to compare with.
2359 */
2360DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
2361{
2362 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
2363}
2364
2365
2366/** @def ASMAtomicCmpXchgSize
2367 * Atomically Compare and Exchange a value which size might differ
2368 * between platforms or compilers, ordered.
2369 *
2370 * @param pu Pointer to the value to update.
2371 * @param uNew The new value to assigned to *pu.
2372 * @param uOld The old value to *pu compare with.
2373 * @param fRc Where to store the result.
2374 */
2375#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
2376 do { \
2377 switch (sizeof(*(pu))) { \
2378 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
2379 break; \
2380 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
2381 break; \
2382 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2383 (fRc) = false; \
2384 break; \
2385 } \
2386 } while (0)
2387
2388
2389/**
2390 * Atomically Compare and Exchange a pointer value, ordered.
2391 *
2392 * @returns true if xchg was done.
2393 * @returns false if xchg wasn't done.
2394 *
2395 * @param ppv Pointer to the value to update.
2396 * @param pvNew The new value to assigned to *ppv.
2397 * @param pvOld The old value to *ppv compare with.
2398 */
2399DECLINLINE(bool) ASMAtomicCmpXchgPtr(void * volatile *ppv, void *pvNew, void *pvOld)
2400{
2401#if ARCH_BITS == 32
2402 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
2403#elif ARCH_BITS == 64
2404 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
2405#else
2406# error "ARCH_BITS is bogus"
2407#endif
2408}
2409
2410
2411/**
2412 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
2413 * passes back old value, ordered.
2414 *
2415 * @returns true if xchg was done.
2416 * @returns false if xchg wasn't done.
2417 *
2418 * @param pu32 Pointer to the value to update.
2419 * @param u32New The new value to assigned to *pu32.
2420 * @param u32Old The old value to *pu32 compare with.
2421 * @param pu32Old Pointer store the old value at.
2422 */
2423#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2424DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
2425#else
2426DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
2427{
2428# if RT_INLINE_ASM_GNU_STYLE
2429 uint8_t u8Ret;
2430 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
2431 "setz %1\n\t"
2432 : "=m" (*pu32),
2433 "=qm" (u8Ret),
2434 "=a" (*pu32Old)
2435 : "r" (u32New),
2436 "a" (u32Old));
2437 return (bool)u8Ret;
2438
2439# elif RT_INLINE_ASM_USES_INTRIN
2440 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
2441
2442# else
2443 uint32_t u32Ret;
2444 __asm
2445 {
2446# ifdef RT_ARCH_AMD64
2447 mov rdx, [pu32]
2448# else
2449 mov edx, [pu32]
2450# endif
2451 mov eax, [u32Old]
2452 mov ecx, [u32New]
2453# ifdef RT_ARCH_AMD64
2454 lock cmpxchg [rdx], ecx
2455 mov rdx, [pu32Old]
2456 mov [rdx], eax
2457# else
2458 lock cmpxchg [edx], ecx
2459 mov edx, [pu32Old]
2460 mov [edx], eax
2461# endif
2462 setz al
2463 movzx eax, al
2464 mov [u32Ret], eax
2465 }
2466 return !!u32Ret;
2467# endif
2468}
2469#endif
2470
2471
2472/**
2473 * Atomically Compare and Exchange a signed 32-bit value, additionally
2474 * passes back old value, ordered.
2475 *
2476 * @returns true if xchg was done.
2477 * @returns false if xchg wasn't done.
2478 *
2479 * @param pi32 Pointer to the value to update.
2480 * @param i32New The new value to assigned to *pi32.
2481 * @param i32Old The old value to *pi32 compare with.
2482 * @param pi32Old Pointer store the old value at.
2483 */
2484DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
2485{
2486 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
2487}
2488
2489
2490/**
2491 * Atomically Compare and exchange an unsigned 64-bit value, additionally
2492 * passing back old value, ordered.
2493 *
2494 * @returns true if xchg was done.
2495 * @returns false if xchg wasn't done.
2496 *
2497 * @param pu64 Pointer to the 64-bit variable to update.
2498 * @param u64New The 64-bit value to assign to *pu64.
2499 * @param u64Old The value to compare with.
2500 * @param pu64Old Pointer store the old value at.
2501 */
2502#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2503DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
2504#else
2505DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
2506{
2507# if RT_INLINE_ASM_USES_INTRIN
2508 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
2509
2510# elif defined(RT_ARCH_AMD64)
2511# if RT_INLINE_ASM_GNU_STYLE
2512 uint8_t u8Ret;
2513 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
2514 "setz %1\n\t"
2515 : "=m" (*pu64),
2516 "=qm" (u8Ret),
2517 "=a" (*pu64Old)
2518 : "r" (u64New),
2519 "a" (u64Old));
2520 return (bool)u8Ret;
2521# else
2522 bool fRet;
2523 __asm
2524 {
2525 mov rdx, [pu32]
2526 mov rax, [u64Old]
2527 mov rcx, [u64New]
2528 lock cmpxchg [rdx], rcx
2529 mov rdx, [pu64Old]
2530 mov [rdx], rax
2531 setz al
2532 mov [fRet], al
2533 }
2534 return fRet;
2535# endif
2536# else /* !RT_ARCH_AMD64 */
2537# if RT_INLINE_ASM_GNU_STYLE
2538 uint64_t u64Ret;
2539# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
2540 /* NB: this code uses a memory clobber description, because the clean
2541 * solution with an output value for *pu64 makes gcc run out of registers.
2542 * This will cause suboptimal code, and anyone with a better solution is
2543 * welcome to improve this. */
2544 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
2545 "lock; cmpxchg8b %3\n\t"
2546 "xchgl %%ebx, %1\n\t"
2547 : "=A" (u64Ret)
2548 : "DS" ((uint32_t)u64New),
2549 "c" ((uint32_t)(u64New >> 32)),
2550 "m" (*pu64),
2551 "0" (u64Old)
2552 : "memory" );
2553# else /* !PIC */
2554 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
2555 : "=A" (u64Ret),
2556 "=m" (*pu64)
2557 : "b" ((uint32_t)u64New),
2558 "c" ((uint32_t)(u64New >> 32)),
2559 "m" (*pu64),
2560 "0" (u64Old));
2561# endif
2562 *pu64Old = u64Ret;
2563 return u64Ret == u64Old;
2564# else
2565 uint32_t u32Ret;
2566 __asm
2567 {
2568 mov ebx, dword ptr [u64New]
2569 mov ecx, dword ptr [u64New + 4]
2570 mov edi, [pu64]
2571 mov eax, dword ptr [u64Old]
2572 mov edx, dword ptr [u64Old + 4]
2573 lock cmpxchg8b [edi]
2574 mov ebx, [pu64Old]
2575 mov [ebx], eax
2576 setz al
2577 movzx eax, al
2578 add ebx, 4
2579 mov [ebx], edx
2580 mov dword ptr [u32Ret], eax
2581 }
2582 return !!u32Ret;
2583# endif
2584# endif /* !RT_ARCH_AMD64 */
2585}
2586#endif
2587
2588
2589/**
2590 * Atomically Compare and exchange a signed 64-bit value, additionally
2591 * passing back old value, ordered.
2592 *
2593 * @returns true if xchg was done.
2594 * @returns false if xchg wasn't done.
2595 *
2596 * @param pi64 Pointer to the 64-bit variable to update.
2597 * @param i64 The 64-bit value to assign to *pu64.
2598 * @param i64Old The value to compare with.
2599 * @param pi64Old Pointer store the old value at.
2600 */
2601DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
2602{
2603 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
2604}
2605
2606
2607/** @def ASMAtomicCmpXchgExSize
2608 * Atomically Compare and Exchange a value which size might differ
2609 * between platforms or compilers. Additionally passes back old value.
2610 *
2611 * @param pu Pointer to the value to update.
2612 * @param uNew The new value to assigned to *pu.
2613 * @param uOld The old value to *pu compare with.
2614 * @param fRc Where to store the result.
2615 * @param uOldVal Where to store the old value.
2616 */
2617#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, uOldVal) \
2618 do { \
2619 switch (sizeof(*(pu))) { \
2620 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)&(uOldVal)); \
2621 break; \
2622 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)&(uOldVal)); \
2623 break; \
2624 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
2625 (fRc) = false; \
2626 (uOldVal) = 0; \
2627 break; \
2628 } \
2629 } while (0)
2630
2631
2632/**
2633 * Atomically Compare and Exchange a pointer value, additionally
2634 * passing back old value, ordered.
2635 *
2636 * @returns true if xchg was done.
2637 * @returns false if xchg wasn't done.
2638 *
2639 * @param ppv Pointer to the value to update.
2640 * @param pvNew The new value to assigned to *ppv.
2641 * @param pvOld The old value to *ppv compare with.
2642 * @param ppvOld Pointer store the old value at.
2643 */
2644DECLINLINE(bool) ASMAtomicCmpXchgExPtr(void * volatile *ppv, void *pvNew, void *pvOld, void **ppvOld)
2645{
2646#if ARCH_BITS == 32
2647 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
2648#elif ARCH_BITS == 64
2649 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
2650#else
2651# error "ARCH_BITS is bogus"
2652#endif
2653}
2654
2655
2656/**
2657 * Atomically exchanges and adds to a 32-bit value, ordered.
2658 *
2659 * @returns The old value.
2660 * @param pu32 Pointer to the value.
2661 * @param u32 Number to add.
2662 */
2663#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2664DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2665#else
2666DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2667{
2668# if RT_INLINE_ASM_USES_INTRIN
2669 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2670 return u32;
2671
2672# elif RT_INLINE_ASM_GNU_STYLE
2673 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2674 : "=r" (u32),
2675 "=m" (*pu32)
2676 : "0" (u32)
2677 : "memory");
2678 return u32;
2679# else
2680 __asm
2681 {
2682 mov eax, [u32]
2683# ifdef RT_ARCH_AMD64
2684 mov rdx, [pu32]
2685 lock xadd [rdx], eax
2686# else
2687 mov edx, [pu32]
2688 lock xadd [edx], eax
2689# endif
2690 mov [u32], eax
2691 }
2692 return u32;
2693# endif
2694}
2695#endif
2696
2697
2698/**
2699 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2700 *
2701 * @returns The old value.
2702 * @param pi32 Pointer to the value.
2703 * @param i32 Number to add.
2704 */
2705DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2706{
2707 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2708}
2709
2710
2711/**
2712 * Atomically increment a 32-bit value, ordered.
2713 *
2714 * @returns The new value.
2715 * @param pu32 Pointer to the value to increment.
2716 */
2717#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2718DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2719#else
2720DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2721{
2722 uint32_t u32;
2723# if RT_INLINE_ASM_USES_INTRIN
2724 u32 = _InterlockedIncrement((long *)pu32);
2725 return u32;
2726
2727# elif RT_INLINE_ASM_GNU_STYLE
2728 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2729 : "=r" (u32),
2730 "=m" (*pu32)
2731 : "0" (1)
2732 : "memory");
2733 return u32+1;
2734# else
2735 __asm
2736 {
2737 mov eax, 1
2738# ifdef RT_ARCH_AMD64
2739 mov rdx, [pu32]
2740 lock xadd [rdx], eax
2741# else
2742 mov edx, [pu32]
2743 lock xadd [edx], eax
2744# endif
2745 mov u32, eax
2746 }
2747 return u32+1;
2748# endif
2749}
2750#endif
2751
2752
2753/**
2754 * Atomically increment a signed 32-bit value, ordered.
2755 *
2756 * @returns The new value.
2757 * @param pi32 Pointer to the value to increment.
2758 */
2759DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2760{
2761 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2762}
2763
2764
2765/**
2766 * Atomically decrement an unsigned 32-bit value, ordered.
2767 *
2768 * @returns The new value.
2769 * @param pu32 Pointer to the value to decrement.
2770 */
2771#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2772DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
2773#else
2774DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
2775{
2776 uint32_t u32;
2777# if RT_INLINE_ASM_USES_INTRIN
2778 u32 = _InterlockedDecrement((long *)pu32);
2779 return u32;
2780
2781# elif RT_INLINE_ASM_GNU_STYLE
2782 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2783 : "=r" (u32),
2784 "=m" (*pu32)
2785 : "0" (-1)
2786 : "memory");
2787 return u32-1;
2788# else
2789 __asm
2790 {
2791 mov eax, -1
2792# ifdef RT_ARCH_AMD64
2793 mov rdx, [pu32]
2794 lock xadd [rdx], eax
2795# else
2796 mov edx, [pu32]
2797 lock xadd [edx], eax
2798# endif
2799 mov u32, eax
2800 }
2801 return u32-1;
2802# endif
2803}
2804#endif
2805
2806
2807/**
2808 * Atomically decrement a signed 32-bit value, ordered.
2809 *
2810 * @returns The new value.
2811 * @param pi32 Pointer to the value to decrement.
2812 */
2813DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
2814{
2815 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
2816}
2817
2818
2819/**
2820 * Atomically Or an unsigned 32-bit value, ordered.
2821 *
2822 * @param pu32 Pointer to the pointer variable to OR u32 with.
2823 * @param u32 The value to OR *pu32 with.
2824 */
2825#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2826DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
2827#else
2828DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
2829{
2830# if RT_INLINE_ASM_USES_INTRIN
2831 _InterlockedOr((long volatile *)pu32, (long)u32);
2832
2833# elif RT_INLINE_ASM_GNU_STYLE
2834 __asm__ __volatile__("lock; orl %1, %0\n\t"
2835 : "=m" (*pu32)
2836 : "ir" (u32));
2837# else
2838 __asm
2839 {
2840 mov eax, [u32]
2841# ifdef RT_ARCH_AMD64
2842 mov rdx, [pu32]
2843 lock or [rdx], eax
2844# else
2845 mov edx, [pu32]
2846 lock or [edx], eax
2847# endif
2848 }
2849# endif
2850}
2851#endif
2852
2853
2854/**
2855 * Atomically Or a signed 32-bit value, ordered.
2856 *
2857 * @param pi32 Pointer to the pointer variable to OR u32 with.
2858 * @param i32 The value to OR *pu32 with.
2859 */
2860DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
2861{
2862 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
2863}
2864
2865
2866/**
2867 * Atomically And an unsigned 32-bit value, ordered.
2868 *
2869 * @param pu32 Pointer to the pointer variable to AND u32 with.
2870 * @param u32 The value to AND *pu32 with.
2871 */
2872#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2873DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
2874#else
2875DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
2876{
2877# if RT_INLINE_ASM_USES_INTRIN
2878 _InterlockedAnd((long volatile *)pu32, u32);
2879
2880# elif RT_INLINE_ASM_GNU_STYLE
2881 __asm__ __volatile__("lock; andl %1, %0\n\t"
2882 : "=m" (*pu32)
2883 : "ir" (u32));
2884# else
2885 __asm
2886 {
2887 mov eax, [u32]
2888# ifdef RT_ARCH_AMD64
2889 mov rdx, [pu32]
2890 lock and [rdx], eax
2891# else
2892 mov edx, [pu32]
2893 lock and [edx], eax
2894# endif
2895 }
2896# endif
2897}
2898#endif
2899
2900
2901/**
2902 * Atomically And a signed 32-bit value, ordered.
2903 *
2904 * @param pi32 Pointer to the pointer variable to AND i32 with.
2905 * @param i32 The value to AND *pi32 with.
2906 */
2907DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
2908{
2909 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
2910}
2911
2912
2913/**
2914 * Memory fence, waits for any pending writes and reads to complete.
2915 */
2916DECLINLINE(void) ASMMemoryFence(void)
2917{
2918 /** @todo use mfence? check if all cpus we care for support it. */
2919 uint32_t volatile u32;
2920 ASMAtomicXchgU32(&u32, 0);
2921}
2922
2923
2924/**
2925 * Write fence, waits for any pending writes to complete.
2926 */
2927DECLINLINE(void) ASMWriteFence(void)
2928{
2929 /** @todo use sfence? check if all cpus we care for support it. */
2930 ASMMemoryFence();
2931}
2932
2933
2934/**
2935 * Read fence, waits for any pending reads to complete.
2936 */
2937DECLINLINE(void) ASMReadFence(void)
2938{
2939 /** @todo use lfence? check if all cpus we care for support it. */
2940 ASMMemoryFence();
2941}
2942
2943
2944/**
2945 * Atomically reads an unsigned 8-bit value, ordered.
2946 *
2947 * @returns Current *pu8 value
2948 * @param pu8 Pointer to the 8-bit variable to read.
2949 */
2950DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
2951{
2952 ASMMemoryFence();
2953 return *pu8; /* byte reads are atomic on x86 */
2954}
2955
2956
2957/**
2958 * Atomically reads an unsigned 8-bit value, unordered.
2959 *
2960 * @returns Current *pu8 value
2961 * @param pu8 Pointer to the 8-bit variable to read.
2962 */
2963DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
2964{
2965 return *pu8; /* byte reads are atomic on x86 */
2966}
2967
2968
2969/**
2970 * Atomically reads a signed 8-bit value, ordered.
2971 *
2972 * @returns Current *pi8 value
2973 * @param pi8 Pointer to the 8-bit variable to read.
2974 */
2975DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
2976{
2977 ASMMemoryFence();
2978 return *pi8; /* byte reads are atomic on x86 */
2979}
2980
2981
2982/**
2983 * Atomically reads a signed 8-bit value, unordered.
2984 *
2985 * @returns Current *pi8 value
2986 * @param pi8 Pointer to the 8-bit variable to read.
2987 */
2988DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
2989{
2990 return *pi8; /* byte reads are atomic on x86 */
2991}
2992
2993
2994/**
2995 * Atomically reads an unsigned 16-bit value, ordered.
2996 *
2997 * @returns Current *pu16 value
2998 * @param pu16 Pointer to the 16-bit variable to read.
2999 */
3000DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
3001{
3002 ASMMemoryFence();
3003 Assert(!((uintptr_t)pu16 & 1));
3004 return *pu16;
3005}
3006
3007
3008/**
3009 * Atomically reads an unsigned 16-bit value, unordered.
3010 *
3011 * @returns Current *pu16 value
3012 * @param pu16 Pointer to the 16-bit variable to read.
3013 */
3014DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
3015{
3016 Assert(!((uintptr_t)pu16 & 1));
3017 return *pu16;
3018}
3019
3020
3021/**
3022 * Atomically reads a signed 16-bit value, ordered.
3023 *
3024 * @returns Current *pi16 value
3025 * @param pi16 Pointer to the 16-bit variable to read.
3026 */
3027DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
3028{
3029 ASMMemoryFence();
3030 Assert(!((uintptr_t)pi16 & 1));
3031 return *pi16;
3032}
3033
3034
3035/**
3036 * Atomically reads a signed 16-bit value, unordered.
3037 *
3038 * @returns Current *pi16 value
3039 * @param pi16 Pointer to the 16-bit variable to read.
3040 */
3041DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
3042{
3043 Assert(!((uintptr_t)pi16 & 1));
3044 return *pi16;
3045}
3046
3047
3048/**
3049 * Atomically reads an unsigned 32-bit value, ordered.
3050 *
3051 * @returns Current *pu32 value
3052 * @param pu32 Pointer to the 32-bit variable to read.
3053 */
3054DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
3055{
3056 ASMMemoryFence();
3057 Assert(!((uintptr_t)pu32 & 3));
3058 return *pu32;
3059}
3060
3061
3062/**
3063 * Atomically reads an unsigned 32-bit value, unordered.
3064 *
3065 * @returns Current *pu32 value
3066 * @param pu32 Pointer to the 32-bit variable to read.
3067 */
3068DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
3069{
3070 Assert(!((uintptr_t)pu32 & 3));
3071 return *pu32;
3072}
3073
3074
3075/**
3076 * Atomically reads a signed 32-bit value, ordered.
3077 *
3078 * @returns Current *pi32 value
3079 * @param pi32 Pointer to the 32-bit variable to read.
3080 */
3081DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
3082{
3083 ASMMemoryFence();
3084 Assert(!((uintptr_t)pi32 & 3));
3085 return *pi32;
3086}
3087
3088
3089/**
3090 * Atomically reads a signed 32-bit value, unordered.
3091 *
3092 * @returns Current *pi32 value
3093 * @param pi32 Pointer to the 32-bit variable to read.
3094 */
3095DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
3096{
3097 Assert(!((uintptr_t)pi32 & 3));
3098 return *pi32;
3099}
3100
3101
3102/**
3103 * Atomically reads an unsigned 64-bit value, ordered.
3104 *
3105 * @returns Current *pu64 value
3106 * @param pu64 Pointer to the 64-bit variable to read.
3107 * The memory pointed to must be writable.
3108 * @remark This will fault if the memory is read-only!
3109 */
3110#if RT_INLINE_ASM_EXTERNAL
3111DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
3112#else
3113DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
3114{
3115 uint64_t u64;
3116# ifdef RT_ARCH_AMD64
3117# if RT_INLINE_ASM_GNU_STYLE
3118 Assert(!((uintptr_t)pu64 & 7));
3119 __asm__ __volatile__( "mfence\n\t"
3120 "movq %1, %0\n\t"
3121 : "=r" (u64)
3122 : "m" (*pu64));
3123# else
3124 __asm
3125 {
3126 mfence
3127 mov rdx, [pu64]
3128 mov rax, [rdx]
3129 mov [u64], rax
3130 }
3131# endif
3132# else /* !RT_ARCH_AMD64 */
3133# if RT_INLINE_ASM_GNU_STYLE
3134# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3135 uint32_t u32EBX = 0;
3136 Assert(!((uintptr_t)pu64 & 7));
3137 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3138 "lock; cmpxchg8b (%5)\n\t"
3139 "xchgl %%ebx, %3\n\t"
3140 : "=A" (u64),
3141 "=m" (*pu64)
3142 : "0" (0),
3143 "m" (u32EBX),
3144 "c" (0),
3145 "S" (pu64));
3146# else /* !PIC */
3147 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
3148 : "=A" (u64),
3149 "=m" (*pu64)
3150 : "0" (0),
3151 "b" (0),
3152 "c" (0));
3153# endif
3154# else
3155 Assert(!((uintptr_t)pu64 & 7));
3156 __asm
3157 {
3158 xor eax, eax
3159 xor edx, edx
3160 mov edi, pu64
3161 xor ecx, ecx
3162 xor ebx, ebx
3163 lock cmpxchg8b [edi]
3164 mov dword ptr [u64], eax
3165 mov dword ptr [u64 + 4], edx
3166 }
3167# endif
3168# endif /* !RT_ARCH_AMD64 */
3169 return u64;
3170}
3171#endif
3172
3173
3174/**
3175 * Atomically reads an unsigned 64-bit value, unordered.
3176 *
3177 * @returns Current *pu64 value
3178 * @param pu64 Pointer to the 64-bit variable to read.
3179 * The memory pointed to must be writable.
3180 * @remark This will fault if the memory is read-only!
3181 */
3182#if RT_INLINE_ASM_EXTERNAL
3183DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
3184#else
3185DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
3186{
3187 uint64_t u64;
3188# ifdef RT_ARCH_AMD64
3189# if RT_INLINE_ASM_GNU_STYLE
3190 Assert(!((uintptr_t)pu64 & 7));
3191 __asm__ __volatile__("movq %1, %0\n\t"
3192 : "=r" (u64)
3193 : "m" (*pu64));
3194# else
3195 __asm
3196 {
3197 mov rdx, [pu64]
3198 mov rax, [rdx]
3199 mov [u64], rax
3200 }
3201# endif
3202# else /* !RT_ARCH_AMD64 */
3203# if RT_INLINE_ASM_GNU_STYLE
3204# if defined(PIC) || defined(RT_OS_DARWIN) /* darwin: 4.0.1 compiler option / bug? */
3205 uint32_t u32EBX = 0;
3206 Assert(!((uintptr_t)pu64 & 7));
3207 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
3208 "lock; cmpxchg8b (%5)\n\t"
3209 "xchgl %%ebx, %3\n\t"
3210 : "=A" (u64),
3211 "=m" (*pu64)
3212 : "0" (0),
3213 "m" (u32EBX),
3214 "c" (0),
3215 "S" (pu64));
3216# else /* !PIC */
3217 __asm__ __volatile__("cmpxchg8b %1\n\t"
3218 : "=A" (u64),
3219 "=m" (*pu64)
3220 : "0" (0),
3221 "b" (0),
3222 "c" (0));
3223# endif
3224# else
3225 Assert(!((uintptr_t)pu64 & 7));
3226 __asm
3227 {
3228 xor eax, eax
3229 xor edx, edx
3230 mov edi, pu64
3231 xor ecx, ecx
3232 xor ebx, ebx
3233 lock cmpxchg8b [edi]
3234 mov dword ptr [u64], eax
3235 mov dword ptr [u64 + 4], edx
3236 }
3237# endif
3238# endif /* !RT_ARCH_AMD64 */
3239 return u64;
3240}
3241#endif
3242
3243
3244/**
3245 * Atomically reads a signed 64-bit value, ordered.
3246 *
3247 * @returns Current *pi64 value
3248 * @param pi64 Pointer to the 64-bit variable to read.
3249 * The memory pointed to must be writable.
3250 * @remark This will fault if the memory is read-only!
3251 */
3252DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
3253{
3254 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
3255}
3256
3257
3258/**
3259 * Atomically reads a signed 64-bit value, unordered.
3260 *
3261 * @returns Current *pi64 value
3262 * @param pi64 Pointer to the 64-bit variable to read.
3263 * The memory pointed to must be writable.
3264 * @remark This will fault if the memory is read-only!
3265 */
3266DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
3267{
3268 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
3269}
3270
3271
3272/**
3273 * Atomically reads a pointer value, ordered.
3274 *
3275 * @returns Current *pv value
3276 * @param ppv Pointer to the pointer variable to read.
3277 */
3278DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
3279{
3280#if ARCH_BITS == 32
3281 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
3282#elif ARCH_BITS == 64
3283 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
3284#else
3285# error "ARCH_BITS is bogus"
3286#endif
3287}
3288
3289
3290/**
3291 * Atomically reads a pointer value, unordered.
3292 *
3293 * @returns Current *pv value
3294 * @param ppv Pointer to the pointer variable to read.
3295 */
3296DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
3297{
3298#if ARCH_BITS == 32
3299 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
3300#elif ARCH_BITS == 64
3301 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
3302#else
3303# error "ARCH_BITS is bogus"
3304#endif
3305}
3306
3307
3308/**
3309 * Atomically reads a boolean value, ordered.
3310 *
3311 * @returns Current *pf value
3312 * @param pf Pointer to the boolean variable to read.
3313 */
3314DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
3315{
3316 ASMMemoryFence();
3317 return *pf; /* byte reads are atomic on x86 */
3318}
3319
3320
3321/**
3322 * Atomically reads a boolean value, unordered.
3323 *
3324 * @returns Current *pf value
3325 * @param pf Pointer to the boolean variable to read.
3326 */
3327DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
3328{
3329 return *pf; /* byte reads are atomic on x86 */
3330}
3331
3332
3333/**
3334 * Atomically read a value which size might differ
3335 * between platforms or compilers, ordered.
3336 *
3337 * @param pu Pointer to the variable to update.
3338 * @param puRes Where to store the result.
3339 */
3340#define ASMAtomicReadSize(pu, puRes) \
3341 do { \
3342 switch (sizeof(*(pu))) { \
3343 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3344 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
3345 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
3346 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
3347 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3348 } \
3349 } while (0)
3350
3351
3352/**
3353 * Atomically read a value which size might differ
3354 * between platforms or compilers, unordered.
3355 *
3356 * @param pu Pointer to the variable to update.
3357 * @param puRes Where to store the result.
3358 */
3359#define ASMAtomicUoReadSize(pu, puRes) \
3360 do { \
3361 switch (sizeof(*(pu))) { \
3362 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
3363 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
3364 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
3365 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
3366 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3367 } \
3368 } while (0)
3369
3370
3371/**
3372 * Atomically writes an unsigned 8-bit value, ordered.
3373 *
3374 * @param pu8 Pointer to the 8-bit variable.
3375 * @param u8 The 8-bit value to assign to *pu8.
3376 */
3377DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
3378{
3379 ASMAtomicXchgU8(pu8, u8);
3380}
3381
3382
3383/**
3384 * Atomically writes an unsigned 8-bit value, unordered.
3385 *
3386 * @param pu8 Pointer to the 8-bit variable.
3387 * @param u8 The 8-bit value to assign to *pu8.
3388 */
3389DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
3390{
3391 *pu8 = u8; /* byte writes are atomic on x86 */
3392}
3393
3394
3395/**
3396 * Atomically writes a signed 8-bit value, ordered.
3397 *
3398 * @param pi8 Pointer to the 8-bit variable to read.
3399 * @param i8 The 8-bit value to assign to *pi8.
3400 */
3401DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
3402{
3403 ASMAtomicXchgS8(pi8, i8);
3404}
3405
3406
3407/**
3408 * Atomically writes a signed 8-bit value, unordered.
3409 *
3410 * @param pi8 Pointer to the 8-bit variable to read.
3411 * @param i8 The 8-bit value to assign to *pi8.
3412 */
3413DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
3414{
3415 *pi8 = i8; /* byte writes are atomic on x86 */
3416}
3417
3418
3419/**
3420 * Atomically writes an unsigned 16-bit value, ordered.
3421 *
3422 * @param pu16 Pointer to the 16-bit variable.
3423 * @param u16 The 16-bit value to assign to *pu16.
3424 */
3425DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
3426{
3427 ASMAtomicXchgU16(pu16, u16);
3428}
3429
3430
3431/**
3432 * Atomically writes an unsigned 16-bit value, unordered.
3433 *
3434 * @param pu16 Pointer to the 16-bit variable.
3435 * @param u16 The 16-bit value to assign to *pu16.
3436 */
3437DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
3438{
3439 Assert(!((uintptr_t)pu16 & 1));
3440 *pu16 = u16;
3441}
3442
3443
3444/**
3445 * Atomically writes a signed 16-bit value, ordered.
3446 *
3447 * @param pi16 Pointer to the 16-bit variable to read.
3448 * @param i16 The 16-bit value to assign to *pi16.
3449 */
3450DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
3451{
3452 ASMAtomicXchgS16(pi16, i16);
3453}
3454
3455
3456/**
3457 * Atomically writes a signed 16-bit value, unordered.
3458 *
3459 * @param pi16 Pointer to the 16-bit variable to read.
3460 * @param i16 The 16-bit value to assign to *pi16.
3461 */
3462DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
3463{
3464 Assert(!((uintptr_t)pi16 & 1));
3465 *pi16 = i16;
3466}
3467
3468
3469/**
3470 * Atomically writes an unsigned 32-bit value, ordered.
3471 *
3472 * @param pu32 Pointer to the 32-bit variable.
3473 * @param u32 The 32-bit value to assign to *pu32.
3474 */
3475DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
3476{
3477 ASMAtomicXchgU32(pu32, u32);
3478}
3479
3480
3481/**
3482 * Atomically writes an unsigned 32-bit value, unordered.
3483 *
3484 * @param pu32 Pointer to the 32-bit variable.
3485 * @param u32 The 32-bit value to assign to *pu32.
3486 */
3487DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
3488{
3489 Assert(!((uintptr_t)pu32 & 3));
3490 *pu32 = u32;
3491}
3492
3493
3494/**
3495 * Atomically writes a signed 32-bit value, ordered.
3496 *
3497 * @param pi32 Pointer to the 32-bit variable to read.
3498 * @param i32 The 32-bit value to assign to *pi32.
3499 */
3500DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
3501{
3502 ASMAtomicXchgS32(pi32, i32);
3503}
3504
3505
3506/**
3507 * Atomically writes a signed 32-bit value, unordered.
3508 *
3509 * @param pi32 Pointer to the 32-bit variable to read.
3510 * @param i32 The 32-bit value to assign to *pi32.
3511 */
3512DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
3513{
3514 Assert(!((uintptr_t)pi32 & 3));
3515 *pi32 = i32;
3516}
3517
3518
3519/**
3520 * Atomically writes an unsigned 64-bit value, ordered.
3521 *
3522 * @param pu64 Pointer to the 64-bit variable.
3523 * @param u64 The 64-bit value to assign to *pu64.
3524 */
3525DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
3526{
3527 ASMAtomicXchgU64(pu64, u64);
3528}
3529
3530
3531/**
3532 * Atomically writes an unsigned 64-bit value, unordered.
3533 *
3534 * @param pu64 Pointer to the 64-bit variable.
3535 * @param u64 The 64-bit value to assign to *pu64.
3536 */
3537DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
3538{
3539 Assert(!((uintptr_t)pu64 & 7));
3540#if ARCH_BITS == 64
3541 *pu64 = u64;
3542#else
3543 ASMAtomicXchgU64(pu64, u64);
3544#endif
3545}
3546
3547
3548/**
3549 * Atomically writes a signed 64-bit value, ordered.
3550 *
3551 * @param pi64 Pointer to the 64-bit variable.
3552 * @param i64 The 64-bit value to assign to *pi64.
3553 */
3554DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
3555{
3556 ASMAtomicXchgS64(pi64, i64);
3557}
3558
3559
3560/**
3561 * Atomically writes a signed 64-bit value, unordered.
3562 *
3563 * @param pi64 Pointer to the 64-bit variable.
3564 * @param i64 The 64-bit value to assign to *pi64.
3565 */
3566DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
3567{
3568 Assert(!((uintptr_t)pi64 & 7));
3569#if ARCH_BITS == 64
3570 *pi64 = i64;
3571#else
3572 ASMAtomicXchgS64(pi64, i64);
3573#endif
3574}
3575
3576
3577/**
3578 * Atomically writes a boolean value, unordered.
3579 *
3580 * @param pf Pointer to the boolean variable.
3581 * @param f The boolean value to assign to *pf.
3582 */
3583DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
3584{
3585 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
3586}
3587
3588
3589/**
3590 * Atomically writes a boolean value, unordered.
3591 *
3592 * @param pf Pointer to the boolean variable.
3593 * @param f The boolean value to assign to *pf.
3594 */
3595DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
3596{
3597 *pf = f; /* byte writes are atomic on x86 */
3598}
3599
3600
3601/**
3602 * Atomically writes a pointer value, ordered.
3603 *
3604 * @returns Current *pv value
3605 * @param ppv Pointer to the pointer variable.
3606 * @param pv The pointer value to assigne to *ppv.
3607 */
3608DECLINLINE(void) ASMAtomicWritePtr(void * volatile *ppv, void *pv)
3609{
3610#if ARCH_BITS == 32
3611 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3612#elif ARCH_BITS == 64
3613 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3614#else
3615# error "ARCH_BITS is bogus"
3616#endif
3617}
3618
3619
3620/**
3621 * Atomically writes a pointer value, unordered.
3622 *
3623 * @returns Current *pv value
3624 * @param ppv Pointer to the pointer variable.
3625 * @param pv The pointer value to assigne to *ppv.
3626 */
3627DECLINLINE(void) ASMAtomicUoWritePtr(void * volatile *ppv, void *pv)
3628{
3629#if ARCH_BITS == 32
3630 ASMAtomicUoWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
3631#elif ARCH_BITS == 64
3632 ASMAtomicUoWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
3633#else
3634# error "ARCH_BITS is bogus"
3635#endif
3636}
3637
3638
3639/**
3640 * Atomically write a value which size might differ
3641 * between platforms or compilers, ordered.
3642 *
3643 * @param pu Pointer to the variable to update.
3644 * @param uNew The value to assign to *pu.
3645 */
3646#define ASMAtomicWriteSize(pu, uNew) \
3647 do { \
3648 switch (sizeof(*(pu))) { \
3649 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3650 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3651 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3652 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3653 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3654 } \
3655 } while (0)
3656
3657/**
3658 * Atomically write a value which size might differ
3659 * between platforms or compilers, unordered.
3660 *
3661 * @param pu Pointer to the variable to update.
3662 * @param uNew The value to assign to *pu.
3663 */
3664#define ASMAtomicUoWriteSize(pu, uNew) \
3665 do { \
3666 switch (sizeof(*(pu))) { \
3667 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
3668 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
3669 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
3670 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
3671 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3672 } \
3673 } while (0)
3674
3675
3676
3677
3678/**
3679 * Invalidate page.
3680 *
3681 * @param pv Address of the page to invalidate.
3682 */
3683#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3684DECLASM(void) ASMInvalidatePage(void *pv);
3685#else
3686DECLINLINE(void) ASMInvalidatePage(void *pv)
3687{
3688# if RT_INLINE_ASM_USES_INTRIN
3689 __invlpg(pv);
3690
3691# elif RT_INLINE_ASM_GNU_STYLE
3692 __asm__ __volatile__("invlpg %0\n\t"
3693 : : "m" (*(uint8_t *)pv));
3694# else
3695 __asm
3696 {
3697# ifdef RT_ARCH_AMD64
3698 mov rax, [pv]
3699 invlpg [rax]
3700# else
3701 mov eax, [pv]
3702 invlpg [eax]
3703# endif
3704 }
3705# endif
3706}
3707#endif
3708
3709
3710#if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3711# if PAGE_SIZE != 0x1000
3712# error "PAGE_SIZE is not 0x1000!"
3713# endif
3714#endif
3715
3716/**
3717 * Zeros a 4K memory page.
3718 *
3719 * @param pv Pointer to the memory block. This must be page aligned.
3720 */
3721#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3722DECLASM(void) ASMMemZeroPage(volatile void *pv);
3723# else
3724DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3725{
3726# if RT_INLINE_ASM_USES_INTRIN
3727# ifdef RT_ARCH_AMD64
3728 __stosq((unsigned __int64 *)pv, 0, /*PAGE_SIZE*/0x1000 / 8);
3729# else
3730 __stosd((unsigned long *)pv, 0, /*PAGE_SIZE*/0x1000 / 4);
3731# endif
3732
3733# elif RT_INLINE_ASM_GNU_STYLE
3734 RTUINTREG uDummy;
3735# ifdef RT_ARCH_AMD64
3736 __asm__ __volatile__ ("rep stosq"
3737 : "=D" (pv),
3738 "=c" (uDummy)
3739 : "0" (pv),
3740 "c" (0x1000 >> 3),
3741 "a" (0)
3742 : "memory");
3743# else
3744 __asm__ __volatile__ ("rep stosl"
3745 : "=D" (pv),
3746 "=c" (uDummy)
3747 : "0" (pv),
3748 "c" (0x1000 >> 2),
3749 "a" (0)
3750 : "memory");
3751# endif
3752# else
3753 __asm
3754 {
3755# ifdef RT_ARCH_AMD64
3756 xor rax, rax
3757 mov ecx, 0200h
3758 mov rdi, [pv]
3759 rep stosq
3760# else
3761 xor eax, eax
3762 mov ecx, 0400h
3763 mov edi, [pv]
3764 rep stosd
3765# endif
3766 }
3767# endif
3768}
3769# endif
3770
3771
3772/**
3773 * Zeros a memory block with a 32-bit aligned size.
3774 *
3775 * @param pv Pointer to the memory block.
3776 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3777 */
3778#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3779DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3780#else
3781DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3782{
3783# if RT_INLINE_ASM_USES_INTRIN
3784 __stosd((unsigned long *)pv, 0, cb >> 2);
3785
3786# elif RT_INLINE_ASM_GNU_STYLE
3787 __asm__ __volatile__ ("rep stosl"
3788 : "=D" (pv),
3789 "=c" (cb)
3790 : "0" (pv),
3791 "1" (cb >> 2),
3792 "a" (0)
3793 : "memory");
3794# else
3795 __asm
3796 {
3797 xor eax, eax
3798# ifdef RT_ARCH_AMD64
3799 mov rcx, [cb]
3800 shr rcx, 2
3801 mov rdi, [pv]
3802# else
3803 mov ecx, [cb]
3804 shr ecx, 2
3805 mov edi, [pv]
3806# endif
3807 rep stosd
3808 }
3809# endif
3810}
3811#endif
3812
3813
3814/**
3815 * Fills a memory block with a 32-bit aligned size.
3816 *
3817 * @param pv Pointer to the memory block.
3818 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3819 * @param u32 The value to fill with.
3820 */
3821#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3822DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3823#else
3824DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3825{
3826# if RT_INLINE_ASM_USES_INTRIN
3827 __stosd((unsigned long *)pv, 0, cb >> 2);
3828
3829# elif RT_INLINE_ASM_GNU_STYLE
3830 __asm__ __volatile__ ("rep stosl"
3831 : "=D" (pv),
3832 "=c" (cb)
3833 : "0" (pv),
3834 "1" (cb >> 2),
3835 "a" (u32)
3836 : "memory");
3837# else
3838 __asm
3839 {
3840# ifdef RT_ARCH_AMD64
3841 mov rcx, [cb]
3842 shr rcx, 2
3843 mov rdi, [pv]
3844# else
3845 mov ecx, [cb]
3846 shr ecx, 2
3847 mov edi, [pv]
3848# endif
3849 mov eax, [u32]
3850 rep stosd
3851 }
3852# endif
3853}
3854#endif
3855
3856
3857/**
3858 * Checks if a memory block is filled with the specified byte.
3859 *
3860 * This is a sort of inverted memchr.
3861 *
3862 * @returns Pointer to the byte which doesn't equal u8.
3863 * @returns NULL if all equal to u8.
3864 *
3865 * @param pv Pointer to the memory block.
3866 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3867 * @param u8 The value it's supposed to be filled with.
3868 */
3869#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3870DECLASM(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8);
3871#else
3872DECLINLINE(void *) ASMMemIsAll8(void const *pv, size_t cb, uint8_t u8)
3873{
3874/** @todo rewrite this in inline assembly? */
3875 uint8_t const *pb = (uint8_t const *)pv;
3876 for (; cb; cb--, pb++)
3877 if (RT_UNLIKELY(*pb != u8))
3878 return (void *)pb;
3879 return NULL;
3880}
3881#endif
3882
3883
3884/**
3885 * Checks if a memory block is filled with the specified 32-bit value.
3886 *
3887 * This is a sort of inverted memchr.
3888 *
3889 * @returns Pointer to the first value which doesn't equal u32.
3890 * @returns NULL if all equal to u32.
3891 *
3892 * @param pv Pointer to the memory block.
3893 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3894 * @param u32 The value it's supposed to be filled with.
3895 */
3896#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3897DECLASM(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32);
3898#else
3899DECLINLINE(uint32_t *) ASMMemIsAllU32(void const *pv, size_t cb, uint32_t u32)
3900{
3901/** @todo rewrite this in inline assembly? */
3902 uint32_t const *pu32 = (uint32_t const *)pv;
3903 for (; cb; cb -= 4, pu32++)
3904 if (RT_UNLIKELY(*pu32 != u32))
3905 return (uint32_t *)pu32;
3906 return NULL;
3907}
3908#endif
3909
3910
3911/**
3912 * Multiplies two unsigned 32-bit values returning an unsigned 64-bit result.
3913 *
3914 * @returns u32F1 * u32F2.
3915 */
3916#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3917DECLASM(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2);
3918#else
3919DECLINLINE(uint64_t) ASMMult2xU32RetU64(uint32_t u32F1, uint32_t u32F2)
3920{
3921# ifdef RT_ARCH_AMD64
3922 return (uint64_t)u32F1 * u32F2;
3923# else /* !RT_ARCH_AMD64 */
3924 uint64_t u64;
3925# if RT_INLINE_ASM_GNU_STYLE
3926 __asm__ __volatile__("mull %%edx"
3927 : "=A" (u64)
3928 : "a" (u32F2), "d" (u32F1));
3929# else
3930 __asm
3931 {
3932 mov edx, [u32F1]
3933 mov eax, [u32F2]
3934 mul edx
3935 mov dword ptr [u64], eax
3936 mov dword ptr [u64 + 4], edx
3937 }
3938# endif
3939 return u64;
3940# endif /* !RT_ARCH_AMD64 */
3941}
3942#endif
3943
3944
3945/**
3946 * Multiplies two signed 32-bit values returning a signed 64-bit result.
3947 *
3948 * @returns u32F1 * u32F2.
3949 */
3950#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3951DECLASM(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2);
3952#else
3953DECLINLINE(int64_t) ASMMult2xS32RetS64(int32_t i32F1, int32_t i32F2)
3954{
3955# ifdef RT_ARCH_AMD64
3956 return (int64_t)i32F1 * i32F2;
3957# else /* !RT_ARCH_AMD64 */
3958 int64_t i64;
3959# if RT_INLINE_ASM_GNU_STYLE
3960 __asm__ __volatile__("imull %%edx"
3961 : "=A" (i64)
3962 : "a" (i32F2), "d" (i32F1));
3963# else
3964 __asm
3965 {
3966 mov edx, [i32F1]
3967 mov eax, [i32F2]
3968 imul edx
3969 mov dword ptr [i64], eax
3970 mov dword ptr [i64 + 4], edx
3971 }
3972# endif
3973 return i64;
3974# endif /* !RT_ARCH_AMD64 */
3975}
3976#endif
3977
3978
3979/**
3980 * Devides a 64-bit unsigned by a 32-bit unsigned returning an unsigned 32-bit result.
3981 *
3982 * @returns u64 / u32.
3983 */
3984#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
3985DECLASM(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32);
3986#else
3987DECLINLINE(uint32_t) ASMDivU64ByU32RetU32(uint64_t u64, uint32_t u32)
3988{
3989# ifdef RT_ARCH_AMD64
3990 return (uint32_t)(u64 / u32);
3991# else /* !RT_ARCH_AMD64 */
3992# if RT_INLINE_ASM_GNU_STYLE
3993 RTUINTREG uDummy;
3994 __asm__ __volatile__("divl %3"
3995 : "=a" (u32), "=d"(uDummy)
3996 : "A" (u64), "r" (u32));
3997# else
3998 __asm
3999 {
4000 mov eax, dword ptr [u64]
4001 mov edx, dword ptr [u64 + 4]
4002 mov ecx, [u32]
4003 div ecx
4004 mov [u32], eax
4005 }
4006# endif
4007 return u32;
4008# endif /* !RT_ARCH_AMD64 */
4009}
4010#endif
4011
4012
4013/**
4014 * Devides a 64-bit signed by a 32-bit signed returning a signed 32-bit result.
4015 *
4016 * @returns u64 / u32.
4017 */
4018#if RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)
4019DECLASM(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32);
4020#else
4021DECLINLINE(int32_t) ASMDivS64ByS32RetS32(int64_t i64, int32_t i32)
4022{
4023# ifdef RT_ARCH_AMD64
4024 return (int32_t)(i64 / i32);
4025# else /* !RT_ARCH_AMD64 */
4026# if RT_INLINE_ASM_GNU_STYLE
4027 RTUINTREG iDummy;
4028 __asm__ __volatile__("idivl %3"
4029 : "=a" (i32), "=d"(iDummy)
4030 : "A" (i64), "r" (i32));
4031# else
4032 __asm
4033 {
4034 mov eax, dword ptr [i64]
4035 mov edx, dword ptr [i64 + 4]
4036 mov ecx, [i32]
4037 idiv ecx
4038 mov [i32], eax
4039 }
4040# endif
4041 return i32;
4042# endif /* !RT_ARCH_AMD64 */
4043}
4044#endif
4045
4046
4047/**
4048 * Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
4049 * using a 96 bit intermediate result.
4050 * @note Don't use 64-bit C arithmetic here since some gcc compilers generate references to
4051 * __udivdi3 and __umoddi3 even if this inline function is not used.
4052 *
4053 * @returns (u64A * u32B) / u32C.
4054 * @param u64A The 64-bit value.
4055 * @param u32B The 32-bit value to multiple by A.
4056 * @param u32C The 32-bit value to divide A*B by.
4057 */
4058#if RT_INLINE_ASM_EXTERNAL || !defined(__GNUC__)
4059DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
4060#else
4061DECLINLINE(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C)
4062{
4063# if RT_INLINE_ASM_GNU_STYLE
4064# ifdef RT_ARCH_AMD64
4065 uint64_t u64Result, u64Spill;
4066 __asm__ __volatile__("mulq %2\n\t"
4067 "divq %3\n\t"
4068 : "=a" (u64Result),
4069 "=d" (u64Spill)
4070 : "r" ((uint64_t)u32B),
4071 "r" ((uint64_t)u32C),
4072 "0" (u64A),
4073 "1" (0));
4074 return u64Result;
4075# else
4076 uint32_t u32Dummy;
4077 uint64_t u64Result;
4078 __asm__ __volatile__("mull %%ecx \n\t" /* eax = u64Lo.lo = (u64A.lo * u32B).lo
4079 edx = u64Lo.hi = (u64A.lo * u32B).hi */
4080 "xchg %%eax,%%esi \n\t" /* esi = u64Lo.lo
4081 eax = u64A.hi */
4082 "xchg %%edx,%%edi \n\t" /* edi = u64Low.hi
4083 edx = u32C */
4084 "xchg %%edx,%%ecx \n\t" /* ecx = u32C
4085 edx = u32B */
4086 "mull %%edx \n\t" /* eax = u64Hi.lo = (u64A.hi * u32B).lo
4087 edx = u64Hi.hi = (u64A.hi * u32B).hi */
4088 "addl %%edi,%%eax \n\t" /* u64Hi.lo += u64Lo.hi */
4089 "adcl $0,%%edx \n\t" /* u64Hi.hi += carry */
4090 "divl %%ecx \n\t" /* eax = u64Hi / u32C
4091 edx = u64Hi % u32C */
4092 "movl %%eax,%%edi \n\t" /* edi = u64Result.hi = u64Hi / u32C */
4093 "movl %%esi,%%eax \n\t" /* eax = u64Lo.lo */
4094 "divl %%ecx \n\t" /* u64Result.lo */
4095 "movl %%edi,%%edx \n\t" /* u64Result.hi */
4096 : "=A"(u64Result), "=c"(u32Dummy),
4097 "=S"(u32Dummy), "=D"(u32Dummy)
4098 : "a"((uint32_t)u64A),
4099 "S"((uint32_t)(u64A >> 32)),
4100 "c"(u32B),
4101 "D"(u32C));
4102 return u64Result;
4103# endif
4104# else
4105 RTUINT64U u;
4106 uint64_t u64Lo = (uint64_t)(u64A & 0xffffffff) * u32B;
4107 uint64_t u64Hi = (uint64_t)(u64A >> 32) * u32B;
4108 u64Hi += (u64Lo >> 32);
4109 u.s.Hi = (uint32_t)(u64Hi / u32C);
4110 u.s.Lo = (uint32_t)((((u64Hi % u32C) << 32) + (u64Lo & 0xffffffff)) / u32C);
4111 return u.u;
4112# endif
4113}
4114#endif
4115
4116
4117/**
4118 * Probes a byte pointer for read access.
4119 *
4120 * While the function will not fault if the byte is not read accessible,
4121 * the idea is to do this in a safe place like before acquiring locks
4122 * and such like.
4123 *
4124 * Also, this functions guarantees that an eager compiler is not going
4125 * to optimize the probing away.
4126 *
4127 * @param pvByte Pointer to the byte.
4128 */
4129#if RT_INLINE_ASM_EXTERNAL
4130DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4131#else
4132DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4133{
4134 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4135 uint8_t u8;
4136# if RT_INLINE_ASM_GNU_STYLE
4137 __asm__ __volatile__("movb (%1), %0\n\t"
4138 : "=r" (u8)
4139 : "r" (pvByte));
4140# else
4141 __asm
4142 {
4143# ifdef RT_ARCH_AMD64
4144 mov rax, [pvByte]
4145 mov al, [rax]
4146# else
4147 mov eax, [pvByte]
4148 mov al, [eax]
4149# endif
4150 mov [u8], al
4151 }
4152# endif
4153 return u8;
4154}
4155#endif
4156
4157/**
4158 * Probes a buffer for read access page by page.
4159 *
4160 * While the function will fault if the buffer is not fully read
4161 * accessible, the idea is to do this in a safe place like before
4162 * acquiring locks and such like.
4163 *
4164 * Also, this functions guarantees that an eager compiler is not going
4165 * to optimize the probing away.
4166 *
4167 * @param pvBuf Pointer to the buffer.
4168 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4169 */
4170DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4171{
4172 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4173 /* the first byte */
4174 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4175 ASMProbeReadByte(pu8);
4176
4177 /* the pages in between pages. */
4178 while (cbBuf > /*PAGE_SIZE*/0x1000)
4179 {
4180 ASMProbeReadByte(pu8);
4181 cbBuf -= /*PAGE_SIZE*/0x1000;
4182 pu8 += /*PAGE_SIZE*/0x1000;
4183 }
4184
4185 /* the last byte */
4186 ASMProbeReadByte(pu8 + cbBuf - 1);
4187}
4188
4189
4190/** @def ASMBreakpoint
4191 * Debugger Breakpoint.
4192 * @remark In the gnu world we add a nop instruction after the int3 to
4193 * force gdb to remain at the int3 source line.
4194 * @remark The L4 kernel will try make sense of the breakpoint, thus the jmp.
4195 * @internal
4196 */
4197#if RT_INLINE_ASM_GNU_STYLE
4198# ifndef __L4ENV__
4199# define ASMBreakpoint() do { __asm__ __volatile__ ("int3\n\tnop"); } while (0)
4200# else
4201# define ASMBreakpoint() do { __asm__ __volatile__ ("int3; jmp 1f; 1:"); } while (0)
4202# endif
4203#else
4204# define ASMBreakpoint() __debugbreak()
4205#endif
4206
4207
4208
4209/** @defgroup grp_inline_bits Bit Operations
4210 * @{
4211 */
4212
4213
4214/**
4215 * Sets a bit in a bitmap.
4216 *
4217 * @param pvBitmap Pointer to the bitmap.
4218 * @param iBit The bit to set.
4219 */
4220#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4221DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4222#else
4223DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4224{
4225# if RT_INLINE_ASM_USES_INTRIN
4226 _bittestandset((long *)pvBitmap, iBit);
4227
4228# elif RT_INLINE_ASM_GNU_STYLE
4229 __asm__ __volatile__ ("btsl %1, %0"
4230 : "=m" (*(volatile long *)pvBitmap)
4231 : "Ir" (iBit)
4232 : "memory");
4233# else
4234 __asm
4235 {
4236# ifdef RT_ARCH_AMD64
4237 mov rax, [pvBitmap]
4238 mov edx, [iBit]
4239 bts [rax], edx
4240# else
4241 mov eax, [pvBitmap]
4242 mov edx, [iBit]
4243 bts [eax], edx
4244# endif
4245 }
4246# endif
4247}
4248#endif
4249
4250
4251/**
4252 * Atomically sets a bit in a bitmap, ordered.
4253 *
4254 * @param pvBitmap Pointer to the bitmap.
4255 * @param iBit The bit to set.
4256 */
4257#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4258DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4259#else
4260DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4261{
4262# if RT_INLINE_ASM_USES_INTRIN
4263 _interlockedbittestandset((long *)pvBitmap, iBit);
4264# elif RT_INLINE_ASM_GNU_STYLE
4265 __asm__ __volatile__ ("lock; btsl %1, %0"
4266 : "=m" (*(volatile long *)pvBitmap)
4267 : "Ir" (iBit)
4268 : "memory");
4269# else
4270 __asm
4271 {
4272# ifdef RT_ARCH_AMD64
4273 mov rax, [pvBitmap]
4274 mov edx, [iBit]
4275 lock bts [rax], edx
4276# else
4277 mov eax, [pvBitmap]
4278 mov edx, [iBit]
4279 lock bts [eax], edx
4280# endif
4281 }
4282# endif
4283}
4284#endif
4285
4286
4287/**
4288 * Clears a bit in a bitmap.
4289 *
4290 * @param pvBitmap Pointer to the bitmap.
4291 * @param iBit The bit to clear.
4292 */
4293#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4294DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4295#else
4296DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4297{
4298# if RT_INLINE_ASM_USES_INTRIN
4299 _bittestandreset((long *)pvBitmap, iBit);
4300
4301# elif RT_INLINE_ASM_GNU_STYLE
4302 __asm__ __volatile__ ("btrl %1, %0"
4303 : "=m" (*(volatile long *)pvBitmap)
4304 : "Ir" (iBit)
4305 : "memory");
4306# else
4307 __asm
4308 {
4309# ifdef RT_ARCH_AMD64
4310 mov rax, [pvBitmap]
4311 mov edx, [iBit]
4312 btr [rax], edx
4313# else
4314 mov eax, [pvBitmap]
4315 mov edx, [iBit]
4316 btr [eax], edx
4317# endif
4318 }
4319# endif
4320}
4321#endif
4322
4323
4324/**
4325 * Atomically clears a bit in a bitmap, ordered.
4326 *
4327 * @param pvBitmap Pointer to the bitmap.
4328 * @param iBit The bit to toggle set.
4329 * @remark No memory barrier, take care on smp.
4330 */
4331#if RT_INLINE_ASM_EXTERNAL
4332DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4333#else
4334DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4335{
4336# if RT_INLINE_ASM_GNU_STYLE
4337 __asm__ __volatile__ ("lock; btrl %1, %0"
4338 : "=m" (*(volatile long *)pvBitmap)
4339 : "Ir" (iBit)
4340 : "memory");
4341# else
4342 __asm
4343 {
4344# ifdef RT_ARCH_AMD64
4345 mov rax, [pvBitmap]
4346 mov edx, [iBit]
4347 lock btr [rax], edx
4348# else
4349 mov eax, [pvBitmap]
4350 mov edx, [iBit]
4351 lock btr [eax], edx
4352# endif
4353 }
4354# endif
4355}
4356#endif
4357
4358
4359/**
4360 * Toggles a bit in a bitmap.
4361 *
4362 * @param pvBitmap Pointer to the bitmap.
4363 * @param iBit The bit to toggle.
4364 */
4365#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4366DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4367#else
4368DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4369{
4370# if RT_INLINE_ASM_USES_INTRIN
4371 _bittestandcomplement((long *)pvBitmap, iBit);
4372# elif RT_INLINE_ASM_GNU_STYLE
4373 __asm__ __volatile__ ("btcl %1, %0"
4374 : "=m" (*(volatile long *)pvBitmap)
4375 : "Ir" (iBit)
4376 : "memory");
4377# else
4378 __asm
4379 {
4380# ifdef RT_ARCH_AMD64
4381 mov rax, [pvBitmap]
4382 mov edx, [iBit]
4383 btc [rax], edx
4384# else
4385 mov eax, [pvBitmap]
4386 mov edx, [iBit]
4387 btc [eax], edx
4388# endif
4389 }
4390# endif
4391}
4392#endif
4393
4394
4395/**
4396 * Atomically toggles a bit in a bitmap, ordered.
4397 *
4398 * @param pvBitmap Pointer to the bitmap.
4399 * @param iBit The bit to test and set.
4400 */
4401#if RT_INLINE_ASM_EXTERNAL
4402DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4403#else
4404DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4405{
4406# if RT_INLINE_ASM_GNU_STYLE
4407 __asm__ __volatile__ ("lock; btcl %1, %0"
4408 : "=m" (*(volatile long *)pvBitmap)
4409 : "Ir" (iBit)
4410 : "memory");
4411# else
4412 __asm
4413 {
4414# ifdef RT_ARCH_AMD64
4415 mov rax, [pvBitmap]
4416 mov edx, [iBit]
4417 lock btc [rax], edx
4418# else
4419 mov eax, [pvBitmap]
4420 mov edx, [iBit]
4421 lock btc [eax], edx
4422# endif
4423 }
4424# endif
4425}
4426#endif
4427
4428
4429/**
4430 * Tests and sets a bit in a bitmap.
4431 *
4432 * @returns true if the bit was set.
4433 * @returns false if the bit was clear.
4434 * @param pvBitmap Pointer to the bitmap.
4435 * @param iBit The bit to test and set.
4436 */
4437#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4438DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4439#else
4440DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4441{
4442 union { bool f; uint32_t u32; uint8_t u8; } rc;
4443# if RT_INLINE_ASM_USES_INTRIN
4444 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4445
4446# elif RT_INLINE_ASM_GNU_STYLE
4447 __asm__ __volatile__ ("btsl %2, %1\n\t"
4448 "setc %b0\n\t"
4449 "andl $1, %0\n\t"
4450 : "=q" (rc.u32),
4451 "=m" (*(volatile long *)pvBitmap)
4452 : "Ir" (iBit)
4453 : "memory");
4454# else
4455 __asm
4456 {
4457 mov edx, [iBit]
4458# ifdef RT_ARCH_AMD64
4459 mov rax, [pvBitmap]
4460 bts [rax], edx
4461# else
4462 mov eax, [pvBitmap]
4463 bts [eax], edx
4464# endif
4465 setc al
4466 and eax, 1
4467 mov [rc.u32], eax
4468 }
4469# endif
4470 return rc.f;
4471}
4472#endif
4473
4474
4475/**
4476 * Atomically tests and sets a bit in a bitmap, ordered.
4477 *
4478 * @returns true if the bit was set.
4479 * @returns false if the bit was clear.
4480 * @param pvBitmap Pointer to the bitmap.
4481 * @param iBit The bit to set.
4482 */
4483#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4484DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4485#else
4486DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4487{
4488 union { bool f; uint32_t u32; uint8_t u8; } rc;
4489# if RT_INLINE_ASM_USES_INTRIN
4490 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4491# elif RT_INLINE_ASM_GNU_STYLE
4492 __asm__ __volatile__ ("lock; btsl %2, %1\n\t"
4493 "setc %b0\n\t"
4494 "andl $1, %0\n\t"
4495 : "=q" (rc.u32),
4496 "=m" (*(volatile long *)pvBitmap)
4497 : "Ir" (iBit)
4498 : "memory");
4499# else
4500 __asm
4501 {
4502 mov edx, [iBit]
4503# ifdef RT_ARCH_AMD64
4504 mov rax, [pvBitmap]
4505 lock bts [rax], edx
4506# else
4507 mov eax, [pvBitmap]
4508 lock bts [eax], edx
4509# endif
4510 setc al
4511 and eax, 1
4512 mov [rc.u32], eax
4513 }
4514# endif
4515 return rc.f;
4516}
4517#endif
4518
4519
4520/**
4521 * Tests and clears a bit in a bitmap.
4522 *
4523 * @returns true if the bit was set.
4524 * @returns false if the bit was clear.
4525 * @param pvBitmap Pointer to the bitmap.
4526 * @param iBit The bit to test and clear.
4527 */
4528#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4529DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4530#else
4531DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4532{
4533 union { bool f; uint32_t u32; uint8_t u8; } rc;
4534# if RT_INLINE_ASM_USES_INTRIN
4535 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4536
4537# elif RT_INLINE_ASM_GNU_STYLE
4538 __asm__ __volatile__ ("btrl %2, %1\n\t"
4539 "setc %b0\n\t"
4540 "andl $1, %0\n\t"
4541 : "=q" (rc.u32),
4542 "=m" (*(volatile long *)pvBitmap)
4543 : "Ir" (iBit)
4544 : "memory");
4545# else
4546 __asm
4547 {
4548 mov edx, [iBit]
4549# ifdef RT_ARCH_AMD64
4550 mov rax, [pvBitmap]
4551 btr [rax], edx
4552# else
4553 mov eax, [pvBitmap]
4554 btr [eax], edx
4555# endif
4556 setc al
4557 and eax, 1
4558 mov [rc.u32], eax
4559 }
4560# endif
4561 return rc.f;
4562}
4563#endif
4564
4565
4566/**
4567 * Atomically tests and clears a bit in a bitmap, ordered.
4568 *
4569 * @returns true if the bit was set.
4570 * @returns false if the bit was clear.
4571 * @param pvBitmap Pointer to the bitmap.
4572 * @param iBit The bit to test and clear.
4573 * @remark No memory barrier, take care on smp.
4574 */
4575#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4576DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4577#else
4578DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4579{
4580 union { bool f; uint32_t u32; uint8_t u8; } rc;
4581# if RT_INLINE_ASM_USES_INTRIN
4582 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4583
4584# elif RT_INLINE_ASM_GNU_STYLE
4585 __asm__ __volatile__ ("lock; btrl %2, %1\n\t"
4586 "setc %b0\n\t"
4587 "andl $1, %0\n\t"
4588 : "=q" (rc.u32),
4589 "=m" (*(volatile long *)pvBitmap)
4590 : "Ir" (iBit)
4591 : "memory");
4592# else
4593 __asm
4594 {
4595 mov edx, [iBit]
4596# ifdef RT_ARCH_AMD64
4597 mov rax, [pvBitmap]
4598 lock btr [rax], edx
4599# else
4600 mov eax, [pvBitmap]
4601 lock btr [eax], edx
4602# endif
4603 setc al
4604 and eax, 1
4605 mov [rc.u32], eax
4606 }
4607# endif
4608 return rc.f;
4609}
4610#endif
4611
4612
4613/**
4614 * Tests and toggles a bit in a bitmap.
4615 *
4616 * @returns true if the bit was set.
4617 * @returns false if the bit was clear.
4618 * @param pvBitmap Pointer to the bitmap.
4619 * @param iBit The bit to test and toggle.
4620 */
4621#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4622DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4623#else
4624DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4625{
4626 union { bool f; uint32_t u32; uint8_t u8; } rc;
4627# if RT_INLINE_ASM_USES_INTRIN
4628 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4629
4630# elif RT_INLINE_ASM_GNU_STYLE
4631 __asm__ __volatile__ ("btcl %2, %1\n\t"
4632 "setc %b0\n\t"
4633 "andl $1, %0\n\t"
4634 : "=q" (rc.u32),
4635 "=m" (*(volatile long *)pvBitmap)
4636 : "Ir" (iBit)
4637 : "memory");
4638# else
4639 __asm
4640 {
4641 mov edx, [iBit]
4642# ifdef RT_ARCH_AMD64
4643 mov rax, [pvBitmap]
4644 btc [rax], edx
4645# else
4646 mov eax, [pvBitmap]
4647 btc [eax], edx
4648# endif
4649 setc al
4650 and eax, 1
4651 mov [rc.u32], eax
4652 }
4653# endif
4654 return rc.f;
4655}
4656#endif
4657
4658
4659/**
4660 * Atomically tests and toggles a bit in a bitmap, ordered.
4661 *
4662 * @returns true if the bit was set.
4663 * @returns false if the bit was clear.
4664 * @param pvBitmap Pointer to the bitmap.
4665 * @param iBit The bit to test and toggle.
4666 */
4667#if RT_INLINE_ASM_EXTERNAL
4668DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4669#else
4670DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4671{
4672 union { bool f; uint32_t u32; uint8_t u8; } rc;
4673# if RT_INLINE_ASM_GNU_STYLE
4674 __asm__ __volatile__ ("lock; btcl %2, %1\n\t"
4675 "setc %b0\n\t"
4676 "andl $1, %0\n\t"
4677 : "=q" (rc.u32),
4678 "=m" (*(volatile long *)pvBitmap)
4679 : "Ir" (iBit)
4680 : "memory");
4681# else
4682 __asm
4683 {
4684 mov edx, [iBit]
4685# ifdef RT_ARCH_AMD64
4686 mov rax, [pvBitmap]
4687 lock btc [rax], edx
4688# else
4689 mov eax, [pvBitmap]
4690 lock btc [eax], edx
4691# endif
4692 setc al
4693 and eax, 1
4694 mov [rc.u32], eax
4695 }
4696# endif
4697 return rc.f;
4698}
4699#endif
4700
4701
4702/**
4703 * Tests if a bit in a bitmap is set.
4704 *
4705 * @returns true if the bit is set.
4706 * @returns false if the bit is clear.
4707 * @param pvBitmap Pointer to the bitmap.
4708 * @param iBit The bit to test.
4709 */
4710#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4711DECLASM(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit);
4712#else
4713DECLINLINE(bool) ASMBitTest(volatile void *pvBitmap, int32_t iBit)
4714{
4715 union { bool f; uint32_t u32; uint8_t u8; } rc;
4716# if RT_INLINE_ASM_USES_INTRIN
4717 rc.u32 = _bittest((long *)pvBitmap, iBit);
4718# elif RT_INLINE_ASM_GNU_STYLE
4719
4720 __asm__ __volatile__ ("btl %2, %1\n\t"
4721 "setc %b0\n\t"
4722 "andl $1, %0\n\t"
4723 : "=q" (rc.u32),
4724 "=m" (*(volatile long *)pvBitmap)
4725 : "Ir" (iBit)
4726 : "memory");
4727# else
4728 __asm
4729 {
4730 mov edx, [iBit]
4731# ifdef RT_ARCH_AMD64
4732 mov rax, [pvBitmap]
4733 bt [rax], edx
4734# else
4735 mov eax, [pvBitmap]
4736 bt [eax], edx
4737# endif
4738 setc al
4739 and eax, 1
4740 mov [rc.u32], eax
4741 }
4742# endif
4743 return rc.f;
4744}
4745#endif
4746
4747
4748/**
4749 * Clears a bit range within a bitmap.
4750 *
4751 * @param pvBitmap Pointer to the bitmap.
4752 * @param iBitStart The First bit to clear.
4753 * @param iBitEnd The first bit not to clear.
4754 */
4755DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4756{
4757 if (iBitStart < iBitEnd)
4758 {
4759 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4760 int iStart = iBitStart & ~31;
4761 int iEnd = iBitEnd & ~31;
4762 if (iStart == iEnd)
4763 *pu32 &= ((1 << (iBitStart & 31)) - 1) | ~((1 << (iBitEnd & 31)) - 1);
4764 else
4765 {
4766 /* bits in first dword. */
4767 if (iBitStart & 31)
4768 {
4769 *pu32 &= (1 << (iBitStart & 31)) - 1;
4770 pu32++;
4771 iBitStart = iStart + 32;
4772 }
4773
4774 /* whole dword. */
4775 if (iBitStart != iEnd)
4776 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4777
4778 /* bits in last dword. */
4779 if (iBitEnd & 31)
4780 {
4781 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4782 *pu32 &= ~((1 << (iBitEnd & 31)) - 1);
4783 }
4784 }
4785 }
4786}
4787
4788
4789/**
4790 * Finds the first clear bit in a bitmap.
4791 *
4792 * @returns Index of the first zero bit.
4793 * @returns -1 if no clear bit was found.
4794 * @param pvBitmap Pointer to the bitmap.
4795 * @param cBits The number of bits in the bitmap. Multiple of 32.
4796 */
4797#if RT_INLINE_ASM_EXTERNAL
4798DECLASM(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits);
4799#else
4800DECLINLINE(int) ASMBitFirstClear(volatile void *pvBitmap, uint32_t cBits)
4801{
4802 if (cBits)
4803 {
4804 int32_t iBit;
4805# if RT_INLINE_ASM_GNU_STYLE
4806 RTCCUINTREG uEAX, uECX, uEDI;
4807 cBits = RT_ALIGN_32(cBits, 32);
4808 __asm__ __volatile__("repe; scasl\n\t"
4809 "je 1f\n\t"
4810# ifdef RT_ARCH_AMD64
4811 "lea -4(%%rdi), %%rdi\n\t"
4812 "xorl (%%rdi), %%eax\n\t"
4813 "subq %5, %%rdi\n\t"
4814# else
4815 "lea -4(%%edi), %%edi\n\t"
4816 "xorl (%%edi), %%eax\n\t"
4817 "subl %5, %%edi\n\t"
4818# endif
4819 "shll $3, %%edi\n\t"
4820 "bsfl %%eax, %%edx\n\t"
4821 "addl %%edi, %%edx\n\t"
4822 "1:\t\n"
4823 : "=d" (iBit),
4824 "=&c" (uECX),
4825 "=&D" (uEDI),
4826 "=&a" (uEAX)
4827 : "0" (0xffffffff),
4828 "mr" (pvBitmap),
4829 "1" (cBits >> 5),
4830 "2" (pvBitmap),
4831 "3" (0xffffffff));
4832# else
4833 cBits = RT_ALIGN_32(cBits, 32);
4834 __asm
4835 {
4836# ifdef RT_ARCH_AMD64
4837 mov rdi, [pvBitmap]
4838 mov rbx, rdi
4839# else
4840 mov edi, [pvBitmap]
4841 mov ebx, edi
4842# endif
4843 mov edx, 0ffffffffh
4844 mov eax, edx
4845 mov ecx, [cBits]
4846 shr ecx, 5
4847 repe scasd
4848 je done
4849
4850# ifdef RT_ARCH_AMD64
4851 lea rdi, [rdi - 4]
4852 xor eax, [rdi]
4853 sub rdi, rbx
4854# else
4855 lea edi, [edi - 4]
4856 xor eax, [edi]
4857 sub edi, ebx
4858# endif
4859 shl edi, 3
4860 bsf edx, eax
4861 add edx, edi
4862 done:
4863 mov [iBit], edx
4864 }
4865# endif
4866 return iBit;
4867 }
4868 return -1;
4869}
4870#endif
4871
4872
4873/**
4874 * Finds the next clear bit in a bitmap.
4875 *
4876 * @returns Index of the first zero bit.
4877 * @returns -1 if no clear bit was found.
4878 * @param pvBitmap Pointer to the bitmap.
4879 * @param cBits The number of bits in the bitmap. Multiple of 32.
4880 * @param iBitPrev The bit returned from the last search.
4881 * The search will start at iBitPrev + 1.
4882 */
4883#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4884DECLASM(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4885#else
4886DECLINLINE(int) ASMBitNextClear(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4887{
4888 int iBit = ++iBitPrev & 31;
4889 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
4890 cBits -= iBitPrev & ~31;
4891 if (iBit)
4892 {
4893 /* inspect the first dword. */
4894 uint32_t u32 = (~*(volatile uint32_t *)pvBitmap) >> iBit;
4895# if RT_INLINE_ASM_USES_INTRIN
4896 unsigned long ulBit = 0;
4897 if (_BitScanForward(&ulBit, u32))
4898 return ulBit + iBitPrev;
4899 iBit = -1;
4900# else
4901# if RT_INLINE_ASM_GNU_STYLE
4902 __asm__ __volatile__("bsf %1, %0\n\t"
4903 "jnz 1f\n\t"
4904 "movl $-1, %0\n\t"
4905 "1:\n\t"
4906 : "=r" (iBit)
4907 : "r" (u32));
4908# else
4909 __asm
4910 {
4911 mov edx, [u32]
4912 bsf eax, edx
4913 jnz done
4914 mov eax, 0ffffffffh
4915 done:
4916 mov [iBit], eax
4917 }
4918# endif
4919 if (iBit >= 0)
4920 return iBit + iBitPrev;
4921# endif
4922 /* Search the rest of the bitmap, if there is anything. */
4923 if (cBits > 32)
4924 {
4925 iBit = ASMBitFirstClear((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
4926 if (iBit >= 0)
4927 return iBit + (iBitPrev & ~31) + 32;
4928 }
4929 }
4930 else
4931 {
4932 /* Search the rest of the bitmap. */
4933 iBit = ASMBitFirstClear(pvBitmap, cBits);
4934 if (iBit >= 0)
4935 return iBit + (iBitPrev & ~31);
4936 }
4937 return iBit;
4938}
4939#endif
4940
4941
4942/**
4943 * Finds the first set bit in a bitmap.
4944 *
4945 * @returns Index of the first set bit.
4946 * @returns -1 if no clear bit was found.
4947 * @param pvBitmap Pointer to the bitmap.
4948 * @param cBits The number of bits in the bitmap. Multiple of 32.
4949 */
4950#if RT_INLINE_ASM_EXTERNAL
4951DECLASM(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits);
4952#else
4953DECLINLINE(int) ASMBitFirstSet(volatile void *pvBitmap, uint32_t cBits)
4954{
4955 if (cBits)
4956 {
4957 int32_t iBit;
4958# if RT_INLINE_ASM_GNU_STYLE
4959 RTCCUINTREG uEAX, uECX, uEDI;
4960 cBits = RT_ALIGN_32(cBits, 32);
4961 __asm__ __volatile__("repe; scasl\n\t"
4962 "je 1f\n\t"
4963# ifdef RT_ARCH_AMD64
4964 "lea -4(%%rdi), %%rdi\n\t"
4965 "movl (%%rdi), %%eax\n\t"
4966 "subq %5, %%rdi\n\t"
4967# else
4968 "lea -4(%%edi), %%edi\n\t"
4969 "movl (%%edi), %%eax\n\t"
4970 "subl %5, %%edi\n\t"
4971# endif
4972 "shll $3, %%edi\n\t"
4973 "bsfl %%eax, %%edx\n\t"
4974 "addl %%edi, %%edx\n\t"
4975 "1:\t\n"
4976 : "=d" (iBit),
4977 "=&c" (uECX),
4978 "=&D" (uEDI),
4979 "=&a" (uEAX)
4980 : "0" (0xffffffff),
4981 "mr" (pvBitmap),
4982 "1" (cBits >> 5),
4983 "2" (pvBitmap),
4984 "3" (0));
4985# else
4986 cBits = RT_ALIGN_32(cBits, 32);
4987 __asm
4988 {
4989# ifdef RT_ARCH_AMD64
4990 mov rdi, [pvBitmap]
4991 mov rbx, rdi
4992# else
4993 mov edi, [pvBitmap]
4994 mov ebx, edi
4995# endif
4996 mov edx, 0ffffffffh
4997 xor eax, eax
4998 mov ecx, [cBits]
4999 shr ecx, 5
5000 repe scasd
5001 je done
5002# ifdef RT_ARCH_AMD64
5003 lea rdi, [rdi - 4]
5004 mov eax, [rdi]
5005 sub rdi, rbx
5006# else
5007 lea edi, [edi - 4]
5008 mov eax, [edi]
5009 sub edi, ebx
5010# endif
5011 shl edi, 3
5012 bsf edx, eax
5013 add edx, edi
5014 done:
5015 mov [iBit], edx
5016 }
5017# endif
5018 return iBit;
5019 }
5020 return -1;
5021}
5022#endif
5023
5024
5025/**
5026 * Finds the next set bit in a bitmap.
5027 *
5028 * @returns Index of the next set bit.
5029 * @returns -1 if no set bit was found.
5030 * @param pvBitmap Pointer to the bitmap.
5031 * @param cBits The number of bits in the bitmap. Multiple of 32.
5032 * @param iBitPrev The bit returned from the last search.
5033 * The search will start at iBitPrev + 1.
5034 */
5035#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5036DECLASM(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5037#else
5038DECLINLINE(int) ASMBitNextSet(volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5039{
5040 int iBit = ++iBitPrev & 31;
5041 pvBitmap = (volatile char *)pvBitmap + ((iBitPrev >> 5) << 2);
5042 cBits -= iBitPrev & ~31;
5043 if (iBit)
5044 {
5045 /* inspect the first dword. */
5046 uint32_t u32 = *(volatile uint32_t *)pvBitmap >> iBit;
5047# if RT_INLINE_ASM_USES_INTRIN
5048 unsigned long ulBit = 0;
5049 if (_BitScanForward(&ulBit, u32))
5050 return ulBit + iBitPrev;
5051 iBit = -1;
5052# else
5053# if RT_INLINE_ASM_GNU_STYLE
5054 __asm__ __volatile__("bsf %1, %0\n\t"
5055 "jnz 1f\n\t"
5056 "movl $-1, %0\n\t"
5057 "1:\n\t"
5058 : "=r" (iBit)
5059 : "r" (u32));
5060# else
5061 __asm
5062 {
5063 mov edx, u32
5064 bsf eax, edx
5065 jnz done
5066 mov eax, 0ffffffffh
5067 done:
5068 mov [iBit], eax
5069 }
5070# endif
5071 if (iBit >= 0)
5072 return iBit + iBitPrev;
5073# endif
5074 /* Search the rest of the bitmap, if there is anything. */
5075 if (cBits > 32)
5076 {
5077 iBit = ASMBitFirstSet((volatile char *)pvBitmap + sizeof(uint32_t), cBits - 32);
5078 if (iBit >= 0)
5079 return iBit + (iBitPrev & ~31) + 32;
5080 }
5081
5082 }
5083 else
5084 {
5085 /* Search the rest of the bitmap. */
5086 iBit = ASMBitFirstSet(pvBitmap, cBits);
5087 if (iBit >= 0)
5088 return iBit + (iBitPrev & ~31);
5089 }
5090 return iBit;
5091}
5092#endif
5093
5094
5095/**
5096 * Finds the first bit which is set in the given 32-bit integer.
5097 * Bits are numbered from 1 (least significant) to 32.
5098 *
5099 * @returns index [1..32] of the first set bit.
5100 * @returns 0 if all bits are cleared.
5101 * @param u32 Integer to search for set bits.
5102 * @remark Similar to ffs() in BSD.
5103 */
5104DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5105{
5106# if RT_INLINE_ASM_USES_INTRIN
5107 unsigned long iBit;
5108 if (_BitScanForward(&iBit, u32))
5109 iBit++;
5110 else
5111 iBit = 0;
5112# elif RT_INLINE_ASM_GNU_STYLE
5113 uint32_t iBit;
5114 __asm__ __volatile__("bsf %1, %0\n\t"
5115 "jnz 1f\n\t"
5116 "xorl %0, %0\n\t"
5117 "jmp 2f\n"
5118 "1:\n\t"
5119 "incl %0\n"
5120 "2:\n\t"
5121 : "=r" (iBit)
5122 : "rm" (u32));
5123# else
5124 uint32_t iBit;
5125 _asm
5126 {
5127 bsf eax, [u32]
5128 jnz found
5129 xor eax, eax
5130 jmp done
5131 found:
5132 inc eax
5133 done:
5134 mov [iBit], eax
5135 }
5136# endif
5137 return iBit;
5138}
5139
5140
5141/**
5142 * Finds the first bit which is set in the given 32-bit integer.
5143 * Bits are numbered from 1 (least significant) to 32.
5144 *
5145 * @returns index [1..32] of the first set bit.
5146 * @returns 0 if all bits are cleared.
5147 * @param i32 Integer to search for set bits.
5148 * @remark Similar to ffs() in BSD.
5149 */
5150DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5151{
5152 return ASMBitFirstSetU32((uint32_t)i32);
5153}
5154
5155
5156/**
5157 * Finds the last bit which is set in the given 32-bit integer.
5158 * Bits are numbered from 1 (least significant) to 32.
5159 *
5160 * @returns index [1..32] of the last set bit.
5161 * @returns 0 if all bits are cleared.
5162 * @param u32 Integer to search for set bits.
5163 * @remark Similar to fls() in BSD.
5164 */
5165DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5166{
5167# if RT_INLINE_ASM_USES_INTRIN
5168 unsigned long iBit;
5169 if (_BitScanReverse(&iBit, u32))
5170 iBit++;
5171 else
5172 iBit = 0;
5173# elif RT_INLINE_ASM_GNU_STYLE
5174 uint32_t iBit;
5175 __asm__ __volatile__("bsrl %1, %0\n\t"
5176 "jnz 1f\n\t"
5177 "xorl %0, %0\n\t"
5178 "jmp 2f\n"
5179 "1:\n\t"
5180 "incl %0\n"
5181 "2:\n\t"
5182 : "=r" (iBit)
5183 : "rm" (u32));
5184# else
5185 uint32_t iBit;
5186 _asm
5187 {
5188 bsr eax, [u32]
5189 jnz found
5190 xor eax, eax
5191 jmp done
5192 found:
5193 inc eax
5194 done:
5195 mov [iBit], eax
5196 }
5197# endif
5198 return iBit;
5199}
5200
5201
5202/**
5203 * Finds the last bit which is set in the given 32-bit integer.
5204 * Bits are numbered from 1 (least significant) to 32.
5205 *
5206 * @returns index [1..32] of the last set bit.
5207 * @returns 0 if all bits are cleared.
5208 * @param i32 Integer to search for set bits.
5209 * @remark Similar to fls() in BSD.
5210 */
5211DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5212{
5213 return ASMBitLastSetS32((uint32_t)i32);
5214}
5215
5216
5217/**
5218 * Reverse the byte order of the given 32-bit integer.
5219 * @param u32 Integer
5220 */
5221DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5222{
5223#if RT_INLINE_ASM_USES_INTRIN
5224 u32 = _byteswap_ulong(u32);
5225#elif RT_INLINE_ASM_GNU_STYLE
5226 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5227#else
5228 _asm
5229 {
5230 mov eax, [u32]
5231 bswap eax
5232 mov [u32], eax
5233 }
5234#endif
5235 return u32;
5236}
5237
5238/** @} */
5239
5240
5241/** @} */
5242#endif
5243
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use