Index: /trunk/include/iprt/asm-watcom-x86-16.h
===================================================================
--- /trunk/include/iprt/asm-watcom-x86-16.h	(revision 58770)
+++ /trunk/include/iprt/asm-watcom-x86-16.h	(revision 58771)
@@ -537,4 +537,42 @@
     modify exact [ax dx] nomemory;
 
+#undef      ASMBitFirstSetU64
+#pragma aux ASMBitFirstSetU64 = \
+    "shl ecx, 16" \
+    "mov cx, dx" \
+    "bsf ecx, ecx" \
+    "jz  not_found_low" \
+    "mov ax, cx" \
+    "inc ax" \
+    "jmp done" \
+    \
+    "not_found_low:" \
+    "shr eax, 16" \
+    "mov ax, bx" \
+    "bsf eax, eax" \
+    "jz  not_found_high" \
+    "add ax, 33" \
+    "jmp done" \
+    \
+    "not_found_high:" \
+    "xor ax, ax" \
+    "done:" \
+    parm [dx cx bx ax] nomemory \
+    value [ax] \
+    modify exact [ax cx] nomemory;
+
+#undef      ASMBitFirstSetU16
+#pragma aux ASMBitFirstSetU16 = \
+    "bsf ax, ax" \
+    "jz  not_found" \
+    "inc ax" \
+    "jmp done" \
+    "not_found:" \
+    "xor ax, ax" \
+    "done:" \
+    parm [ax] nomemory \
+    value [ax] \
+    modify exact [ax] nomemory;
+
 #undef      ASMBitLastSetU32
 #pragma aux ASMBitLastSetU32 = \
@@ -551,4 +589,42 @@
     value [ax] \
     modify exact [ax dx] nomemory;
+
+#undef      ASMBitLastSetU64
+#pragma aux ASMBitLastSetU64 = \
+    "shl ecx, 16" \
+    "mov cx, dx" \
+    "bsf ecx, ecx" \
+    "jz  not_found_low" \
+    "mov ax, cx" \
+    "inc ax" \
+    "jmp done" \
+    \
+    "not_found_low:" \
+    "shr eax, 16" \
+    "mov ax, bx" \
+    "bsf eax, eax" \
+    "jz  not_found_high" \
+    "add ax, 33" \
+    "jmp done" \
+    \
+    "not_found_high:" \
+    "xor ax, ax" \
+    "done:" \
+    parm [dx cx bx ax] nomemory \
+    value [ax] \
+    modify exact [ax cx] nomemory;
+
+#undef      ASMBitLastSetU16
+#pragma aux ASMBitLastSetU16 = \
+    "bsr ax, ax" \
+    "jz  not_found" \
+    "inc ax" \
+    "jmp done" \
+    "not_found:" \
+    "xor ax, ax" \
+    "done:" \
+    parm [ax] nomemory \
+    value [ax] \
+    modify exact [ax] nomemory;
 
 #undef      ASMByteSwapU16
Index: /trunk/include/iprt/asm-watcom-x86-32.h
===================================================================
--- /trunk/include/iprt/asm-watcom-x86-32.h	(revision 58770)
+++ /trunk/include/iprt/asm-watcom-x86-32.h	(revision 58771)
@@ -468,4 +468,38 @@
     modify exact [eax] nomemory;
 
+#undef      ASMBitFirstSetU64
+#pragma aux ASMBitFirstSetU64 = \
+    "bsf eax, eax" \
+    "jz  not_found_low" \
+    "inc eax" \
+    "jmp done" \
+    \
+    "not_found_low:" \
+    "bsf eax, edx" \
+    "jz  not_found_high" \
+    "add eax, 33" \
+    "jmp done" \
+    \
+    "not_found_high:" \
+    "xor eax, eax" \
+    "done:" \
+    parm [eax edx] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+
+#undef      ASMBitFirstSetU16
+#pragma aux ASMBitFirstSetU16 = \
+    "movzx eax, ax" \
+    "bsf eax, eax" \
+    "jz  not_found" \
+    "inc eax" \
+    "jmp done" \
+    "not_found:" \
+    "xor eax, eax" \
+    "done:" \
+    parm [ax] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+
 #undef      ASMBitLastSetU32
 #pragma aux ASMBitLastSetU32 = \
@@ -478,4 +512,38 @@
     "done:" \
     parm [eax] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+
+#undef      ASMBitLastSetU64
+#pragma aux ASMBitLastSetU64 = \
+    "bsf eax, eax" \
+    "jz  not_found_low" \
+    "inc eax" \
+    "jmp done" \
+    \
+    "not_found_low:" \
+    "bsf eax, edx" \
+    "jz  not_found_high" \
+    "add eax, 33" \
+    "jmp done" \
+    \
+    "not_found_high:" \
+    "xor eax, eax" \
+    "done:" \
+    parm [eax edx] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+
+#undef      ASMBitLastSetU16
+#pragma aux ASMBitLastSetU16 = \
+    "movzx eax, ax" \
+    "bsr eax, eax" \
+    "jz  not_found" \
+    "inc eax" \
+    "jmp done" \
+    "not_found:" \
+    "xor eax, eax" \
+    "done:" \
+    parm [ax] nomemory \
     value [eax] \
     modify exact [eax] nomemory;
Index: /trunk/include/iprt/asm.h
===================================================================
--- /trunk/include/iprt/asm.h	(revision 58770)
+++ /trunk/include/iprt/asm.h	(revision 58771)
@@ -4797,5 +4797,5 @@
  * @returns 0 if all bits are cleared.
  * @param   u32     Integer to search for set bits.
- * @remark  Similar to ffs() in BSD.
+ * @remarks Similar to ffs() in BSD.
  */
 #if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
@@ -4856,4 +4856,79 @@
 
 /**
+ * Finds the first bit which is set in the given 64-bit integer.
+ *
+ * Bits are numbered from 1 (least significant) to 64.
+ *
+ * @returns index [1..64] of the first set bit.
+ * @returns 0 if all bits are cleared.
+ * @param   u64     Integer to search for set bits.
+ * @remarks Similar to ffs() in BSD.
+ */
+#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
+DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
+#else
+DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
+{
+# if RT_INLINE_ASM_USES_INTRIN
+    unsigned long iBit;
+#  if ARCH_BITS == 64
+    if (_BitScanForward64(&iBit, u64))
+        iBit++;
+    else
+        iBit = 0;
+#  else
+    if (_BitScanForward(&iBit, (uint32_t)u64))
+        iBit++;
+    else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
+        iBit += 33;
+    else
+        iBit = 0;
+#  endif
+# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
+    uint64_t iBit;
+    __asm__ __volatile__("bsfq %1, %0\n\t"
+                         "jnz  1f\n\t"
+                         "xorl %0, %0\n\t"
+                         "jmp  2f\n"
+                         "1:\n\t"
+                         "incl %0\n"
+                         "2:\n\t"
+                         : "=r" (iBit)
+                         : "rm" (u64));
+# else
+    unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
+    if (!iBit)
+    {
+        iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
+        if (iBit)
+            iBit += 32;
+    }
+# endif
+    return (unsigned)iBit;
+}
+#endif
+
+
+/**
+ * Finds the first bit which is set in the given 16-bit integer.
+ *
+ * Bits are numbered from 1 (least significant) to 16.
+ *
+ * @returns index [1..16] of the first set bit.
+ * @returns 0 if all bits are cleared.
+ * @param   u16     Integer to search for set bits.
+ * @remarks For 16-bit bs3kit code.
+ */
+#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
+DECLASM(unsigned) ASMBitFirstSetU16(uint32_t u16);
+#else
+DECLINLINE(unsigned) ASMBitFirstSetU16(uint32_t u16)
+{
+    return ASMBitFirstSetU32((uint32_t)u16);
+}
+#endif
+
+
+/**
  * Finds the last bit which is set in the given 32-bit integer.
  * Bits are numbered from 1 (least significant) to 32.
@@ -4918,4 +4993,78 @@
     return ASMBitLastSetU32((uint32_t)i32);
 }
+
+
+/**
+ * Finds the last bit which is set in the given 64-bit integer.
+ *
+ * Bits are numbered from 1 (least significant) to 64.
+ *
+ * @returns index [1..64] of the last set bit.
+ * @returns 0 if all bits are cleared.
+ * @param   u64     Integer to search for set bits.
+ * @remark  Similar to fls() in BSD.
+ */
+#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
+DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
+#else
+DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
+{
+# if RT_INLINE_ASM_USES_INTRIN
+    unsigned long iBit;
+#  if ARCH_BITS == 64
+    if (_BitScanReverse64(&iBit, u64))
+        iBit++;
+    else
+        iBit = 0;
+#  else
+    if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
+        iBit += 33;
+    else if (_BitScanReverse(&iBit, (uint32_t)u64))
+        iBit++;
+    else
+        iBit = 0;
+#  endif
+# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
+    uint64_t iBit;
+    __asm__ __volatile__("bsrq %1, %0\n\t"
+                         "jnz   1f\n\t"
+                         "xorl %0, %0\n\t"
+                         "jmp  2f\n"
+                         "1:\n\t"
+                         "incl %0\n"
+                         "2:\n\t"
+                         : "=r" (iBit)
+                         : "rm" (u64));
+# else
+    unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32);
+    if (iBit)
+        iBit += 32;
+    else
+        iBit = ASMBitLastSetU32((uint32_t)u64);
+#endif
+    return (unsigned)iBit;
+}
+#endif
+
+
+/**
+ * Finds the last bit which is set in the given 16-bit integer.
+ *
+ * Bits are numbered from 1 (least significant) to 16.
+ *
+ * @returns index [1..16] of the last set bit.
+ * @returns 0 if all bits are cleared.
+ * @param   u16     Integer to search for set bits.
+ * @remarks For 16-bit bs3kit code.
+ */
+#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
+DECLASM(unsigned) ASMBitLastSetU16(uint32_t u16);
+#else
+DECLINLINE(unsigned) ASMBitLastSetU16(uint32_t u16)
+{
+    return ASMBitLastSetU32((uint32_t)u16);
+}
+#endif
+
 
 /**
