Index: /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp
===================================================================
--- /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp	(revision 51883)
+++ /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp	(revision 51884)
@@ -202,9 +202,30 @@
  *
  * @param   pCtx                The SHA-256 context.
- * @param   pbBlock             The block.  Must be 32-bit aligned.
+ * @param   pbBlock             The block.  Must be arch-bit-width aligned.
  */
 DECLINLINE(void) rtSha256BlockInit(PRTSHA256CONTEXT pCtx, uint8_t const *pbBlock)
 {
 #ifdef RTSHA256_UNROLLED
+    /* Copy and byte-swap the block. Initializing the rest of the Ws are done
+       in the processing loop. */
+# ifdef RT_LITTLE_ENDIAN
+#  if ARCH_BITS == 64
+    uint64_t const *puSrc = (uint64_t const *)pbBlock;
+    uint64_t       *puW   = (uint64_t *)&pCtx->AltPrivate.auW[0];
+    Assert(!((uintptr_t)puSrc & 7));
+    Assert(!((uintptr_t)puW & 7));
+
+    /* b0 b1 b2 b3  b4 b5 b6 b7 --bwap--> b7 b6 b5 b4 b3 b2 b1 b0 --ror--> b3 b2 b1 b0  b7 b6 b5 b4; */
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
+
+#  else
     uint32_t const *puSrc = (uint32_t const *)pbBlock;
     uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
@@ -212,29 +233,27 @@
     Assert(!((uintptr_t)puW & 3));
 
-    /* Copy and byte-swap the block. Initializing the rest of the Ws are done
-       in the processing loop. */
-# ifdef RT_LITTLE_ENDIAN
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-    *puW++ = ASMByteSwapU32(*puSrc++);
-# else
-    memcpy(puW, puSrc, RTSHA256_BLOCK_SIZE);
-# endif
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+    *puW++ = ASMByteSwapU32(*puSrc++);
+#  endif
+# else  /* RT_BIG_ENDIAN */
+    memcpy(&pCtx->AltPrivate.auW[0], pbBlock, RTSHA256_BLOCK_SIZE);
+# endif /* RT_BIG_ENDIAN */
 
 #else  /* !RTSHA256_UNROLLED */
@@ -266,29 +285,45 @@
 {
 #ifdef RTSHA256_UNROLLED
-    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
-    Assert(!((uintptr_t)puW & 3));
-
     /* Do the byte swap if necessary. Initializing the rest of the Ws are done
        in the processing loop. */
 # ifdef RT_LITTLE_ENDIAN
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
-    *puW = ASMByteSwapU32(*puW); puW++;
+#  if ARCH_BITS == 64
+    uint64_t *puW = (uint64_t *)&pCtx->AltPrivate.auW[0];
+    Assert(!((uintptr_t)puW & 7));
+    /* b0 b1 b2 b3  b4 b5 b6 b7 --bwap--> b7 b6 b5 b4 b3 b2 b1 b0 --ror--> b3 b2 b1 b0  b7 b6 b5 b4; */
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
+
+#  else
+    uint32_t *puW = &pCtx->AltPrivate.auW[0];
+    Assert(!((uintptr_t)puW & 3));
+
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+    *puW = ASMByteSwapU32(*puW); puW++;
+#  endif
 # endif
 
@@ -444,5 +479,5 @@
     }
 
-    if (!((uintptr_t)pbBuf & 3))
+    if (!((uintptr_t)pbBuf & (sizeof(void *) - 1)))
     {
         /*
