Index: /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp
===================================================================
--- /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp	(revision 51884)
+++ /trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp	(revision 51885)
@@ -210,5 +210,15 @@
        in the processing loop. */
 # ifdef RT_LITTLE_ENDIAN
-#  if ARCH_BITS == 64
+#  if 0 /* Just an idea... very little gain as this isn't the expensive code. */
+    __m128i const  uBSwapConst = { 3, 2, 1, 0,  7, 6, 5, 4,  11, 10, 9, 8,  15, 14, 13, 12 };
+    __m128i const *puSrc = (__m128i const *)pbBlock;
+    __m128i       *puDst = (__m128i *)&pCtx->AltPrivate.auW[0];
+
+    _mm_storeu_si128(puDst, _mm_shuffle_epi8(_mm_loadu_si128(puSrc), uBSwapConst)); puDst++; puSrc++;
+    _mm_storeu_si128(puDst, _mm_shuffle_epi8(_mm_loadu_si128(puSrc), uBSwapConst)); puDst++; puSrc++;
+    _mm_storeu_si128(puDst, _mm_shuffle_epi8(_mm_loadu_si128(puSrc), uBSwapConst)); puDst++; puSrc++;
+    _mm_storeu_si128(puDst, _mm_shuffle_epi8(_mm_loadu_si128(puSrc), uBSwapConst)); puDst++; puSrc++;
+
+#  elif ARCH_BITS == 64
     uint64_t const *puSrc = (uint64_t const *)pbBlock;
     uint64_t       *puW   = (uint64_t *)&pCtx->AltPrivate.auW[0];
