VirtualBox

Changeset 51883 in vbox


Ignore:
Timestamp:
Jul 6, 2014 1:59:04 PM (10 years ago)
Author:
vboxsync
Message:

alt-sha512: Applied the optimizations from alt-sha256 and alt-sha1, gaining 15-20.

Location:
trunk/src/VBox/Runtime/common
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/asm/asm-fake.cpp

    r44529 r51883  
    461461}
    462462
     463RTDECL(uint64_t) ASMByteSwapU64(uint64_t u64)
     464{
     465    return RT_MAKE_U64_FROM_U8(RT_BYTE8(u64), RT_BYTE7(u64), RT_BYTE6(u64), RT_BYTE5(u64),
     466                               RT_BYTE4(u64), RT_BYTE3(u64), RT_BYTE2(u64), RT_BYTE1(u64));
     467}
     468
  • trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp

    r51881 r51883  
    3333
    3434/** Enables the unrolled code. */
    35 #define RTSHA1_UNROLLED 1
     35#define RTSHA256_UNROLLED 1
    3636
    3737
     
    7171*   Global Variables                                                           *
    7272*******************************************************************************/
    73 #ifndef RTSHA1_UNROLLED
     73#ifndef RTSHA256_UNROLLED
    7474/** The K constants */
    7575static uint32_t const g_auKs[] =
     
    9292    UINT32_C(0x90befffa), UINT32_C(0xa4506ceb), UINT32_C(0xbef9a3f7), UINT32_C(0xc67178f2),
    9393};
    94 #endif /* !RTSHA1_UNROLLED */
     94#endif /* !RTSHA256_UNROLLED */
    9595
    9696
     
    206206DECLINLINE(void) rtSha256BlockInit(PRTSHA256CONTEXT pCtx, uint8_t const *pbBlock)
    207207{
    208 #ifdef RTSHA1_UNROLLED
     208#ifdef RTSHA256_UNROLLED
    209209    uint32_t const *puSrc = (uint32_t const *)pbBlock;
    210210    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
     
    235235    *puW++ = ASMByteSwapU32(*puSrc++);
    236236# else
    237     memcpy(puW, puSrc, RTSHA1_BLOCK_SIZE);
     237    memcpy(puW, puSrc, RTSHA256_BLOCK_SIZE);
    238238# endif
    239239
    240 #else  /* !RTSHA1_UNROLLED */
     240#else  /* !RTSHA256_UNROLLED */
    241241    uint32_t const *pu32Block = (uint32_t const *)pbBlock;
    242242    Assert(!((uintptr_t)pu32Block & 3));
     
    254254        pCtx->AltPrivate.auW[iWord] = u32;
    255255    }
    256 #endif /* !RTSHA1_UNROLLED */
     256#endif /* !RTSHA256_UNROLLED */
    257257}
    258258
     
    265265DECLINLINE(void) rtSha256BlockInitBuffered(PRTSHA256CONTEXT pCtx)
    266266{
    267 #ifdef RTSHA1_UNROLLED
     267#ifdef RTSHA256_UNROLLED
    268268    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
    269269    Assert(!((uintptr_t)puW & 3));
     
    293293# endif
    294294
    295 #else  /* !RTSHA1_UNROLLED */
     295#else  /* !RTSHA256_UNROLLED */
    296296    unsigned iWord;
    297297    for (iWord = 0; iWord < 16; iWord++)
     
    306306        pCtx->AltPrivate.auW[iWord] = u32;
    307307    }
    308 #endif /* !RTSHA1_UNROLLED */
     308#endif /* !RTSHA256_UNROLLED */
    309309}
    310310
     
    328328    uint32_t uH = pCtx->AltPrivate.auH[7];
    329329
    330 #ifdef RTSHA1_UNROLLED
     330#ifdef RTSHA256_UNROLLED
    331331    uint32_t *puW = &pCtx->AltPrivate.auW[0];
    332332# define RTSHA256_BODY(a_iWord, a_uK, a_uA, a_uB, a_uC, a_uD, a_uE, a_uF, a_uG, a_uH) \
     
    380380                   UINT32_C(0x90befffa), UINT32_C(0xa4506ceb), UINT32_C(0xbef9a3f7), UINT32_C(0xc67178f2), 56);
    381381
    382 #else  /* !RTSHA1_UNROLLED */
     382#else  /* !RTSHA256_UNROLLED */
    383383    for (unsigned iWord = 0; iWord < RT_ELEMENTS(pCtx->AltPrivate.auW); iWord++)
    384384    {
     
    401401        uA = uT1 + uT2;
    402402    }
    403 #endif /* !RTSHA1_UNROLLED */
     403#endif /* !RTSHA256_UNROLLED */
    404404
    405405    pCtx->AltPrivate.auH[0] += uA;
  • trunk/src/VBox/Runtime/common/checksum/alt-sha512.cpp

    r51861 r51883  
    3232#define RTSHA512_BLOCK_SIZE   128U
    3333
     34/** Enables the unrolled code. */
     35#define RTSHA512_UNROLLED 1
     36
    3437
    3538/*******************************************************************************
     
    6871*   Global Variables                                                           *
    6972*******************************************************************************/
     73#ifndef RTSHA512_UNROLLED
    7074/** The K constants. */
    7175static uint64_t const g_auKs[] =
     
    9296    UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817),
    9397};
     98#endif /* !RTSHA512_UNROLLED */
    9499
    95100
     
    114119DECL_FORCE_INLINE(uint64_t) rtSha512Ch(uint64_t uX, uint64_t uY, uint64_t uZ)
    115120{
     121#if 1
     122    /* Optimization that saves one operation and probably a temporary variable. */
     123    uint64_t uResult = uY;
     124    uResult ^= uZ;
     125    uResult &= uX;
     126    uResult ^= uZ;
     127    return uResult;
     128#else
     129    /* The original. */
    116130    uint64_t uResult = uX & uY;
    117131    uResult ^= ~uX & uZ;
    118132    return uResult;
     133#endif
    119134}
    120135
     
    123138DECL_FORCE_INLINE(uint64_t) rtSha512Maj(uint64_t uX, uint64_t uY, uint64_t uZ)
    124139{
     140#if 1
     141    /* Optimization that save one operation and probably a temporary variable. */
     142    uint64_t uResult = uY;
     143    uResult ^= uZ;
     144    uResult &= uX;
     145    uResult ^= uY & uZ;
     146    return uResult;
     147#else
     148    /* The original. */
    125149    uint64_t uResult = uX & uY;
    126150    uResult ^= uX & uZ;
    127151    uResult ^= uY & uZ;
    128152    return uResult;
     153#endif
    129154}
    130155
     
    182207 *
    183208 * @param   pCtx                The SHA-512 context.
    184  * @param   pbBlock             The block.  Must be 32-bit aligned.
     209 * @param   pbBlock             The block.  Must be 64-bit aligned.
    185210 */
    186211DECLINLINE(void) rtSha512BlockInit(PRTSHA512CONTEXT pCtx, uint8_t const *pbBlock)
    187212{
     213#ifdef RTSHA512_UNROLLED
     214    uint64_t const *puSrc = (uint64_t const *)pbBlock;
     215    uint64_t       *puW   = &pCtx->AltPrivate.auW[0];
     216    Assert(!((uintptr_t)puSrc & 7));
     217    Assert(!((uintptr_t)puW & 7));
     218
     219    /* Copy and byte-swap the block. Initializing the rest of the Ws are done
     220       in the processing loop. */
     221# ifdef RT_LITTLE_ENDIAN
     222    *puW++ = ASMByteSwapU64(*puSrc++);
     223    *puW++ = ASMByteSwapU64(*puSrc++);
     224    *puW++ = ASMByteSwapU64(*puSrc++);
     225    *puW++ = ASMByteSwapU64(*puSrc++);
     226
     227    *puW++ = ASMByteSwapU64(*puSrc++);
     228    *puW++ = ASMByteSwapU64(*puSrc++);
     229    *puW++ = ASMByteSwapU64(*puSrc++);
     230    *puW++ = ASMByteSwapU64(*puSrc++);
     231
     232    *puW++ = ASMByteSwapU64(*puSrc++);
     233    *puW++ = ASMByteSwapU64(*puSrc++);
     234    *puW++ = ASMByteSwapU64(*puSrc++);
     235    *puW++ = ASMByteSwapU64(*puSrc++);
     236
     237    *puW++ = ASMByteSwapU64(*puSrc++);
     238    *puW++ = ASMByteSwapU64(*puSrc++);
     239    *puW++ = ASMByteSwapU64(*puSrc++);
     240    *puW++ = ASMByteSwapU64(*puSrc++);
     241# else
     242    memcpy(puW, puSrc, RTSHA512_BLOCK_SIZE);
     243# endif
     244
     245#else  /* !RTSHA512_UNROLLED */
     246
    188247    uint64_t const *pu32Block = (uint64_t const *)pbBlock;
    189248    Assert(!((uintptr_t)pu32Block & 3));
     
    201260        pCtx->AltPrivate.auW[iWord] = u64;
    202261    }
     262#endif /* !RTSHA512_UNROLLED */
    203263}
    204264
     
    211271DECLINLINE(void) rtSha512BlockInitBuffered(PRTSHA512CONTEXT pCtx)
    212272{
     273#ifdef RTSHA512_UNROLLED
     274    uint64_t *puW = &pCtx->AltPrivate.auW[0];
     275    Assert(!((uintptr_t)puW & 7));
     276
     277    /* Do the byte swap if necessary. Initializing the rest of the Ws are done
     278       in the processing loop. */
     279# ifdef RT_LITTLE_ENDIAN
     280    *puW = ASMByteSwapU64(*puW); puW++;
     281    *puW = ASMByteSwapU64(*puW); puW++;
     282    *puW = ASMByteSwapU64(*puW); puW++;
     283    *puW = ASMByteSwapU64(*puW); puW++;
     284
     285    *puW = ASMByteSwapU64(*puW); puW++;
     286    *puW = ASMByteSwapU64(*puW); puW++;
     287    *puW = ASMByteSwapU64(*puW); puW++;
     288    *puW = ASMByteSwapU64(*puW); puW++;
     289
     290    *puW = ASMByteSwapU64(*puW); puW++;
     291    *puW = ASMByteSwapU64(*puW); puW++;
     292    *puW = ASMByteSwapU64(*puW); puW++;
     293    *puW = ASMByteSwapU64(*puW); puW++;
     294
     295    *puW = ASMByteSwapU64(*puW); puW++;
     296    *puW = ASMByteSwapU64(*puW); puW++;
     297    *puW = ASMByteSwapU64(*puW); puW++;
     298    *puW = ASMByteSwapU64(*puW); puW++;
     299# endif
     300
     301#else  /* !RTSHA512_UNROLLED */
     302
    213303    unsigned iWord;
    214304    for (iWord = 0; iWord < 16; iWord++)
     
    223313        pCtx->AltPrivate.auW[iWord] = u64;
    224314    }
     315#endif /* !RTSHA512_UNROLLED */
    225316}
    226317
     
    244335    uint64_t uH = pCtx->AltPrivate.auH[7];
    245336
     337#ifdef RTSHA512_UNROLLED
     338    uint64_t *puW = &pCtx->AltPrivate.auW[0];
     339# define RTSHA512_BODY(a_iWord, a_uK, a_uA, a_uB, a_uC, a_uD, a_uE, a_uF, a_uG, a_uH) \
     340        do { \
     341            if ((a_iWord) < 16) \
     342                a_uH += *puW++; \
     343            else \
     344            { \
     345                uint64_t u64 = puW[-16]; \
     346                u64 += rtSha512SmallSigma0(puW[-15]); \
     347                u64 += puW[-7]; \
     348                u64 += rtSha512SmallSigma1(puW[-2]); \
     349                if (a_iWord < 80-2) *puW++ = u64; else puW++; \
     350                a_uH += u64; \
     351            } \
     352            \
     353            a_uH += rtSha512CapitalSigma1(a_uE); \
     354            a_uH += a_uK; \
     355            a_uH += rtSha512Ch(a_uE, a_uF, a_uG); \
     356            a_uD += a_uH; \
     357            \
     358            a_uH += rtSha512CapitalSigma0(a_uA); \
     359            a_uH += rtSha512Maj(a_uA, a_uB, a_uC); \
     360        } while (0)
     361# define RTSHA512_EIGHT(a_uK0, a_uK1, a_uK2, a_uK3, a_uK4, a_uK5, a_uK6, a_uK7, a_iFirst) \
     362        do { \
     363            RTSHA512_BODY(a_iFirst + 0, a_uK0, uA, uB, uC, uD, uE, uF, uG, uH); \
     364            RTSHA512_BODY(a_iFirst + 1, a_uK1, uH, uA, uB, uC, uD, uE, uF, uG); \
     365            RTSHA512_BODY(a_iFirst + 2, a_uK2, uG, uH, uA, uB, uC, uD, uE, uF); \
     366            RTSHA512_BODY(a_iFirst + 3, a_uK3, uF, uG, uH, uA, uB, uC, uD, uE); \
     367            RTSHA512_BODY(a_iFirst + 4, a_uK4, uE, uF, uG, uH, uA, uB, uC, uD); \
     368            RTSHA512_BODY(a_iFirst + 5, a_uK5, uD, uE, uF, uG, uH, uA, uB, uC); \
     369            RTSHA512_BODY(a_iFirst + 6, a_uK6, uC, uD, uE, uF, uG, uH, uA, uB); \
     370            RTSHA512_BODY(a_iFirst + 7, a_uK7, uB, uC, uD, uE, uF, uG, uH, uA); \
     371        } while (0)
     372    RTSHA512_EIGHT(UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc),
     373                   UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118),
     374                   0);
     375    RTSHA512_EIGHT(UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2),
     376                   UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694),
     377                   8);
     378    RTSHA512_EIGHT(UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65),
     379                   UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5),
     380                   16);
     381    RTSHA512_EIGHT(UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4),
     382                   UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70),
     383                   24);
     384    RTSHA512_EIGHT(UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df),
     385                   UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b),
     386                   32);
     387    RTSHA512_EIGHT(UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30),
     388                   UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8),
     389                   40);
     390    RTSHA512_EIGHT(UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8),
     391                   UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3),
     392                   48);
     393    RTSHA512_EIGHT(UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec),
     394                   UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b),
     395                   56);
     396    RTSHA512_EIGHT(UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178),
     397                   UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b),
     398                   64);
     399    RTSHA512_EIGHT(UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c),
     400                   UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817),
     401                   72);
     402#else
    246403    for (unsigned iWord = 0; iWord < RT_ELEMENTS(pCtx->AltPrivate.auW); iWord++)
    247404    {
     
    264421        uA = uT1 + uT2;
    265422    }
     423#endif
    266424
    267425    pCtx->AltPrivate.auH[0] += uA;
     
    308466    }
    309467
    310     if (!((uintptr_t)pbBuf & 3))
     468    if (!((uintptr_t)pbBuf & 7))
    311469    {
    312470        /*
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette