VirtualBox

Changeset 51878 in vbox


Ignore:
Timestamp:
Jul 5, 2014 8:23:47 PM (10 years ago)
Author:
vboxsync
Message:

alt-sha1.cpp: Unrolled the block processing code, getting a ~25 speed increase on windows/amd64.

Location:
trunk/src/VBox/Runtime
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/checksum/alt-sha1.cpp

    r51861 r51878  
    128128
    129129
     130/** Function 4.1, Ch(x,y,z). */
     131DECL_FORCE_INLINE(uint32_t) rtSha1Ch(uint32_t uX, uint32_t uY, uint32_t uZ)
     132{
     133    uint32_t uResult = uX & uY;
     134    uResult ^= ~uX & uZ;
     135    return uResult;
     136}
     137
     138
     139/** Function 4.1, Parity(x,y,z). */
     140DECL_FORCE_INLINE(uint32_t) rtSha1Parity(uint32_t uX, uint32_t uY, uint32_t uZ)
     141{
     142    uint32_t uResult = uX;
     143    uResult ^= uY;
     144    uResult ^= uZ;
     145    return uResult;
     146}
     147
     148
     149/** Function 4.1, Maj(x,y,z). */
     150DECL_FORCE_INLINE(uint32_t) rtSha1Maj(uint32_t uX, uint32_t uY, uint32_t uZ)
     151{
     152    uint32_t uResult = (uX & uY);
     153    uResult |= (uX & uZ);
     154    uResult |= (uY & uZ);
     155    return uResult;
     156}
     157
     158
    130159/**
    131160 * Process the current block.
     
    143172    uint32_t uE = pCtx->AltPrivate.auH[4];
    144173
    145 #if 1
     174#if 1 /* Fully unrolled version. */
     175    register uint32_t const *puW = &pCtx->AltPrivate.auW[0];
     176# define SHA1_BODY(a_uW, a_uK, a_fnFt, a_uA, a_uB, a_uC, a_uD, a_uE) \
     177        do { \
     178            a_uE += a_uW; \
     179            a_uE += (a_uK); \
     180            a_uE += ASMRotateLeftU32(a_uA, 5); \
     181            a_uE += a_fnFt(a_uB, a_uC, a_uD); \
     182            a_uB = ASMRotateLeftU32(a_uB, 30); \
     183        } while (0)
     184# define FIVE_ITERATIONS(a_iStart, a_uK, a_fnFt) \
     185    do { \
     186        SHA1_BODY(/*puW[a_iStart + 0]*/ *puW++, a_uK, a_fnFt, uA, uB, uC, uD, uE); \
     187        SHA1_BODY(/*puW[a_iStart + 1]*/ *puW++, a_uK, a_fnFt, uE, uA, uB, uC, uD); \
     188        SHA1_BODY(/*puW[a_iStart + 2]*/ *puW++, a_uK, a_fnFt, uD, uE, uA, uB, uC); \
     189        SHA1_BODY(/*puW[a_iStart + 3]*/ *puW++, a_uK, a_fnFt, uC, uD, uE, uA, uB); \
     190        SHA1_BODY(/*puW[a_iStart + 4]*/ *puW++, a_uK, a_fnFt, uB, uC, uD, uE, uA); \
     191    } while (0)
     192# if 0 /* Variation that reduces the code size by a factor of 4 without much loss in preformance. */
     193#  define TWENTY_ITERATIONS(a_iFirst, a_uK, a_fnFt) \
     194    do { unsigned i = 4; while (i-- > 0) FIVE_ITERATIONS(a_iFirst + (3 - i) * 5, a_uK, a_fnFt); } while (0)
     195    /*for (unsigned i = a_iFirst; i < (a_iFirst + 20); i += 5) FIVE_ITERATIONS(i, a_uK, a_fnFt);*/
     196# else
     197#  define TWENTY_ITERATIONS(a_iFirst, a_uK, a_fnFt) \
     198    do { \
     199        FIVE_ITERATIONS(a_iFirst +  0, a_uK, a_fnFt); \
     200        FIVE_ITERATIONS(a_iFirst +  5, a_uK, a_fnFt); \
     201        FIVE_ITERATIONS(a_iFirst + 10, a_uK, a_fnFt); \
     202        FIVE_ITERATIONS(a_iFirst + 15, a_uK, a_fnFt); \
     203    } while (0)
     204# endif
     205    TWENTY_ITERATIONS( 0, UINT32_C(0x5a827999), rtSha1Ch);
     206    TWENTY_ITERATIONS(20, UINT32_C(0x6ed9eba1), rtSha1Parity);
     207    TWENTY_ITERATIONS(40, UINT32_C(0x8f1bbcdc), rtSha1Maj);
     208    TWENTY_ITERATIONS(60, UINT32_C(0xca62c1d6), rtSha1Parity);
     209
     210#elif 0 /* Version avoiding the constant selection. */
    146211    unsigned iWord = 0;
    147212# define TWENTY_ITERATIONS(a_iWordStop, a_uK, a_uExprBCD) \
     
    160225            uA = uTemp; \
    161226        } do { } while (0)
    162     TWENTY_ITERATIONS(20, UINT32_C(0x5a827999), (uB & uC) | (~uB & uD));
    163     TWENTY_ITERATIONS(40, UINT32_C(0x6ed9eba1), uB ^ uC ^ uD);
    164     TWENTY_ITERATIONS(60, UINT32_C(0x8f1bbcdc), (uB & uC) | (uB & uD) | (uC & uD));
    165     TWENTY_ITERATIONS(80, UINT32_C(0xca62c1d6), uB ^ uC ^ uD);
    166 #else
     227    TWENTY_ITERATIONS(20, UINT32_C(0x5a827999), rtSha1Ch(uB, uC, uD));
     228    TWENTY_ITERATIONS(40, UINT32_C(0x6ed9eba1), rtSha1Parity(uB, uC, uD));
     229    TWENTY_ITERATIONS(60, UINT32_C(0x8f1bbcdc), rtSha1Maj(uB, uC, uD));
     230    TWENTY_ITERATIONS(80, UINT32_C(0xca62c1d6), rtSha1Parity(uB, uC, uD));
     231
     232#else /* Dead simple implementation. */
    167233    for (unsigned iWord = 0; iWord < RT_ELEMENTS(pCtx->AltPrivate.auW); iWord++)
    168234    {
  • trunk/src/VBox/Runtime/testcase/tstRTDigest-2.cpp

    r51856 r51878  
    3636#include <iprt/err.h>
    3737#include <iprt/test.h>
     38#include <iprt/thread.h>
    3839#include <iprt/string.h>
    3940
     
    103104 * @param   cTests          The number of tests in the table.
    104105 * @param   pszDigestName   The name of the digest.
     106 * @param   enmDigestType   The digest enum type value.
    105107 */
    106 static void testGeneric(const char *pszDigestObjId, TESTRTDIGEST const *paTests, size_t cTests, const char *pszDigestName)
     108static void testGeneric(const char *pszDigestObjId, TESTRTDIGEST const *paTests, size_t cTests, const char *pszDigestName,
     109                        RTDIGESTTYPE enmDigestType)
    107110{
    108111    /*
     
    154157     */
    155158    RTTESTI_CHECK_RC_RETV(RTCrDigestCreateByObjIdString(&hDigest, pszDigestObjId), VINF_SUCCESS);
    156     uint32_t cChunks  = 64;
     159
     160    /* Warmup. */
     161    uint32_t cChunks  = enmDigestType == RTDIGESTTYPE_MD2 ? 12 : 128;
    157162    uint32_t cLeft    = cChunks;
    158163    int      rc       = VINF_SUCCESS;
    159 
     164    RTThreadYield();
    160165    uint64_t uStartTS = RTTimeNanoTS();
    161166    while (cLeft-- > 0)
    162167        rc |= RTCrDigestUpdate(hDigest, g_abRandom72KB, sizeof(g_abRandom72KB));
    163     rc |= RTCrDigestFinal(hDigest, NULL, 0);
     168    uint64_t cNsPerChunk = (RTTimeNanoTS() - uStartTS) / cChunks;
     169    if (!cNsPerChunk)
     170        cNsPerChunk = 16000000 / cChunks; /* Time resolution kludge: 16ms. */
     171    RTTESTI_CHECK_RETV(rc == VINF_SUCCESS);
     172
     173    /* Do it for real for about 2 seconds. */
     174    RTTESTI_CHECK_RC(RTCrDigestReset(hDigest), VINF_SUCCESS);
     175    cChunks = _2G32 / cNsPerChunk;
     176    cLeft   = cChunks;
     177    RTThreadYield();
     178    uStartTS = RTTimeNanoTS();
     179    while (cLeft-- > 0)
     180        rc |= RTCrDigestUpdate(hDigest, g_abRandom72KB, sizeof(g_abRandom72KB));
    164181    uint64_t cNsElapsed = RTTimeNanoTS() - uStartTS;
    165182    RTTESTI_CHECK(rc == VINF_SUCCESS);
    166183
    167     /* If it was too quick, redo with more chunks. */
    168     if (rc == VINF_SUCCESS && cNsElapsed < 100000000 /* 100 ms */)
    169     {
    170         cChunks  = 1024;
    171         cLeft    = cChunks;
    172         RTTESTI_CHECK_RC(RTCrDigestReset(hDigest), VINF_SUCCESS);
    173 
    174         uStartTS = RTTimeNanoTS();
    175         while (cLeft-- > 0)
    176             rc |= RTCrDigestUpdate(hDigest, g_abRandom72KB, sizeof(g_abRandom72KB));
    177         rc |= RTCrDigestFinal(hDigest, NULL, 0);
    178         cNsElapsed = RTTimeNanoTS() - uStartTS;
    179         RTTESTI_CHECK(rc == VINF_SUCCESS);
    180     }
    181 
    182184    RTTestIValueF((uint64_t)cChunks * sizeof(g_abRandom72KB) / _1K / (0.000000001 * cNsElapsed), RTTESTUNIT_KILOBYTES_PER_SEC,
    183185                  "%s throughput", pszDigestName);
     186    RTTESTI_CHECK_RC(RTCrDigestRelease(hDigest), 0);
    184187}
    185188
     
    380383        { &g_abRandom72KB[0x20c9],  9991, "bbba194efa81238e5b613e20e937144e", "MD2 8393 bytes @9991" },
    381384    };
    382     testGeneric("1.2.840.113549.2.2", s_abTests, RT_ELEMENTS(s_abTests), "MD2");
     385    testGeneric("1.2.840.113549.2.2", s_abTests, RT_ELEMENTS(s_abTests), "MD2", RTDIGESTTYPE_MD2);
    383386}
    384387
     
    579582        { &g_abRandom72KB[0x20c9], 9991, "6461339c6615d23c704298a313e07cf5", "MD5 8393 bytes @9991" },
    580583    };
    581     testGeneric("1.2.840.113549.2.5", s_abTests, RT_ELEMENTS(s_abTests), "MD5");
     584    testGeneric("1.2.840.113549.2.5", s_abTests, RT_ELEMENTS(s_abTests), "MD5", RTDIGESTTYPE_MD5);
    582585}
    583586
     
    758761        { &g_abRandom72KB[0x20c9],  9991, "62001184bacacce3774566d916055d425a85eba5", "SHA-1 8393 bytes @9991" },
    759762    };
    760     testGeneric("1.3.14.3.2.26", s_abTests, RT_ELEMENTS(s_abTests), "SHA-1");
     763    testGeneric("1.3.14.3.2.26", s_abTests, RT_ELEMENTS(s_abTests), "SHA-1", RTDIGESTTYPE_SHA1);
    761764}
    762765
     
    926929        { &g_abRandom72KB[0x20c9],  9991, "8bd4c6142e36f15385769ebdeb855dcdf542f72d067315472a52ff626946310e", "SHA-256 8393 bytes @9991" },
    927930    };
    928     testGeneric("2.16.840.1.101.3.4.2.1", s_abTests, RT_ELEMENTS(s_abTests), "SHA-256");
     931    testGeneric("2.16.840.1.101.3.4.2.1", s_abTests, RT_ELEMENTS(s_abTests), "SHA-256", RTDIGESTTYPE_SHA256);
    929932}
    930933
     
    964967          "75388b16512776cc5dba5da1fd890150b0c6455cb4f58b1952522525", "SHA-224 abcdbc..." },
    965968    };
    966     testGeneric("2.16.840.1.101.3.4.2.4", s_abTests, RT_ELEMENTS(s_abTests), "SHA-224");
     969    testGeneric("2.16.840.1.101.3.4.2.4", s_abTests, RT_ELEMENTS(s_abTests), "SHA-224", RTDIGESTTYPE_SHA224);
    967970}
    968971
     
    11321135        { &g_abRandom72KB[0x20c9],  9991, "d6ac7c68664df2e34dc6be233b33f8dad196348350b70a4c2c5a78eb54d6e297c819771313d798de7552b7a3cb85370aab25087e189f3be8560d49406ebb6280", "SHA-512 8393 bytes @9991" },
    11331136    };
    1134     testGeneric("2.16.840.1.101.3.4.2.3", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512");
     1137    testGeneric("2.16.840.1.101.3.4.2.3", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512", RTDIGESTTYPE_SHA512);
    11351138}
    11361139
     
    11701173          "23fec5bb94d60b23308192640b0c453335d664734fe40e7268674af9", "SHA-512/256 abcdef..." },
    11711174    };
    1172     testGeneric("2.16.840.1.101.3.4.2.5", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512/224");
     1175    testGeneric("2.16.840.1.101.3.4.2.5", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512/224", RTDIGESTTYPE_SHA512T224);
    11731176}
    11741177#endif /* IPRT_WITHOUT_SHA512T224 */
     
    12081211          "3928e184fb8690f840da3988121d31be65cb9d3ef83ee6146feac861e19b563a", "SHA-512/256 abcdef..." },
    12091212    };
    1210     testGeneric("2.16.840.1.101.3.4.2.6", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512/256");
     1213    testGeneric("2.16.840.1.101.3.4.2.6", s_abTests, RT_ELEMENTS(s_abTests), "SHA-512/256", RTDIGESTTYPE_SHA512T256);
    12111214}
    12121215#endif /* !IPRT_WITHOUT_SHA512T256 */
     
    12451248          "09330c33f71147e83d192fc782cd1b4753111b173b3b05d22fa08086e3b0f712fcc7c71a557e2db966c3e9fa91746039", "SHA-384 abcdef..." },
    12461249    };
    1247     testGeneric("2.16.840.1.101.3.4.2.2", s_abTests, RT_ELEMENTS(s_abTests), "SHA-384");
     1250    testGeneric("2.16.840.1.101.3.4.2.2", s_abTests, RT_ELEMENTS(s_abTests), "SHA-384", RTDIGESTTYPE_SHA384);
    12481251}
    12491252
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette