VirtualBox

Changeset 67334 in vbox for trunk


Ignore:
Timestamp:
Jun 12, 2017 9:48:11 AM (7 years ago)
Author:
vboxsync
Message:

IPRT: Added RTStrToUtf16BigEx and RTStrToUtf16Big for turning UTF-8 into UTF-16BE (big endian).

Location:
trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/mangling.h

    r67284 r67334  
    19521952# define RTStrToUniEx                                   RT_MANGLER(RTStrToUniEx)
    19531953# define RTStrToUpper                                   RT_MANGLER(RTStrToUpper)
     1954# define RTStrToUtf16BigExTag                           RT_MANGLER(RTStrToUtf16BigExTag)
     1955# define RTStrToUtf16BigTag                             RT_MANGLER(RTStrToUtf16BigTag)
    19541956# define RTStrToUtf16ExTag                              RT_MANGLER(RTStrToUtf16ExTag)
    19551957# define RTStrToUtf16Tag                                RT_MANGLER(RTStrToUtf16Tag)
  • trunk/include/iprt/string.h

    r66882 r67334  
    830830 * tag).
    831831 *
     832 * This differs from RTStrToUtf16 in that it always produces a
     833 * big-endian string.
     834 *
    832835 * @returns iprt status code.
    833836 * @param   pszString       UTF-8 string to convert.
     
    837840 */
    838841RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag);
     842
     843/**
     844 * Translate a UTF-8 string into a UTF-16BE allocating the result buffer
     845 * (default tag).
     846 *
     847 * This differs from RTStrToUtf16Tag in that it always produces a
     848 * big-endian string.
     849 *
     850 * @returns iprt status code.
     851 * @param   pszString       UTF-8 string to convert.
     852 * @param   ppwszString     Receives pointer to the allocated UTF-16BE string.
     853 *                          The returned string must be freed using RTUtf16Free().
     854 */
     855#define RTStrToUtf16Big(pszString, ppwszString)  RTStrToUtf16BigTag((pszString), (ppwszString), RTSTR_TAG)
     856
     857/**
     858 * Translate a UTF-8 string into a UTF-16BE allocating the result buffer (custom
     859 * tag).
     860 *
     861 * @returns iprt status code.
     862 * @param   pszString       UTF-8 string to convert.
     863 * @param   ppwszString     Receives pointer to the allocated UTF-16BE string.
     864 *                          The returned string must be freed using RTUtf16Free().
     865 * @param   pszTag          Allocation tag used for statistics and such.
     866 */
     867RTDECL(int) RTStrToUtf16BigTag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag);
    839868
    840869/**
     
    887916 * @param   pszTag          Allocation tag used for statistics and such.
    888917 */
    889 RTDECL(int)  RTStrToUtf16ExTag(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
     918RTDECL(int)  RTStrToUtf16ExTag(const char *pszString, size_t cchString,
     919                               PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
     920
     921
     922/**
     923 * Translates pszString from UTF-8 to UTF-16BE, allocating the result buffer if requested.
     924 *
     925 * This differs from RTStrToUtf16Ex in that it always produces a
     926 * big-endian string.
     927 *
     928 * @returns iprt status code.
     929 * @param   pszString       UTF-8 string to convert.
     930 * @param   cchString       The maximum size in chars (the type) to convert. The conversion stop
     931 *                          when it reaches cchString or the string terminator ('\\0').
     932 *                          Use RTSTR_MAX to translate the entire string.
     933 * @param   ppwsz           If cwc is non-zero, this must either be pointing to pointer to
     934 *                          a buffer of the specified size, or pointer to a NULL pointer.
     935 *                          If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
     936 *                          will be allocated to hold the translated string.
     937 *                          If a buffer was requested it must be freed using RTUtf16Free().
     938 * @param   cwc             The buffer size in RTUTF16s. This includes the terminator.
     939 * @param   pcwc            Where to store the length of the translated string,
     940 *                          excluding the terminator. (Optional)
     941 *
     942 *                          This may be set under some error conditions,
     943 *                          however, only for VERR_BUFFER_OVERFLOW and
     944 *                          VERR_NO_STR_MEMORY will it contain a valid string
     945 *                          length that can be used to resize the buffer.
     946 */
     947#define RTStrToUtf16BigEx(pszString, cchString, ppwsz, cwc, pcwc) \
     948    RTStrToUtf16BigExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG)
     949
     950/**
     951 * Translates pszString from UTF-8 to UTF-16BE, allocating the result buffer if
     952 * requested (custom tag).
     953 *
     954 * This differs from RTStrToUtf16ExTag in that it always produces a
     955 * big-endian string.
     956 *
     957 * @returns iprt status code.
     958 * @param   pszString       UTF-8 string to convert.
     959 * @param   cchString       The maximum size in chars (the type) to convert. The conversion stop
     960 *                          when it reaches cchString or the string terminator ('\\0').
     961 *                          Use RTSTR_MAX to translate the entire string.
     962 * @param   ppwsz           If cwc is non-zero, this must either be pointing to pointer to
     963 *                          a buffer of the specified size, or pointer to a NULL pointer.
     964 *                          If *ppwsz is NULL or cwc is zero a buffer of at least cwc items
     965 *                          will be allocated to hold the translated string.
     966 *                          If a buffer was requested it must be freed using RTUtf16Free().
     967 * @param   cwc             The buffer size in RTUTF16s. This includes the terminator.
     968 * @param   pcwc            Where to store the length of the translated string,
     969 *                          excluding the terminator. (Optional)
     970 *
     971 *                          This may be set under some error conditions,
     972 *                          however, only for VERR_BUFFER_OVERFLOW and
     973 *                          VERR_NO_STR_MEMORY will it contain a valid string
     974 *                          length that can be used to resize the buffer.
     975 * @param   pszTag          Allocation tag used for statistics and such.
     976 */
     977RTDECL(int)  RTStrToUtf16BigExTag(const char *pszString, size_t cchString,
     978                                  PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag);
    890979
    891980
  • trunk/src/VBox/Runtime/common/fs/iso9660vfs.cpp

    r67326 r67334  
    688688
    689689/**
    690  * RTStrToUtf16Ex returning big-endian UTF-16.
    691  */
    692 static int rtFsIso9660_StrToUtf16BigEndian(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc)
    693 {
    694     int rc = RTStrToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc);
    695 #ifndef RT_BIG_ENDIAN
    696     if (RT_SUCCESS(rc))
    697     {
    698         PRTUTF16 pwc = *ppwsz;
    699         RTUTF16  wc;
    700         while ((wc = *pwc))
    701             *pwc++ = RT_H2BE_U16(wc);
    702     }
    703 #endif
    704     return rc;
    705 }
    706 
    707 
    708 /**
    709690 * Looks up the shared structure for a child.
    710691 *
     
    774755    {
    775756        PRTUTF16 pwszEntry = uBuf.wszEntry;
    776         rc = rtFsIso9660_StrToUtf16BigEndian(pszEntry, RTSTR_MAX, &pwszEntry, RT_ELEMENTS(uBuf.wszEntry), &cwcEntry);
     757        rc = RTStrToUtf16BigEx(pszEntry, RTSTR_MAX, &pwszEntry, RT_ELEMENTS(uBuf.wszEntry), &cwcEntry);
    777758        if (RT_FAILURE(rc))
    778759            return rc;
  • trunk/src/VBox/Runtime/common/string/utf-8.cpp

    r65642 r67334  
    3333
    3434#include <iprt/uni.h>
     35#include <iprt/asm.h>
    3536#include <iprt/alloc.h>
    3637#include <iprt/assert.h>
     
    829830 * @param   pwsz    Where to store the UTF-16 string.
    830831 * @param   cwc     The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0').
     832 *
     833 * @note    rtUtf8RecodeAsUtf16Big is a duplicate with RT_H2BE_U16 applied.
    831834 */
    832835static int rtUtf8RecodeAsUtf16(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc)
     
    907910
    908911
     912/**
     913 * Recodes a valid UTF-8 string as UTF-16BE.
     914 *
     915 * Since we know the input is valid, we do *not* perform encoding or length checks.
     916 *
     917 * @returns iprt status code.
     918 * @param   psz     The UTF-8 string to recode. This is a valid encoding.
     919 * @param   cch     The number of chars (the type char, so bytes if you like) to process of the UTF-8 string.
     920 *                  The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'.
     921 * @param   pwsz    Where to store the UTF-16BE string.
     922 * @param   cwc     The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0').
     923 *
     924 * @note    This is a copy of rtUtf8RecodeAsUtf16 with RT_H2BE_U16 applied.
     925 */
     926static int rtUtf8RecodeAsUtf16Big(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc)
     927{
     928    int                     rc   = VINF_SUCCESS;
     929    const unsigned char    *puch = (const unsigned char *)psz;
     930    PRTUTF16                pwc  = pwsz;
     931    while (cch > 0)
     932    {
     933        /* read the next char and check for terminator. */
     934        const unsigned char uch = *puch;
     935        if (uch)
     936        { /* we only break once, so consider this the likely branch. */ }
     937        else
     938            break;
     939
     940        /* check for output overflow */
     941        if (RT_LIKELY(cwc >= 1))
     942        { /* likely */ }
     943        else
     944        {
     945            rc = VERR_BUFFER_OVERFLOW;
     946            break;
     947        }
     948        cwc--;
     949
     950        /* decode and recode the code point */
     951        if (!(uch & RT_BIT(7)))
     952        {
     953            *pwc++ = RT_H2BE_U16((RTUTF16)uch);
     954            puch++;
     955            cch--;
     956        }
     957        else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) == (RT_BIT(7) | RT_BIT(6)))
     958        {
     959            uint16_t uc = (puch[1] & 0x3f)
     960                    | ((uint16_t)(uch     & 0x1f) << 6);
     961            *pwc++ = RT_H2BE_U16(uc);
     962            puch += 2;
     963            cch -= 2;
     964        }
     965        else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5)))
     966        {
     967            uint16_t uc = (puch[2] & 0x3f)
     968                    | ((uint16_t)(puch[1] & 0x3f) << 6)
     969                    | ((uint16_t)(uch     & 0x0f) << 12);
     970            *pwc++ = RT_H2BE_U16(uc);
     971            puch += 3;
     972            cch -= 3;
     973        }
     974        else
     975        {
     976            /* generate surrogate pair */
     977            Assert((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4)));
     978            RTUNICP uc =           (puch[3] & 0x3f)
     979                       | ((RTUNICP)(puch[2] & 0x3f) << 6)
     980                       | ((RTUNICP)(puch[1] & 0x3f) << 12)
     981                       | ((RTUNICP)(uch     & 0x07) << 18);
     982            if (RT_UNLIKELY(cwc < 1))
     983            {
     984                rc = VERR_BUFFER_OVERFLOW;
     985                break;
     986            }
     987            cwc--;
     988
     989            uc -= 0x10000;
     990            *pwc++ = RT_H2BE_U16(0xd800 | (uc >> 10));
     991            *pwc++ = RT_H2BE_U16(0xdc00 | (uc & 0x3ff));
     992            puch += 4;
     993            cch -= 4;
     994        }
     995    }
     996
     997    /* done */
     998    *pwc = '\0';
     999    return rc;
     1000}
     1001
     1002
    9091003RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag)
    9101004{
     
    9461040}
    9471041RT_EXPORT_SYMBOL(RTStrToUtf16Tag);
     1042
     1043
     1044RTDECL(int) RTStrToUtf16BigTag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag)
     1045{
     1046    /*
     1047     * Validate input.
     1048     */
     1049    Assert(VALID_PTR(ppwszString));
     1050    Assert(VALID_PTR(pszString));
     1051    *ppwszString = NULL;
     1052
     1053    /*
     1054     * Validate the UTF-8 input and calculate the length of the UTF-16 string.
     1055     */
     1056    size_t cwc;
     1057    int rc = rtUtf8CalcUtf16Length(pszString, &cwc);
     1058    if (RT_SUCCESS(rc))
     1059    {
     1060        /*
     1061         * Allocate buffer.
     1062         */
     1063        PRTUTF16 pwsz = (PRTUTF16)RTMemAllocTag((cwc + 1) * sizeof(RTUTF16), pszTag);
     1064        if (pwsz)
     1065        {
     1066            /*
     1067             * Encode the UTF-16 string.
     1068             */
     1069            rc = rtUtf8RecodeAsUtf16Big(pszString, RTSTR_MAX, pwsz, cwc);
     1070            if (RT_SUCCESS(rc))
     1071            {
     1072                *ppwszString = pwsz;
     1073                return rc;
     1074            }
     1075            RTMemFree(pwsz);
     1076        }
     1077        else
     1078            rc = VERR_NO_UTF16_MEMORY;
     1079    }
     1080    return rc;
     1081}
     1082RT_EXPORT_SYMBOL(RTStrToUtf16TagBig);
    9481083
    9491084
     
    10111146}
    10121147RT_EXPORT_SYMBOL(RTStrToUtf16ExTag);
     1148
     1149
     1150RTDECL(int)  RTStrToUtf16BigExTag(const char *pszString, size_t cchString,
     1151                                  PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag)
     1152{
     1153    /*
     1154     * Validate input.
     1155     */
     1156    Assert(VALID_PTR(pszString));
     1157    Assert(VALID_PTR(ppwsz));
     1158    Assert(!pcwc || VALID_PTR(pcwc));
     1159
     1160    /*
     1161     * Validate the UTF-8 input and calculate the length of the UTF-16 string.
     1162     */
     1163    size_t cwcResult;
     1164    int rc;
     1165    if (cchString != RTSTR_MAX)
     1166        rc = rtUtf8CalcUtf16LengthN(pszString, cchString, &cwcResult);
     1167    else
     1168        rc = rtUtf8CalcUtf16Length(pszString, &cwcResult);
     1169    if (RT_SUCCESS(rc))
     1170    {
     1171        if (pcwc)
     1172            *pcwc = cwcResult;
     1173
     1174        /*
     1175         * Check buffer size / Allocate buffer.
     1176         */
     1177        bool fShouldFree;
     1178        PRTUTF16 pwszResult;
     1179        if (cwc > 0 && *ppwsz)
     1180        {
     1181            fShouldFree = false;
     1182            if (cwc <= cwcResult)
     1183                return VERR_BUFFER_OVERFLOW;
     1184            pwszResult = *ppwsz;
     1185        }
     1186        else
     1187        {
     1188            *ppwsz = NULL;
     1189            fShouldFree = true;
     1190            cwc = RT_MAX(cwcResult + 1, cwc);
     1191            pwszResult = (PRTUTF16)RTMemAllocTag(cwc * sizeof(RTUTF16), pszTag);
     1192        }
     1193        if (pwszResult)
     1194        {
     1195            /*
     1196             * Encode the UTF-16BE string.
     1197             */
     1198            rc = rtUtf8RecodeAsUtf16Big(pszString, cchString, pwszResult, cwc - 1);
     1199            if (RT_SUCCESS(rc))
     1200            {
     1201                *ppwsz = pwszResult;
     1202                return rc;
     1203            }
     1204            if (fShouldFree)
     1205                RTMemFree(pwszResult);
     1206        }
     1207        else
     1208            rc = VERR_NO_UTF16_MEMORY;
     1209    }
     1210    return rc;
     1211}
     1212RT_EXPORT_SYMBOL(RTStrToUtf16BigExTag);
    10131213
    10141214
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette