- Timestamp:
- Jun 12, 2017 9:48:11 AM (7 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
-
include/iprt/mangling.h (modified) (1 diff)
-
include/iprt/string.h (modified) (3 diffs)
-
src/VBox/Runtime/common/fs/iso9660vfs.cpp (modified) (2 diffs)
-
src/VBox/Runtime/common/string/utf-8.cpp (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/mangling.h
r67284 r67334 1952 1952 # define RTStrToUniEx RT_MANGLER(RTStrToUniEx) 1953 1953 # define RTStrToUpper RT_MANGLER(RTStrToUpper) 1954 # define RTStrToUtf16BigExTag RT_MANGLER(RTStrToUtf16BigExTag) 1955 # define RTStrToUtf16BigTag RT_MANGLER(RTStrToUtf16BigTag) 1954 1956 # define RTStrToUtf16ExTag RT_MANGLER(RTStrToUtf16ExTag) 1955 1957 # define RTStrToUtf16Tag RT_MANGLER(RTStrToUtf16Tag) -
trunk/include/iprt/string.h
r66882 r67334 830 830 * tag). 831 831 * 832 * This differs from RTStrToUtf16 in that it always produces a 833 * big-endian string. 834 * 832 835 * @returns iprt status code. 833 836 * @param pszString UTF-8 string to convert. … … 837 840 */ 838 841 RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag); 842 843 /** 844 * Translate a UTF-8 string into a UTF-16BE allocating the result buffer 845 * (default tag). 846 * 847 * This differs from RTStrToUtf16Tag in that it always produces a 848 * big-endian string. 849 * 850 * @returns iprt status code. 851 * @param pszString UTF-8 string to convert. 852 * @param ppwszString Receives pointer to the allocated UTF-16BE string. 853 * The returned string must be freed using RTUtf16Free(). 854 */ 855 #define RTStrToUtf16Big(pszString, ppwszString) RTStrToUtf16BigTag((pszString), (ppwszString), RTSTR_TAG) 856 857 /** 858 * Translate a UTF-8 string into a UTF-16BE allocating the result buffer (custom 859 * tag). 860 * 861 * @returns iprt status code. 862 * @param pszString UTF-8 string to convert. 863 * @param ppwszString Receives pointer to the allocated UTF-16BE string. 864 * The returned string must be freed using RTUtf16Free(). 865 * @param pszTag Allocation tag used for statistics and such. 866 */ 867 RTDECL(int) RTStrToUtf16BigTag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag); 839 868 840 869 /** … … 887 916 * @param pszTag Allocation tag used for statistics and such. 888 917 */ 889 RTDECL(int) RTStrToUtf16ExTag(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 918 RTDECL(int) RTStrToUtf16ExTag(const char *pszString, size_t cchString, 919 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 920 921 922 /** 923 * Translates pszString from UTF-8 to UTF-16BE, allocating the result buffer if requested. 924 * 925 * This differs from RTStrToUtf16Ex in that it always produces a 926 * big-endian string. 927 * 928 * @returns iprt status code. 929 * @param pszString UTF-8 string to convert. 930 * @param cchString The maximum size in chars (the type) to convert. The conversion stop 931 * when it reaches cchString or the string terminator ('\\0'). 932 * Use RTSTR_MAX to translate the entire string. 933 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to 934 * a buffer of the specified size, or pointer to a NULL pointer. 935 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items 936 * will be allocated to hold the translated string. 937 * If a buffer was requested it must be freed using RTUtf16Free(). 938 * @param cwc The buffer size in RTUTF16s. This includes the terminator. 939 * @param pcwc Where to store the length of the translated string, 940 * excluding the terminator. (Optional) 941 * 942 * This may be set under some error conditions, 943 * however, only for VERR_BUFFER_OVERFLOW and 944 * VERR_NO_STR_MEMORY will it contain a valid string 945 * length that can be used to resize the buffer. 946 */ 947 #define RTStrToUtf16BigEx(pszString, cchString, ppwsz, cwc, pcwc) \ 948 RTStrToUtf16BigExTag((pszString), (cchString), (ppwsz), (cwc), (pcwc), RTSTR_TAG) 949 950 /** 951 * Translates pszString from UTF-8 to UTF-16BE, allocating the result buffer if 952 * requested (custom tag). 953 * 954 * This differs from RTStrToUtf16ExTag in that it always produces a 955 * big-endian string. 956 * 957 * @returns iprt status code. 958 * @param pszString UTF-8 string to convert. 959 * @param cchString The maximum size in chars (the type) to convert. The conversion stop 960 * when it reaches cchString or the string terminator ('\\0'). 961 * Use RTSTR_MAX to translate the entire string. 962 * @param ppwsz If cwc is non-zero, this must either be pointing to pointer to 963 * a buffer of the specified size, or pointer to a NULL pointer. 964 * If *ppwsz is NULL or cwc is zero a buffer of at least cwc items 965 * will be allocated to hold the translated string. 966 * If a buffer was requested it must be freed using RTUtf16Free(). 967 * @param cwc The buffer size in RTUTF16s. This includes the terminator. 968 * @param pcwc Where to store the length of the translated string, 969 * excluding the terminator. (Optional) 970 * 971 * This may be set under some error conditions, 972 * however, only for VERR_BUFFER_OVERFLOW and 973 * VERR_NO_STR_MEMORY will it contain a valid string 974 * length that can be used to resize the buffer. 975 * @param pszTag Allocation tag used for statistics and such. 976 */ 977 RTDECL(int) RTStrToUtf16BigExTag(const char *pszString, size_t cchString, 978 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag); 890 979 891 980 -
trunk/src/VBox/Runtime/common/fs/iso9660vfs.cpp
r67326 r67334 688 688 689 689 /** 690 * RTStrToUtf16Ex returning big-endian UTF-16.691 */692 static int rtFsIso9660_StrToUtf16BigEndian(const char *pszString, size_t cchString, PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc)693 {694 int rc = RTStrToUtf16Ex(pszString, cchString, ppwsz, cwc, pcwc);695 #ifndef RT_BIG_ENDIAN696 if (RT_SUCCESS(rc))697 {698 PRTUTF16 pwc = *ppwsz;699 RTUTF16 wc;700 while ((wc = *pwc))701 *pwc++ = RT_H2BE_U16(wc);702 }703 #endif704 return rc;705 }706 707 708 /**709 690 * Looks up the shared structure for a child. 710 691 * … … 774 755 { 775 756 PRTUTF16 pwszEntry = uBuf.wszEntry; 776 rc = rtFsIso9660_StrToUtf16BigEndian(pszEntry, RTSTR_MAX, &pwszEntry, RT_ELEMENTS(uBuf.wszEntry), &cwcEntry);757 rc = RTStrToUtf16BigEx(pszEntry, RTSTR_MAX, &pwszEntry, RT_ELEMENTS(uBuf.wszEntry), &cwcEntry); 777 758 if (RT_FAILURE(rc)) 778 759 return rc; -
trunk/src/VBox/Runtime/common/string/utf-8.cpp
r65642 r67334 33 33 34 34 #include <iprt/uni.h> 35 #include <iprt/asm.h> 35 36 #include <iprt/alloc.h> 36 37 #include <iprt/assert.h> … … 829 830 * @param pwsz Where to store the UTF-16 string. 830 831 * @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0'). 832 * 833 * @note rtUtf8RecodeAsUtf16Big is a duplicate with RT_H2BE_U16 applied. 831 834 */ 832 835 static int rtUtf8RecodeAsUtf16(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc) … … 907 910 908 911 912 /** 913 * Recodes a valid UTF-8 string as UTF-16BE. 914 * 915 * Since we know the input is valid, we do *not* perform encoding or length checks. 916 * 917 * @returns iprt status code. 918 * @param psz The UTF-8 string to recode. This is a valid encoding. 919 * @param cch The number of chars (the type char, so bytes if you like) to process of the UTF-8 string. 920 * The recoding will stop when cch or '\\0' is reached. Pass RTSTR_MAX to process up to '\\0'. 921 * @param pwsz Where to store the UTF-16BE string. 922 * @param cwc The number of RTUTF16 items the pwsz buffer can hold, excluding the terminator ('\\0'). 923 * 924 * @note This is a copy of rtUtf8RecodeAsUtf16 with RT_H2BE_U16 applied. 925 */ 926 static int rtUtf8RecodeAsUtf16Big(const char *psz, size_t cch, PRTUTF16 pwsz, size_t cwc) 927 { 928 int rc = VINF_SUCCESS; 929 const unsigned char *puch = (const unsigned char *)psz; 930 PRTUTF16 pwc = pwsz; 931 while (cch > 0) 932 { 933 /* read the next char and check for terminator. */ 934 const unsigned char uch = *puch; 935 if (uch) 936 { /* we only break once, so consider this the likely branch. */ } 937 else 938 break; 939 940 /* check for output overflow */ 941 if (RT_LIKELY(cwc >= 1)) 942 { /* likely */ } 943 else 944 { 945 rc = VERR_BUFFER_OVERFLOW; 946 break; 947 } 948 cwc--; 949 950 /* decode and recode the code point */ 951 if (!(uch & RT_BIT(7))) 952 { 953 *pwc++ = RT_H2BE_U16((RTUTF16)uch); 954 puch++; 955 cch--; 956 } 957 else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) == (RT_BIT(7) | RT_BIT(6))) 958 { 959 uint16_t uc = (puch[1] & 0x3f) 960 | ((uint16_t)(uch & 0x1f) << 6); 961 *pwc++ = RT_H2BE_U16(uc); 962 puch += 2; 963 cch -= 2; 964 } 965 else if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) 966 { 967 uint16_t uc = (puch[2] & 0x3f) 968 | ((uint16_t)(puch[1] & 0x3f) << 6) 969 | ((uint16_t)(uch & 0x0f) << 12); 970 *pwc++ = RT_H2BE_U16(uc); 971 puch += 3; 972 cch -= 3; 973 } 974 else 975 { 976 /* generate surrogate pair */ 977 Assert((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4) | RT_BIT(3))) == (RT_BIT(7) | RT_BIT(6) | RT_BIT(5) | RT_BIT(4))); 978 RTUNICP uc = (puch[3] & 0x3f) 979 | ((RTUNICP)(puch[2] & 0x3f) << 6) 980 | ((RTUNICP)(puch[1] & 0x3f) << 12) 981 | ((RTUNICP)(uch & 0x07) << 18); 982 if (RT_UNLIKELY(cwc < 1)) 983 { 984 rc = VERR_BUFFER_OVERFLOW; 985 break; 986 } 987 cwc--; 988 989 uc -= 0x10000; 990 *pwc++ = RT_H2BE_U16(0xd800 | (uc >> 10)); 991 *pwc++ = RT_H2BE_U16(0xdc00 | (uc & 0x3ff)); 992 puch += 4; 993 cch -= 4; 994 } 995 } 996 997 /* done */ 998 *pwc = '\0'; 999 return rc; 1000 } 1001 1002 909 1003 RTDECL(int) RTStrToUtf16Tag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag) 910 1004 { … … 946 1040 } 947 1041 RT_EXPORT_SYMBOL(RTStrToUtf16Tag); 1042 1043 1044 RTDECL(int) RTStrToUtf16BigTag(const char *pszString, PRTUTF16 *ppwszString, const char *pszTag) 1045 { 1046 /* 1047 * Validate input. 1048 */ 1049 Assert(VALID_PTR(ppwszString)); 1050 Assert(VALID_PTR(pszString)); 1051 *ppwszString = NULL; 1052 1053 /* 1054 * Validate the UTF-8 input and calculate the length of the UTF-16 string. 1055 */ 1056 size_t cwc; 1057 int rc = rtUtf8CalcUtf16Length(pszString, &cwc); 1058 if (RT_SUCCESS(rc)) 1059 { 1060 /* 1061 * Allocate buffer. 1062 */ 1063 PRTUTF16 pwsz = (PRTUTF16)RTMemAllocTag((cwc + 1) * sizeof(RTUTF16), pszTag); 1064 if (pwsz) 1065 { 1066 /* 1067 * Encode the UTF-16 string. 1068 */ 1069 rc = rtUtf8RecodeAsUtf16Big(pszString, RTSTR_MAX, pwsz, cwc); 1070 if (RT_SUCCESS(rc)) 1071 { 1072 *ppwszString = pwsz; 1073 return rc; 1074 } 1075 RTMemFree(pwsz); 1076 } 1077 else 1078 rc = VERR_NO_UTF16_MEMORY; 1079 } 1080 return rc; 1081 } 1082 RT_EXPORT_SYMBOL(RTStrToUtf16TagBig); 948 1083 949 1084 … … 1011 1146 } 1012 1147 RT_EXPORT_SYMBOL(RTStrToUtf16ExTag); 1148 1149 1150 RTDECL(int) RTStrToUtf16BigExTag(const char *pszString, size_t cchString, 1151 PRTUTF16 *ppwsz, size_t cwc, size_t *pcwc, const char *pszTag) 1152 { 1153 /* 1154 * Validate input. 1155 */ 1156 Assert(VALID_PTR(pszString)); 1157 Assert(VALID_PTR(ppwsz)); 1158 Assert(!pcwc || VALID_PTR(pcwc)); 1159 1160 /* 1161 * Validate the UTF-8 input and calculate the length of the UTF-16 string. 1162 */ 1163 size_t cwcResult; 1164 int rc; 1165 if (cchString != RTSTR_MAX) 1166 rc = rtUtf8CalcUtf16LengthN(pszString, cchString, &cwcResult); 1167 else 1168 rc = rtUtf8CalcUtf16Length(pszString, &cwcResult); 1169 if (RT_SUCCESS(rc)) 1170 { 1171 if (pcwc) 1172 *pcwc = cwcResult; 1173 1174 /* 1175 * Check buffer size / Allocate buffer. 1176 */ 1177 bool fShouldFree; 1178 PRTUTF16 pwszResult; 1179 if (cwc > 0 && *ppwsz) 1180 { 1181 fShouldFree = false; 1182 if (cwc <= cwcResult) 1183 return VERR_BUFFER_OVERFLOW; 1184 pwszResult = *ppwsz; 1185 } 1186 else 1187 { 1188 *ppwsz = NULL; 1189 fShouldFree = true; 1190 cwc = RT_MAX(cwcResult + 1, cwc); 1191 pwszResult = (PRTUTF16)RTMemAllocTag(cwc * sizeof(RTUTF16), pszTag); 1192 } 1193 if (pwszResult) 1194 { 1195 /* 1196 * Encode the UTF-16BE string. 1197 */ 1198 rc = rtUtf8RecodeAsUtf16Big(pszString, cchString, pwszResult, cwc - 1); 1199 if (RT_SUCCESS(rc)) 1200 { 1201 *ppwsz = pwszResult; 1202 return rc; 1203 } 1204 if (fShouldFree) 1205 RTMemFree(pwszResult); 1206 } 1207 else 1208 rc = VERR_NO_UTF16_MEMORY; 1209 } 1210 return rc; 1211 } 1212 RT_EXPORT_SYMBOL(RTStrToUtf16BigExTag); 1013 1213 1014 1214
Note:
See TracChangeset
for help on using the changeset viewer.

