1 | /** @file
|
---|
2 | UCS2 to UTF8 manipulation library.
|
---|
3 |
|
---|
4 | Copyright (c) 2018 - 2019, Intel Corporation. All rights reserved.<BR>
|
---|
5 | (C) Copyright 2020 Hewlett Packard Enterprise Development LP<BR>
|
---|
6 |
|
---|
7 | SPDX-License-Identifier: BSD-2-Clause-Patent
|
---|
8 |
|
---|
9 | **/
|
---|
10 | #include <Uefi.h>
|
---|
11 | #include <Library/BaseLib.h>
|
---|
12 | #include <Library/BaseMemoryLib.h>
|
---|
13 | #include <Library/BaseUcs2Utf8Lib.h>
|
---|
14 | #include <Library/DebugLib.h>
|
---|
15 | #include <Library/MemoryAllocationLib.h>
|
---|
16 |
|
---|
17 | /**
|
---|
18 | Since each UCS2 character can be represented by 1-3 UTF8 encoded characters,
|
---|
19 | this function is used to retrieve the UTF8 encoding size for a UCS2 character.
|
---|
20 |
|
---|
21 | @param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
---|
22 |
|
---|
23 | @retval Return the size of UTF8 encoding string or 0 if it is not for
|
---|
24 | UCS2 format.
|
---|
25 |
|
---|
26 | **/
|
---|
27 | UINT8
|
---|
28 | GetUTF8SizeForUCS2 (
|
---|
29 | IN CHAR8 *Utf8Buffer
|
---|
30 | )
|
---|
31 | {
|
---|
32 | CHAR8 TempChar;
|
---|
33 | UINT8 Utf8Size;
|
---|
34 |
|
---|
35 | ASSERT (Utf8Buffer != NULL);
|
---|
36 |
|
---|
37 | TempChar = *Utf8Buffer;
|
---|
38 | if ((TempChar & 0xF0) == 0xF0) {
|
---|
39 | //
|
---|
40 | // This format is not for UCS2.
|
---|
41 | //
|
---|
42 | return 0;
|
---|
43 | }
|
---|
44 |
|
---|
45 | Utf8Size = 1;
|
---|
46 | if ((TempChar & 0x80) == 0x80) {
|
---|
47 | if ((TempChar & 0xC0) == 0xC0) {
|
---|
48 | Utf8Size++;
|
---|
49 | if ((TempChar & 0xE0) == 0xE0) {
|
---|
50 | Utf8Size++;
|
---|
51 | }
|
---|
52 | }
|
---|
53 | }
|
---|
54 |
|
---|
55 | return Utf8Size;
|
---|
56 | }
|
---|
57 |
|
---|
58 | /**
|
---|
59 | Since each UCS2 character can be represented by the format: \uXXXX, this function
|
---|
60 | is used to retrieve the UCS2 character from a Unicode format.
|
---|
61 | Call MUST make sure there are at least 6 Bytes in the input UTF8 buffer.
|
---|
62 |
|
---|
63 | @param[in] Utf8Buffer The buffer for UTF8 encoded data.
|
---|
64 | @param[out] Ucs2Char The converted UCS2 character.
|
---|
65 |
|
---|
66 | @retval EFI_INVALID_PARAMETER Non-Ascii characters found in the hexadecimal
|
---|
67 | digits string, and can't be converted to a UCS2
|
---|
68 | character.
|
---|
69 | @retval EFI_SUCCESS The UCS2 character has been retrieved.
|
---|
70 |
|
---|
71 | **/
|
---|
72 | EFI_STATUS
|
---|
73 | GetUCS2CharByFormat (
|
---|
74 | IN CHAR8 *Utf8Buffer,
|
---|
75 | OUT CHAR16 *Ucs2Char
|
---|
76 | )
|
---|
77 | {
|
---|
78 | UINT8 Num1;
|
---|
79 | UINT8 Num2;
|
---|
80 | UINT8 Index;
|
---|
81 | CHAR8 Ucs2CharFormat[UNICODE_FORMAT_CHAR_SIZE]; /// two Hexadecimal digits Ascii string, like "3F"
|
---|
82 |
|
---|
83 | for (Index = 0; Index < 4; Index++) {
|
---|
84 | if ((*(Utf8Buffer + 2 + Index) & 0x80) != 0x00) {
|
---|
85 | return EFI_INVALID_PARAMETER;
|
---|
86 | }
|
---|
87 | }
|
---|
88 |
|
---|
89 | ZeroMem (Ucs2CharFormat, UNICODE_FORMAT_CHAR_SIZE);
|
---|
90 |
|
---|
91 | //
|
---|
92 | // Get the First Number, Offset is 2
|
---|
93 | //
|
---|
94 | CopyMem (Ucs2CharFormat, Utf8Buffer + 2, UNICODE_FORMAT_CHAR_LEN);
|
---|
95 | Num1 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
---|
96 |
|
---|
97 | //
|
---|
98 | // Get the Second Number, Offset is 4
|
---|
99 | //
|
---|
100 | CopyMem (Ucs2CharFormat, Utf8Buffer + 4, UNICODE_FORMAT_CHAR_LEN);
|
---|
101 | Num2 = (UINT8)AsciiStrHexToUintn (Ucs2CharFormat);
|
---|
102 |
|
---|
103 | //
|
---|
104 | // Ucs2Char is Little-Endian
|
---|
105 | //
|
---|
106 | *((CHAR8 *)Ucs2Char) = Num2;
|
---|
107 | *(((CHAR8 *)Ucs2Char) + 1) = Num1;
|
---|
108 |
|
---|
109 | return EFI_SUCCESS;
|
---|
110 | }
|
---|
111 |
|
---|
112 | /**
|
---|
113 | Convert a UCS2 character to UTF8 encoding string.
|
---|
114 |
|
---|
115 | @param[in] Ucs2Char The provided UCS2 character.
|
---|
116 | @param[out] Utf8Buffer The converted UTF8 encoded data.
|
---|
117 |
|
---|
118 | @retval Return the size of UTF8 encoding data for this UCS2 character.
|
---|
119 |
|
---|
120 | **/
|
---|
121 | UINT8
|
---|
122 | UCS2CharToUTF8 (
|
---|
123 | IN CHAR16 Ucs2Char,
|
---|
124 | OUT CHAR8 *Utf8Buffer
|
---|
125 | )
|
---|
126 | {
|
---|
127 | UINT16 Ucs2Number;
|
---|
128 |
|
---|
129 | ASSERT (Utf8Buffer != NULL);
|
---|
130 |
|
---|
131 | Ucs2Number = (UINT16)Ucs2Char;
|
---|
132 | if (Ucs2Number <= 0x007F) {
|
---|
133 | //
|
---|
134 | // UTF8 format: 0xxxxxxx
|
---|
135 | //
|
---|
136 | *Utf8Buffer = Ucs2Char & 0x7F;
|
---|
137 | return 1;
|
---|
138 | } else if ((Ucs2Number >= 0x0080) && (Ucs2Number <= 0x07FF)) {
|
---|
139 | //
|
---|
140 | // UTF8 format: 110xxxxx 10xxxxxx
|
---|
141 | //
|
---|
142 | *(Utf8Buffer + 1) = (Ucs2Char & 0x3F) | 0x80;
|
---|
143 | *Utf8Buffer = ((Ucs2Char >> 6) & 0x1F) | 0xC0;
|
---|
144 | return 2;
|
---|
145 | } else {
|
---|
146 | /// Ucs2Number >= 0x0800 && Ucs2Number <= 0xFFFF
|
---|
147 |
|
---|
148 | //
|
---|
149 | // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
---|
150 | //
|
---|
151 | *(Utf8Buffer + 2) = (Ucs2Char & 0x3F) | 0x80;
|
---|
152 | *(Utf8Buffer + 1) = ((Ucs2Char >> 6) & 0x3F) | 0x80;
|
---|
153 | *Utf8Buffer = ((Ucs2Char >> 12) & 0x0F) | 0xE0;
|
---|
154 | return 3;
|
---|
155 | }
|
---|
156 | }
|
---|
157 |
|
---|
158 | /**
|
---|
159 | Convert a UTF8 encoded data to a UCS2 character.
|
---|
160 |
|
---|
161 | @param[in] Utf8Buffer The provided UTF8 encoded data.
|
---|
162 | @param[out] Ucs2Char The converted UCS2 character.
|
---|
163 |
|
---|
164 | @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid or
|
---|
165 | not for UCS2 character.
|
---|
166 | @retval EFI_SUCCESS The converted UCS2 character.
|
---|
167 |
|
---|
168 | **/
|
---|
169 | EFI_STATUS
|
---|
170 | UTF8ToUCS2Char (
|
---|
171 | IN CHAR8 *Utf8Buffer,
|
---|
172 | OUT CHAR16 *Ucs2Char
|
---|
173 | )
|
---|
174 | {
|
---|
175 | UINT8 Utf8Size;
|
---|
176 | CHAR8 *Ucs2Buffer;
|
---|
177 | CHAR8 TempChar1;
|
---|
178 | CHAR8 TempChar2;
|
---|
179 | CHAR8 TempChar3;
|
---|
180 |
|
---|
181 | ASSERT (Utf8Buffer != NULL && Ucs2Char != NULL);
|
---|
182 | ZeroMem (Ucs2Char, sizeof (CHAR16));
|
---|
183 | Ucs2Buffer = (CHAR8 *)Ucs2Char;
|
---|
184 |
|
---|
185 | Utf8Size = GetUTF8SizeForUCS2 (Utf8Buffer);
|
---|
186 | switch (Utf8Size) {
|
---|
187 | case 1:
|
---|
188 |
|
---|
189 | //
|
---|
190 | // UTF8 format: 0xxxxxxx
|
---|
191 | //
|
---|
192 | TempChar1 = *Utf8Buffer;
|
---|
193 | if ((TempChar1 & 0x80) != 0x00) {
|
---|
194 | return EFI_INVALID_PARAMETER;
|
---|
195 | }
|
---|
196 |
|
---|
197 | *Ucs2Buffer = TempChar1;
|
---|
198 | *(Ucs2Buffer + 1) = 0;
|
---|
199 | break;
|
---|
200 |
|
---|
201 | case 2:
|
---|
202 |
|
---|
203 | //
|
---|
204 | // UTF8 format: 110xxxxx 10xxxxxx
|
---|
205 | //
|
---|
206 | TempChar1 = *Utf8Buffer;
|
---|
207 | if ((TempChar1 & 0xE0) != 0xC0) {
|
---|
208 | return EFI_INVALID_PARAMETER;
|
---|
209 | }
|
---|
210 |
|
---|
211 | TempChar2 = *(Utf8Buffer + 1);
|
---|
212 | if ((TempChar2 & 0xC0) != 0x80) {
|
---|
213 | return EFI_INVALID_PARAMETER;
|
---|
214 | }
|
---|
215 |
|
---|
216 | *Ucs2Buffer = (TempChar1 << 6) + (TempChar2 & 0x3F);
|
---|
217 | *(Ucs2Buffer + 1) = (TempChar1 >> 2) & 0x07;
|
---|
218 | break;
|
---|
219 |
|
---|
220 | case 3:
|
---|
221 |
|
---|
222 | //
|
---|
223 | // UTF8 format: 1110xxxx 10xxxxxx 10xxxxxx
|
---|
224 | //
|
---|
225 | TempChar1 = *Utf8Buffer;
|
---|
226 | if ((TempChar1 & 0xF0) != 0xE0) {
|
---|
227 | return EFI_INVALID_PARAMETER;
|
---|
228 | }
|
---|
229 |
|
---|
230 | TempChar2 = *(Utf8Buffer + 1);
|
---|
231 | if ((TempChar2 & 0xC0) != 0x80) {
|
---|
232 | return EFI_INVALID_PARAMETER;
|
---|
233 | }
|
---|
234 |
|
---|
235 | TempChar3 = *(Utf8Buffer + 2);
|
---|
236 | if ((TempChar3 & 0xC0) != 0x80) {
|
---|
237 | return EFI_INVALID_PARAMETER;
|
---|
238 | }
|
---|
239 |
|
---|
240 | *Ucs2Buffer = (TempChar2 << 6) + (TempChar3 & 0x3F);
|
---|
241 | *(Ucs2Buffer + 1) = (TempChar1 << 4) + ((TempChar2 >> 2) & 0x0F);
|
---|
242 |
|
---|
243 | break;
|
---|
244 |
|
---|
245 | default:
|
---|
246 |
|
---|
247 | return EFI_INVALID_PARAMETER;
|
---|
248 | }
|
---|
249 |
|
---|
250 | return EFI_SUCCESS;
|
---|
251 | }
|
---|
252 |
|
---|
253 | /**
|
---|
254 | Convert a UCS2 string to a UTF8 encoded string.
|
---|
255 |
|
---|
256 | @param[in] Ucs2Str The provided UCS2 string.
|
---|
257 | @param[out] Utf8StrAddr The converted UTF8 string address. Caller
|
---|
258 | is responsible for Free this string.
|
---|
259 |
|
---|
260 | @retval EFI_INVALID_PARAMETER One or more parameters are invalid.
|
---|
261 | @retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
---|
262 | @retval EFI_SUCCESS The UTF8 encoded string has been converted.
|
---|
263 |
|
---|
264 | **/
|
---|
265 | EFI_STATUS
|
---|
266 | UCS2StrToUTF8 (
|
---|
267 | IN CHAR16 *Ucs2Str,
|
---|
268 | OUT CHAR8 **Utf8StrAddr
|
---|
269 | )
|
---|
270 | {
|
---|
271 | UINTN Ucs2StrIndex;
|
---|
272 | UINTN Ucs2StrLength;
|
---|
273 | CHAR8 *Utf8Str;
|
---|
274 | UINTN Utf8StrLength;
|
---|
275 | UINTN Utf8StrIndex;
|
---|
276 | CHAR8 Utf8Buffer[UTF8_BUFFER_FOR_UCS2_MAX_SIZE];
|
---|
277 | UINT8 Utf8BufferSize;
|
---|
278 |
|
---|
279 | if ((Ucs2Str == NULL) || (Utf8StrAddr == NULL)) {
|
---|
280 | return EFI_INVALID_PARAMETER;
|
---|
281 | }
|
---|
282 |
|
---|
283 | Ucs2StrLength = StrLen (Ucs2Str);
|
---|
284 | Utf8StrLength = 0;
|
---|
285 |
|
---|
286 | for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
---|
287 | ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
---|
288 | Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
---|
289 | Utf8StrLength += Utf8BufferSize;
|
---|
290 | }
|
---|
291 |
|
---|
292 | Utf8Str = AllocateZeroPool (Utf8StrLength + 1);
|
---|
293 | if (Utf8Str == NULL) {
|
---|
294 | return EFI_OUT_OF_RESOURCES;
|
---|
295 | }
|
---|
296 |
|
---|
297 | Utf8StrIndex = 0;
|
---|
298 | for (Ucs2StrIndex = 0; Ucs2StrIndex < Ucs2StrLength; Ucs2StrIndex++) {
|
---|
299 | ZeroMem (Utf8Buffer, sizeof (Utf8Buffer));
|
---|
300 | Utf8BufferSize = UCS2CharToUTF8 (Ucs2Str[Ucs2StrIndex], Utf8Buffer);
|
---|
301 |
|
---|
302 | CopyMem (Utf8Str + Utf8StrIndex, Utf8Buffer, Utf8BufferSize);
|
---|
303 | Utf8StrIndex += Utf8BufferSize;
|
---|
304 | }
|
---|
305 |
|
---|
306 | Utf8Str[Utf8StrIndex] = '\0';
|
---|
307 | *Utf8StrAddr = Utf8Str;
|
---|
308 |
|
---|
309 | return EFI_SUCCESS;
|
---|
310 | }
|
---|
311 |
|
---|
312 | /**
|
---|
313 | Convert a UTF8 encoded string to a UCS2 string.
|
---|
314 |
|
---|
315 | @param[in] Utf8Str The provided UTF8 encoded string.
|
---|
316 | @param[out] Ucs2StrAddr The converted UCS2 string address. Caller
|
---|
317 | is responsible for Free this string.
|
---|
318 |
|
---|
319 | @retval EFI_INVALID_PARAMETER The UTF8 encoded string is not valid to
|
---|
320 | convert to UCS2 string.
|
---|
321 | One or more parameters are invalid.
|
---|
322 | @retval EFI_OUT_OF_RESOURCES System runs out of resources.
|
---|
323 | @retval EFI_SUCCESS The UCS2 string has been converted.
|
---|
324 |
|
---|
325 | **/
|
---|
326 | EFI_STATUS
|
---|
327 | UTF8StrToUCS2 (
|
---|
328 | IN CHAR8 *Utf8Str,
|
---|
329 | OUT CHAR16 **Ucs2StrAddr
|
---|
330 | )
|
---|
331 | {
|
---|
332 | EFI_STATUS Status;
|
---|
333 | UINTN Utf8StrIndex;
|
---|
334 | UINTN Utf8StrLength;
|
---|
335 | UINTN Ucs2StrIndex;
|
---|
336 | UINT8 Utf8BufferSize;
|
---|
337 | CHAR16 *Ucs2StrTemp;
|
---|
338 |
|
---|
339 | if ((Utf8Str == NULL) || (Ucs2StrAddr == NULL)) {
|
---|
340 | return EFI_INVALID_PARAMETER;
|
---|
341 | }
|
---|
342 |
|
---|
343 | //
|
---|
344 | // It is not an Ascii string, calculate string length.
|
---|
345 | //
|
---|
346 | Utf8StrLength = 0;
|
---|
347 | while (*(Utf8Str + Utf8StrLength) != '\0') {
|
---|
348 | Utf8StrLength++;
|
---|
349 | }
|
---|
350 |
|
---|
351 | //
|
---|
352 | // UCS2 string shall not be longer than the UTF8 string.
|
---|
353 | //
|
---|
354 | Ucs2StrTemp = AllocateZeroPool ((Utf8StrLength + 1) * sizeof (CHAR16));
|
---|
355 | if (Ucs2StrTemp == NULL) {
|
---|
356 | return EFI_OUT_OF_RESOURCES;
|
---|
357 | }
|
---|
358 |
|
---|
359 | Utf8StrIndex = 0;
|
---|
360 | Ucs2StrIndex = 0;
|
---|
361 | while (Utf8Str[Utf8StrIndex] != '\0') {
|
---|
362 | if ((CompareMem (Utf8Str + Utf8StrIndex, "\\u", 2) == 0) &&
|
---|
363 | (Utf8StrLength - Utf8StrIndex >= UNICODE_FORMAT_LEN))
|
---|
364 | {
|
---|
365 | Status = GetUCS2CharByFormat (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
---|
366 | if (!EFI_ERROR (Status)) {
|
---|
367 | Utf8StrIndex += UNICODE_FORMAT_LEN;
|
---|
368 | Ucs2StrIndex++;
|
---|
369 | } else {
|
---|
370 | StrCpyS (Ucs2StrTemp + Ucs2StrIndex, 3, L"\\u");
|
---|
371 |
|
---|
372 | Ucs2StrIndex += 2;
|
---|
373 | Utf8StrIndex += 2;
|
---|
374 | }
|
---|
375 | } else {
|
---|
376 | Utf8BufferSize = GetUTF8SizeForUCS2 (Utf8Str + Utf8StrIndex);
|
---|
377 | if ((Utf8BufferSize == 0) || (Utf8StrLength - Utf8StrIndex < Utf8BufferSize)) {
|
---|
378 | FreePool (Ucs2StrTemp);
|
---|
379 | return EFI_INVALID_PARAMETER;
|
---|
380 | }
|
---|
381 |
|
---|
382 | Status = UTF8ToUCS2Char (Utf8Str + Utf8StrIndex, Ucs2StrTemp + Ucs2StrIndex);
|
---|
383 | if (EFI_ERROR (Status)) {
|
---|
384 | FreePool (Ucs2StrTemp);
|
---|
385 | return EFI_INVALID_PARAMETER;
|
---|
386 | }
|
---|
387 |
|
---|
388 | Ucs2StrIndex++;
|
---|
389 | Utf8StrIndex += Utf8BufferSize;
|
---|
390 | }
|
---|
391 | }
|
---|
392 |
|
---|
393 | *Ucs2StrAddr = AllocateZeroPool ((Ucs2StrIndex + 1) * sizeof (CHAR16));
|
---|
394 | if (*Ucs2StrAddr == NULL) {
|
---|
395 | FreePool (Ucs2StrTemp);
|
---|
396 | return EFI_OUT_OF_RESOURCES;
|
---|
397 | }
|
---|
398 |
|
---|
399 | StrCpyS (*Ucs2StrAddr, Ucs2StrIndex + 1, Ucs2StrTemp);
|
---|
400 | *(*Ucs2StrAddr + Ucs2StrIndex) = L'\0';
|
---|
401 | FreePool (Ucs2StrTemp);
|
---|
402 |
|
---|
403 | return EFI_SUCCESS;
|
---|
404 | }
|
---|