VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/utf-16-case.cpp

Last change on this file was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
  • Property svn:mergeinfo set to (toggle deleted branches)
    /branches/VBox-3.0/src/VBox/Runtime/common/string/utf-16.cpp58652,​70973
    /branches/VBox-3.2/src/VBox/Runtime/common/string/utf-16.cpp66309,​66318
    /branches/VBox-4.0/src/VBox/Runtime/common/string/utf-16.cpp70873
    /branches/VBox-4.1/src/VBox/Runtime/common/string/utf-16.cpp74233,​78414,​78691,​81841,​82127,​85941,​85944-85947,​85949-85950,​85953,​86701,​86728,​87009
    /branches/VBox-4.2/src/VBox/Runtime/common/string/utf-16.cpp86229-86230,​86234,​86529,​91503-91504,​91506-91508,​91510,​91514-91515,​91521
    /branches/VBox-4.3/src/VBox/Runtime/common/string/utf-16.cpp91223
    /branches/VBox-4.3/trunk/src/VBox/Runtime/common/string/utf-16.cpp91223
    /branches/andy/draganddrop/src/VBox/Runtime/common/string/utf-16.cpp90781-91268
    /branches/andy/guestctrl20/src/VBox/Runtime/common/string/utf-16.cpp78916,​78930
    /branches/bird/hardenedwindows/src/VBox/Runtime/common/string/utf-16-case.cpp92961-94610
    /branches/dsen/gui/src/VBox/Runtime/common/string/utf-16.cpp79076-79078,​79089,​79109-79110,​79112-79113,​79127-79130,​79134,​79141,​79151,​79155,​79157-79159,​79193,​79197
    /branches/dsen/gui2/src/VBox/Runtime/common/string/utf-16.cpp79224,​79228,​79233,​79235,​79258,​79262-79263,​79273,​79341,​79345,​79354,​79357,​79387-79388,​79559-79569,​79572-79573,​79578,​79581-79582,​79590-79591,​79598-79599,​79602-79603,​79605-79606,​79632,​79635,​79637,​79644
    /branches/dsen/gui3/src/VBox/Runtime/common/string/utf-16.cpp79645-79692
File size: 14.1 KB
Line 
1/* $Id: utf-16-case.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * IPRT - UTF-16, Case Sensitivity.
4 */
5
6/*
7 * Copyright (C) 2006-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <iprt/utf16.h>
42#include "internal/iprt.h"
43
44#include <iprt/uni.h>
45#include <iprt/alloc.h>
46#include <iprt/asm.h>
47#include <iprt/assert.h>
48#include <iprt/errcore.h>
49#include "internal/string.h"
50
51
52RTDECL(int) RTUtf16ICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2)
53{
54 if (pwsz1 == pwsz2)
55 return 0;
56 if (!pwsz1)
57 return -1;
58 if (!pwsz2)
59 return 1;
60
61 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
62 for (;;)
63 {
64 RTUTF16 wc1 = *pwsz1;
65 RTUTF16 wc2 = *pwsz2;
66 int iDiff = wc1 - wc2;
67 if (iDiff)
68 {
69 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
70 if ( wc1 < 0xd800
71 || wc2 < 0xd800
72 || wc1 > 0xdfff
73 || wc2 > 0xdfff)
74 {
75 /* simple UCS-2 char */
76 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
77 if (iDiff)
78 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
79 }
80 else
81 {
82 /* a damned pair */
83 RTUNICP uc1;
84 RTUNICP uc2;
85 if (wc1 >= 0xdc00)
86 {
87 if (pwsz1Start == pwsz1)
88 return iDiff;
89 uc1 = pwsz1[-1];
90 if (uc1 < 0xd800 || uc1 >= 0xdc00)
91 return iDiff;
92 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
93 uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff));
94 }
95 else
96 {
97 uc1 = *++pwsz1;
98 if (uc1 < 0xdc00 || uc1 >= 0xe000)
99 return iDiff;
100 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
101 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff));
102 }
103 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
104 if (iDiff)
105 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
106 }
107 if (iDiff)
108 return iDiff;
109 }
110 if (!wc1)
111 return 0;
112 pwsz1++;
113 pwsz2++;
114 }
115}
116RT_EXPORT_SYMBOL(RTUtf16ICmp);
117
118
119RTDECL(int) RTUtf16BigICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2)
120{
121 if (pwsz1 == pwsz2)
122 return 0;
123 if (!pwsz1)
124 return -1;
125 if (!pwsz2)
126 return 1;
127
128 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
129 for (;;)
130 {
131 RTUTF16 wc1 = *pwsz1;
132 RTUTF16 wc2 = *pwsz2;
133 int iDiff = wc1 - wc2;
134 if (iDiff)
135 {
136 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
137 wc1 = RT_BE2H_U16(wc1);
138 wc2 = RT_BE2H_U16(wc2);
139 if ( wc1 < 0xd800
140 || wc2 < 0xd800
141 || wc1 > 0xdfff
142 || wc2 > 0xdfff)
143 {
144 /* simple UCS-2 char */
145 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
146 if (iDiff)
147 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
148 }
149 else
150 {
151 /* a damned pair */
152 RTUNICP uc1;
153 RTUNICP uc2;
154 if (wc1 >= 0xdc00)
155 {
156 if (pwsz1Start == pwsz1)
157 return iDiff;
158 uc1 = RT_BE2H_U16(pwsz1[-1]);
159 if (uc1 < 0xd800 || uc1 >= 0xdc00)
160 return iDiff;
161 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
162 uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff));
163 }
164 else
165 {
166 RTUTF16 wcTmp = *++pwsz1;
167 uc1 = RT_BE2H_U16(wcTmp);
168 if (uc1 < 0xdc00 || uc1 >= 0xe000)
169 return iDiff;
170 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
171 wcTmp = *++pwsz2;
172 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff));
173 }
174 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
175 if (iDiff)
176 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
177 }
178 if (iDiff)
179 return iDiff;
180 }
181 if (!wc1)
182 return 0;
183 pwsz1++;
184 pwsz2++;
185 }
186}
187RT_EXPORT_SYMBOL(RTUtf16BigICmp);
188
189
190RTDECL(int) RTUtf16ICmpUtf8(PCRTUTF16 pwsz1, const char *psz2)
191{
192 /*
193 * NULL and empty strings are all the same.
194 */
195 if (!pwsz1)
196 return !psz2 || !*psz2 ? 0 : -1;
197 if (!psz2)
198 return !*pwsz1 ? 0 : 1;
199
200 /*
201 * Compare with a UTF-8 string by enumerating them char by char.
202 */
203 for (;;)
204 {
205 RTUNICP uc1;
206 int rc = RTUtf16GetCpEx(&pwsz1, &uc1);
207 AssertRCReturn(rc, 1);
208
209 RTUNICP uc2;
210 rc = RTStrGetCpEx(&psz2, &uc2);
211 AssertRCReturn(rc, -1);
212 if (uc1 == uc2)
213 {
214 if (uc1)
215 continue;
216 return 0;
217 }
218
219 if (RTUniCpToUpper(uc1) == RTUniCpToUpper(uc2))
220 continue;
221 if (RTUniCpToLower(uc1) == RTUniCpToLower(uc2))
222 continue;
223 return uc1 < uc2 ? -1 : 1;
224 }
225}
226RT_EXPORT_SYMBOL(RTUtf16CmpIUtf8);
227
228
229RTDECL(int) RTUtf16NICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax)
230{
231 if (pwsz1 == pwsz2)
232 return 0;
233 if (!pwsz1)
234 return -1;
235 if (!pwsz2)
236 return 1;
237
238 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
239 while (cwcMax-- > 0)
240 {
241 RTUTF16 wc1 = *pwsz1;
242 RTUTF16 wc2 = *pwsz2;
243 int iDiff = wc1 - wc2;
244 if (iDiff)
245 {
246 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
247 if ( wc1 < 0xd800
248 || wc2 < 0xd800
249 || wc1 > 0xdfff
250 || wc2 > 0xdfff)
251 {
252 /* simple UCS-2 char */
253 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
254 if (iDiff)
255 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
256 }
257 else
258 {
259 /* a damned pair */
260 RTUNICP uc1;
261 RTUNICP uc2;
262 if (wc1 >= 0xdc00)
263 {
264 if (pwsz1Start == pwsz1)
265 return iDiff;
266 uc1 = pwsz1[-1];
267 if (uc1 < 0xd800 || uc1 >= 0xdc00)
268 return iDiff;
269 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
270 uc2 = 0x10000 + (((pwsz2[-1] & 0x3ff) << 10) | (wc2 & 0x3ff));
271 }
272 else if (cwcMax-- > 0)
273 {
274 uc1 = *++pwsz1;
275 if (uc1 < 0xdc00 || uc1 >= 0xe000)
276 return iDiff;
277 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
278 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (*++pwsz2 & 0x3ff));
279 }
280 else
281 {
282 iDiff = wc1 - wc2;
283 return iDiff;
284 }
285 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
286 if (iDiff)
287 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
288 }
289 if (iDiff)
290 return iDiff;
291 }
292 if (!wc1)
293 return 0;
294 pwsz1++;
295 pwsz2++;
296 }
297 return 0;
298}
299RT_EXPORT_SYMBOL(RTUtf16NICmp);
300
301
302RTDECL(int) RTUtf16BigNICmp(PCRTUTF16 pwsz1, PCRTUTF16 pwsz2, size_t cwcMax)
303{
304 if (pwsz1 == pwsz2)
305 return 0;
306 if (!pwsz1)
307 return -1;
308 if (!pwsz2)
309 return 1;
310
311 PCRTUTF16 pwsz1Start = pwsz1; /* keep it around in case we have to backtrack on a surrogate pair */
312 while (cwcMax-- > 0)
313 {
314 RTUTF16 wc1 = *pwsz1;
315 RTUTF16 wc2 = *pwsz2;
316 int iDiff = wc1 - wc2;
317 if (iDiff)
318 {
319 /* unless they are *both* surrogate pairs, there is no chance they'll be identical. */
320 wc1 = RT_BE2H_U16(wc1);
321 wc2 = RT_BE2H_U16(wc2);
322 if ( wc1 < 0xd800
323 || wc2 < 0xd800
324 || wc1 > 0xdfff
325 || wc2 > 0xdfff)
326 {
327 /* simple UCS-2 char */
328 iDiff = RTUniCpToUpper(wc1) - RTUniCpToUpper(wc2);
329 if (iDiff)
330 iDiff = RTUniCpToLower(wc1) - RTUniCpToLower(wc2);
331 }
332 else
333 {
334 /* a damned pair */
335 RTUNICP uc1;
336 RTUNICP uc2;
337 if (wc1 >= 0xdc00)
338 {
339 if (pwsz1Start == pwsz1)
340 return iDiff;
341 uc1 = RT_BE2H_U16(pwsz1[-1]);
342 if (uc1 < 0xd800 || uc1 >= 0xdc00)
343 return iDiff;
344 uc1 = 0x10000 + (((uc1 & 0x3ff) << 10) | (wc1 & 0x3ff));
345 uc2 = 0x10000 + (((RT_BE2H_U16(pwsz2[-1]) & 0x3ff) << 10) | (wc2 & 0x3ff));
346 }
347 else if (cwcMax > 0)
348 {
349 RTUTF16 wcTmp = *++pwsz1;
350 uc1 = RT_BE2H_U16(wcTmp);
351 if (uc1 < 0xdc00 || uc1 >= 0xe000)
352 return iDiff;
353 uc1 = 0x10000 + (((wc1 & 0x3ff) << 10) | (uc1 & 0x3ff));
354 wcTmp = *++pwsz2;
355 uc2 = 0x10000 + (((wc2 & 0x3ff) << 10) | (RT_BE2H_U16(wcTmp) & 0x3ff));
356 }
357 else
358 {
359 iDiff = wc1 - wc2;
360 return iDiff;
361 }
362 iDiff = RTUniCpToUpper(uc1) - RTUniCpToUpper(uc2);
363 if (iDiff)
364 iDiff = RTUniCpToLower(uc1) - RTUniCpToLower(uc2); /* serious paranoia! */
365 }
366 if (iDiff)
367 return iDiff;
368 }
369 if (!wc1)
370 return 0;
371 pwsz1++;
372 pwsz2++;
373 }
374 return 0;
375}
376RT_EXPORT_SYMBOL(RTUtf16BigNICmp);
377
378
379RTDECL(PRTUTF16) RTUtf16ToLower(PRTUTF16 pwsz)
380{
381 PRTUTF16 pwc = pwsz;
382 for (;;)
383 {
384 RTUTF16 wc = *pwc;
385 if (!wc)
386 break;
387 if (wc < 0xd800 || wc >= 0xdc00)
388 {
389 RTUNICP ucFolded = RTUniCpToLower(wc);
390 if (ucFolded < 0x10000)
391 *pwc++ = RTUniCpToLower(wc);
392 }
393 else
394 {
395 /* surrogate */
396 RTUTF16 wc2 = pwc[1];
397 if (wc2 >= 0xdc00 && wc2 <= 0xdfff)
398 {
399 RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff));
400 RTUNICP ucFolded = RTUniCpToLower(uc);
401 if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */
402 {
403 uc -= 0x10000;
404 *pwc++ = 0xd800 | (uc >> 10);
405 *pwc++ = 0xdc00 | (uc & 0x3ff);
406 }
407 }
408 else /* invalid encoding. */
409 pwc++;
410 }
411 }
412 return pwsz;
413}
414RT_EXPORT_SYMBOL(RTUtf16ToLower);
415
416
417RTDECL(PRTUTF16) RTUtf16ToUpper(PRTUTF16 pwsz)
418{
419 PRTUTF16 pwc = pwsz;
420 for (;;)
421 {
422 RTUTF16 wc = *pwc;
423 if (!wc)
424 break;
425 if (wc < 0xd800 || wc >= 0xdc00)
426 *pwc++ = RTUniCpToUpper(wc);
427 else
428 {
429 /* surrogate */
430 RTUTF16 wc2 = pwc[1];
431 if (wc2 >= 0xdc00 && wc2 <= 0xdfff)
432 {
433 RTUNICP uc = 0x10000 + (((wc & 0x3ff) << 10) | (wc2 & 0x3ff));
434 RTUNICP ucFolded = RTUniCpToUpper(uc);
435 if (uc != ucFolded && ucFolded >= 0x10000) /* we don't support shrinking the string */
436 {
437 uc -= 0x10000;
438 *pwc++ = 0xd800 | (uc >> 10);
439 *pwc++ = 0xdc00 | (uc & 0x3ff);
440 }
441 }
442 else /* invalid encoding. */
443 pwc++;
444 }
445 }
446 return pwsz;
447}
448RT_EXPORT_SYMBOL(RTUtf16ToUpper);
449
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use