Index: /trunk/include/iprt/string.h
===================================================================
--- /trunk/include/iprt/string.h	(revision 31417)
+++ /trunk/include/iprt/string.h	(revision 31418)
@@ -31,5 +31,4 @@
 #include <iprt/assert.h>
 #include <iprt/stdarg.h>
-#include <iprt/uni.h> /* for RTUNICP_INVALID */
 #include <iprt/err.h> /* for VINF_SUCCESS */
 #if defined(RT_OS_LINUX) && defined(__KERNEL__)
@@ -68,4 +67,14 @@
 #endif
 
+
+/** @def RT_USE_RTC_3629
+ * When defined the UTF-8 range will stop at  0x10ffff.  If not defined, the
+ * range stops at 0x7fffffff.
+ * @remarks Must be defined both when building and using the IPRT.  */
+#ifdef DOXYGEN_RUNNING
+# define RT_USE_RTC_3629
+#endif
+
+
 /**
  * Byte zero the specified object.
@@ -100,4 +109,5 @@
  */
 #define RT_BZERO(pv, cb)    do { memset((pv), 0, cb); } while (0)
+
 
 
@@ -823,7 +833,8 @@
  *
  * This function will validate the string, and incorrectly encoded UTF-8
- * strings will be rejected. The primary purpose of this function is to
- * help allocate buffers for RTStrToLatin1Ex of the correct size. For most
- * other purposes RTStrCalcLatin1LenEx() should be used.
+ * strings as well as string with codepoints outside the latin-1 range will be
+ * rejected.  The primary purpose of this function is to help allocate buffers
+ * for RTStrToLatin1Ex of the correct size.  For most other purposes
+ * RTStrCalcLatin1LenEx() should be used.
  *
  * @returns Number of Latin-1 characters.
@@ -837,9 +848,11 @@
  *
  * This function will validate the string, and incorrectly encoded UTF-8
- * strings will be rejected.
+ * strings as well as string with codepoints outside the latin-1 range will be
+ * rejected.
  *
  * @returns iprt status code.
  * @param   psz         The string.
- * @param   cch         The max string length. Use RTSTR_MAX to process the entire string.
+ * @param   cch         The max string length. Use RTSTR_MAX to process the
+ *                      entire string.
  * @param   pcch        Where to store the string length. Optional.
  *                      This is undefined on failure.
@@ -875,13 +888,17 @@
  * @returns iprt status code.
  * @param   pszString       UTF-8 string to convert.
- * @param   cchString       The maximum size in chars (the type) to convert. The conversion stop
- *                          when it reaches cchString or the string terminator ('\\0').
- *                          Use RTSTR_MAX to translate the entire string.
- * @param   ppsz            If cch is non-zero, this must either be pointing to pointer to
- *                          a buffer of the specified size, or pointer to a NULL pointer.
- *                          If *ppsz is NULL or cch is zero a buffer of at least cch items
- *                          will be allocated to hold the translated string.
- *                          If a buffer was requested it must be freed using RTStrFree().
- * @param   cch             The buffer size in bytes. This includes the terminator.
+ * @param   cchString       The maximum size in chars (the type) to convert.
+ *                          The conversion stop when it reaches cchString or
+ *                          the string terminator ('\\0'). Use RTSTR_MAX to
+ *                          translate the entire string.
+ * @param   ppsz            If cch is non-zero, this must either be pointing to
+ *                          pointer to a buffer of the specified size, or
+ *                          pointer to a NULL pointer.  If *ppsz is NULL or cch
+ *                          is zero a buffer of at least cch items will be
+ *                          allocated to hold the translated string. If a
+ *                          buffer was requested it must be freed using
+ *                          RTStrFree().
+ * @param   cch             The buffer size in bytes. This includes the
+ *                          terminator.
  * @param   pcch            Where to store the length of the translated string,
  *                          excluding the terminator. (Optional)
@@ -901,13 +918,17 @@
  * @returns iprt status code.
  * @param   pszString       UTF-8 string to convert.
- * @param   cchString       The maximum size in chars (the type) to convert. The conversion stop
- *                          when it reaches cchString or the string terminator ('\\0').
- *                          Use RTSTR_MAX to translate the entire string.
- * @param   ppsz            If cch is non-zero, this must either be pointing to pointer to
- *                          a buffer of the specified size, or pointer to a NULL pointer.
- *                          If *ppsz is NULL or cch is zero a buffer of at least cch items
- *                          will be allocated to hold the translated string.
- *                          If a buffer was requested it must be freed using RTStrFree().
- * @param   cch             The buffer size in bytes. This includes the terminator.
+ * @param   cchString       The maximum size in chars (the type) to convert.
+ *                          The conversion stop when it reaches cchString or
+ *                          the string terminator ('\\0'). Use RTSTR_MAX to
+ *                          translate the entire string.
+ * @param   ppsz            If cch is non-zero, this must either be pointing to
+ *                          pointer to a buffer of the specified size, or
+ *                          pointer to a NULL pointer.  If *ppsz is NULL or cch
+ *                          is zero a buffer of at least cch items will be
+ *                          allocated to hold the translated string. If a
+ *                          buffer was requested it must be freed using
+ *                          RTStrFree().
+ * @param   cch             The buffer size in bytes.  This includes the
+ *                          terminator.
  * @param   pcch            Where to store the length of the translated string,
  *                          excluding the terminator. (Optional)
@@ -952,12 +973,15 @@
  * @returns iprt status code.
  * @param   pszString       The Latin-1 string to convert.
- * @param   cchString       The number of Latin-1 characters to translate from pszString.
- *                          The translation will stop when reaching cchString or the terminator ('\\0').
- *                          Use RTSTR_MAX to translate the entire string.
- * @param   ppsz            If cch is non-zero, this must either be pointing to a pointer to
- *                          a buffer of the specified size, or pointer to a NULL pointer.
- *                          If *ppsz is NULL or cch is zero a buffer of at least cch chars
- *                          will be allocated to hold the translated string.
- *                          If a buffer was requested it must be freed using RTStrFree().
+ * @param   cchString       The number of Latin-1 characters to translate from
+ *                          pszString. The translation will stop when reaching
+ *                          cchString or the terminator ('\\0').  Use RTSTR_MAX
+ *                          to translate the entire string.
+ * @param   ppsz            If cch is non-zero, this must either be pointing to
+ *                          a pointer to a buffer of the specified size, or
+ *                          pointer to a NULL pointer.  If *ppsz is NULL or cch
+ *                          is zero a buffer of at least cch chars will be
+ *                          allocated to hold the translated string. If a
+ *                          buffer was requested it must be freed using
+ *                          RTStrFree().
  * @param   cch             The buffer size in chars (the type). This includes the terminator.
  * @param   pcch            Where to store the length of the translated string,
@@ -978,13 +1002,17 @@
  * @returns iprt status code.
  * @param   pszString       The Latin1 string to convert.
- * @param   cchString       The number of Latin1 characters to translate from pwszString.
- *                          The translation will stop when reaching cchString or the terminator ('\\0').
- *                          Use RTSTR_MAX to translate the entire string.
- * @param   ppsz            If cch is non-zero, this must either be pointing to a pointer to
- *                          a buffer of the specified size, or pointer to a NULL pointer.
- *                          If *ppsz is NULL or cch is zero a buffer of at least cch chars
- *                          will be allocated to hold the translated string.
- *                          If a buffer was requested it must be freed using RTStrFree().
- * @param   cch             The buffer size in chars (the type). This includes the terminator.
+ * @param   cchString       The number of Latin1 characters to translate from
+ *                          pwszString.  The translation will stop when
+ *                          reaching cchString or the terminator ('\\0').  Use
+ *                          RTSTR_MAX to translate the entire string.
+ * @param   ppsz            If cch is non-zero, this must either be pointing to
+ *                          a pointer to a buffer of the specified size, or
+ *                          pointer to a NULL pointer.  If *ppsz is NULL or cch
+ *                          is zero a buffer of at least cch chars will be
+ *                          allocated to hold the translated string.  If a
+ *                          buffer was requested it must be freed using
+ *                          RTStrFree().
+ * @param   cch             The buffer size in chars (the type).  This includes
+ *                          the terminator.
  * @param   pcch            Where to store the length of the translated string,
  *                          excluding the terminator. (Optional)
@@ -1005,5 +1033,5 @@
  * RTLatin1ToUtf8Ex() should be used.
  *
- * @returns Number of char (bytes).
+ * @returns Number of chars (bytes).
  * @returns 0 if the string was incorrectly encoded.
  * @param   psz        The Latin-1 string.
@@ -1017,5 +1045,5 @@
  * @param   psz         The string.
  * @param   cch         The max string length. Use RTSTR_MAX to process the entire string.
- * @param   pcch        Where to store the string length (in bytes). Optional.
+ * @param   pcch        Where to store the string length (in bytes).  Optional.
  *                      This is undefined on failure.
  */
@@ -1159,20 +1187,32 @@
 
 /**
- * Get the UTF-8 size in characters of a given Unicode code point.  The code
- * point is expected to be a valid Unicode one, but not necessarily in the
- * range supported by UTF-8.
- *
- * @returns the size in characters, or zero if there is no UTF-8 encoding
+ * Get the UTF-8 size in characters of a given Unicode code point.
+ *
+ * The code point is expected to be a valid Unicode one, but not necessarily in
+ * the range supported by UTF-8.
+ *
+ * @returns The number of chars (bytes) required to encode the code point, or
+ *          zero if there is no UTF-8 encoding.
+ * @param   CodePoint       The unicode code point.
  */
 DECLINLINE(size_t) RTStrCpSize(RTUNICP CodePoint)
 {
-    if (CodePoint < 0x80)
+    if (CodePoint < 0x00000080)
         return 1;
-    if (CodePoint < 0x800)
+    if (CodePoint < 0x00000800)
         return 2;
-    if (CodePoint < 0x10000)
+    if (CodePoint < 0x00010000)
         return 3;
-    if (CodePoint < 0x11000)
+#ifdef RT_USE_RTC_3629
+    if (CodePoint < 0x00011000)
         return 4;
+#else
+    if (CodePoint < 0x00200000)
+        return 4;
+    if (CodePoint < 0x04000000)
+        return 5;
+    if (CodePoint < 0x7fffffff)
+        return 6;
+#endif
     return 0;
 }
@@ -1293,7 +1333,8 @@
 
 /**
- * Get the Latin-1 size in characters of a given Unicode code point.  The code
- * point is expected to be a valid Unicode one, but not necessarily in the
- * range supported by Latin-1.
+ * Get the Latin-1 size in characters of a given Unicode code point.
+ *
+ * The code point is expected to be a valid Unicode one, but not necessarily in
+ * the range supported by Latin-1.
  *
  * @returns the size in characters, or zero if there is no Latin-1 encoding
Index: /trunk/include/iprt/types.h
===================================================================
--- /trunk/include/iprt/types.h	(revision 31417)
+++ /trunk/include/iprt/types.h	(revision 31418)
@@ -1575,9 +1575,15 @@
  * Unicode Code Point.
  */
-typedef uint32_t        RTUNICP;
+typedef uint32_t            RTUNICP;
 /** Pointer to an Unicode Code Point. */
-typedef RTUNICP        *PRTUNICP;
+typedef RTUNICP            *PRTUNICP;
 /** Pointer to an Unicode Code Point. */
-typedef const RTUNICP  *PCRTUNICP;
+typedef const RTUNICP      *PCRTUNICP;
+/** Max value a RTUNICP type can hold. */
+#define RTUNICP_MAX         ( ~(RTUNICP)0 )
+/** Invalid code point.
+ * This is returned when encountered invalid encodings or invalid
+ * unicode code points. */
+#define RTUNICP_INVALID     ( UINT32_C(0xfffffffe) )
 
 
Index: /trunk/include/iprt/uni.h
===================================================================
--- /trunk/include/iprt/uni.h	(revision 31417)
+++ /trunk/include/iprt/uni.h	(revision 31418)
@@ -45,13 +45,4 @@
 
 RT_C_DECLS_BEGIN
-
-
-/** Max value a RTUNICP type can hold. */
-#define RTUNICP_MAX         ( ~(RTUNICP)0 )
-
-/** Invalid code point.
- * This is returned when encountered invalid encodings or invalid
- * unicode code points. */
-#define RTUNICP_INVALID     ( 0xfffffffe )
 
 
Index: /trunk/src/VBox/Runtime/common/string/utf-8.cpp
===================================================================
--- /trunk/src/VBox/Runtime/common/string/utf-8.cpp	(revision 31417)
+++ /trunk/src/VBox/Runtime/common/string/utf-8.cpp	(revision 31418)
@@ -64,4 +64,5 @@
         {
             /* figure sequence length and validate the first byte */
+/** @todo RT_USE_RTC_3629 */
             unsigned cb;
             if ((uch & (RT_BIT(7) | RT_BIT(6) | RT_BIT(5))) == (RT_BIT(7) | RT_BIT(6)))
@@ -805,5 +806,5 @@
 {
     size_t  cch = 0;
-    while (true)
+    for (;;)
     {
         RTUNICP Cp;
@@ -814,8 +815,5 @@
         if (RT_FAILURE(rc))
             return rc;
-        cchCp = RTStrCpSize(Cp);
-        if (cchCp == 0)
-            return VERR_NO_TRANSLATION;
-        cch += cchCp;
+        cch += RTStrCpSize(Cp); /* cannot fail */
     }
 
@@ -838,7 +836,6 @@
 static int rtLatin1RecodeAsUtf8(const char *pszIn, size_t cchIn, char *psz, size_t cch)
 {
-    int   rc  = VINF_SUCCESS;
-
-    while (true)
+    int     rc = VINF_SUCCESS;
+    for (;;)
     {
         RTUNICP Cp;
@@ -854,6 +851,6 @@
             break;
         }
+        cch -= cchCp;
         psz = RTStrPutCp(psz, Cp);
-        cch -= cchCp;
     }
 
@@ -985,5 +982,6 @@
 
 /**
- * Calculates the Latin-1 length of a string, validating the encoding while doing so.
+ * Calculates the Latin-1 length of a string, validating the encoding while
+ * doing so.
  *
  * @returns IPRT status code.
@@ -996,5 +994,5 @@
 {
     size_t  cch = 0;
-    while (true)
+    for (;;)
     {
         RTUNICP Cp;
@@ -1033,5 +1031,5 @@
     int   rc  = VINF_SUCCESS;
 
-    while (true)
+    for (;;)
     {
         RTUNICP Cp;
@@ -1047,6 +1045,6 @@
             break;
         }
+        cch -= cchCp;
         psz = RTLatin1PutCp(psz, Cp);
-        cch -= cchCp;
     }
 
@@ -1224,4 +1222,5 @@
     {
         /* figure the length and validate the first octet. */
+/** @todo RT_USE_RTC_3629 */
         unsigned cb;
         if (!(uch & RT_BIT(5)))
@@ -1375,4 +1374,5 @@
     {
         /* figure the length and validate the first octet. */
+/** @todo RT_USE_RTC_3629 */
         unsigned cb;
         if (!(uch & RT_BIT(5)))
@@ -1500,4 +1500,5 @@
     else if (uc < 0x00010000)
     {
+/** @todo RT_USE_RTC_3629 */
         if (   uc < 0x0000d8000
              || (   uc > 0x0000dfff
@@ -1514,4 +1515,5 @@
         }
     }
+/** @todo RT_USE_RTC_3629 */
     else if (uc < 0x00200000)
     {
