VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/string/base64.cpp

Last change on this file was 98103, checked in by vboxsync, 16 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.2 KB
Line 
1/* $Id: base64.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * IPRT - Base64, MIME content transfer encoding.
4 *
5 * @note The base64-utf16.cpp file must be diffable with this one.
6 * Fixed typically applies to both files.
7 */
8
9/*
10 * Copyright (C) 2009-2023 Oracle and/or its affiliates.
11 *
12 * This file is part of VirtualBox base platform packages, as
13 * available from https://www.virtualbox.org.
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation, in version 3 of the
18 * License.
19 *
20 * This program is distributed in the hope that it will be useful, but
21 * WITHOUT ANY WARRANTY; without even the implied warranty of
22 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 * General Public License for more details.
24 *
25 * You should have received a copy of the GNU General Public License
26 * along with this program; if not, see <https://www.gnu.org/licenses>.
27 *
28 * The contents of this file may alternatively be used under the terms
29 * of the Common Development and Distribution License Version 1.0
30 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
31 * in the VirtualBox distribution, in which case the provisions of the
32 * CDDL are applicable instead of those of the GPL.
33 *
34 * You may elect to license modified versions of this file under the
35 * terms and conditions of either the GPL or the CDDL or both.
36 *
37 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
38 */
39
40
41/*********************************************************************************************************************************
42* Header Files *
43*********************************************************************************************************************************/
44#include <iprt/base64.h>
45#include "internal/iprt.h"
46
47#include <iprt/assert.h>
48#include <iprt/err.h>
49#include <iprt/ctype.h>
50#include <iprt/string.h>
51#ifdef RT_STRICT
52# include <iprt/asm.h>
53#endif
54
55#include "base64.h"
56
57
58/*********************************************************************************************************************************
59* Global Variables *
60*********************************************************************************************************************************/
61/** Base64 character to value. (RFC 2045)
62 * ASSUMES ASCII / UTF-8. */
63DECL_HIDDEN_CONST(const uint8_t) g_au8rtBase64CharToVal[256] =
64{
65 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xff, 0xff, /* 0x00..0x0f */
66 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x10..0x1f */
67 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 62, 0xff, 0xff, 0xff, 63, /* 0x20..0x2f */
68 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, /* 0x30..0x3f */
69 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 0x40..0x4f */
70 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x50..0x5f */
71 0xff, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, /* 0x60..0x6f */
72 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x70..0x7f */
73 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x80..0x8f */
74 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x90..0x9f */
75 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xa0..0xaf */
76 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xb0..0xbf */
77 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xc0..0xcf */
78 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xd0..0xdf */
79 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0xe0..0xef */
80 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff /* 0xf0..0xff */
81};
82
83/** Value to Base64 character. (RFC 2045) */
84DECL_HIDDEN_CONST(const char) g_szrtBase64ValToChar[64+1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
85
86/** The end-of-line lengths (indexed by style flag value). */
87DECL_HIDDEN_CONST(const size_t) g_acchrtBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1] =
88{
89 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ RTBASE64_EOL_SIZE,
90 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ 0,
91 /*[RTBASE64_FLAGS_EOL_LF ]:*/ 1,
92 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ 2
93};
94
95/** The end-of-line characters (zero, one or two). */
96DECL_HIDDEN_CONST(const char) g_aachrtBase64EolStyles[RTBASE64_FLAGS_EOL_STYLE_MASK + 1][2] =
97{
98 /*[RTBASE64_FLAGS_EOL_NATIVE ]:*/ { RTBASE64_EOL_SIZE == 1 ? '\n' : '\r', RTBASE64_EOL_SIZE == 1 ? '\0' : '\n', },
99 /*[RTBASE64_FLAGS_NO_LINE_BREAKS]:*/ { '\0', '\0' },
100 /*[RTBASE64_FLAGS_EOL_LF ]:*/ { '\n', '\0' },
101 /*[RTBASE64_FLAGS_EOL_CRLF ]:*/ { '\r', '\n' },
102};
103
104
105
106#ifdef RT_STRICT
107/**
108 * Perform table sanity checks on the first call.
109 */
110DECLHIDDEN(void) rtBase64Sanity(void)
111{
112 static bool s_fSane = false;
113 if (RT_UNLIKELY(!s_fSane))
114 {
115 for (unsigned i = 0; i < 64; i++)
116 {
117 unsigned ch = g_szrtBase64ValToChar[i];
118 Assert(ch);
119 Assert(g_au8rtBase64CharToVal[ch] == i);
120 }
121
122 for (unsigned i = 0; i < 256; i++)
123 {
124 uint8_t u8 = g_au8rtBase64CharToVal[i];
125 Assert( ( u8 == BASE64_INVALID
126 && !RT_C_IS_ALNUM(i)
127 && !RT_C_IS_SPACE(i))
128 || ( u8 == BASE64_PAD
129 && i == '=')
130 || ( u8 == BASE64_SPACE
131 && RT_C_IS_SPACE(i))
132 || ( u8 < 64
133 && (unsigned)g_szrtBase64ValToChar[u8] == i)
134 || ( u8 == BASE64_NULL
135 && i == 0) );
136 }
137 ASMAtomicWriteBool(&s_fSane, true);
138 }
139}
140#endif /* RT_STRICT */
141
142
143
144/** Fetched the next character in the string and translates it. */
145DECL_FORCE_INLINE(uint8_t) rtBase64TranslateNext(const char *pszString, size_t cchStringMax)
146{
147 AssertCompile(sizeof(unsigned char) == sizeof(uint8_t));
148 if (cchStringMax > 0)
149 return g_au8rtBase64CharToVal[(unsigned char)*pszString];
150 return BASE64_NULL;
151}
152
153
154/*
155 * Mostly the same as RTBase64DecodedUtf16SizeEx, except for the simpler
156 * character type. Fixes must be applied to both copies of the code.
157 */
158RTDECL(ssize_t) RTBase64DecodedSizeEx(const char *pszString, size_t cchStringMax, char **ppszEnd)
159{
160#ifdef RT_STRICT
161 rtBase64Sanity();
162#endif
163
164 /*
165 * Walk the string until a non-encoded or non-space character is encountered.
166 */
167 uint32_t c6Bits = 0;
168 uint8_t u8;
169
170 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
171 {
172 if (u8 < 64)
173 c6Bits++;
174 else if (RT_UNLIKELY(u8 != BASE64_SPACE))
175 break;
176
177 /* advance */
178 pszString++;
179 cchStringMax--;
180 }
181
182 /*
183 * Padding can only be found at the end and there is
184 * only 1 or 2 padding chars. Deal with it first.
185 */
186 unsigned cbPad = 0;
187 if (u8 == BASE64_PAD)
188 {
189 cbPad = 1;
190 c6Bits++;
191 pszString++;
192 cchStringMax--;
193 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
194 {
195 if (u8 != BASE64_SPACE)
196 {
197 if (u8 != BASE64_PAD)
198 break;
199 c6Bits++;
200 cbPad++;
201 }
202 pszString++;
203 cchStringMax--;
204 }
205 if (cbPad >= 3)
206 return -1;
207 }
208
209 /*
210 * Invalid char and no where to indicate where the
211 * Base64 text ends? Return failure.
212 */
213 if ( u8 == BASE64_INVALID
214 && !ppszEnd)
215 return -1;
216
217 /*
218 * Recalc 6-bit to 8-bit and adjust for padding.
219 */
220 if (ppszEnd)
221 *ppszEnd = (char *)pszString;
222 return rtBase64DecodedSizeRecalc(c6Bits, cbPad);
223}
224RT_EXPORT_SYMBOL(RTBase64DecodedSizeEx);
225
226
227RTDECL(ssize_t) RTBase64DecodedSize(const char *pszString, char **ppszEnd)
228{
229 return RTBase64DecodedSizeEx(pszString, RTSTR_MAX, ppszEnd);
230}
231RT_EXPORT_SYMBOL(RTBase64DecodedSize);
232
233
234RTDECL(int) RTBase64DecodeEx(const char *pszString, size_t cchStringMax, void *pvData, size_t cbData,
235 size_t *pcbActual, char **ppszEnd)
236{
237#ifdef RT_STRICT
238 rtBase64Sanity();
239#endif
240
241 /*
242 * Process input in groups of 4 input / 3 output chars.
243 */
244 uint8_t u8Trio[3] = { 0, 0, 0 }; /* shuts up gcc */
245 uint8_t *pbData = (uint8_t *)pvData;
246 uint8_t u8;
247 unsigned c6Bits = 0;
248
249 for (;;)
250 {
251 /* The first 6-bit group. */
252 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
253 pszString++, cchStringMax--;
254 if (u8 >= 64)
255 {
256 c6Bits = 0;
257 break;
258 }
259 u8Trio[0] = u8 << 2;
260 pszString++;
261 cchStringMax--;
262
263 /* The second 6-bit group. */
264 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
265 pszString++, cchStringMax--;
266 if (u8 >= 64)
267 {
268 c6Bits = 1;
269 break;
270 }
271 u8Trio[0] |= u8 >> 4;
272 u8Trio[1] = u8 << 4;
273 pszString++;
274 cchStringMax--;
275
276 /* The third 6-bit group. */
277 u8 = BASE64_INVALID;
278 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
279 pszString++, cchStringMax--;
280 if (u8 >= 64)
281 {
282 c6Bits = 2;
283 break;
284 }
285 u8Trio[1] |= u8 >> 2;
286 u8Trio[2] = u8 << 6;
287 pszString++;
288 cchStringMax--;
289
290 /* The fourth 6-bit group. */
291 u8 = BASE64_INVALID;
292 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) == BASE64_SPACE)
293 pszString++, cchStringMax--;
294 if (u8 >= 64)
295 {
296 c6Bits = 3;
297 break;
298 }
299 u8Trio[2] |= u8;
300 pszString++;
301 cchStringMax--;
302
303 /* flush the trio */
304 if (cbData < 3)
305 return VERR_BUFFER_OVERFLOW;
306 cbData -= 3;
307 pbData[0] = u8Trio[0];
308 pbData[1] = u8Trio[1];
309 pbData[2] = u8Trio[2];
310 pbData += 3;
311 }
312
313 /*
314 * Padding can only be found at the end and there is
315 * only 1 or 2 padding chars. Deal with it first.
316 */
317 unsigned cbPad = 0;
318 if (u8 == BASE64_PAD)
319 {
320 cbPad = 1;
321 pszString++;
322 cchStringMax--;
323 while ((u8 = rtBase64TranslateNext(pszString, cchStringMax)) != BASE64_NULL)
324 {
325 if (u8 != BASE64_SPACE)
326 {
327 if (u8 != BASE64_PAD)
328 break;
329 cbPad++;
330 }
331 pszString++;
332 cchStringMax--;
333 }
334 if (cbPad >= 3)
335 return VERR_INVALID_BASE64_ENCODING;
336 }
337
338 /*
339 * Invalid char and no where to indicate where the
340 * Base64 text ends? Return failure.
341 */
342 if ( u8 == BASE64_INVALID
343 && !ppszEnd)
344 return VERR_INVALID_BASE64_ENCODING;
345
346 /*
347 * Check padding vs. pending sextets, if anything left to do finish it off.
348 */
349 if (c6Bits || cbPad)
350 {
351 if (c6Bits + cbPad != 4)
352 return VERR_INVALID_BASE64_ENCODING;
353
354 switch (c6Bits)
355 {
356 case 1:
357 u8Trio[1] = u8Trio[2] = 0;
358 break;
359 case 2:
360 u8Trio[2] = 0;
361 break;
362 case 3:
363 default:
364 break;
365 }
366 switch (3 - cbPad)
367 {
368 case 1:
369 if (cbData < 1)
370 return VERR_BUFFER_OVERFLOW;
371 cbData--;
372 pbData[0] = u8Trio[0];
373 pbData++;
374 break;
375
376 case 2:
377 if (cbData < 2)
378 return VERR_BUFFER_OVERFLOW;
379 cbData -= 2;
380 pbData[0] = u8Trio[0];
381 pbData[1] = u8Trio[1];
382 pbData += 2;
383 break;
384
385 default:
386 break;
387 }
388 }
389
390 /*
391 * Set optional return values and return successfully.
392 */
393 if (ppszEnd)
394 *ppszEnd = (char *)pszString;
395 if (pcbActual)
396 *pcbActual = pbData - (uint8_t *)pvData;
397 return VINF_SUCCESS;
398}
399RT_EXPORT_SYMBOL(RTBase64DecodeEx);
400
401
402RTDECL(int) RTBase64Decode(const char *pszString, void *pvData, size_t cbData, size_t *pcbActual, char **ppszEnd)
403{
404 return RTBase64DecodeEx(pszString, RTSTR_MAX, pvData, cbData, pcbActual, ppszEnd);
405}
406RT_EXPORT_SYMBOL(RTBase64Decode);
407
408
409RTDECL(size_t) RTBase64EncodedLength(size_t cbData)
410{
411 return RTBase64EncodedLengthEx(cbData, 0);
412}
413RT_EXPORT_SYMBOL(RTBase64EncodedLength);
414
415
416RTDECL(size_t) RTBase64EncodedLengthEx(size_t cbData, uint32_t fFlags)
417{
418 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
419
420 if (cbData * 8 / 8 != cbData)
421 {
422 AssertReturn(sizeof(size_t) == sizeof(uint64_t), ~(size_t)0);
423 uint64_t cch = cbData * (uint64_t)8;
424 while (cch % 24)
425 cch += 8;
426 cch /= 6;
427 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
428 return cch;
429 }
430
431 size_t cch = cbData * 8;
432 while (cch % 24)
433 cch += 8;
434 cch /= 6;
435 cch += ((cch - 1) / RTBASE64_LINE_LEN) * cchEol;
436 return cch;
437}
438RT_EXPORT_SYMBOL(RTBase64EncodedLengthEx);
439
440
441RTDECL(int) RTBase64Encode(const void *pvData, size_t cbData, char *pszBuf, size_t cbBuf, size_t *pcchActual)
442{
443 return RTBase64EncodeEx(pvData, cbData, 0, pszBuf, cbBuf, pcchActual);
444}
445RT_EXPORT_SYMBOL(RTBase64Encode);
446
447
448/*
449 * Please note that RTBase64EncodeUtf16Ex contains an almost exact copy of
450 * this code, just using different output character type and variable prefixes.
451 * So, all fixes must be applied to both versions of the code.
452 */
453RTDECL(int) RTBase64EncodeEx(const void *pvData, size_t cbData, uint32_t fFlags,
454 char *pszBuf, size_t cbBuf, size_t *pcchActual)
455{
456 /* Expand the EOL style flags: */
457 size_t const cchEol = g_acchrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK];
458 char const chEol0 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][0];
459 char const chEol1 = g_aachrtBase64EolStyles[fFlags & RTBASE64_FLAGS_EOL_STYLE_MASK][1];
460 Assert(cchEol == (chEol0 != '\0' ? 1U : 0U) + (chEol1 != '\0' ? 1U : 0U));
461
462 /*
463 * Process whole "trios" of input data.
464 */
465 uint8_t u8A;
466 uint8_t u8B;
467 uint8_t u8C;
468 size_t cbLineFeed = cchEol ? cbBuf - RTBASE64_LINE_LEN : ~(size_t)0;
469 const uint8_t *pbSrc = (const uint8_t *)pvData;
470 char *pchDst = pszBuf;
471 while (cbData >= 3)
472 {
473 if (cbBuf < 4 + 1)
474 return VERR_BUFFER_OVERFLOW;
475
476 /* encode */
477 u8A = pbSrc[0];
478 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
479 u8B = pbSrc[1];
480 pchDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
481 u8C = pbSrc[2];
482 pchDst[2] = g_szrtBase64ValToChar[((u8B << 2) & 0x3f) | (u8C >> 6)];
483 pchDst[3] = g_szrtBase64ValToChar[u8C & 0x3f];
484
485 /* advance */
486 cbBuf -= 4;
487 pchDst += 4;
488 cbData -= 3;
489 pbSrc += 3;
490
491 /* deal out end-of-line */
492 if (cbBuf == cbLineFeed && cbData && cchEol)
493 {
494 if (cbBuf < cchEol + 1)
495 return VERR_BUFFER_OVERFLOW;
496 cbBuf -= cchEol;
497 *pchDst++ = chEol0;
498 if (chEol1)
499 *pchDst++ = chEol1;
500 cbLineFeed = cbBuf - RTBASE64_LINE_LEN;
501 }
502 }
503
504 /*
505 * Deal with the odd bytes and string termination.
506 */
507 if (cbData)
508 {
509 if (cbBuf < 4 + 1)
510 return VERR_BUFFER_OVERFLOW;
511 switch (cbData)
512 {
513 case 1:
514 u8A = pbSrc[0];
515 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
516 pchDst[1] = g_szrtBase64ValToChar[(u8A << 4) & 0x3f];
517 pchDst[2] = '=';
518 pchDst[3] = '=';
519 break;
520 case 2:
521 u8A = pbSrc[0];
522 pchDst[0] = g_szrtBase64ValToChar[u8A >> 2];
523 u8B = pbSrc[1];
524 pchDst[1] = g_szrtBase64ValToChar[((u8A << 4) & 0x3f) | (u8B >> 4)];
525 pchDst[2] = g_szrtBase64ValToChar[(u8B << 2) & 0x3f];
526 pchDst[3] = '=';
527 break;
528 }
529 pchDst += 4;
530 }
531
532 *pchDst = '\0';
533
534 if (pcchActual)
535 *pcchActual = pchDst - pszBuf;
536 return VINF_SUCCESS;
537}
538RT_EXPORT_SYMBOL(RTBase64EncodeEx);
539
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use