VirtualBox

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp@ 103795

Last change on this file since 103795 was 98103, checked in by vboxsync, 20 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 20.5 KB
Line 
1/* $Id: QMTranslatorImpl.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2/** @file
3 * VirtualBox API translation handling class
4 */
5
6/*
7 * Copyright (C) 2014-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#include <vector>
29#include <set>
30#include <algorithm>
31#include <iprt/sanitized/iterator>
32#include <iprt/errcore.h>
33#include <iprt/file.h>
34#include <iprt/asm.h>
35#include <iprt/string.h>
36#include <iprt/strcache.h>
37#include <VBox/com/string.h>
38#include <VBox/log.h>
39#include <QMTranslator.h>
40
41/* QM File Magic Number */
42static const size_t g_cbMagic = 16;
43static const uint8_t g_abMagic[g_cbMagic] =
44{
45 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
46 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
47};
48
49/* Used internally */
50class QMException : public std::exception
51{
52 const char *m_str;
53public:
54 QMException(const char *str) : m_str(str) {}
55 virtual const char *what() const throw() { return m_str; }
56};
57
58/* Bytes stream. Used by the parser to iterate through the data */
59class QMBytesStream
60{
61 size_t m_cbSize;
62 const uint8_t * const m_dataStart;
63 const uint8_t *m_iter;
64 const uint8_t *m_end;
65
66public:
67
68 QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
69 : m_cbSize(dataStart ? cbSize : 0)
70 , m_dataStart(dataStart)
71 , m_iter(dataStart)
72 {
73 setEnd();
74 }
75
76 /** Sets end pointer.
77 * Used in message reader to detect the end of message block */
78 inline void setEnd(size_t pos = 0)
79 {
80 m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
81 }
82
83 inline uint8_t read8()
84 {
85 checkSize(1);
86 return *m_iter++;
87 }
88
89 inline uint32_t read32()
90 {
91 checkSize(4);
92 uint32_t result = *reinterpret_cast<const uint32_t *>(m_iter);
93 m_iter += 4;
94 return RT_BE2H_U32(result);
95 }
96
97 /** Reads string in UTF16 and converts it into a UTF8 string */
98 inline com::Utf8Str readUtf16String()
99 {
100 uint32_t size = read32();
101 checkSize(size);
102 if (size & 1)
103 throw QMException("Incorrect string size");
104
105 /* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
106 to encode, so reserve twice the size plus a terminator for the result. */
107 com::Utf8Str result;
108 result.reserve(size * 2 + 1);
109 char *pszStr = result.mutableRaw();
110 int vrc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
111 if (RT_SUCCESS(vrc))
112 result.jolt();
113 else
114 throw QMException("Translation from UTF-16 to UTF-8 failed");
115
116 m_iter += size;
117 return result;
118 }
119
120 /**
121 * Reads a string, forcing UTF-8 encoding.
122 */
123 inline com::Utf8Str readString()
124 {
125 uint32_t size = read32();
126 checkSize(size);
127
128 com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
129 if (size > 0)
130 {
131 RTStrPurgeEncoding(result.mutableRaw());
132 result.jolt();
133 }
134
135 m_iter += size;
136 return result;
137 }
138
139 /**
140 * Reads memory block
141 * Returns number of bytes read
142 */
143 inline uint32_t read(char *bBuf, uint32_t cbSize)
144 {
145 if (!bBuf || !cbSize)
146 return 0;
147 cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
148 memcpy(bBuf, m_iter, cbSize);
149 m_iter += cbSize;
150 return cbSize;
151 }
152
153 /** Checks the magic number.
154 * Should be called when in the beginning of the data
155 * @throws exception on mismatch */
156 inline void checkMagic()
157 {
158 checkSize(g_cbMagic);
159 if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
160 m_iter += g_cbMagic;
161 else
162 throw QMException("Wrong magic number");
163 }
164
165 /** Has we reached the end pointer? */
166 inline bool hasFinished()
167 {
168 return m_iter == m_end;
169 }
170
171 /** Returns current stream position */
172 inline size_t tellPos()
173 {
174 return (size_t)(m_iter - m_dataStart);
175 }
176
177 /** Moves current pointer to a desired position */
178 inline void seek(uint32_t offSkip)
179 {
180 size_t cbLeft = (size_t)(m_end - m_iter);
181 if (cbLeft >= offSkip)
182 m_iter += offSkip;
183 else
184 m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
185 }
186
187 /** Checks whether stream has enough data to read size bytes */
188 inline void checkSize(size_t size)
189 {
190 if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
191 return;
192 throw QMException("Incorrect item size");
193 }
194};
195
196/* Internal QMTranslator implementation */
197class QMTranslator_Impl
198{
199 /** Used while parsing */
200 struct QMMessageParse
201 {
202 /* Everything is in UTF-8 */
203 std::vector<com::Utf8Str> astrTranslations;
204 com::Utf8Str strContext;
205 com::Utf8Str strComment;
206 com::Utf8Str strSource;
207
208 QMMessageParse() {}
209 };
210
211 struct QMMessage
212 {
213 const char *pszContext;
214 const char *pszSource;
215 const char *pszComment;
216 std::vector<const char *> vecTranslations;
217 uint32_t hash;
218
219 QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
220 {}
221
222 QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
223 : pszContext(addStr(hStrCache, rSrc.strContext))
224 , pszSource(addStr(hStrCache, rSrc.strSource))
225 , pszComment(addStr(hStrCache, rSrc.strComment))
226 , hash(RTStrHash1(pszSource))
227 {
228 for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
229 vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
230 }
231
232 /** Helper. */
233 static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
234 {
235 if (rSrc.isNotEmpty())
236 {
237 const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
238 if (RT_LIKELY(psz))
239 return psz;
240 throw std::bad_alloc();
241 }
242 return NULL;
243 }
244
245 };
246
247 struct HashOffset
248 {
249 uint32_t hash;
250 uint32_t offset;
251
252 HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
253
254 bool operator<(const HashOffset &obj) const
255 {
256 return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
257 }
258
259 };
260
261 typedef std::set<HashOffset> QMHashSet;
262 typedef QMHashSet::const_iterator QMHashSetConstIter;
263 typedef std::vector<QMMessage> QMMessageArray;
264 typedef std::vector<uint8_t> QMByteArray;
265
266 QMHashSet m_hashSet;
267 QMMessageArray m_messageArray;
268 QMByteArray m_pluralRules;
269
270public:
271
272 QMTranslator_Impl() {}
273
274 enum PluralOpCodes
275 {
276 Pl_Eq = 0x01,
277 Pl_Lt = 0x02,
278 Pl_Leq = 0x03,
279 Pl_Between = 0x04,
280
281 Pl_OpMask = 0x07,
282
283 Pl_Not = 0x08,
284 Pl_Mod10 = 0x10,
285 Pl_Mod100 = 0x20,
286 Pl_Lead1000 = 0x40,
287
288 Pl_And = 0xFD,
289 Pl_Or = 0xFE,
290 Pl_NewRule = 0xFF,
291
292 Pl_LMask = 0x80,
293 };
294
295 /*
296 * Rules format:
297 * <O><2>[<3>][<&&><O><2>[<3>]]...[<||><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
298 * where:
299 * <O> - OpCode
300 * <2> - Second operand
301 * <3> - Third operand
302 * <&&> - 'And' operation
303 * <||> - 'Or' operation
304 * <New> - Start of rule for next plural form
305 * Rules are ordered by plural forms, i.e:
306 * <rule for first form (i.e. single)><New><rule for next form>...
307 */
308 bool checkPlural(const QMByteArray &aRules) const
309 {
310 if (aRules.empty())
311 return true;
312
313 uint32_t iPos = 0;
314 do {
315 uint8_t bOpCode = aRules[iPos];
316
317 /* Invalid place of And/Or/NewRule */
318 if (bOpCode & Pl_LMask)
319 return false;
320
321 /* 2nd operand */
322 iPos++;
323
324 /* 2nd operand missing */
325 if (iPos == aRules.size())
326 return false;
327
328 /* Invalid OpCode */
329 if ((bOpCode & Pl_OpMask) == 0)
330 return false;
331
332 if ((bOpCode & Pl_OpMask) == Pl_Between)
333 {
334 /* 3rd operand */
335 iPos++;
336
337 /* 3rd operand missing */
338 if (iPos == aRules.size())
339 return false;
340 }
341
342 /* And/Or/NewRule */
343 iPos++;
344
345 /* All rules checked */
346 if (iPos == aRules.size())
347 return true;
348
349 } while ( ( (aRules[iPos] == Pl_And)
350 || (aRules[iPos] == Pl_Or)
351 || (aRules[iPos] == Pl_NewRule))
352 && ++iPos != aRules.size());
353
354 return false;
355 }
356
357 size_t plural(size_t aNum) const
358 {
359 if (aNum == ~(size_t)0 || m_pluralRules.empty())
360 return 0;
361
362 size_t uPluralNumber = 0;
363 uint32_t iPos = 0;
364
365 /* Rules loop */
366 for (;;)
367 {
368 bool fOr = false;
369 /* 'Or' loop */
370 for (;;)
371 {
372 bool fAnd = true;
373 /* 'And' loop */
374 for (;;)
375 {
376 int iOpCode = m_pluralRules[iPos++];
377 size_t iOpLeft = aNum;
378 if (iOpCode & Pl_Mod10)
379 iOpLeft %= 10;
380 else if (iOpCode & Pl_Mod100)
381 iOpLeft %= 100;
382 else if (iOpCode & Pl_Lead1000)
383 {
384 while (iOpLeft >= 1000)
385 iOpLeft /= 1000;
386 }
387 size_t iOpRight = m_pluralRules[iPos++];
388 int iOp = iOpCode & Pl_OpMask;
389 size_t iOpRight1 = 0;
390 if (iOp == Pl_Between)
391 iOpRight1 = m_pluralRules[iPos++];
392
393 bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight)
394 || (iOp == Pl_Lt && iOpLeft < iOpRight)
395 || (iOp == Pl_Leq && iOpLeft <= iOpRight)
396 || (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
397 if (iOpCode & Pl_Not)
398 fResult = !fResult;
399
400 fAnd = fAnd && fResult;
401 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_And)
402 break;
403 iPos++;
404 }
405 fOr = fOr || fAnd;
406 if (iPos == m_pluralRules.size() || m_pluralRules[iPos] != Pl_Or)
407 break;
408 iPos++;
409 }
410 if (fOr)
411 return uPluralNumber;
412
413 /* Qt returns last plural number if none of rules are match. */
414 uPluralNumber++;
415
416 if (iPos >= m_pluralRules.size())
417 return uPluralNumber;
418
419 iPos++; // Skip Pl_NewRule
420 }
421 }
422
423 const char *translate(const char *pszContext,
424 const char *pszSource,
425 const char *pszDisamb,
426 const size_t aNum,
427 const char **ppszSafeSource) const RT_NOEXCEPT
428 {
429 QMHashSetConstIter lowerIter, upperIter;
430
431 /* As turned out, comments (pszDisamb) are not kept always in result qm file
432 * Therefore, exclude them from the hash */
433 uint32_t hash = RTStrHash1(pszSource);
434 lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
435 upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
436
437 /*
438 * Check different combinations with and without context and
439 * disambiguation. This can help us to find the translation even
440 * if context or disambiguation are not know or properly defined.
441 */
442 const char *apszCtx[] = {pszContext, pszContext, NULL, NULL};
443 const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL};
444 AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
445
446 for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
447 {
448 for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
449 {
450 const QMMessage &message = m_messageArray[iter->offset];
451 if ( RTStrCmp(message.pszSource, pszSource) == 0
452 && (!apszCtx[i] || !*apszCtx[i] || RTStrCmp(message.pszContext, apszCtx[i]) == 0)
453 && (!apszDisabm[i] || !*apszDisabm[i] || RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
454 {
455 *ppszSafeSource = message.pszSource;
456 const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
457 size_t const idxPlural = plural(aNum);
458 return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
459 }
460 }
461 }
462
463 *ppszSafeSource = NULL;
464 return pszSource;
465 }
466
467 void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
468 {
469 /* Load into local variables. If we failed during the load,
470 * it would allow us to keep the object in a valid (previous) state. */
471 QMHashSet hashSet;
472 QMMessageArray messageArray;
473 QMByteArray pluralRules;
474
475 stream.checkMagic();
476
477 while (!stream.hasFinished())
478 {
479 uint32_t sectionCode = stream.read8();
480 uint32_t sLen = stream.read32();
481
482 /* Hashes and Context sections are ignored. They contain hash tables
483 * to speed-up search which is not useful since we recalculate all hashes
484 * and don't perform context search by hash */
485 switch (sectionCode)
486 {
487 case Messages:
488 parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
489 break;
490 case Hashes:
491 /* Only get size information to speed-up vector filling
492 * if Hashes section goes in the file before Message section */
493 if (messageArray.empty())
494 messageArray.reserve(sLen >> 3);
495 stream.seek(sLen);
496 break;
497 case NumerusRules:
498 {
499 pluralRules.resize(sLen);
500 uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
501 if (cbSize < sLen)
502 throw QMException("Incorrect section size");
503 if (!checkPlural(pluralRules))
504 pluralRules.erase(pluralRules.begin(), pluralRules.end());
505 break;
506 }
507 case Contexts:
508 case Dependencies:
509 case Language:
510 stream.seek(sLen);
511 break;
512 default:
513 throw QMException("Unkown section");
514 }
515 }
516
517 /* Store the data into member variables.
518 * The following functions never generate exceptions */
519 m_hashSet.swap(hashSet);
520 m_messageArray.swap(messageArray);
521 m_pluralRules.swap(pluralRules);
522 }
523
524private:
525
526 /* Some QM stuff */
527 enum SectionType
528 {
529 Contexts = 0x2f,
530 Hashes = 0x42,
531 Messages = 0x69,
532 NumerusRules = 0x88,
533 Dependencies = 0x96,
534 Language = 0xa7
535 };
536
537 enum MessageType
538 {
539 End = 1,
540 SourceText16 = 2,
541 Translation = 3,
542 Context16 = 4,
543 Obsolete1 = 5, /**< was Hash */
544 SourceText = 6,
545 Context = 7,
546 Comment = 8
547 };
548
549 /* Read messages from the stream. */
550 static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
551 QMMessageArray * const messageArray, size_t cbSize)
552 {
553 stream.setEnd(stream.tellPos() + cbSize);
554 uint32_t cMessage = 0;
555 while (!stream.hasFinished())
556 {
557 /* Process the record. Skip anything that doesn't have a source
558 string or any valid translations. Using C++ strings for temporary
559 storage here, as we don't want to pollute the cache we bogus strings
560 in case of duplicate sub-records or invalid records. */
561 QMMessageParse ParsedMsg;
562 parseMessageRecord(stream, &ParsedMsg);
563 if ( ParsedMsg.astrTranslations.size() > 0
564 && ParsedMsg.strSource.isNotEmpty())
565 {
566 /* Copy the strings over into the string cache and a hashed QMMessage,
567 before adding it to the result. */
568 QMMessage HashedMsg(hStrCache, ParsedMsg);
569 hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
570 messageArray->push_back(HashedMsg);
571
572 }
573 /*else: wtf? */
574 }
575 stream.setEnd();
576 }
577
578 /* Parse one message from the stream */
579 static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
580 {
581 while (!stream.hasFinished())
582 {
583 uint8_t type = stream.read8();
584 switch (type)
585 {
586 case End:
587 return;
588 /* Ignored as obsolete */
589 case Context16:
590 case SourceText16:
591 stream.seek(stream.read32());
592 break;
593 case Translation:
594 message->astrTranslations.push_back(stream.readUtf16String());
595 break;
596
597 case SourceText:
598 message->strSource = stream.readString();
599 break;
600
601 case Context:
602 message->strContext = stream.readString();
603 break;
604
605 case Comment:
606 message->strComment = stream.readString();
607 break;
608
609 default:
610 /* Ignore unknown/obsolete block */
611 LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
612 break;
613 }
614 }
615 }
616};
617
618/* Inteface functions implementation */
619QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
620
621QMTranslator::~QMTranslator() { delete m_impl; }
622
623const char *QMTranslator::translate(const char *pszContext, const char *pszSource, const char **ppszSafeSource,
624 const char *pszDisamb /*= NULL*/, const size_t aNum /*= ~(size_t)0*/) const RT_NOEXCEPT
625
626{
627 return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource);
628}
629
630int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
631{
632 /* To free safely the file in case of exception */
633 struct FileLoader
634 {
635 uint8_t *data;
636 size_t cbSize;
637 int vrc;
638 FileLoader(const char *pszFname)
639 {
640 vrc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
641 }
642
643 ~FileLoader()
644 {
645 if (isSuccess())
646 RTFileReadAllFree(data, cbSize);
647 }
648 bool isSuccess() { return RT_SUCCESS(vrc); }
649 };
650
651 try
652 {
653 FileLoader loader(pszFilename);
654 if (loader.isSuccess())
655 {
656 QMBytesStream stream(loader.data, loader.cbSize);
657 m_impl->load(stream, hStrCache);
658 }
659 return loader.vrc;
660 }
661 catch(std::exception &e)
662 {
663 LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
664 return VERR_INTERNAL_ERROR;
665 }
666 catch(...)
667 {
668 LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
669 return VERR_GENERAL_FAILURE;
670 }
671}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle
ContactPrivacy/Do Not Sell My InfoTerms of Use