Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

QMTranslatorImpl.cpp

Last change on this file was 98103, checked in by vboxsync, 16 months ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 20.5 KB

Line
1	/* $Id: QMTranslatorImpl.cpp 98103 2023-01-17 14:15:46Z vboxsync $ */
2	/** @file
3	* VirtualBox API translation handling class
4	*/
5
6	/*
7	* Copyright (C) 2014-2023 Oracle and/or its affiliates.
8	*
9	* This file is part of VirtualBox base platform packages, as
10	* available from https://www.virtualbox.org.
11	*
12	* This program is free software; you can redistribute it and/or
13	* modify it under the terms of the GNU General Public License
14	* as published by the Free Software Foundation, in version 3 of the
15	* License.
16	*
17	* This program is distributed in the hope that it will be useful, but
18	* WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	* General Public License for more details.
21	*
22	* You should have received a copy of the GNU General Public License
23	* along with this program; if not, see <https://www.gnu.org/licenses>.
24	*
25	* SPDX-License-Identifier: GPL-3.0-only
26	*/
27
28	#include <vector>
29	#include <set>
30	#include <algorithm>
31	#include <iprt/sanitized/iterator>
32	#include <iprt/errcore.h>
33	#include <iprt/file.h>
34	#include <iprt/asm.h>
35	#include <iprt/string.h>
36	#include <iprt/strcache.h>
37	#include <VBox/com/string.h>
38	#include <VBox/log.h>
39	#include <QMTranslator.h>
40
41	/* QM File Magic Number */
42	static const size_t g_cbMagic = 16;
43	static const uint8_t g_abMagic[g_cbMagic] =
44	{
45	0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95,
46	0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd
47	};
48
49	/* Used internally */
50	class QMException : public std::exception
51	{
52	const char *m_str;
53	public:
54	QMException(const char *str) : m_str(str) {}
55	virtual const char *what() const throw() { return m_str; }
56	};
57
58	/* Bytes stream. Used by the parser to iterate through the data */
59	class QMBytesStream
60	{
61	size_t m_cbSize;
62	const uint8_t * const m_dataStart;
63	const uint8_t *m_iter;
64	const uint8_t *m_end;
65
66	public:
67
68	QMBytesStream(const uint8_t *const dataStart, size_t cbSize)
69	: m_cbSize(dataStart ? cbSize : 0)
70	, m_dataStart(dataStart)
71	, m_iter(dataStart)
72	{
73	setEnd();
74	}
75
76	/** Sets end pointer.
77	* Used in message reader to detect the end of message block */
78	inline void setEnd(size_t pos = 0)
79	{
80	m_end = m_dataStart + (pos && pos < m_cbSize ? pos : m_cbSize);
81	}
82
83	inline uint8_t read8()
84	{
85	checkSize(1);
86	return *m_iter++;
87	}
88
89	inline uint32_t read32()
90	{
91	checkSize(4);
92	uint32_t result = reinterpret_cast<const uint32_t >(m_iter);
93	m_iter += 4;
94	return RT_BE2H_U32(result);
95	}
96
97	/** Reads string in UTF16 and converts it into a UTF8 string */
98	inline com::Utf8Str readUtf16String()
99	{
100	uint32_t size = read32();
101	checkSize(size);
102	if (size & 1)
103	throw QMException("Incorrect string size");
104
105	/* UTF-16 can encode up to codepoint U+10ffff, which UTF-8 needs 4 bytes
106	to encode, so reserve twice the size plus a terminator for the result. */
107	com::Utf8Str result;
108	result.reserve(size * 2 + 1);
109	char *pszStr = result.mutableRaw();
110	int vrc = RTUtf16BigToUtf8Ex((PCRTUTF16)m_iter, size >> 1, &pszStr, result.capacity(), NULL);
111	if (RT_SUCCESS(vrc))
112	result.jolt();
113	else
114	throw QMException("Translation from UTF-16 to UTF-8 failed");
115
116	m_iter += size;
117	return result;
118	}
119
120	/**
121	* Reads a string, forcing UTF-8 encoding.
122	*/
123	inline com::Utf8Str readString()
124	{
125	uint32_t size = read32();
126	checkSize(size);
127
128	com::Utf8Str result(reinterpret_cast<const char *>(m_iter), size);
129	if (size > 0)
130	{
131	RTStrPurgeEncoding(result.mutableRaw());
132	result.jolt();
133	}
134
135	m_iter += size;
136	return result;
137	}
138
139	/**
140	* Reads memory block
141	* Returns number of bytes read
142	*/
143	inline uint32_t read(char *bBuf, uint32_t cbSize)
144	{
145	if (!bBuf \|\| !cbSize)
146	return 0;
147	cbSize = RT_MIN(cbSize, (uint32_t)(m_end - m_iter));
148	memcpy(bBuf, m_iter, cbSize);
149	m_iter += cbSize;
150	return cbSize;
151	}
152
153	/** Checks the magic number.
154	* Should be called when in the beginning of the data
155	* @throws exception on mismatch */
156	inline void checkMagic()
157	{
158	checkSize(g_cbMagic);
159	if (RT_LIKELY(memcmp(&(*m_iter), g_abMagic, g_cbMagic) == 0))
160	m_iter += g_cbMagic;
161	else
162	throw QMException("Wrong magic number");
163	}
164
165	/** Has we reached the end pointer? */
166	inline bool hasFinished()
167	{
168	return m_iter == m_end;
169	}
170
171	/** Returns current stream position */
172	inline size_t tellPos()
173	{
174	return (size_t)(m_iter - m_dataStart);
175	}
176
177	/** Moves current pointer to a desired position */
178	inline void seek(uint32_t offSkip)
179	{
180	size_t cbLeft = (size_t)(m_end - m_iter);
181	if (cbLeft >= offSkip)
182	m_iter += offSkip;
183	else
184	m_iter = m_end; /** @todo r=bird: Or throw exception via checkSize? */
185	}
186
187	/** Checks whether stream has enough data to read size bytes */
188	inline void checkSize(size_t size)
189	{
190	if (RT_LIKELY((size_t)(m_end - m_iter) >= size))
191	return;
192	throw QMException("Incorrect item size");
193	}
194	};
195
196	/* Internal QMTranslator implementation */
197	class QMTranslator_Impl
198	{
199	/** Used while parsing */
200	struct QMMessageParse
201	{
202	/* Everything is in UTF-8 */
203	std::vector<com::Utf8Str> astrTranslations;
204	com::Utf8Str strContext;
205	com::Utf8Str strComment;
206	com::Utf8Str strSource;
207
208	QMMessageParse() {}
209	};
210
211	struct QMMessage
212	{
213	const char *pszContext;
214	const char *pszSource;
215	const char *pszComment;
216	std::vector<const char *> vecTranslations;
217	uint32_t hash;
218
219	QMMessage() : pszContext(NULL), pszSource(NULL), pszComment(NULL), hash(0)
220	{}
221
222	QMMessage(RTSTRCACHE hStrCache, const QMMessageParse &rSrc)
223	: pszContext(addStr(hStrCache, rSrc.strContext))
224	, pszSource(addStr(hStrCache, rSrc.strSource))
225	, pszComment(addStr(hStrCache, rSrc.strComment))
226	, hash(RTStrHash1(pszSource))
227	{
228	for (size_t i = 0; i < rSrc.astrTranslations.size(); i++)
229	vecTranslations.push_back(addStr(hStrCache, rSrc.astrTranslations[i]));
230	}
231
232	/** Helper. */
233	static const char *addStr(RTSTRCACHE hStrCache, const com::Utf8Str &rSrc)
234	{
235	if (rSrc.isNotEmpty())
236	{
237	const char *psz = RTStrCacheEnterN(hStrCache, rSrc.c_str(), rSrc.length());
238	if (RT_LIKELY(psz))
239	return psz;
240	throw std::bad_alloc();
241	}
242	return NULL;
243	}
244
245	};
246
247	struct HashOffset
248	{
249	uint32_t hash;
250	uint32_t offset;
251
252	HashOffset(uint32_t a_hash = 0, uint32_t a_offs = 0) : hash(a_hash), offset(a_offs) {}
253
254	bool operator<(const HashOffset &obj) const
255	{
256	return (hash != obj.hash ? hash < obj.hash : offset < obj.offset);
257	}
258
259	};
260
261	typedef std::set<HashOffset> QMHashSet;
262	typedef QMHashSet::const_iterator QMHashSetConstIter;
263	typedef std::vector<QMMessage> QMMessageArray;
264	typedef std::vector<uint8_t> QMByteArray;
265
266	QMHashSet m_hashSet;
267	QMMessageArray m_messageArray;
268	QMByteArray m_pluralRules;
269
270	public:
271
272	QMTranslator_Impl() {}
273
274	enum PluralOpCodes
275	{
276	Pl_Eq = 0x01,
277	Pl_Lt = 0x02,
278	Pl_Leq = 0x03,
279	Pl_Between = 0x04,
280
281	Pl_OpMask = 0x07,
282
283	Pl_Not = 0x08,
284	Pl_Mod10 = 0x10,
285	Pl_Mod100 = 0x20,
286	Pl_Lead1000 = 0x40,
287
288	Pl_And = 0xFD,
289	Pl_Or = 0xFE,
290	Pl_NewRule = 0xFF,
291
292	Pl_LMask = 0x80,
293	};
294
295	/*
296	* Rules format:
297	* <O><2>[<3>][<&&><O><2>[<3>]]...[<\|\|><O><2>[<3>][<&&><O><2>[<3>]]...]...[<New><O>...]...
298	* where:
299	* <O> - OpCode
300	* <2> - Second operand
301	* <3> - Third operand
302	* <&&> - 'And' operation
303	* <\|\|> - 'Or' operation
304	* <New> - Start of rule for next plural form
305	* Rules are ordered by plural forms, i.e:
306	* <rule for first form (i.e. single)><New><rule for next form>...
307	*/
308	bool checkPlural(const QMByteArray &aRules) const
309	{
310	if (aRules.empty())
311	return true;
312
313	uint32_t iPos = 0;
314	do {
315	uint8_t bOpCode = aRules[iPos];
316
317	/* Invalid place of And/Or/NewRule */
318	if (bOpCode & Pl_LMask)
319	return false;
320
321	/* 2nd operand */
322	iPos++;
323
324	/* 2nd operand missing */
325	if (iPos == aRules.size())
326	return false;
327
328	/* Invalid OpCode */
329	if ((bOpCode & Pl_OpMask) == 0)
330	return false;
331
332	if ((bOpCode & Pl_OpMask) == Pl_Between)
333	{
334	/* 3rd operand */
335	iPos++;
336
337	/* 3rd operand missing */
338	if (iPos == aRules.size())
339	return false;
340	}
341
342	/* And/Or/NewRule */
343	iPos++;
344
345	/* All rules checked */
346	if (iPos == aRules.size())
347	return true;
348
349	} while ( ( (aRules[iPos] == Pl_And)
350	\|\| (aRules[iPos] == Pl_Or)
351	\|\| (aRules[iPos] == Pl_NewRule))
352	&& ++iPos != aRules.size());
353
354	return false;
355	}
356
357	size_t plural(size_t aNum) const
358	{
359	if (aNum == ~(size_t)0 \|\| m_pluralRules.empty())
360	return 0;
361
362	size_t uPluralNumber = 0;
363	uint32_t iPos = 0;
364
365	/* Rules loop */
366	for (;;)
367	{
368	bool fOr = false;
369	/* 'Or' loop */
370	for (;;)
371	{
372	bool fAnd = true;
373	/* 'And' loop */
374	for (;;)
375	{
376	int iOpCode = m_pluralRules[iPos++];
377	size_t iOpLeft = aNum;
378	if (iOpCode & Pl_Mod10)
379	iOpLeft %= 10;
380	else if (iOpCode & Pl_Mod100)
381	iOpLeft %= 100;
382	else if (iOpCode & Pl_Lead1000)
383	{
384	while (iOpLeft >= 1000)
385	iOpLeft /= 1000;
386	}
387	size_t iOpRight = m_pluralRules[iPos++];
388	int iOp = iOpCode & Pl_OpMask;
389	size_t iOpRight1 = 0;
390	if (iOp == Pl_Between)
391	iOpRight1 = m_pluralRules[iPos++];
392
393	bool fResult = (iOp == Pl_Eq && iOpLeft == iOpRight)
394	\|\| (iOp == Pl_Lt && iOpLeft < iOpRight)
395	\|\| (iOp == Pl_Leq && iOpLeft <= iOpRight)
396	\|\| (iOp == Pl_Between && iOpLeft >= iOpRight && iOpLeft <= iOpRight1);
397	if (iOpCode & Pl_Not)
398	fResult = !fResult;
399
400	fAnd = fAnd && fResult;
401	if (iPos == m_pluralRules.size() \|\| m_pluralRules[iPos] != Pl_And)
402	break;
403	iPos++;
404	}
405	fOr = fOr \|\| fAnd;
406	if (iPos == m_pluralRules.size() \|\| m_pluralRules[iPos] != Pl_Or)
407	break;
408	iPos++;
409	}
410	if (fOr)
411	return uPluralNumber;
412
413	/* Qt returns last plural number if none of rules are match. */
414	uPluralNumber++;
415
416	if (iPos >= m_pluralRules.size())
417	return uPluralNumber;
418
419	iPos++; // Skip Pl_NewRule
420	}
421	}
422
423	const char translate(const char pszContext,
424	const char *pszSource,
425	const char *pszDisamb,
426	const size_t aNum,
427	const char **ppszSafeSource) const RT_NOEXCEPT
428	{
429	QMHashSetConstIter lowerIter, upperIter;
430
431	/* As turned out, comments (pszDisamb) are not kept always in result qm file
432	* Therefore, exclude them from the hash */
433	uint32_t hash = RTStrHash1(pszSource);
434	lowerIter = m_hashSet.lower_bound(HashOffset(hash, 0));
435	upperIter = m_hashSet.upper_bound(HashOffset(hash, UINT32_MAX));
436
437	/*
438	* Check different combinations with and without context and
439	* disambiguation. This can help us to find the translation even
440	* if context or disambiguation are not know or properly defined.
441	*/
442	const char *apszCtx[] = {pszContext, pszContext, NULL, NULL};
443	const char *apszDisabm[] = {pszDisamb, NULL, pszDisamb, NULL};
444	AssertCompile(RT_ELEMENTS(apszCtx) == RT_ELEMENTS(apszDisabm));
445
446	for (size_t i = 0; i < RT_ELEMENTS(apszCtx); ++i)
447	{
448	for (QMHashSetConstIter iter = lowerIter; iter != upperIter; ++iter)
449	{
450	const QMMessage &message = m_messageArray[iter->offset];
451	if ( RTStrCmp(message.pszSource, pszSource) == 0
452	&& (!apszCtx[i] \|\| !*apszCtx[i] \|\| RTStrCmp(message.pszContext, apszCtx[i]) == 0)
453	&& (!apszDisabm[i] \|\| !*apszDisabm[i] \|\| RTStrCmp(message.pszComment, apszDisabm[i]) == 0 ))
454	{
455	*ppszSafeSource = message.pszSource;
456	const std::vector<const char *> &vecTranslations = m_messageArray[iter->offset].vecTranslations;
457	size_t const idxPlural = plural(aNum);
458	return vecTranslations[RT_MIN(idxPlural, vecTranslations.size() - 1)];
459	}
460	}
461	}
462
463	*ppszSafeSource = NULL;
464	return pszSource;
465	}
466
467	void load(QMBytesStream &stream, RTSTRCACHE hStrCache)
468	{
469	/* Load into local variables. If we failed during the load,
470	* it would allow us to keep the object in a valid (previous) state. */
471	QMHashSet hashSet;
472	QMMessageArray messageArray;
473	QMByteArray pluralRules;
474
475	stream.checkMagic();
476
477	while (!stream.hasFinished())
478	{
479	uint32_t sectionCode = stream.read8();
480	uint32_t sLen = stream.read32();
481
482	/* Hashes and Context sections are ignored. They contain hash tables
483	* to speed-up search which is not useful since we recalculate all hashes
484	* and don't perform context search by hash */
485	switch (sectionCode)
486	{
487	case Messages:
488	parseMessages(stream, hStrCache, &hashSet, &messageArray, sLen);
489	break;
490	case Hashes:
491	/* Only get size information to speed-up vector filling
492	* if Hashes section goes in the file before Message section */
493	if (messageArray.empty())
494	messageArray.reserve(sLen >> 3);
495	stream.seek(sLen);
496	break;
497	case NumerusRules:
498	{
499	pluralRules.resize(sLen);
500	uint32_t cbSize = stream.read((char *)&pluralRules[0], sLen);
501	if (cbSize < sLen)
502	throw QMException("Incorrect section size");
503	if (!checkPlural(pluralRules))
504	pluralRules.erase(pluralRules.begin(), pluralRules.end());
505	break;
506	}
507	case Contexts:
508	case Dependencies:
509	case Language:
510	stream.seek(sLen);
511	break;
512	default:
513	throw QMException("Unkown section");
514	}
515	}
516
517	/* Store the data into member variables.
518	* The following functions never generate exceptions */
519	m_hashSet.swap(hashSet);
520	m_messageArray.swap(messageArray);
521	m_pluralRules.swap(pluralRules);
522	}
523
524	private:
525
526	/* Some QM stuff */
527	enum SectionType
528	{
529	Contexts = 0x2f,
530	Hashes = 0x42,
531	Messages = 0x69,
532	NumerusRules = 0x88,
533	Dependencies = 0x96,
534	Language = 0xa7
535	};
536
537	enum MessageType
538	{
539	End = 1,
540	SourceText16 = 2,
541	Translation = 3,
542	Context16 = 4,
543	Obsolete1 = 5, /*< was Hash /
544	SourceText = 6,
545	Context = 7,
546	Comment = 8
547	};
548
549	/* Read messages from the stream. */
550	static void parseMessages(QMBytesStream &stream, RTSTRCACHE hStrCache, QMHashSet * const hashSet,
551	QMMessageArray * const messageArray, size_t cbSize)
552	{
553	stream.setEnd(stream.tellPos() + cbSize);
554	uint32_t cMessage = 0;
555	while (!stream.hasFinished())
556	{
557	/* Process the record. Skip anything that doesn't have a source
558	string or any valid translations. Using C++ strings for temporary
559	storage here, as we don't want to pollute the cache we bogus strings
560	in case of duplicate sub-records or invalid records. */
561	QMMessageParse ParsedMsg;
562	parseMessageRecord(stream, &ParsedMsg);
563	if ( ParsedMsg.astrTranslations.size() > 0
564	&& ParsedMsg.strSource.isNotEmpty())
565	{
566	/* Copy the strings over into the string cache and a hashed QMMessage,
567	before adding it to the result. */
568	QMMessage HashedMsg(hStrCache, ParsedMsg);
569	hashSet->insert(HashOffset(HashedMsg.hash, cMessage++));
570	messageArray->push_back(HashedMsg);
571
572	}
573	/else: wtf? /
574	}
575	stream.setEnd();
576	}
577
578	/* Parse one message from the stream */
579	static void parseMessageRecord(QMBytesStream &stream, QMMessageParse * const message)
580	{
581	while (!stream.hasFinished())
582	{
583	uint8_t type = stream.read8();
584	switch (type)
585	{
586	case End:
587	return;
588	/* Ignored as obsolete */
589	case Context16:
590	case SourceText16:
591	stream.seek(stream.read32());
592	break;
593	case Translation:
594	message->astrTranslations.push_back(stream.readUtf16String());
595	break;
596
597	case SourceText:
598	message->strSource = stream.readString();
599	break;
600
601	case Context:
602	message->strContext = stream.readString();
603	break;
604
605	case Comment:
606	message->strComment = stream.readString();
607	break;
608
609	default:
610	/* Ignore unknown/obsolete block */
611	LogRel(("QMTranslator::parseMessageRecord(): Unknown/obsolete message block %x\n", type));
612	break;
613	}
614	}
615	}
616	};
617
618	/* Inteface functions implementation */
619	QMTranslator::QMTranslator() : m_impl(new QMTranslator_Impl) {}
620
621	QMTranslator::~QMTranslator() { delete m_impl; }
622
623	const char QMTranslator::translate(const char pszContext, const char pszSource, const char *ppszSafeSource,
624	const char pszDisamb /= NULL/, const size_t aNum /= ~(size_t)0*/) const RT_NOEXCEPT
625
626	{
627	return m_impl->translate(pszContext, pszSource, pszDisamb, aNum, ppszSafeSource);
628	}
629
630	int QMTranslator::load(const char *pszFilename, RTSTRCACHE hStrCache) RT_NOEXCEPT
631	{
632	/* To free safely the file in case of exception */
633	struct FileLoader
634	{
635	uint8_t *data;
636	size_t cbSize;
637	int vrc;
638	FileLoader(const char *pszFname)
639	{
640	vrc = RTFileReadAll(pszFname, (void**) &data, &cbSize);
641	}
642
643	~FileLoader()
644	{
645	if (isSuccess())
646	RTFileReadAllFree(data, cbSize);
647	}
648	bool isSuccess() { return RT_SUCCESS(vrc); }
649	};
650
651	try
652	{
653	FileLoader loader(pszFilename);
654	if (loader.isSuccess())
655	{
656	QMBytesStream stream(loader.data, loader.cbSize);
657	m_impl->load(stream, hStrCache);
658	}
659	return loader.vrc;
660	}
661	catch(std::exception &e)
662	{
663	LogRel(("QMTranslator::load() failed to load file '%s', reason: %s\n", pszFilename, e.what()));
664	return VERR_INTERNAL_ERROR;
665	}
666	catch(...)
667	{
668	LogRel(("QMTranslator::load() failed to load file '%s'\n", pszFilename));
669	return VERR_GENERAL_FAILURE;
670	}
671	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vbox/trunk/src/VBox/Main/src-all/QMTranslatorImpl.cpp

Download in other formats: