VirtualBox

source: vbox/trunk/src/bldprogs/scmparser.cpp@ 78203

Last change on this file since 78203 was 76553, checked in by vboxsync, 5 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 36.1 KB
Line 
1/* $Id: scmparser.cpp 76553 2019-01-01 01:45:53Z vboxsync $ */
2/** @file
3 * IPRT Testcase / Tool - Source Code Massager, Code Parsers.
4 */
5
6/*
7 * Copyright (C) 2010-2019 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include <iprt/assert.h>
23#include <iprt/ctype.h>
24#include <iprt/dir.h>
25#include <iprt/env.h>
26#include <iprt/file.h>
27#include <iprt/errcore.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/mem.h>
31#include <iprt/message.h>
32#include <iprt/param.h>
33#include <iprt/path.h>
34#include <iprt/process.h>
35#include <iprt/stream.h>
36#include <iprt/string.h>
37
38#include "scm.h"
39
40
41/*********************************************************************************************************************************
42* Structures and Typedefs *
43*********************************************************************************************************************************/
44typedef size_t (*PFNISCOMMENT)(const char *pchLine, size_t cchLine, bool fSecond);
45
46
47/**
48 * Callback for checking if C++ line comment.
49 */
50static size_t isCppLineComment(const char *pchLine, size_t cchLine, bool fSecond)
51{
52 if ( cchLine >= 2
53 && pchLine[0] == '/'
54 && pchLine[1] == '/')
55 {
56 if (!fSecond)
57 return 2;
58 if (cchLine >= 3 && pchLine[2] == '/')
59 return 3;
60 }
61 return 0;
62}
63
64
65/**
66 * Callback for checking if hash comment.
67 */
68static size_t isHashComment(const char *pchLine, size_t cchLine, bool fSecond)
69{
70 if (cchLine >= 1 && *pchLine == '#')
71 {
72 if (!fSecond)
73 return 1;
74 if (cchLine >= 2 && pchLine[1] == '#')
75 return 2;
76 }
77 return 0;
78}
79
80
81/**
82 * Callback for checking if semicolon comment.
83 */
84static size_t isSemicolonComment(const char *pchLine, size_t cchLine, bool fSecond)
85{
86 if (cchLine >= 1 && *pchLine == ';')
87 {
88 if (!fSecond)
89 return 1;
90 if (cchLine >= 2 && pchLine[1] == ';')
91 return 2;
92 }
93 return 0;
94}
95
96
97/** Macro for checking for a batch file comment prefix. */
98#define IS_REM(a_pch, a_off, a_cch) \
99 ( (a_off) + 3 <= (a_cch) \
100 && ((a_pch)[(a_off) ] == 'R' || (a_pch)[(a_off) ] == 'r') \
101 && ((a_pch)[(a_off) + 1] == 'E' || (a_pch)[(a_off) + 1] == 'e') \
102 && ((a_pch)[(a_off) + 2] == 'M' || (a_pch)[(a_off) + 2] == 'm') \
103 && ((a_off) + 3 == (a_cch) || RT_C_IS_SPACE((a_pch)[(a_off) + 3])) )
104
105
106/**
107 * Callback for checking if batch comment.
108 */
109static size_t isBatchComment(const char *pchLine, size_t cchLine, bool fSecond)
110{
111 if (!fSecond)
112 {
113 if (IS_REM(pchLine, 0, cchLine))
114 return 3;
115 }
116 else
117 {
118 /* Check for the 2nd in "rem rem" lines. */
119 if ( cchLine >= 4
120 && RT_C_IS_SPACE(*pchLine)
121 && IS_REM(pchLine, 1, cchLine))
122 return 4;
123 }
124 return 0;
125}
126
127/**
128 * Callback for checking if SQL comment.
129 */
130static size_t isSqlComment(const char *pchLine, size_t cchLine, bool fSecond)
131{
132 if ( cchLine >= 2
133 && pchLine[0] == '-'
134 && pchLine[1] == '-')
135 {
136 if (!fSecond)
137 return 2;
138 if ( cchLine >= 3
139 && pchLine[2] == '-')
140 return 3;
141 }
142 return 0;
143}
144
145/**
146 * Callback for checking if tick comment.
147 */
148static size_t isTickComment(const char *pchLine, size_t cchLine, bool fSecond)
149{
150 if (cchLine >= 1 && *pchLine == '\'')
151 {
152 if (!fSecond)
153 return 1;
154 if (cchLine >= 2 && pchLine[1] == '\'')
155 return 2;
156 }
157 return 0;
158}
159
160
161/**
162 * Common worker for enumeratePythonComments and enumerateSimpleLineComments.
163 *
164 * @returns IPRT status code.
165 * @param pIn The input stream.
166 * @param pfnIsComment Comment tester function.
167 * @param pfnCallback The callback.
168 * @param pvUser The user argument for the callback.
169 * @param ppchLine Pointer to the line variable.
170 * @param pcchLine Pointer to the line length variable.
171 * @param penmEol Pointer to the line ending type variable.
172 * @param piLine Pointer to the line number variable.
173 * @param poff Pointer to the line offset variable. On input this
174 * is positioned at the start of the comment.
175 */
176static int handleLineComment(PSCMSTREAM pIn, PFNISCOMMENT pfnIsComment,
177 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser,
178 const char **ppchLine, size_t *pcchLine, PSCMEOL penmEol,
179 uint32_t *piLine, size_t *poff)
180{
181 /* Unpack input/output variables. */
182 uint32_t iLine = *piLine;
183 const char *pchLine = *ppchLine;
184 size_t cchLine = *pcchLine;
185 size_t off = *poff;
186 SCMEOL enmEol = *penmEol;
187
188 /*
189 * Take down the basic info about the comment.
190 */
191 SCMCOMMENTINFO Info;
192 Info.iLineStart = iLine;
193 Info.iLineEnd = iLine;
194 Info.offStart = (uint32_t)off;
195 Info.offEnd = (uint32_t)cchLine;
196
197 size_t cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false);
198 Assert(cchSkip > 0);
199 off += cchSkip;
200
201 /* Determin comment type. */
202 Info.enmType = kScmCommentType_Line;
203 char ch;
204 cchSkip = 1;
205 if ( off < cchLine
206 && ( (ch = pchLine[off]) == '!'
207 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, true)) > 0) )
208 {
209 unsigned ch2;
210 if ( off + cchSkip == cchLine
211 || RT_C_IS_SPACE(ch2 = pchLine[off + cchSkip]) )
212 {
213 Info.enmType = ch != '!' ? kScmCommentType_Line_JavaDoc : kScmCommentType_Line_Qt;
214 off += cchSkip;
215 }
216 else if ( ch2 == '<'
217 && ( off + cchSkip + 1 == cchLine
218 || RT_C_IS_SPACE(pchLine[off + cchSkip + 1]) ))
219 {
220 Info.enmType = ch == '!' ? kScmCommentType_Line_JavaDoc_After : kScmCommentType_Line_Qt_After;
221 off += cchSkip + 1;
222 }
223 }
224
225 /*
226 * Copy body of the first line. Like for C, we ignore a single space in the first comment line.
227 */
228 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
229 off++;
230 size_t cchBody = cchLine;
231 while (cchBody > off && RT_C_IS_SPACE(pchLine[cchBody - 1]))
232 cchBody--;
233 cchBody -= off;
234 size_t cbBodyAlloc = RT_MAX(_1K, RT_ALIGN_Z(cchBody + 64, 128));
235 char *pszBody = (char *)RTMemAlloc(cbBodyAlloc);
236 if (!pszBody)
237 return VERR_NO_MEMORY;
238 memcpy(pszBody, &pchLine[off], cchBody);
239 pszBody[cchBody] = '\0';
240
241 Info.cBlankLinesBefore = cchBody == 0;
242
243 /*
244 * Look for more comment lines and append them to the body.
245 */
246 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
247 {
248 iLine++;
249
250 /* Skip leading spaces. */
251 off = 0;
252 while (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
253 off++;
254
255 /* Check if it's a comment. */
256 if ( off >= cchLine
257 || (cchSkip = pfnIsComment(&pchLine[off], cchLine - off, false)) == 0)
258 break;
259 off += cchSkip;
260
261 /* Split on doxygen comment start (if not already in one). */
262 if ( Info.enmType == kScmCommentType_Line
263 && off + 1 < cchLine
264 && ( pfnIsComment(&pchLine[off], cchLine - off, true) > 0
265 || ( pchLine[off + 1] == '!'
266 && ( off + 2 == cchLine
267 || pchLine[off + 2] != '!') ) ) )
268 {
269 off -= cchSkip;
270 break;
271 }
272
273 /* Append the body w/o trailing spaces and some leading ones. */
274 if (off < cchLine && RT_C_IS_SPACE(pchLine[off]))
275 off++;
276 while (off < cchLine && off < Info.offStart + 3 && RT_C_IS_SPACE(pchLine[off]))
277 off++;
278 size_t cchAppend = cchLine;
279 while (cchAppend > off && RT_C_IS_SPACE(pchLine[cchAppend - 1]))
280 cchAppend--;
281 cchAppend -= off;
282
283 size_t cchNewBody = cchBody + 1 + cchAppend;
284 if (cchNewBody >= cbBodyAlloc)
285 {
286 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
287 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
288 if (pvNew)
289 pszBody = (char *)pvNew;
290 else
291 {
292 RTMemFree(pszBody);
293 return VERR_NO_MEMORY;
294 }
295 }
296
297 if ( cchBody > 0
298 || cchAppend > 0)
299 {
300 if (cchBody > 0)
301 pszBody[cchBody++] = '\n';
302 memcpy(&pszBody[cchBody], &pchLine[off], cchAppend);
303 cchBody += cchAppend;
304 pszBody[cchBody] = '\0';
305 }
306 else
307 Info.cBlankLinesBefore++;
308
309 /* Advance. */
310 Info.offEnd = (uint32_t)cchLine;
311 Info.iLineEnd = iLine;
312 }
313
314 /*
315 * Strip trailing empty lines in the body.
316 */
317 Info.cBlankLinesAfter = 0;
318 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
319 {
320 Info.cBlankLinesAfter++;
321 pszBody[--cchBody] = '\0';
322 }
323
324 /*
325 * Do the callback and return.
326 */
327 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
328
329 RTMemFree(pszBody);
330
331 *piLine = iLine;
332 *ppchLine = pchLine;
333 *pcchLine = cchLine;
334 *poff = off;
335 *penmEol = enmEol;
336 return rc;
337}
338
339
340
341/**
342 * Common string litteral handler.
343 *
344 * @returns new pchLine value.
345 * @param pIn The input string.
346 * @param chType The quotation type.
347 * @param pchLine The current line.
348 * @param ppchLine Pointer to the line variable.
349 * @param pcchLine Pointer to the line length variable.
350 * @param penmEol Pointer to the line ending type variable.
351 * @param piLine Pointer to the line number variable.
352 * @param poff Pointer to the line offset variable.
353 */
354static const char *handleStringLitteral(PSCMSTREAM pIn, char chType, const char *pchLine, size_t *pcchLine, PSCMEOL penmEol,
355 uint32_t *piLine, size_t *poff)
356{
357 size_t off = *poff;
358 for (;;)
359 {
360 bool fEnd = false;
361 bool fEscaped = false;
362 size_t const cchLine = *pcchLine;
363 while (off < cchLine)
364 {
365 char ch = pchLine[off++];
366 if (!fEscaped)
367 {
368 if (ch != chType)
369 {
370 if (ch != '\\')
371 { /* likely */ }
372 else
373 fEscaped = true;
374 }
375 else
376 {
377 fEnd = true;
378 break;
379 }
380 }
381 else
382 fEscaped = false;
383 }
384 if (fEnd)
385 break;
386
387 /* next line */
388 pchLine = ScmStreamGetLine(pIn, pcchLine, penmEol);
389 if (!pchLine)
390 break;
391 *piLine += 1;
392 off = 0;
393 }
394
395 *poff = off;
396 return pchLine;
397}
398
399
400/**
401 * Deals with comments in C and C++ code.
402 *
403 * @returns VBox status code / callback return code.
404 * @param pIn The stream to parse.
405 * @param pfnCallback The callback.
406 * @param pvUser The user parameter for the callback.
407 */
408static int enumerateCStyleComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
409{
410 AssertCompile('\'' < '/');
411 AssertCompile('"' < '/');
412
413 int rcRet = VINF_SUCCESS;
414 uint32_t iLine = 0;
415 SCMEOL enmEol;
416 size_t cchLine;
417 const char *pchLine;
418 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
419 {
420 size_t off = 0;
421 while (off < cchLine)
422 {
423 unsigned ch = pchLine[off++];
424 if (ch > (unsigned)'/')
425 { /* not interesting */ }
426 else if (ch == '/')
427 {
428 if (off < cchLine)
429 {
430 ch = pchLine[off++];
431 if (ch == '*')
432 {
433 /*
434 * Multiline comment. Find the end.
435 *
436 * Note! This is very similar to the python doc string handling further down.
437 */
438 SCMCOMMENTINFO Info;
439 Info.iLineStart = iLine;
440 Info.offStart = (uint32_t)off - 2;
441 Info.iLineEnd = UINT32_MAX;
442 Info.offEnd = UINT32_MAX;
443 Info.cBlankLinesBefore = 0;
444
445 /* Determin comment type (same as for line-comments). */
446 Info.enmType = kScmCommentType_MultiLine;
447 if ( off < cchLine
448 && ( (ch = pchLine[off]) == '*'
449 || ch == '!') )
450 {
451 unsigned ch2;
452 if ( off + 1 == cchLine
453 || RT_C_IS_SPACE(ch2 = pchLine[off + 1]) )
454 {
455 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc : kScmCommentType_MultiLine_Qt;
456 off += 1;
457 }
458 else if ( ch2 == '<'
459 && ( off + 2 == cchLine
460 || RT_C_IS_SPACE(pchLine[off + 2]) ))
461 {
462 Info.enmType = ch == '*' ? kScmCommentType_MultiLine_JavaDoc_After
463 : kScmCommentType_MultiLine_Qt_After;
464 off += 2;
465 }
466 }
467
468 /*
469 * Copy the body and find the end of the multiline comment.
470 */
471 size_t cbBodyAlloc = 0;
472 size_t cchBody = 0;
473 char *pszBody = NULL;
474 for (;;)
475 {
476 /* Parse the line up to the end-of-comment or end-of-line. */
477 size_t offLineStart = off;
478 size_t offLastNonBlank = off;
479 size_t offFirstNonBlank = ~(size_t)0;
480 while (off < cchLine)
481 {
482 ch = pchLine[off++];
483 if (ch != '*' || off >= cchLine || pchLine[off] != '/')
484 {
485 if (RT_C_IS_BLANK(ch))
486 {/* kind of likely */}
487 else
488 {
489 offLastNonBlank = off - 1;
490 if (offFirstNonBlank != ~(size_t)0)
491 {/* likely */}
492 else if ( ch != '*' /* ignore continuation-asterisks */
493 || off > Info.offStart + 1 + 1
494 || off > cchLine
495 || ( off < cchLine
496 && !RT_C_IS_SPACE(pchLine[off]))
497 || pszBody == NULL)
498 offFirstNonBlank = off - 1;
499 }
500 }
501 else
502 {
503 Info.offEnd = (uint32_t)++off;
504 Info.iLineEnd = iLine;
505 break;
506 }
507 }
508
509 /* Append line content to the comment body string. */
510 size_t cchAppend;
511 if (offFirstNonBlank == ~(size_t)0)
512 cchAppend = 0; /* empty line */
513 else
514 {
515 if (pszBody)
516 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
517 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
518 offLineStart++;
519 cchAppend = offLastNonBlank + 1 - offLineStart;
520 Assert(cchAppend <= cchLine);
521 }
522
523 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
524 if (cchNewBody >= cbBodyAlloc)
525 {
526 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
527 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
528 if (pvNew)
529 pszBody = (char *)pvNew;
530 else
531 {
532 RTMemFree(pszBody);
533 return VERR_NO_MEMORY;
534 }
535 }
536
537 if (cchBody > 0) /* no leading blank lines */
538 pszBody[cchBody++] = '\n';
539 else if (cchAppend == 0)
540 Info.cBlankLinesBefore++;
541 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
542 cchBody += cchAppend;
543 pszBody[cchBody] = '\0';
544
545 /* Advance to the next line, if we haven't yet seen the end of this comment. */
546 if (Info.iLineEnd != UINT32_MAX)
547 break;
548 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
549 if (!pchLine)
550 {
551 Info.offEnd = (uint32_t)cchLine;
552 Info.iLineEnd = iLine;
553 break;
554 }
555 iLine++;
556 off = 0;
557 }
558
559 /* Strip trailing empty lines in the body. */
560 Info.cBlankLinesAfter = 0;
561 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
562 {
563 Info.cBlankLinesAfter++;
564 pszBody[--cchBody] = '\0';
565 }
566
567 /* Do the callback. */
568 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
569 RTMemFree(pszBody);
570 if (RT_FAILURE(rc))
571 return rc;
572 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
573 rcRet = rc;
574 }
575 else if (ch == '/')
576 {
577 /*
578 * Line comment. Join the other line comment guys.
579 */
580 off -= 2;
581 int rc = handleLineComment(pIn, isCppLineComment, pfnCallback, pvUser,
582 &pchLine, &cchLine, &enmEol, &iLine, &off);
583 if (RT_FAILURE(rc))
584 return rc;
585 if (rcRet == VINF_SUCCESS)
586 rcRet = rc;
587 }
588
589 if (!pchLine)
590 break;
591 }
592 }
593 else if (ch == '"')
594 {
595 /*
596 * String litterals may include sequences that looks like comments. So,
597 * they needs special handling to avoid confusion.
598 */
599 pchLine = handleStringLitteral(pIn, '"', pchLine, &cchLine, &enmEol, &iLine, &off);
600 }
601 /* else: We don't have to deal with character litterals as these shouldn't
602 include comment-like sequences. */
603 } /* for each character in the line */
604
605 iLine++;
606 } /* for each line in the stream */
607
608 int rcStream = ScmStreamGetStatus(pIn);
609 if (RT_SUCCESS(rcStream))
610 return rcRet;
611 return rcStream;
612}
613
614
615/**
616 * Deals with comments in Python code.
617 *
618 * @returns VBox status code / callback return code.
619 * @param pIn The stream to parse.
620 * @param pfnCallback The callback.
621 * @param pvUser The user parameter for the callback.
622 */
623static int enumeratePythonComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
624{
625 AssertCompile('#' < '\'');
626 AssertCompile('"' < '\'');
627
628 int rcRet = VINF_SUCCESS;
629 uint32_t iLine = 0;
630 SCMEOL enmEol;
631 size_t cchLine;
632 const char *pchLine;
633 SCMCOMMENTINFO Info;
634 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
635 {
636 size_t off = 0;
637 while (off < cchLine)
638 {
639 char ch = pchLine[off++];
640 if ((unsigned char)ch > (unsigned char)'\'')
641 { /* not interesting */ }
642 else if (ch == '#')
643 {
644 /*
645 * Line comment. Join paths with the others.
646 */
647 off -= 1;
648 int rc = handleLineComment(pIn, isHashComment, pfnCallback, pvUser,
649 &pchLine, &cchLine, &enmEol, &iLine, &off);
650 if (RT_FAILURE(rc))
651 return rc;
652 if (rcRet == VINF_SUCCESS)
653 rcRet = rc;
654
655 if (!pchLine)
656 break;
657 }
658 else if (ch == '"' || ch == '\'')
659 {
660 /*
661 * String litterals may be doc strings and they may legally include hashes.
662 */
663 const char chType = ch;
664 if ( off + 1 >= cchLine
665 || pchLine[off] != chType
666 || pchLine[off + 1] != chType)
667 pchLine = handleStringLitteral(pIn, chType, pchLine, &cchLine, &enmEol, &iLine, &off);
668 else
669 {
670 /*
671 * Doc string (/ long string).
672 *
673 * Note! This is very similar to the multiline C comment handling above.
674 */
675 Info.iLineStart = iLine;
676 Info.offStart = (uint32_t)off - 1;
677 Info.iLineEnd = UINT32_MAX;
678 Info.offEnd = UINT32_MAX;
679 Info.cBlankLinesBefore = 0;
680 Info.enmType = kScmCommentType_DocString;
681
682 off += 2;
683
684 /* Copy the body and find the end of the doc string comment. */
685 size_t cbBodyAlloc = 0;
686 size_t cchBody = 0;
687 char *pszBody = NULL;
688 for (;;)
689 {
690 /* Parse the line up to the end-of-comment or end-of-line. */
691 size_t offLineStart = off;
692 size_t offLastNonBlank = off;
693 size_t offFirstNonBlank = ~(size_t)0;
694 bool fEscaped = false;
695 while (off < cchLine)
696 {
697 ch = pchLine[off++];
698 if (!fEscaped)
699 {
700 if ( off + 1 >= cchLine
701 || ch != chType
702 || pchLine[off] != chType
703 || pchLine[off + 1] != chType)
704 {
705 if (RT_C_IS_BLANK(ch))
706 {/* kind of likely */}
707 else
708 {
709 offLastNonBlank = off - 1;
710 if (offFirstNonBlank != ~(size_t)0)
711 {/* likely */}
712 else if ( ch != '*' /* ignore continuation-asterisks */
713 || off > Info.offStart + 1 + 1
714 || off > cchLine
715 || ( off < cchLine
716 && !RT_C_IS_SPACE(pchLine[off]))
717 || pszBody == NULL)
718 offFirstNonBlank = off - 1;
719
720 if (ch != '\\')
721 {/* likely */ }
722 else
723 fEscaped = true;
724 }
725 }
726 else
727 {
728 off += 2;
729 Info.offEnd = (uint32_t)off;
730 Info.iLineEnd = iLine;
731 break;
732 }
733 }
734 else
735 fEscaped = false;
736 }
737
738 /* Append line content to the comment body string. */
739 size_t cchAppend;
740 if (offFirstNonBlank == ~(size_t)0)
741 cchAppend = 0; /* empty line */
742 else
743 {
744 if (pszBody)
745 offLineStart = RT_MIN(Info.offStart + 3, offFirstNonBlank);
746 else if (offFirstNonBlank > Info.offStart + 2) /* Skip one leading blank at the start of the comment. */
747 offLineStart++;
748 cchAppend = offLastNonBlank + 1 - offLineStart;
749 Assert(cchAppend <= cchLine);
750 }
751
752 size_t cchNewBody = cchBody + (cchBody > 0) + cchAppend;
753 if (cchNewBody >= cbBodyAlloc)
754 {
755 cbBodyAlloc = RT_MAX(cbBodyAlloc ? cbBodyAlloc * 2 : _1K, RT_ALIGN_Z(cchNewBody + 64, 128));
756 void *pvNew = RTMemRealloc(pszBody, cbBodyAlloc);
757 if (pvNew)
758 pszBody = (char *)pvNew;
759 else
760 {
761 RTMemFree(pszBody);
762 return VERR_NO_MEMORY;
763 }
764 }
765
766 if (cchBody > 0) /* no leading blank lines */
767 pszBody[cchBody++] = '\n';
768 else if (cchAppend == 0)
769 Info.cBlankLinesBefore++;
770 memcpy(&pszBody[cchBody], &pchLine[offLineStart], cchAppend);
771 cchBody += cchAppend;
772 pszBody[cchBody] = '\0';
773
774 /* Advance to the next line, if we haven't yet seen the end of this comment. */
775 if (Info.iLineEnd != UINT32_MAX)
776 break;
777 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
778 if (!pchLine)
779 {
780 Info.offEnd = (uint32_t)cchLine;
781 Info.iLineEnd = iLine;
782 break;
783 }
784 iLine++;
785 off = 0;
786 }
787
788 /* Strip trailing empty lines in the body. */
789 Info.cBlankLinesAfter = 0;
790 while (cchBody >= 1 && pszBody[cchBody - 1] == '\n')
791 {
792 Info.cBlankLinesAfter++;
793 pszBody[--cchBody] = '\0';
794 }
795
796 /* Do the callback. */
797 int rc = pfnCallback(&Info, pszBody, cchBody, pvUser);
798 RTMemFree(pszBody);
799 if (RT_FAILURE(rc))
800 return rc;
801 if (rc > VINF_SUCCESS && rcRet == VINF_SUCCESS)
802 rcRet = rc;
803 }
804
805 if (!pchLine)
806 break;
807 }
808 /* else: We don't have to deal with character litterals as these shouldn't
809 include comment-like sequences. */
810 } /* for each character in the line */
811
812 iLine++;
813 } /* for each line in the stream */
814
815 int rcStream = ScmStreamGetStatus(pIn);
816 if (RT_SUCCESS(rcStream))
817 return rcRet;
818 return rcStream;
819}
820
821
822/**
823 * Deals with comments in DOS batch files.
824 *
825 * @returns VBox status code / callback return code.
826 * @param pIn The stream to parse.
827 * @param pfnCallback The callback.
828 * @param pvUser The user parameter for the callback.
829 */
830static int enumerateBatchComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
831{
832 int rcRet = VINF_SUCCESS;
833 uint32_t iLine = 0;
834 SCMEOL enmEol;
835 size_t cchLine;
836 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
837 while (pchLine != NULL)
838 {
839 /*
840 * Skip leading blanks and check for 'rem'.
841 * At the moment we do not parse '::lable-comments'.
842 */
843 size_t off = 0;
844 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
845 off++;
846 if (!IS_REM(pchLine, off, cchLine))
847 {
848 iLine++;
849 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
850 }
851 else
852 {
853 int rc = handleLineComment(pIn, isBatchComment, pfnCallback, pvUser,
854 &pchLine, &cchLine, &enmEol, &iLine, &off);
855 if (RT_FAILURE(rc))
856 return rc;
857 if (rcRet == VINF_SUCCESS)
858 rcRet = rc;
859 }
860 }
861
862 int rcStream = ScmStreamGetStatus(pIn);
863 if (RT_SUCCESS(rcStream))
864 return rcRet;
865 return rcStream;
866}
867
868
869/**
870 * Deals with comments in SQL files.
871 *
872 * @returns VBox status code / callback return code.
873 * @param pIn The stream to parse.
874 * @param pfnCallback The callback.
875 * @param pvUser The user parameter for the callback.
876 */
877static int enumerateSqlComments(PSCMSTREAM pIn, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
878{
879 int rcRet = VINF_SUCCESS;
880 uint32_t iLine = 0;
881 SCMEOL enmEol;
882 size_t cchLine;
883 const char *pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
884 while (pchLine != NULL)
885 {
886 /*
887 * Skip leading blanks and check for '--'.
888 */
889 size_t off = 0;
890 while (off + 3 < cchLine && RT_C_IS_SPACE(pchLine[off]))
891 off++;
892 if ( cchLine < 2
893 || pchLine[0] != '-'
894 || pchLine[1] != '-')
895 {
896 iLine++;
897 pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol);
898 }
899 else
900 {
901 int rc = handleLineComment(pIn, isSqlComment, pfnCallback, pvUser,
902 &pchLine, &cchLine, &enmEol, &iLine, &off);
903 if (RT_FAILURE(rc))
904 return rc;
905 if (rcRet == VINF_SUCCESS)
906 rcRet = rc;
907 }
908 }
909
910 int rcStream = ScmStreamGetStatus(pIn);
911 if (RT_SUCCESS(rcStream))
912 return rcRet;
913 return rcStream;
914}
915
916
917/**
918 * Deals with simple line comments.
919 *
920 * @returns VBox status code / callback return code.
921 * @param pIn The stream to parse.
922 * @param chStart The start of comment character.
923 * @param pfnIsComment Comment tester function.
924 * @param pfnCallback The callback.
925 * @param pvUser The user parameter for the callback.
926 */
927static int enumerateSimpleLineComments(PSCMSTREAM pIn, char chStart, PFNISCOMMENT pfnIsComment,
928 PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
929{
930 int rcRet = VINF_SUCCESS;
931 uint32_t iLine = 0;
932 SCMEOL enmEol;
933 size_t cchLine;
934 const char *pchLine;
935 while ((pchLine = ScmStreamGetLine(pIn, &cchLine, &enmEol)) != NULL)
936 {
937 size_t off = 0;
938 while (off < cchLine)
939 {
940 char ch = pchLine[off++];
941 if (ch != chStart)
942 { /* not interesting */ }
943 else
944 {
945 off -= 1;
946 int rc = handleLineComment(pIn, pfnIsComment, pfnCallback, pvUser,
947 &pchLine, &cchLine, &enmEol, &iLine, &off);
948 if (RT_FAILURE(rc))
949 return rc;
950 if (rcRet == VINF_SUCCESS)
951 rcRet = rc;
952
953 if (!pchLine)
954 break;
955 }
956 } /* for each character in the line */
957
958 iLine++;
959 } /* for each line in the stream */
960
961 int rcStream = ScmStreamGetStatus(pIn);
962 if (RT_SUCCESS(rcStream))
963 return rcRet;
964 return rcStream;
965}
966
967
968/**
969 * Enumerates the comments in the given stream, calling @a pfnCallback for each.
970 *
971 * @returns IPRT status code.
972 * @param pIn The stream to parse.
973 * @param enmCommentStyle The comment style of the source stream.
974 * @param pfnCallback The function to call.
975 * @param pvUser User argument to the callback.
976 */
977int ScmEnumerateComments(PSCMSTREAM pIn, SCMCOMMENTSTYLE enmCommentStyle, PFNSCMCOMMENTENUMERATOR pfnCallback, void *pvUser)
978{
979 switch (enmCommentStyle)
980 {
981 case kScmCommentStyle_C:
982 return enumerateCStyleComments(pIn, pfnCallback, pvUser);
983
984 case kScmCommentStyle_Python:
985 return enumeratePythonComments(pIn, pfnCallback, pvUser);
986
987 case kScmCommentStyle_Semicolon:
988 return enumerateSimpleLineComments(pIn, ';', isSemicolonComment, pfnCallback, pvUser);
989
990 case kScmCommentStyle_Hash:
991 return enumerateSimpleLineComments(pIn, '#', isHashComment, pfnCallback, pvUser);
992
993 case kScmCommentStyle_Rem_Upper:
994 case kScmCommentStyle_Rem_Lower:
995 case kScmCommentStyle_Rem_Camel:
996 return enumerateBatchComments(pIn, pfnCallback, pvUser);
997
998 case kScmCommentStyle_Sql:
999 return enumerateSqlComments(pIn, pfnCallback, pvUser);
1000
1001 case kScmCommentStyle_Tick:
1002 return enumerateSimpleLineComments(pIn, '\'', isTickComment, pfnCallback, pvUser);
1003
1004 default:
1005 AssertFailedReturn(VERR_INVALID_PARAMETER);
1006 }
1007}
1008
Note: See TracBrowser for help on using the repository browser.

© 2023 Oracle
ContactPrivacy policyTerms of Use