VirtualBox

source: kBuild/trunk/src/grep/src/grep.c@ 3610

Last change on this file since 3610 was 3548, checked in by bird, 3 years ago

grep: Use get_crt_codepage(). Don't default to the UTF-8 manifest for older VCC versions as the CRT won't do the right thing.

  • Property svn:eol-style set to native
File size: 96.0 KB
Line 
1/* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19/* Written July 1992 by Mike Haertel. */
20
21#include <config.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <wchar.h>
25#include <inttypes.h>
26#include <stdarg.h>
27#include <stdint.h>
28#include <stdio.h>
29#include "system.h"
30
31#include "argmatch.h"
32#include "c-ctype.h"
33#include "c-stack.h"
34#include "closeout.h"
35#include "colorize.h"
36#include "die.h"
37#include "error.h"
38#include "exclude.h"
39#include "exitfail.h"
40#include "fcntl-safer.h"
41#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
42# include "nt/fts-nt.h" /* Use NT optimized FTS implementation. */
43#else
44#include "fts_.h"
45#endif
46#include "getopt.h"
47#include "getprogname.h"
48#include "grep.h"
49#include "hash.h"
50#include "intprops.h"
51#include "propername.h"
52#include "safe-read.h"
53#include "search.h"
54#include "c-strcase.h"
55#include "version-etc.h"
56#include "xalloc.h"
57#include "xbinary-io.h"
58#include "xstrtol.h"
59
60#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
61# include "console.h"
62#endif
63
64enum { SEP_CHAR_SELECTED = ':' };
65enum { SEP_CHAR_REJECTED = '-' };
66static char const SEP_STR_GROUP[] = "--";
67
68/* When stdout is connected to a regular file, save its stat
69 information here, so that we can automatically skip it, thus
70 avoiding a potential (racy) infinite loop. */
71static struct stat out_stat;
72
73/* if non-zero, display usage information and exit */
74static int show_help;
75
76/* Print the version on standard output and exit. */
77static bool show_version;
78
79/* Suppress diagnostics for nonexistent or unreadable files. */
80static bool suppress_errors;
81
82/* If nonzero, use color markers. */
83static int color_option;
84
85/* Show only the part of a line matching the expression. */
86static bool only_matching;
87
88/* If nonzero, make sure first content char in a line is on a tab stop. */
89static bool align_tabs;
90
91/* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */
92static int offset_width;
93
94/* An entry in the PATLOC array saying where patterns came from. */
95struct patloc
96 {
97 /* Line number of the pattern in PATTERN_ARRAY. Line numbers
98 start at 0, and each pattern is terminated by '\n'. */
99 ptrdiff_t lineno;
100
101 /* Input location of the pattern. The FILENAME "-" represents
102 standard input, and "" represents the command line. FILELINE is
103 origin-1 for files and is irrelevant for the command line. */
104 char const *filename;
105 ptrdiff_t fileline;
106 };
107
108/* The array of pattern locations. The concatenation of all patterns
109 is stored in a single array, KEYS. Given the invocation
110 'grep -f <(seq 5) -f <(seq 6) -f <(seq 3)', there will initially be
111 28 bytes in KEYS. After duplicate patterns are removed, KEYS
112 will have 12 bytes and PATLOC will be {0,x,1}, {10,y,1}
113 where x, y and z are just place-holders for shell-generated names
114 since and z is omitted as it contains only duplicates. Sometimes
115 removing duplicates will grow PATLOC, since each run of
116 removed patterns not at a file start or end requires another
117 PATLOC entry for the first non-removed pattern. */
118static struct patloc *patloc;
119static size_t patlocs_allocated, patlocs_used;
120
121/* Pointer to the array of patterns, each terminated by newline. */
122static char *pattern_array;
123
124/* The number of unique patterns seen so far. */
125static size_t n_patterns;
126
127/* Hash table of patterns seen so far. */
128static Hash_table *pattern_table;
129
130/* Hash and compare newline-terminated patterns for textual equality.
131 Patterns are represented by origin-1 offsets into PATTERN_ARRAY,
132 cast to void *. The origin-1 is so that the first pattern offset
133 does not appear to be a null pointer when cast to void *. */
134static size_t _GL_ATTRIBUTE_PURE
135hash_pattern (void const *pat, size_t n_buckets)
136{
137 size_t h = 0;
138 intptr_t pat_offset = (intptr_t) pat - 1;
139 unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
140 for ( ; *s != '\n'; s++)
141 h = h * 33 ^ *s;
142 return h % n_buckets;
143}
144static bool _GL_ATTRIBUTE_PURE
145compare_patterns (void const *a, void const *b)
146{
147 intptr_t a_offset = (intptr_t) a - 1;
148 intptr_t b_offset = (intptr_t) b - 1;
149 char const *p = pattern_array + a_offset;
150 char const *q = pattern_array + b_offset;
151 for (; *p == *q; p++, q++)
152 if (*p == '\n')
153 return true;
154 return false;
155}
156
157/* Update KEYS to remove duplicate patterns, and return the number of
158 bytes in the resulting KEYS. KEYS contains a sequence of patterns
159 each terminated by '\n'. The first DUPFREE_SIZE bytes are a
160 sequence of patterns with no duplicates; SIZE is the total number
161 of bytes in KEYS. If some patterns past the first DUPFREE_SIZE
162 bytes are not duplicates, update PATLOCS accordingly. */
163static ptrdiff_t
164update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
165 char const *filename)
166{
167 char *dst = keys + dupfree_size;
168 ptrdiff_t fileline = 1;
169 int prev_inserted = 0;
170
171 char const *srclim = keys + size;
172 ptrdiff_t patsize;
173 for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
174 {
175 char const *patend = rawmemchr (src, '\n');
176 patsize = patend + 1 - src;
177 memmove (dst, src, patsize);
178
179 intptr_t dst_offset_1 = dst - keys + 1;
180 int inserted = hash_insert_if_absent (pattern_table,
181 (void *) dst_offset_1, NULL);
182 if (inserted)
183 {
184 if (inserted < 0)
185 xalloc_die ();
186 dst += patsize;
187
188 /* Add a PATLOCS entry unless this input line is simply the
189 next one in the same file. */
190 if (!prev_inserted)
191 {
192 if (patlocs_used == patlocs_allocated)
193 patloc = x2nrealloc (patloc, &patlocs_allocated,
194 sizeof *patloc);
195 patloc[patlocs_used++]
196 = (struct patloc) { .lineno = n_patterns,
197 .filename = filename,
198 .fileline = fileline };
199 }
200 n_patterns++;
201 }
202
203 prev_inserted = inserted;
204 fileline++;
205 }
206
207 return dst - keys;
208}
209
210/* Map LINENO, the origin-0 line number of one of the input patterns,
211 to the name of the file from which it came. Return "-" if it was
212 read from stdin, "" if it was specified on the command line.
213 Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
214 or to an unspecified value if PATTERN came from the command line. */
215char const * _GL_ATTRIBUTE_PURE
216pattern_file_name (size_t lineno, size_t *new_lineno)
217{
218 ptrdiff_t i;
219 for (i = 1; i < patlocs_used; i++)
220 if (lineno < patloc[i].lineno)
221 break;
222 *new_lineno = lineno - patloc[i - 1].lineno + patloc[i - 1].fileline;
223 return patloc[i - 1].filename;
224}
225
226#if HAVE_ASAN
227/* Record the starting address and length of the sole poisoned region,
228 so that we can unpoison it later, just before each following read. */
229static void const *poison_buf;
230static size_t poison_len;
231
232static void
233clear_asan_poison (void)
234{
235 if (poison_buf)
236 __asan_unpoison_memory_region (poison_buf, poison_len);
237}
238
239static void
240asan_poison (void const *addr, size_t size)
241{
242 poison_buf = addr;
243 poison_len = size;
244
245 __asan_poison_memory_region (poison_buf, poison_len);
246}
247#else
248static void clear_asan_poison (void) { }
249static void asan_poison (void const volatile *addr, size_t size) { }
250#endif
251
252/* The group separator used when context is requested. */
253static const char *group_separator = SEP_STR_GROUP;
254
255/* The context and logic for choosing default --color screen attributes
256 (foreground and background colors, etc.) are the following.
257 -- There are eight basic colors available, each with its own
258 nominal luminosity to the human eye and foreground/background
259 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
260 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
261 yellow [89 %, 33/43], and white [100 %, 37/47]).
262 -- Sometimes, white as a background is actually implemented using
263 a shade of light gray, so that a foreground white can be visible
264 on top of it (but most often not).
265 -- Sometimes, black as a foreground is actually implemented using
266 a shade of dark gray, so that it can be visible on top of a
267 background black (but most often not).
268 -- Sometimes, more colors are available, as extensions.
269 -- Other attributes can be selected/deselected (bold [1/22],
270 underline [4/24], standout/inverse [7/27], blink [5/25], and
271 invisible/hidden [8/28]). They are sometimes implemented by
272 using colors instead of what their names imply; e.g., bold is
273 often achieved by using brighter colors. In practice, only bold
274 is really available to us, underline sometimes being mapped by
275 the terminal to some strange color choice, and standout best
276 being left for use by downstream programs such as less(1).
277 -- We cannot assume that any of the extensions or special features
278 are available for the purpose of choosing defaults for everyone.
279 -- The most prevalent default terminal backgrounds are pure black
280 and pure white, and are not necessarily the same shades of
281 those as if they were selected explicitly with SGR sequences.
282 Some terminals use dark or light pictures as default background,
283 but those are covered over by an explicit selection of background
284 color with an SGR sequence; their users will appreciate their
285 background pictures not be covered like this, if possible.
286 -- Some uses of colors attributes is to make some output items
287 more understated (e.g., context lines); this cannot be achieved
288 by changing the background color.
289 -- For these reasons, the grep color defaults should strive not
290 to change the background color from its default, unless it's
291 for a short item that should be highlighted, not understated.
292 -- The grep foreground color defaults (without an explicitly set
293 background) should provide enough contrast to be readable on any
294 terminal with either a black (dark) or white (light) background.
295 This only leaves red, magenta, green, and cyan (and their bold
296 counterparts) and possibly bold blue. */
297/* The color strings used for matched text.
298 The user can overwrite them using the deprecated
299 environment variable GREP_COLOR or the new GREP_COLORS. */
300static const char *selected_match_color = "01;31"; /* bold red */
301static const char *context_match_color = "01;31"; /* bold red */
302
303/* Other colors. Defaults look damn good. */
304static const char *filename_color = "35"; /* magenta */
305static const char *line_num_color = "32"; /* green */
306static const char *byte_num_color = "32"; /* green */
307static const char *sep_color = "36"; /* cyan */
308static const char *selected_line_color = ""; /* default color pair */
309static const char *context_line_color = ""; /* default color pair */
310
311/* Select Graphic Rendition (SGR, "\33[...m") strings. */
312/* Also Erase in Line (EL) to Right ("\33[K") by default. */
313/* Why have EL to Right after SGR?
314 -- The behavior of line-wrapping when at the bottom of the
315 terminal screen and at the end of the current line is often
316 such that a new line is introduced, entirely cleared with
317 the current background color which may be different from the
318 default one (see the boolean back_color_erase terminfo(5)
319 capability), thus scrolling the display by one line.
320 The end of this new line will stay in this background color
321 even after reverting to the default background color with
322 "\33[m', unless it is explicitly cleared again with "\33[K"
323 (which is the behavior the user would instinctively expect
324 from the whole thing). There may be some unavoidable
325 background-color flicker at the end of this new line because
326 of this (when timing with the monitor's redraw is just right).
327 -- The behavior of HT (tab, "\t") is usually the same as that of
328 Cursor Forward Tabulation (CHT) with a default parameter
329 of 1 ("\33[I"), i.e., it performs pure movement to the next
330 tab stop, without any clearing of either content or screen
331 attributes (including background color); try
332 printf 'asdfqwerzxcv\rASDF\tZXCV\n'
333 in a bash(1) shell to demonstrate this. This is not what the
334 user would instinctively expect of HT (but is ok for CHT).
335 The instinctive behavior would include clearing the terminal
336 cells that are skipped over by HT with blank cells in the
337 current screen attributes, including background color;
338 the boolean dest_tabs_magic_smso terminfo(5) capability
339 indicates this saner behavior for HT, but only some rare
340 terminals have it (although it also indicates a special
341 glitch with standout mode in the Teleray terminal for which
342 it was initially introduced). The remedy is to add "\33K"
343 after each SGR sequence, be it START (to fix the behavior
344 of any HT after that before another SGR) or END (to fix the
345 behavior of an HT in default background color that would
346 follow a line-wrapping at the bottom of the screen in another
347 background color, and to complement doing it after START).
348 Piping grep's output through a pager such as less(1) avoids
349 any HT problems since the pager performs tab expansion.
350
351 Generic disadvantages of this remedy are:
352 -- Some very rare terminals might support SGR but not EL (nobody
353 will use "grep --color" on a terminal that does not support
354 SGR in the first place).
355 -- Having these extra control sequences might somewhat complicate
356 the task of any program trying to parse "grep --color"
357 output in order to extract structuring information from it.
358 A specific disadvantage to doing it after SGR START is:
359 -- Even more possible background color flicker (when timing
360 with the monitor's redraw is just right), even when not at the
361 bottom of the screen.
362 There are no additional disadvantages specific to doing it after
363 SGR END.
364
365 It would be impractical for GNU grep to become a full-fledged
366 terminal program linked against ncurses or the like, so it will
367 not detect terminfo(5) capabilities. */
368static const char *sgr_start = "\33[%sm\33[K";
369static const char *sgr_end = "\33[m\33[K";
370
371/* SGR utility functions. */
372static void
373pr_sgr_start (char const *s)
374{
375 if (*s)
376 print_start_colorize (sgr_start, s);
377}
378static void
379pr_sgr_end (char const *s)
380{
381 if (*s)
382 print_end_colorize (sgr_end);
383}
384static void
385pr_sgr_start_if (char const *s)
386{
387 if (color_option)
388 pr_sgr_start (s);
389}
390static void
391pr_sgr_end_if (char const *s)
392{
393 if (color_option)
394 pr_sgr_end (s);
395}
396
397struct color_cap
398 {
399 const char *name;
400 const char **var;
401 void (*fct) (void);
402 };
403
404static void
405color_cap_mt_fct (void)
406{
407 /* Our caller just set selected_match_color. */
408 context_match_color = selected_match_color;
409}
410
411static void
412color_cap_rv_fct (void)
413{
414 /* By this point, it was 1 (or already -1). */
415 color_option = -1; /* That's still != 0. */
416}
417
418static void
419color_cap_ne_fct (void)
420{
421 sgr_start = "\33[%sm";
422 sgr_end = "\33[m";
423}
424
425/* For GREP_COLORS. */
426static const struct color_cap color_dict[] =
427 {
428 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
429 { "ms", &selected_match_color, NULL }, /* selected matched text */
430 { "mc", &context_match_color, NULL }, /* context matched text */
431 { "fn", &filename_color, NULL }, /* filename */
432 { "ln", &line_num_color, NULL }, /* line number */
433 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
434 { "se", &sep_color, NULL }, /* separator */
435 { "sl", &selected_line_color, NULL }, /* selected lines */
436 { "cx", &context_line_color, NULL }, /* context lines */
437 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
438 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
439 { NULL, NULL, NULL }
440 };
441
442/* Saved errno value from failed output functions on stdout. */
443static int stdout_errno;
444
445#ifdef KMK_GREP
446# ifdef KBUILD_OS_WINDOWS
447# include <assert.h>
448static void fwrite_errno (void const *, size_t, size_t);
449static int g_fStdOutIsConsole = -1; /* TRUE or FALSE; -1 if not initialize. */
450#endif
451
452/* Attempts to set the code page, leave the rest of the locale as default. */
453static void kmk_grep_set_codepage (const char *pszCodepage)
454{
455# ifdef KBUILD_OS_WINDOWS
456 /* Make sure it starts with a dot: */
457 char szDot[256];
458 if (pszCodepage[0] != '.')
459 {
460 snprintf (szDot, sizeof(szDot), ".%s", pszCodepage);
461 pszCodepage = szDot;
462 }
463
464 if (setlocale (LC_ALL, pszCodepage) == NULL)
465 error (0, errno, _("warning: setlocale (LC_ALL, \"%s\") failed"),
466 pszCodepage);
467
468 g_fStdOutIsConsole = -1; /* ensure this is reinitialized. */
469# endif
470}
471#endif /* KMK_GREP */
472
473static void
474putchar_errno (int c)
475{
476#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
477 char ch = (char)c;
478 fwrite_errno (&ch, 1, 1);
479#else
480 if (putchar (c) < 0)
481 stdout_errno = errno;
482#endif
483}
484
485static void
486fputs_errno (char const *s)
487{
488#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
489 fwrite_errno (s, 1, strlen (s));
490#else
491 if (fputs (s, stdout) < 0)
492 stdout_errno = errno;
493#endif
494}
495
496static void _GL_ATTRIBUTE_FORMAT_PRINTF_STANDARD (1, 2)
497printf_errno (char const *format, ...)
498{
499 va_list ap;
500 va_start (ap, format);
501#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
502 char szBuf[1024]; /* Only really used for a PRIuMAX number and maybe a newline. */
503 int cch = vsnprintf (szBuf, sizeof (szBuf), format, ap);
504 assert (cch < sizeof(szBuf));
505 fwrite_errno (szBuf, 1, cch);
506#else
507 if (vfprintf (stdout, format, ap) < 0)
508 stdout_errno = errno;
509#endif
510 va_end (ap);
511}
512
513static void
514fwrite_errno (void const *ptr, size_t size, size_t nmemb)
515{
516#if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
517 /*
518 * This trick reduces the runtime of 'grep -r GNU .' in the grep source dir
519 * from just above 11 seconds to around 0.8 seconds.
520 *
521 * The trouble with the microsoft CRTs (both the old and the new UCRT), is
522 * that we end up writing one char at the time when writing to the console,
523 * which is a total performance killer. write_double_translated_ansi_nolock()
524 * and write_requires_double_translation_nolock() in lowio/write.cpp in the
525 * UCRT sources have further details.
526 */
527 static HANDLE s_hStdOut = INVALID_HANDLE_VALUE;
528 if (g_fStdOutIsConsole != -1)
529 { /* likely*/ }
530 else
531 {
532 DWORD fModeIgnored;
533 s_hStdOut = (HANDLE)_get_osfhandle (fileno (stdout));
534 g_fStdOutIsConsole = GetConsoleMode (s_hStdOut, &fModeIgnored)
535 ? TRUE : FALSE;
536 if (getenv ("KMK_GREP_CONSOLE_DEBUG"))
537 fprintf (stderr, "kmk_grep: hStdOut=%p %sconsole codepage=%u ansi=%u\n",
538 s_hStdOut, g_fStdOutIsConsole ? "" : "!",
539 get_crt_codepage (), get_ansi_codepage ());
540 }
541 if (g_fStdOutIsConsole == TRUE && size && nmemb)
542 {
543 size_t const cbToWrite = size * nmemb;
544 if ( cbToWrite < (size_t)INT_MAX / 4
545 && cbToWrite >= size
546 && cbToWrite >= nmemb)
547 {
548 /* ASSUME that one input byte won't be translated to more than one
549 surrogate pair, or two compound UTF-16 codepoints. */
550 wchar_t awcBuf[1024];
551 wchar_t *pawcFree = NULL;
552 wchar_t *pawcBuf;
553 size_t cwcBuf = cbToWrite * 2 + 16;
554 if (cwcBuf < sizeof(awcBuf) / sizeof(wchar_t))
555 {
556 cwcBuf = sizeof(awcBuf) / sizeof(wchar_t);
557 pawcBuf = awcBuf;
558 }
559 else
560 pawcFree = pawcBuf = (wchar_t *)malloc(cwcBuf * sizeof(wchar_t));
561 if (pawcBuf)
562 {
563 int cwcToWrite = MultiByteToWideChar(get_crt_codepage(),
564 0 /*dwFlags*/,
565 ptr, (int)cbToWrite,
566 pawcBuf, (int)(cwcBuf - 1));
567 if (cwcToWrite > 0)
568 {
569 pawcBuf[cwcToWrite] = '\0';
570
571 /* Let the CRT do the rest. At least the Visual C++ 2010 CRT
572 sources indicates _cputws will do the right thing. */
573 fflush(stdout);
574 int rc = _cputws(pawcBuf);
575 if (pawcFree)
576 free(pawcFree);
577 if (rc != 0)
578 stdout_errno = errno;
579 return;
580 }
581 free(pawcFree);
582 }
583 }
584 }
585#endif
586 if (fwrite (ptr, size, nmemb, stdout) != nmemb)
587 stdout_errno = errno;
588}
589
590static void
591fflush_errno (void)
592{
593 if (fflush (stdout) != 0)
594 stdout_errno = errno;
595}
596
597static struct exclude *excluded_patterns[2];
598static struct exclude *excluded_directory_patterns[2];
599/* Short options. */
600static char const short_options[] =
601"0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
602
603/* Non-boolean long options that have no corresponding short equivalents. */
604enum
605{
606 BINARY_FILES_OPTION = CHAR_MAX + 1,
607 COLOR_OPTION,
608 EXCLUDE_DIRECTORY_OPTION,
609 EXCLUDE_OPTION,
610 EXCLUDE_FROM_OPTION,
611 GROUP_SEPARATOR_OPTION,
612 INCLUDE_OPTION,
613 LINE_BUFFERED_OPTION,
614 LABEL_OPTION,
615#ifdef KMK_GREP
616 UTF8_OPTION,
617 CODEPAGE_OPTION,
618#endif
619 NO_IGNORE_CASE_OPTION
620};
621
622/* Long options equivalences. */
623static struct option const long_options[] =
624{
625 {"basic-regexp", no_argument, NULL, 'G'},
626 {"extended-regexp", no_argument, NULL, 'E'},
627 {"fixed-regexp", no_argument, NULL, 'F'},
628 {"fixed-strings", no_argument, NULL, 'F'},
629 {"perl-regexp", no_argument, NULL, 'P'},
630 {"after-context", required_argument, NULL, 'A'},
631 {"before-context", required_argument, NULL, 'B'},
632 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
633 {"byte-offset", no_argument, NULL, 'b'},
634 {"context", required_argument, NULL, 'C'},
635 {"color", optional_argument, NULL, COLOR_OPTION},
636 {"colour", optional_argument, NULL, COLOR_OPTION},
637 {"count", no_argument, NULL, 'c'},
638 {"devices", required_argument, NULL, 'D'},
639 {"directories", required_argument, NULL, 'd'},
640 {"exclude", required_argument, NULL, EXCLUDE_OPTION},
641 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
642 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
643 {"file", required_argument, NULL, 'f'},
644 {"files-with-matches", no_argument, NULL, 'l'},
645 {"files-without-match", no_argument, NULL, 'L'},
646 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
647 {"help", no_argument, &show_help, 1},
648 {"include", required_argument, NULL, INCLUDE_OPTION},
649 {"ignore-case", no_argument, NULL, 'i'},
650 {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
651 {"initial-tab", no_argument, NULL, 'T'},
652 {"label", required_argument, NULL, LABEL_OPTION},
653 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
654 {"line-number", no_argument, NULL, 'n'},
655 {"line-regexp", no_argument, NULL, 'x'},
656 {"max-count", required_argument, NULL, 'm'},
657
658 {"no-filename", no_argument, NULL, 'h'},
659 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
660 {"no-messages", no_argument, NULL, 's'},
661 {"null", no_argument, NULL, 'Z'},
662 {"null-data", no_argument, NULL, 'z'},
663 {"only-matching", no_argument, NULL, 'o'},
664 {"quiet", no_argument, NULL, 'q'},
665 {"recursive", no_argument, NULL, 'r'},
666 {"dereference-recursive", no_argument, NULL, 'R'},
667 {"regexp", required_argument, NULL, 'e'},
668 {"invert-match", no_argument, NULL, 'v'},
669 {"silent", no_argument, NULL, 'q'},
670 {"text", no_argument, NULL, 'a'},
671 {"binary", no_argument, NULL, 'U'},
672 {"unix-byte-offsets", no_argument, NULL, 'u'},
673 {"version", no_argument, NULL, 'V'},
674 {"with-filename", no_argument, NULL, 'H'},
675 {"word-regexp", no_argument, NULL, 'w'},
676#ifdef KMK_GREP
677 {"utf8", no_argument, NULL, UTF8_OPTION},
678 {"cp", required_argument, NULL, CODEPAGE_OPTION},
679 {"codepage", required_argument, NULL, CODEPAGE_OPTION},
680#endif
681 {0, 0, 0, 0}
682};
683
684/* Define flags declared in grep.h. */
685bool match_icase;
686bool match_words;
687bool match_lines;
688char eolbyte;
689
690/* For error messages. */
691/* The input file name, or (if standard input) null or a --label argument. */
692static char const *filename;
693/* Omit leading "./" from file names in diagnostics. */
694static bool omit_dot_slash;
695static bool errseen;
696
697/* True if output from the current input file has been suppressed
698 because an output line had an encoding error. */
699static bool encoding_error_output;
700
701enum directories_type
702 {
703 READ_DIRECTORIES = 2,
704 RECURSE_DIRECTORIES,
705 SKIP_DIRECTORIES
706 };
707
708/* How to handle directories. */
709static char const *const directories_args[] =
710{
711 "read", "recurse", "skip", NULL
712};
713static enum directories_type const directories_types[] =
714{
715 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
716};
717ARGMATCH_VERIFY (directories_args, directories_types);
718
719static enum directories_type directories = READ_DIRECTORIES;
720
721enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
722static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
723
724/* How to handle devices. */
725static enum
726 {
727 READ_COMMAND_LINE_DEVICES,
728 READ_DEVICES,
729 SKIP_DEVICES
730 } devices = READ_COMMAND_LINE_DEVICES;
731
732static bool grepfile (int, char const *, bool, bool);
733static bool grepdesc (int, bool);
734
735static bool
736is_device_mode (mode_t m)
737{
738 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
739}
740
741static bool
742skip_devices (bool command_line)
743{
744 return (devices == SKIP_DEVICES
745 || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
746}
747
748/* Return if ST->st_size is defined. Assume the file is not a
749 symbolic link. */
750static bool
751usable_st_size (struct stat const *st)
752{
753 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
754}
755
756/* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
757 Do not rely on these finding data or holes if they equal SEEK_SET. */
758#ifndef SEEK_DATA
759enum { SEEK_DATA = SEEK_SET };
760#endif
761#ifndef SEEK_HOLE
762enum { SEEK_HOLE = SEEK_SET };
763#endif
764
765/* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
766static bool seek_failed;
767static bool seek_data_failed;
768
769/* Functions we'll use to search. */
770typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
771typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
772 char const *);
773static execute_fp_t execute;
774static void *compiled_pattern;
775
776char const *
777input_filename (void)
778{
779 if (!filename)
780 filename = _("(standard input)");
781 return filename;
782}
783
784/* Unless requested, diagnose an error about the input file. */
785static void
786suppressible_error (int errnum)
787{
788 if (! suppress_errors)
789 error (0, errnum, "%s", input_filename ());
790 errseen = true;
791}
792
793/* If there has already been a write error, don't bother closing
794 standard output, as that might elicit a duplicate diagnostic. */
795static void
796clean_up_stdout (void)
797{
798 if (! stdout_errno)
799 close_stdout ();
800}
801
802/* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
803 is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
804 the alignment and would otherwise complain about the cast. */
805#if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
806# define CAST_ALIGNED(type, val) \
807 ({ __typeof__ (val) val_ = val; \
808 _Pragma ("GCC diagnostic push") \
809 _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
810 (type) val_; \
811 _Pragma ("GCC diagnostic pop") \
812 })
813#else
814# define CAST_ALIGNED(type, val) ((type) (val))
815#endif
816
817/* An unsigned type suitable for fast matching. */
818typedef uintmax_t uword;
819static uword const uword_max = UINTMAX_MAX;
820
821struct localeinfo localeinfo;
822
823/* A mask to test for unibyte characters, with the pattern repeated to
824 fill a uword. For a multibyte character encoding where
825 all bytes are unibyte characters, this is 0. For UTF-8, this is
826 0x808080.... For encodings where unibyte characters have no discerned
827 pattern, this is all 1s. The unsigned char C is a unibyte
828 character if C & UNIBYTE_MASK is zero. If the uword W is the
829 concatenation of bytes, the bytes are all unibyte characters
830 if W & UNIBYTE_MASK is zero. */
831static uword unibyte_mask;
832
833static void
834initialize_unibyte_mask (void)
835{
836 /* For each encoding error I that MASK does not already match,
837 accumulate I's most significant 1 bit by ORing it into MASK.
838 Although any 1 bit of I could be used, in practice high-order
839 bits work better. */
840 unsigned char mask = 0;
841 int ms1b = 1;
842 for (int i = 1; i <= UCHAR_MAX; i++)
843 if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
844 {
845 while (ms1b * 2 <= i)
846 ms1b *= 2;
847 mask |= ms1b;
848 }
849
850 /* Now MASK will detect any encoding-error byte, although it may
851 cry wolf and it may not be optimal. Build a uword-length mask by
852 repeating MASK. */
853 unibyte_mask = uword_max / UCHAR_MAX * mask;
854}
855
856/* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
857 that is not easy, and return a pointer to the first non-easy byte.
858 The easy bytes all have UNIBYTE_MASK off. */
859static char const * _GL_ATTRIBUTE_PURE
860skip_easy_bytes (char const *buf)
861{
862 /* Search a byte at a time until the pointer is aligned, then a
863 uword at a time until a match is found, then a byte at a time to
864 identify the exact byte. The uword search may go slightly past
865 the buffer end, but that's benign. */
866 char const *p;
867 uword const *s;
868 for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
869 if (to_uchar (*p) & unibyte_mask)
870 return p;
871 for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
872 continue;
873 for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
874 continue;
875 return p;
876}
877
878/* Return true if BUF, of size SIZE, has an encoding error.
879 BUF must be followed by at least sizeof (uword) bytes,
880 the first of which may be modified. */
881static bool
882buf_has_encoding_errors (char *buf, size_t size)
883{
884 if (! unibyte_mask)
885 return false;
886
887 mbstate_t mbs = { 0 };
888 size_t clen;
889
890 buf[size] = -1;
891 for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
892 {
893 clen = mbrlen (p, buf + size - p, &mbs);
894 if ((size_t) -2 <= clen)
895 return true;
896 }
897
898 return false;
899}
900
901
902/* Return true if BUF, of size SIZE, has a null byte.
903 BUF must be followed by at least one byte,
904 which may be arbitrarily written to or read from. */
905static bool
906buf_has_nulls (char *buf, size_t size)
907{
908 buf[size] = 0;
909 return strlen (buf) != size;
910}
911
912/* Return true if a file is known to contain null bytes.
913 SIZE bytes have already been read from the file
914 with descriptor FD and status ST. */
915static bool
916file_must_have_nulls (size_t size, int fd, struct stat const *st)
917{
918 /* If the file has holes, it must contain a null byte somewhere. */
919 if (SEEK_HOLE != SEEK_SET && !seek_failed
920 && usable_st_size (st) && size < st->st_size)
921 {
922 off_t cur = size;
923 if (O_BINARY || fd == STDIN_FILENO)
924 {
925 cur = lseek (fd, 0, SEEK_CUR);
926 if (cur < 0)
927 return false;
928 }
929
930 /* Look for a hole after the current location. */
931 off_t hole_start = lseek (fd, cur, SEEK_HOLE);
932 if (0 <= hole_start)
933 {
934 if (lseek (fd, cur, SEEK_SET) < 0)
935 suppressible_error (errno);
936 if (hole_start < st->st_size)
937 return true;
938 }
939 }
940
941 return false;
942}
943
944/* Convert STR to a nonnegative integer, storing the result in *OUT.
945 STR must be a valid context length argument; report an error if it
946 isn't. Silently ceiling *OUT at the maximum value, as that is
947 practically equivalent to infinity for grep's purposes. */
948static void
949context_length_arg (char const *str, intmax_t *out)
950{
951 switch (xstrtoimax (str, 0, 10, out, ""))
952 {
953 case LONGINT_OK:
954 case LONGINT_OVERFLOW:
955 if (0 <= *out)
956 break;
957 FALLTHROUGH;
958 default:
959 die (EXIT_TROUBLE, 0, "%s: %s", str,
960 _("invalid context length argument"));
961 }
962}
963
964/* Return the add_exclude options suitable for excluding a file name.
965 If COMMAND_LINE, it is a command-line file name. */
966static int
967exclude_options (bool command_line)
968{
969 return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
970}
971
972/* Return true if the file with NAME should be skipped.
973 If COMMAND_LINE, it is a command-line argument.
974 If IS_DIR, it is a directory. */
975static bool
976skipped_file (char const *name, bool command_line, bool is_dir)
977{
978 struct exclude **pats;
979 if (! is_dir)
980 pats = excluded_patterns;
981 else if (directories == SKIP_DIRECTORIES)
982 return true;
983 else if (command_line && omit_dot_slash)
984 return false;
985 else
986 pats = excluded_directory_patterns;
987 return pats[command_line] && excluded_file_name (pats[command_line], name);
988}
989
990/* Hairy buffering mechanism for grep. The intent is to keep
991 all reads aligned on a page boundary and multiples of the
992 page size, unless a read yields a partial page. */
993
994static char *buffer; /* Base of buffer. */
995static size_t bufalloc; /* Allocated buffer size, counting slop. */
996static int bufdesc; /* File descriptor. */
997static char *bufbeg; /* Beginning of user-visible stuff. */
998static char *buflim; /* Limit of user-visible stuff. */
999static size_t pagesize; /* alignment of memory pages */
1000static off_t bufoffset; /* Read offset. */
1001static off_t after_last_match; /* Pointer after last matching line that
1002 would have been output if we were
1003 outputting characters. */
1004static bool skip_nuls; /* Skip '\0' in data. */
1005static bool skip_empty_lines; /* Skip empty lines in data. */
1006static uintmax_t totalnl; /* Total newline count before lastnl. */
1007
1008/* Initial buffer size, not counting slop. */
1009enum { INITIAL_BUFSIZE = 96 * 1024 };
1010
1011/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
1012 an integer or a pointer. Both args must be free of side effects. */
1013#define ALIGN_TO(val, alignment) \
1014 ((uintptr_t) (val) % (alignment) == 0 \
1015 ? (val) \
1016 : (val) + ((alignment) - (uintptr_t) (val) % (alignment)))
1017
1018/* Add two numbers that count input bytes or lines, and report an
1019 error if the addition overflows. */
1020static uintmax_t
1021add_count (uintmax_t a, uintmax_t b)
1022{
1023 uintmax_t sum = a + b;
1024 if (sum < a)
1025 die (EXIT_TROUBLE, 0, _("input is too large to count"));
1026 return sum;
1027}
1028
1029/* Return true if BUF (of size SIZE) is all zeros. */
1030static bool
1031all_zeros (char const *buf, size_t size)
1032{
1033 for (char const *p = buf; p < buf + size; p++)
1034 if (*p)
1035 return false;
1036 return true;
1037}
1038
1039/* Reset the buffer for a new file, returning false if we should skip it.
1040 Initialize on the first time through. */
1041static bool
1042reset (int fd, struct stat const *st)
1043{
1044 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
1045 bufbeg[-1] = eolbyte;
1046 bufdesc = fd;
1047 bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
1048 seek_failed = bufoffset < 0;
1049
1050 /* Assume SEEK_DATA fails if SEEK_CUR does. */
1051 seek_data_failed = seek_failed;
1052
1053 if (seek_failed)
1054 {
1055 if (errno != ESPIPE)
1056 {
1057 suppressible_error (errno);
1058 return false;
1059 }
1060 bufoffset = 0;
1061 }
1062 return true;
1063}
1064
1065/* Read new stuff into the buffer, saving the specified
1066 amount of old stuff. When we're done, 'bufbeg' points
1067 to the beginning of the buffer contents, and 'buflim'
1068 points just after the end. Return false if there's an error. */
1069static bool
1070fillbuf (size_t save, struct stat const *st)
1071{
1072 size_t fillsize;
1073 bool cc = true;
1074 char *readbuf;
1075 size_t readsize;
1076
1077 if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
1078 readbuf = buflim;
1079 else
1080 {
1081 size_t minsize = save + pagesize;
1082 size_t newsize;
1083 size_t newalloc;
1084 char *newbuf;
1085
1086 /* Grow newsize until it is at least as great as minsize. */
1087 for (newsize = bufalloc - pagesize - sizeof (uword);
1088 newsize < minsize;
1089 newsize *= 2)
1090 if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
1091 xalloc_die ();
1092
1093 /* Try not to allocate more memory than the file size indicates,
1094 as that might cause unnecessary memory exhaustion if the file
1095 is large. However, do not use the original file size as a
1096 heuristic if we've already read past the file end, as most
1097 likely the file is growing. */
1098 if (usable_st_size (st))
1099 {
1100 off_t to_be_read = st->st_size - bufoffset;
1101 off_t maxsize_off = save + to_be_read;
1102 if (0 <= to_be_read && to_be_read <= maxsize_off
1103 && maxsize_off == (size_t) maxsize_off
1104 && minsize <= (size_t) maxsize_off
1105 && (size_t) maxsize_off < newsize)
1106 newsize = maxsize_off;
1107 }
1108
1109 /* Add enough room so that the buffer is aligned and has room
1110 for byte sentinels fore and aft, and so that a uword can
1111 be read aft. */
1112 newalloc = newsize + pagesize + sizeof (uword);
1113
1114 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
1115 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
1116 size_t moved = save + 1; /* Move the preceding byte sentinel too. */
1117 memmove (readbuf - moved, buflim - moved, moved);
1118 if (newbuf != buffer)
1119 {
1120 free (buffer);
1121 buffer = newbuf;
1122 }
1123 }
1124
1125 bufbeg = readbuf - save;
1126
1127 clear_asan_poison ();
1128
1129 readsize = buffer + bufalloc - sizeof (uword) - readbuf;
1130 readsize -= readsize % pagesize;
1131
1132 while (true)
1133 {
1134 fillsize = safe_read (bufdesc, readbuf, readsize);
1135 if (fillsize == SAFE_READ_ERROR)
1136 {
1137 fillsize = 0;
1138 cc = false;
1139 }
1140 bufoffset += fillsize;
1141
1142 if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
1143 break;
1144 totalnl = add_count (totalnl, fillsize);
1145
1146 if (SEEK_DATA != SEEK_SET && !seek_data_failed)
1147 {
1148 /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
1149 off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
1150 if (data_start < 0 && errno == ENXIO
1151 && usable_st_size (st) && bufoffset < st->st_size)
1152 data_start = lseek (bufdesc, 0, SEEK_END);
1153
1154 if (data_start < 0)
1155 seek_data_failed = true;
1156 else
1157 {
1158 totalnl = add_count (totalnl, data_start - bufoffset);
1159 bufoffset = data_start;
1160 }
1161 }
1162 }
1163
1164 buflim = readbuf + fillsize;
1165
1166 /* Initialize the following word, because skip_easy_bytes and some
1167 matchers read (but do not use) those bytes. This avoids false
1168 positive reports of these bytes being used uninitialized. */
1169 memset (buflim, 0, sizeof (uword));
1170
1171 /* Mark the part of the buffer not filled by the read or set by
1172 the above memset call as ASAN-poisoned. */
1173 asan_poison (buflim + sizeof (uword),
1174 bufalloc - (buflim - buffer) - sizeof (uword));
1175
1176 return cc;
1177}
1178
1179/* Flags controlling the style of output. */
1180static enum
1181{
1182 BINARY_BINARY_FILES,
1183 TEXT_BINARY_FILES,
1184 WITHOUT_MATCH_BINARY_FILES
1185} binary_files; /* How to handle binary files. */
1186
1187/* Options for output as a list of matching/non-matching files */
1188static enum
1189{
1190 LISTFILES_NONE,
1191 LISTFILES_MATCHING,
1192 LISTFILES_NONMATCHING,
1193} list_files;
1194
1195/* Whether to output filenames. 1 means yes, 0 means no, and -1 means
1196 'grep -r PATTERN FILE' was used and it is not known yet whether
1197 FILE is a directory (which means yes) or not (which means no). */
1198static int out_file;
1199
1200static int filename_mask; /* If zero, output nulls after filenames. */
1201static bool out_quiet; /* Suppress all normal output. */
1202static bool out_invert; /* Print nonmatching stuff. */
1203static bool out_line; /* Print line numbers. */
1204static bool out_byte; /* Print byte offsets. */
1205static intmax_t out_before; /* Lines of leading context. */
1206static intmax_t out_after; /* Lines of trailing context. */
1207static bool count_matches; /* Count matching lines. */
1208static intmax_t max_count; /* Max number of selected
1209 lines from an input file. */
1210static bool line_buffered; /* Use line buffering. */
1211static char *label = NULL; /* Fake filename for stdin */
1212
1213
1214/* Internal variables to keep track of byte count, context, etc. */
1215static uintmax_t totalcc; /* Total character count before bufbeg. */
1216static char const *lastnl; /* Pointer after last newline counted. */
1217static char *lastout; /* Pointer after last character output;
1218 NULL if no character has been output
1219 or if it's conceptually before bufbeg. */
1220static intmax_t outleft; /* Maximum number of selected lines. */
1221static intmax_t pending; /* Pending lines of output.
1222 Always kept 0 if out_quiet is true. */
1223static bool done_on_match; /* Stop scanning file on first match. */
1224static bool exit_on_match; /* Exit on first match. */
1225static bool dev_null_output; /* Stdout is known to be /dev/null. */
1226static bool binary; /* Use binary rather than text I/O. */
1227
1228static void
1229nlscan (char const *lim)
1230{
1231 size_t newlines = 0;
1232 for (char const *beg = lastnl; beg < lim; beg++)
1233 {
1234 beg = memchr (beg, eolbyte, lim - beg);
1235 if (!beg)
1236 break;
1237 newlines++;
1238 }
1239 totalnl = add_count (totalnl, newlines);
1240 lastnl = lim;
1241}
1242
1243/* Print the current filename. */
1244static void
1245print_filename (void)
1246{
1247 pr_sgr_start_if (filename_color);
1248 fputs_errno (input_filename ());
1249 pr_sgr_end_if (filename_color);
1250}
1251
1252/* Print a character separator. */
1253static void
1254print_sep (char sep)
1255{
1256 pr_sgr_start_if (sep_color);
1257 putchar_errno (sep);
1258 pr_sgr_end_if (sep_color);
1259}
1260
1261/* Print a line number or a byte offset. */
1262static void
1263print_offset (uintmax_t pos, const char *color)
1264{
1265 pr_sgr_start_if (color);
1266 printf_errno ("%*"PRIuMAX, offset_width, pos);
1267 pr_sgr_end_if (color);
1268}
1269
1270/* Print a whole line head (filename, line, byte). The output data
1271 starts at BEG and contains LEN bytes; it is followed by at least
1272 sizeof (uword) bytes, the first of which may be temporarily modified.
1273 The output data comes from what is perhaps a larger input line that
1274 goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
1275 the separator on output.
1276
1277 Return true unless the line was suppressed due to an encoding error. */
1278
1279static bool
1280print_line_head (char *beg, size_t len, char const *lim, char sep)
1281{
1282 if (binary_files != TEXT_BINARY_FILES)
1283 {
1284 char ch = beg[len];
1285 bool encoding_errors = buf_has_encoding_errors (beg, len);
1286 beg[len] = ch;
1287 if (encoding_errors)
1288 {
1289 encoding_error_output = true;
1290 return false;
1291 }
1292 }
1293
1294 if (out_file)
1295 {
1296 print_filename ();
1297 if (filename_mask)
1298 print_sep (sep);
1299 else
1300 putchar_errno (0);
1301 }
1302
1303 if (out_line)
1304 {
1305 if (lastnl < lim)
1306 {
1307 nlscan (beg);
1308 totalnl = add_count (totalnl, 1);
1309 lastnl = lim;
1310 }
1311 print_offset (totalnl, line_num_color);
1312 print_sep (sep);
1313 }
1314
1315 if (out_byte)
1316 {
1317 uintmax_t pos = add_count (totalcc, beg - bufbeg);
1318 print_offset (pos, byte_num_color);
1319 print_sep (sep);
1320 }
1321
1322 if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1323 putchar_errno ('\t');
1324
1325 return true;
1326}
1327
1328static char *
1329print_line_middle (char *beg, char *lim,
1330 const char *line_color, const char *match_color)
1331{
1332 size_t match_size;
1333 size_t match_offset;
1334 char *cur;
1335 char *mid = NULL;
1336 char *b;
1337
1338 for (cur = beg;
1339 (cur < lim
1340 && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1341 &match_size, cur)) != (size_t) -1));
1342 cur = b + match_size)
1343 {
1344 b = beg + match_offset;
1345
1346 /* Avoid matching the empty line at the end of the buffer. */
1347 if (b == lim)
1348 break;
1349
1350 /* Avoid hanging on grep --color "" foo */
1351 if (match_size == 0)
1352 {
1353 /* Make minimal progress; there may be further non-empty matches. */
1354 /* XXX - Could really advance by one whole multi-octet character. */
1355 match_size = 1;
1356 if (!mid)
1357 mid = cur;
1358 }
1359 else
1360 {
1361 /* This function is called on a matching line only,
1362 but is it selected or rejected/context? */
1363 if (only_matching)
1364 {
1365 char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1366 if (! print_line_head (b, match_size, lim, sep))
1367 return NULL;
1368 }
1369 else
1370 {
1371 pr_sgr_start (line_color);
1372 if (mid)
1373 {
1374 cur = mid;
1375 mid = NULL;
1376 }
1377 fwrite_errno (cur, 1, b - cur);
1378 }
1379
1380 pr_sgr_start_if (match_color);
1381 fwrite_errno (b, 1, match_size);
1382 pr_sgr_end_if (match_color);
1383 if (only_matching)
1384 putchar_errno (eolbyte);
1385 }
1386 }
1387
1388 if (only_matching)
1389 cur = lim;
1390 else if (mid)
1391 cur = mid;
1392
1393 return cur;
1394}
1395
1396static char *
1397print_line_tail (char *beg, const char *lim, const char *line_color)
1398{
1399 size_t eol_size;
1400 size_t tail_size;
1401
1402 eol_size = (lim > beg && lim[-1] == eolbyte);
1403 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1404 tail_size = lim - eol_size - beg;
1405
1406 if (tail_size > 0)
1407 {
1408 pr_sgr_start (line_color);
1409 fwrite_errno (beg, 1, tail_size);
1410 beg += tail_size;
1411 pr_sgr_end (line_color);
1412 }
1413
1414 return beg;
1415}
1416
1417static void
1418prline (char *beg, char *lim, char sep)
1419{
1420 bool matching;
1421 const char *line_color;
1422 const char *match_color;
1423
1424 if (!only_matching)
1425 if (! print_line_head (beg, lim - beg - 1, lim, sep))
1426 return;
1427
1428 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1429
1430 if (color_option)
1431 {
1432 line_color = (((sep == SEP_CHAR_SELECTED)
1433 ^ (out_invert && (color_option < 0)))
1434 ? selected_line_color : context_line_color);
1435 match_color = (sep == SEP_CHAR_SELECTED
1436 ? selected_match_color : context_match_color);
1437 }
1438 else
1439 line_color = match_color = NULL; /* Shouldn't be used. */
1440
1441 if ((only_matching && matching)
1442 || (color_option && (*line_color || *match_color)))
1443 {
1444 /* We already know that non-matching lines have no match (to colorize). */
1445 if (matching && (only_matching || *match_color))
1446 {
1447 beg = print_line_middle (beg, lim, line_color, match_color);
1448 if (! beg)
1449 return;
1450 }
1451
1452 if (!only_matching && *line_color)
1453 {
1454 /* This code is exercised at least when grep is invoked like this:
1455 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
1456 beg = print_line_tail (beg, lim, line_color);
1457 }
1458 }
1459
1460 if (!only_matching && lim > beg)
1461 fwrite_errno (beg, 1, lim - beg);
1462
1463 if (line_buffered)
1464 fflush_errno ();
1465
1466 if (stdout_errno)
1467 die (EXIT_TROUBLE, stdout_errno, _("write error"));
1468
1469 lastout = lim;
1470}
1471
1472/* Print pending lines of trailing context prior to LIM. */
1473static void
1474prpending (char const *lim)
1475{
1476 if (!lastout)
1477 lastout = bufbeg;
1478 for (; 0 < pending && lastout < lim; pending--)
1479 {
1480 char *nl = rawmemchr (lastout, eolbyte);
1481 prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1482 }
1483}
1484
1485/* Output the lines between BEG and LIM. Deal with context. */
1486static void
1487prtext (char *beg, char *lim)
1488{
1489 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
1490 char eol = eolbyte;
1491
1492 if (!out_quiet && pending > 0)
1493 prpending (beg);
1494
1495 char *p = beg;
1496
1497 if (!out_quiet)
1498 {
1499 /* Deal with leading context. */
1500 char const *bp = lastout ? lastout : bufbeg;
1501 intmax_t i;
1502 for (i = 0; i < out_before; ++i)
1503 if (p > bp)
1504 do
1505 --p;
1506 while (p[-1] != eol);
1507
1508 /* Print the group separator unless the output is adjacent to
1509 the previous output in the file. */
1510 if ((0 <= out_before || 0 <= out_after) && used
1511 && p != lastout && group_separator)
1512 {
1513 pr_sgr_start_if (sep_color);
1514 fputs_errno (group_separator);
1515 pr_sgr_end_if (sep_color);
1516 putchar_errno ('\n');
1517 }
1518
1519 while (p < beg)
1520 {
1521 char *nl = rawmemchr (p, eol);
1522 nl++;
1523 prline (p, nl, SEP_CHAR_REJECTED);
1524 p = nl;
1525 }
1526 }
1527
1528 intmax_t n;
1529 if (out_invert)
1530 {
1531 /* One or more lines are output. */
1532 for (n = 0; p < lim && n < outleft; n++)
1533 {
1534 char *nl = rawmemchr (p, eol);
1535 nl++;
1536 if (!out_quiet)
1537 prline (p, nl, SEP_CHAR_SELECTED);
1538 p = nl;
1539 }
1540 }
1541 else
1542 {
1543 /* Just one line is output. */
1544 if (!out_quiet)
1545 prline (beg, lim, SEP_CHAR_SELECTED);
1546 n = 1;
1547 p = lim;
1548 }
1549
1550 after_last_match = bufoffset - (buflim - p);
1551 pending = out_quiet ? 0 : MAX (0, out_after);
1552 used = true;
1553 outleft -= n;
1554}
1555
1556/* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1557 This avoids running out of memory when binary input contains a long
1558 sequence of zeros, which would otherwise be considered to be part
1559 of a long line. P[LIM] should be EOL. */
1560static void
1561zap_nuls (char *p, char *lim, char eol)
1562{
1563 if (eol)
1564 while (true)
1565 {
1566 *lim = '\0';
1567 p += strlen (p);
1568 *lim = eol;
1569 if (p == lim)
1570 break;
1571 do
1572 *p++ = eol;
1573 while (!*p);
1574 }
1575}
1576
1577/* Scan the specified portion of the buffer, matching lines (or
1578 between matching lines if OUT_INVERT is true). Return a count of
1579 lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
1580static intmax_t
1581grepbuf (char *beg, char const *lim)
1582{
1583 intmax_t outleft0 = outleft;
1584 char *endp;
1585
1586 for (char *p = beg; p < lim; p = endp)
1587 {
1588 size_t match_size;
1589 size_t match_offset = execute (compiled_pattern, p, lim - p,
1590 &match_size, NULL);
1591 if (match_offset == (size_t) -1)
1592 {
1593 if (!out_invert)
1594 break;
1595 match_offset = lim - p;
1596 match_size = 0;
1597 }
1598 char *b = p + match_offset;
1599 endp = b + match_size;
1600 /* Avoid matching the empty line at the end of the buffer. */
1601 if (!out_invert && b == lim)
1602 break;
1603 if (!out_invert || p < b)
1604 {
1605 char *prbeg = out_invert ? p : b;
1606 char *prend = out_invert ? b : endp;
1607 prtext (prbeg, prend);
1608 if (!outleft || done_on_match)
1609 {
1610 if (exit_on_match)
1611 exit (errseen ? exit_failure : EXIT_SUCCESS);
1612 break;
1613 }
1614 }
1615 }
1616
1617 return outleft0 - outleft;
1618}
1619
1620/* Search a given (non-directory) file. Return a count of lines printed.
1621 Set *INEOF to true if end-of-file reached. */
1622static intmax_t
1623grep (int fd, struct stat const *st, bool *ineof)
1624{
1625 intmax_t nlines, i;
1626 size_t residue, save;
1627 char oldc;
1628 char *beg;
1629 char *lim;
1630 char eol = eolbyte;
1631 char nul_zapper = '\0';
1632 bool done_on_match_0 = done_on_match;
1633 bool out_quiet_0 = out_quiet;
1634
1635 /* The value of NLINES when nulls were first deduced in the input;
1636 this is not necessarily the same as the number of matching lines
1637 before the first null. -1 if no input nulls have been deduced. */
1638 intmax_t nlines_first_null = -1;
1639
1640 if (! reset (fd, st))
1641 return 0;
1642
1643 totalcc = 0;
1644 lastout = 0;
1645 totalnl = 0;
1646 outleft = max_count;
1647 after_last_match = 0;
1648 pending = 0;
1649 skip_nuls = skip_empty_lines && !eol;
1650 encoding_error_output = false;
1651
1652 nlines = 0;
1653 residue = 0;
1654 save = 0;
1655
1656 if (! fillbuf (save, st))
1657 {
1658 suppressible_error (errno);
1659 return 0;
1660 }
1661
1662 offset_width = 0;
1663 if (align_tabs)
1664 {
1665 /* Width is log of maximum number. Line numbers are origin-1. */
1666 uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1667 num += out_line && num < UINTMAX_MAX;
1668 do
1669 offset_width++;
1670 while ((num /= 10) != 0);
1671 }
1672
1673 for (bool firsttime = true; ; firsttime = false)
1674 {
1675 if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1676 && (buf_has_nulls (bufbeg, buflim - bufbeg)
1677 || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1678 {
1679 if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1680 return 0;
1681 if (!count_matches)
1682 done_on_match = out_quiet = true;
1683 nlines_first_null = nlines;
1684 nul_zapper = eol;
1685 skip_nuls = skip_empty_lines;
1686 }
1687
1688 lastnl = bufbeg;
1689 if (lastout)
1690 lastout = bufbeg;
1691
1692 beg = bufbeg + save;
1693
1694 /* no more data to scan (eof) except for maybe a residue -> break */
1695 if (beg == buflim)
1696 {
1697 *ineof = true;
1698 break;
1699 }
1700
1701 zap_nuls (beg, buflim, nul_zapper);
1702
1703 /* Determine new residue (the length of an incomplete line at the end of
1704 the buffer, 0 means there is no incomplete last line). */
1705 oldc = beg[-1];
1706 beg[-1] = eol;
1707 /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1708 that this use of memrchr is guaranteed never to return NULL. */
1709 lim = memrchr (beg - 1, eol, buflim - beg + 1);
1710 ++lim;
1711 beg[-1] = oldc;
1712 if (lim == beg)
1713 lim = beg - residue;
1714 beg -= residue;
1715 residue = buflim - lim;
1716
1717 if (beg < lim)
1718 {
1719 if (outleft)
1720 nlines += grepbuf (beg, lim);
1721 if (pending)
1722 prpending (lim);
1723 if ((!outleft && !pending)
1724 || (done_on_match && MAX (0, nlines_first_null) < nlines))
1725 goto finish_grep;
1726 }
1727
1728 /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1729 leading context if there is a matching line at the begin of the
1730 next data. Make beg point to their begin. */
1731 i = 0;
1732 beg = lim;
1733 while (i < out_before && beg > bufbeg && beg != lastout)
1734 {
1735 ++i;
1736 do
1737 --beg;
1738 while (beg[-1] != eol);
1739 }
1740
1741 /* Detect whether leading context is adjacent to previous output. */
1742 if (beg != lastout)
1743 lastout = 0;
1744
1745 /* Handle some details and read more data to scan. */
1746 save = residue + lim - beg;
1747 if (out_byte)
1748 totalcc = add_count (totalcc, buflim - bufbeg - save);
1749 if (out_line)
1750 nlscan (beg);
1751 if (! fillbuf (save, st))
1752 {
1753 suppressible_error (errno);
1754 goto finish_grep;
1755 }
1756 }
1757 if (residue)
1758 {
1759 *buflim++ = eol;
1760 if (outleft)
1761 nlines += grepbuf (bufbeg + save - residue, buflim);
1762 if (pending)
1763 prpending (buflim);
1764 }
1765
1766 finish_grep:
1767 done_on_match = done_on_match_0;
1768 out_quiet = out_quiet_0;
1769 if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
1770 && (encoding_error_output
1771 || (0 <= nlines_first_null && nlines_first_null < nlines)))
1772 error (0, 0, _("%s: binary file matches"), input_filename ());
1773 return nlines;
1774}
1775
1776static bool
1777grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1778{
1779 bool follow;
1780 command_line &= ent->fts_level == FTS_ROOTLEVEL;
1781
1782 if (ent->fts_info == FTS_DP)
1783 return true;
1784
1785 if (!command_line
1786 && skipped_file (ent->fts_name, false,
1787 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1788 || ent->fts_info == FTS_DNR)))
1789 {
1790 fts_set (fts, ent, FTS_SKIP);
1791 return true;
1792 }
1793
1794 filename = ent->fts_path;
1795 if (omit_dot_slash && filename[1])
1796 filename += 2;
1797 follow = (fts->fts_options & FTS_LOGICAL
1798 || (fts->fts_options & FTS_COMFOLLOW && command_line));
1799
1800 switch (ent->fts_info)
1801 {
1802 case FTS_D:
1803 if (directories == RECURSE_DIRECTORIES)
1804 return true;
1805 fts_set (fts, ent, FTS_SKIP);
1806 break;
1807
1808 case FTS_DC:
1809 if (!suppress_errors)
1810 error (0, 0, _("%s: warning: recursive directory loop"), filename);
1811 return true;
1812
1813 case FTS_DNR:
1814 case FTS_ERR:
1815 case FTS_NS:
1816 suppressible_error (ent->fts_errno);
1817 return true;
1818
1819 case FTS_DEFAULT:
1820 case FTS_NSOK:
1821 if (skip_devices (command_line))
1822 {
1823 struct stat *st = ent->fts_statp;
1824#if !defined(KMK_GREP) || !defined(_MSC_VER) /** @todo revisit this */
1825 struct stat st1;
1826 if (! st->st_mode)
1827 {
1828 /* The file type is not already known. Get the file status
1829 before opening, since opening might have side effects
1830 on a device. */
1831 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1832 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1833 {
1834 suppressible_error (errno);
1835 return true;
1836 }
1837 st = &st1;
1838 }
1839#endif
1840 if (is_device_mode (st->st_mode))
1841 return true;
1842 }
1843 break;
1844
1845 case FTS_F:
1846 case FTS_SLNONE:
1847 break;
1848
1849 case FTS_SL:
1850 case FTS_W:
1851 return true;
1852
1853 default:
1854 abort ();
1855 }
1856
1857 return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1858}
1859
1860/* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1861 POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
1862static bool
1863open_symlink_nofollow_error (int err)
1864{
1865 if (err == ELOOP || err == EMLINK)
1866 return true;
1867#ifdef EFTYPE
1868 if (err == EFTYPE)
1869 return true;
1870#endif
1871 return false;
1872}
1873
1874static bool
1875grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1876{
1877 int oflag = (O_RDONLY | O_NOCTTY
1878 | (IGNORE_DUPLICATE_BRANCH_WARNING
1879 (binary ? O_BINARY : 0))
1880 | (follow ? 0 : O_NOFOLLOW)
1881 | (skip_devices (command_line) ? O_NONBLOCK : 0));
1882 int desc = openat_safer (dirdesc, name, oflag);
1883 if (desc < 0)
1884 {
1885 if (follow || ! open_symlink_nofollow_error (errno))
1886 suppressible_error (errno);
1887 return true;
1888 }
1889 return grepdesc (desc, command_line);
1890}
1891
1892/* Read all data from FD, with status ST. Return true if successful,
1893 false (setting errno) otherwise. */
1894static bool
1895drain_input (int fd, struct stat const *st)
1896{
1897 ssize_t nbytes;
1898 if (S_ISFIFO (st->st_mode) && dev_null_output)
1899 {
1900#ifdef SPLICE_F_MOVE
1901 /* Should be faster, since it need not copy data to user space. */
1902 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1903 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1904 if (0 <= nbytes || errno != EINVAL)
1905 {
1906 while (0 < nbytes)
1907 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1908 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1909 return nbytes == 0;
1910 }
1911#endif
1912 }
1913 while ((nbytes = safe_read (fd, buffer, bufalloc)))
1914 if (nbytes == SAFE_READ_ERROR)
1915 return false;
1916 return true;
1917}
1918
1919/* Finish reading from FD, with status ST and where end-of-file has
1920 been seen if INEOF. Typically this is a no-op, but when reading
1921 from standard input this may adjust the file offset or drain a
1922 pipe. */
1923
1924static void
1925finalize_input (int fd, struct stat const *st, bool ineof)
1926{
1927 if (fd == STDIN_FILENO
1928 && (outleft
1929 ? (!ineof
1930 && (seek_failed
1931 || (lseek (fd, 0, SEEK_END) < 0
1932 /* Linux proc file system has EINVAL (Bug#25180). */
1933 && errno != EINVAL))
1934 && ! drain_input (fd, st))
1935 : (bufoffset != after_last_match && !seek_failed
1936 && lseek (fd, after_last_match, SEEK_SET) < 0)))
1937 suppressible_error (errno);
1938}
1939
1940static bool
1941grepdesc (int desc, bool command_line)
1942{
1943 intmax_t count;
1944 bool status = true;
1945 bool ineof = false;
1946 struct stat st;
1947
1948 /* Get the file status, possibly for the second time. This catches
1949 a race condition if the directory entry changes after the
1950 directory entry is read and before the file is opened. For
1951 example, normally DESC is a directory only at the top level, but
1952 there is an exception if some other process substitutes a
1953 directory for a non-directory while 'grep' is running. */
1954 if (fstat (desc, &st) != 0)
1955 {
1956 suppressible_error (errno);
1957 goto closeout;
1958 }
1959
1960 if (desc != STDIN_FILENO && skip_devices (command_line)
1961 && is_device_mode (st.st_mode))
1962 goto closeout;
1963
1964 if (desc != STDIN_FILENO && command_line
1965 && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1966 goto closeout;
1967
1968 /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */
1969 if (out_file < 0)
1970 out_file = !!S_ISDIR (st.st_mode);
1971
1972 if (desc != STDIN_FILENO
1973 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1974 {
1975 /* Traverse the directory starting with its full name, because
1976 unfortunately fts provides no way to traverse the directory
1977 starting from its file descriptor. */
1978
1979 FTS *fts;
1980 FTSENT *ent;
1981 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1982 char *fts_arg[2];
1983
1984 /* Close DESC now, to conserve file descriptors if the race
1985 condition occurs many times in a deep recursion. */
1986 if (close (desc) != 0)
1987 suppressible_error (errno);
1988
1989 fts_arg[0] = (char *) filename;
1990 fts_arg[1] = NULL;
1991 fts = fts_open (fts_arg, opts, NULL);
1992
1993 if (!fts)
1994 xalloc_die ();
1995 while ((ent = fts_read (fts)))
1996 status &= grepdirent (fts, ent, command_line);
1997 if (errno)
1998 suppressible_error (errno);
1999 if (fts_close (fts) != 0)
2000 suppressible_error (errno);
2001 return status;
2002 }
2003 if (desc != STDIN_FILENO
2004 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
2005 || ((devices == SKIP_DEVICES
2006 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
2007 && is_device_mode (st.st_mode))))
2008 goto closeout;
2009
2010 /* If there is a regular file on stdout and the current file refers
2011 to the same i-node, we have to report the problem and skip it.
2012 Otherwise when matching lines from some other input reach the
2013 disk before we open this file, we can end up reading and matching
2014 those lines and appending them to the file from which we're reading.
2015 Then we'd have what appears to be an infinite loop that'd terminate
2016 only upon filling the output file system or reaching a quota.
2017 However, there is no risk of an infinite loop if grep is generating
2018 no output, i.e., with --silent, --quiet, -q.
2019 Similarly, with any of these:
2020 --max-count=N (-m) (for N >= 2)
2021 --files-with-matches (-l)
2022 --files-without-match (-L)
2023 there is no risk of trouble.
2024 For --max-count=1, grep stops after printing the first match,
2025 so there is no risk of malfunction. But even --max-count=2, with
2026 input==output, while there is no risk of infloop, there is a race
2027 condition that could result in "alternate" output. */
2028 if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
2029 && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
2030 {
2031 if (! suppress_errors)
2032 error (0, 0, _("%s: input file is also the output"), input_filename ());
2033 errseen = true;
2034 goto closeout;
2035 }
2036
2037 count = grep (desc, &st, &ineof);
2038 if (count_matches)
2039 {
2040 if (out_file)
2041 {
2042 print_filename ();
2043 if (filename_mask)
2044 print_sep (SEP_CHAR_SELECTED);
2045 else
2046 putchar_errno (0);
2047 }
2048 printf_errno ("%" PRIdMAX "\n", count);
2049 if (line_buffered)
2050 fflush_errno ();
2051 }
2052
2053 status = !count;
2054
2055 if (list_files == LISTFILES_NONE)
2056 finalize_input (desc, &st, ineof);
2057 else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
2058 {
2059 print_filename ();
2060 putchar_errno ('\n' & filename_mask);
2061 if (line_buffered)
2062 fflush_errno ();
2063 }
2064
2065 closeout:
2066 if (desc != STDIN_FILENO && close (desc) != 0)
2067 suppressible_error (errno);
2068 return status;
2069}
2070
2071static bool
2072grep_command_line_arg (char const *arg)
2073{
2074 if (STREQ (arg, "-"))
2075 {
2076 filename = label;
2077 if (binary)
2078 xset_binary_mode (STDIN_FILENO, O_BINARY);
2079 return grepdesc (STDIN_FILENO, true);
2080 }
2081 else
2082 {
2083 filename = arg;
2084 return grepfile (AT_FDCWD, arg, true, true);
2085 }
2086}
2087
2088_Noreturn void usage (int);
2089void
2090usage (int status)
2091{
2092 if (status != 0)
2093 {
2094 fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
2095 getprogname ());
2096 fprintf (stderr, _("Try '%s --help' for more information.\n"),
2097 getprogname ());
2098 }
2099 else
2100 {
2101 printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
2102 printf (_("Search for PATTERNS in each FILE.\n"));
2103 printf (_("\
2104Example: %s -i 'hello world' menu.h main.c\n\
2105PATTERNS can contain multiple patterns separated by newlines.\n\
2106\n\
2107Pattern selection and interpretation:\n"), getprogname ());
2108 printf (_("\
2109 -E, --extended-regexp PATTERNS are extended regular expressions\n\
2110 -F, --fixed-strings PATTERNS are strings\n\
2111 -G, --basic-regexp PATTERNS are basic regular expressions\n\
2112 -P, --perl-regexp PATTERNS are Perl regular expressions\n"));
2113 /* -X is deliberately undocumented. */
2114 printf (_("\
2115 -e, --regexp=PATTERNS use PATTERNS for matching\n\
2116 -f, --file=FILE take PATTERNS from FILE\n\
2117 -i, --ignore-case ignore case distinctions in patterns and data\n\
2118 --no-ignore-case do not ignore case distinctions (default)\n\
2119 -w, --word-regexp match only whole words\n\
2120 -x, --line-regexp match only whole lines\n\
2121 -z, --null-data a data line ends in 0 byte, not newline\n"));
2122 printf (_("\
2123\n\
2124Miscellaneous:\n\
2125 -s, --no-messages suppress error messages\n\
2126 -v, --invert-match select non-matching lines\n\
2127 -V, --version display version information and exit\n\
2128 --help display this help text and exit\n"));
2129 printf (_("\
2130\n\
2131Output control:\n\
2132 -m, --max-count=NUM stop after NUM selected lines\n\
2133 -b, --byte-offset print the byte offset with output lines\n\
2134 -n, --line-number print line number with output lines\n\
2135 --line-buffered flush output on every line\n\
2136 -H, --with-filename print file name with output lines\n\
2137 -h, --no-filename suppress the file name prefix on output\n\
2138 --label=LABEL use LABEL as the standard input file name prefix\n\
2139"));
2140 printf (_("\
2141 -o, --only-matching show only nonempty parts of lines that match\n\
2142 -q, --quiet, --silent suppress all normal output\n\
2143 --binary-files=TYPE assume that binary files are TYPE;\n\
2144 TYPE is 'binary', 'text', or 'without-match'\n\
2145 -a, --text equivalent to --binary-files=text\n\
2146"));
2147 printf (_("\
2148 -I equivalent to --binary-files=without-match\n\
2149 -d, --directories=ACTION how to handle directories;\n\
2150 ACTION is 'read', 'recurse', or 'skip'\n\
2151 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
2152 ACTION is 'read' or 'skip'\n\
2153 -r, --recursive like --directories=recurse\n\
2154 -R, --dereference-recursive likewise, but follow all symlinks\n\
2155"));
2156 printf (_("\
2157 --include=GLOB search only files that match GLOB (a file pattern)"
2158 "\n\
2159 --exclude=GLOB skip files that match GLOB\n\
2160 --exclude-from=FILE skip files that match any file pattern from FILE\n\
2161 --exclude-dir=GLOB skip directories that match GLOB\n\
2162"));
2163 printf (_("\
2164 -L, --files-without-match print only names of FILEs with no selected lines\n\
2165 -l, --files-with-matches print only names of FILEs with selected lines\n\
2166 -c, --count print only a count of selected lines per FILE\n\
2167 -T, --initial-tab make tabs line up (if needed)\n\
2168 -Z, --null print 0 byte after FILE name\n"));
2169 printf (_("\
2170\n\
2171Context control:\n\
2172 -B, --before-context=NUM print NUM lines of leading context\n\
2173 -A, --after-context=NUM print NUM lines of trailing context\n\
2174 -C, --context=NUM print NUM lines of output context\n\
2175"));
2176 printf (_("\
2177 -NUM same as --context=NUM\n\
2178 --group-separator=SEP print SEP on line between matches with context\n\
2179 --no-group-separator do not print separator for matches with context\n\
2180 --color[=WHEN],\n\
2181 --colour[=WHEN] use markers to highlight the matching strings;\n\
2182 WHEN is 'always', 'never', or 'auto'\n\
2183 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
2184\n"));
2185#ifdef KMK_GREP
2186 printf (_("\
2187kmk_grep extensions:\n\
2188 --codepage=NUM switches the locale to the given codepage, \n\
2189 affecting how input files are treated and outputted\n\
2190 windows only, ignored elsewhere\n\
2191 --utf8 shorthand for --codepage=UTF8\n\
2192\n"));
2193#endif
2194 printf (_("\
2195When FILE is '-', read standard input. With no FILE, read '.' if\n\
2196recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
2197Exit status is 0 if any line is selected, 1 otherwise;\n\
2198if any error occurs and -q is not given, the exit status is 2.\n"));
2199 emit_bug_reporting_address ();
2200 }
2201 exit (status);
2202}
2203
2204/* Pattern compilers and matchers. */
2205
2206static struct
2207{
2208 char name[12];
2209 int syntax; /* used if compile == GEAcompile */
2210 compile_fp_t compile;
2211 execute_fp_t execute;
2212} const matchers[] = {
2213 { "grep", RE_SYNTAX_GREP, (compile_fp_t)GEAcompile, (execute_fp_t)EGexecute },
2214 { "egrep", RE_SYNTAX_EGREP, (compile_fp_t)GEAcompile, (execute_fp_t)EGexecute },
2215 { "fgrep", 0, (compile_fp_t)Fcompile, (execute_fp_t)Fexecute },
2216 { "awk", RE_SYNTAX_AWK, (compile_fp_t)GEAcompile, (execute_fp_t)EGexecute },
2217 { "gawk", RE_SYNTAX_GNU_AWK, (compile_fp_t)GEAcompile, (execute_fp_t)EGexecute },
2218 { "posixawk", RE_SYNTAX_POSIX_AWK, (compile_fp_t)GEAcompile, (execute_fp_t)EGexecute },
2219#if HAVE_LIBPCRE
2220 { "perl", 0, (compile_fp_t)Pcompile, (execute_fp_t)Pexecute },
2221#endif
2222};
2223/* Keep these in sync with the 'matchers' table. */
2224enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
2225
2226/* Return the index of the matcher corresponding to M if available.
2227 MATCHER is the index of the previous matcher, or -1 if none.
2228 Exit in case of conflicts or if M is not available. */
2229static int
2230setmatcher (char const *m, int matcher)
2231{
2232 for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2233 if (STREQ (m, matchers[i].name))
2234 {
2235 if (0 <= matcher && matcher != i)
2236 die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2237 return i;
2238 }
2239
2240#if !HAVE_LIBPCRE
2241 if (STREQ (m, "perl"))
2242 die (EXIT_TROUBLE, 0,
2243 _("Perl matching not supported in a --disable-perl-regexp build"));
2244#endif
2245 die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2246}
2247
2248/* Get the next non-digit option from ARGC and ARGV.
2249 Return -1 if there are no more options.
2250 Process any digit options that were encountered on the way,
2251 and store the resulting integer into *DEFAULT_CONTEXT. */
2252static int
2253get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2254{
2255 static int prev_digit_optind = -1;
2256 int this_digit_optind;
2257 bool was_digit;
2258 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2259 char *p = buf;
2260 int opt;
2261
2262 was_digit = false;
2263 this_digit_optind = optind;
2264 while (true)
2265 {
2266 opt = getopt_long (argc, (char **) argv, short_options,
2267 long_options, NULL);
2268 if (! c_isdigit (opt))
2269 break;
2270
2271 if (prev_digit_optind != this_digit_optind || !was_digit)
2272 {
2273 /* Reset to start another context length argument. */
2274 p = buf;
2275 }
2276 else
2277 {
2278 /* Suppress trivial leading zeros, to avoid incorrect
2279 diagnostic on strings like 00000000000. */
2280 p -= buf[0] == '0';
2281 }
2282
2283 if (p == buf + sizeof buf - 4)
2284 {
2285 /* Too many digits. Append "..." to make context_length_arg
2286 complain about "X...", where X contains the digits seen
2287 so far. */
2288 strcpy (p, "...");
2289 p += 3;
2290 break;
2291 }
2292 *p++ = opt;
2293
2294 was_digit = true;
2295 prev_digit_optind = this_digit_optind;
2296 this_digit_optind = optind;
2297 }
2298 if (p != buf)
2299 {
2300 *p = '\0';
2301 context_length_arg (buf, default_context);
2302 }
2303
2304 return opt;
2305}
2306
2307/* Parse GREP_COLORS. The default would look like:
2308 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2309 with boolean capabilities (ne and rv) unset (i.e., omitted).
2310 No character escaping is needed or supported. */
2311static void
2312parse_grep_colors (void)
2313{
2314 const char *p;
2315 char *q;
2316 char *name;
2317 char *val;
2318
2319 p = getenv ("GREP_COLORS"); /* Plural! */
2320 if (p == NULL || *p == '\0')
2321 return;
2322
2323 /* Work off a writable copy. */
2324 q = xstrdup (p);
2325
2326 name = q;
2327 val = NULL;
2328 /* From now on, be well-formed or you're gone. */
2329 for (;;)
2330 if (*q == ':' || *q == '\0')
2331 {
2332 char c = *q;
2333 struct color_cap const *cap;
2334
2335 *q++ = '\0'; /* Terminate name or val. */
2336 /* Empty name without val (empty cap)
2337 * won't match and will be ignored. */
2338 for (cap = color_dict; cap->name; cap++)
2339 if (STREQ (cap->name, name))
2340 break;
2341 /* If name unknown, go on for forward compatibility. */
2342 if (cap->var && val)
2343 *(cap->var) = val;
2344 if (cap->fct)
2345 cap->fct ();
2346 if (c == '\0')
2347 return;
2348 name = q;
2349 val = NULL;
2350 }
2351 else if (*q == '=')
2352 {
2353 if (q == name || val)
2354 return;
2355 *q++ = '\0'; /* Terminate name. */
2356 val = q; /* Can be the empty string. */
2357 }
2358 else if (val == NULL)
2359 q++; /* Accumulate name. */
2360 else if (*q == ';' || c_isdigit (*q))
2361 q++; /* Accumulate val. Protect the terminal from being sent crap. */
2362 else
2363 return;
2364}
2365
2366/* Return true if PAT (of length PATLEN) contains an encoding error. */
2367static bool
2368contains_encoding_error (char const *pat, size_t patlen)
2369{
2370 mbstate_t mbs = { 0 };
2371 size_t i, charlen;
2372
2373 for (i = 0; i < patlen; i += charlen)
2374 {
2375 charlen = mb_clen (pat + i, patlen - i, &mbs);
2376 if ((size_t) -2 <= charlen)
2377 return true;
2378 }
2379 return false;
2380}
2381
2382/* When ignoring case and (-E or -F or -G), then for each single-byte
2383 character I, ok_fold[I] is 1 if every case folded counterpart of I
2384 is also single-byte, and is -1 otherwise. */
2385static signed char ok_fold[NCHAR];
2386static void
2387setup_ok_fold (void)
2388{
2389 for (int i = 0; i < NCHAR; i++)
2390 {
2391 wint_t wi = localeinfo.sbctowc[i];
2392 if (wi == WEOF)
2393 continue;
2394
2395 int ok = 1;
2396 wchar_t folded[CASE_FOLDED_BUFSIZE];
2397 for (int n = case_folded_counterparts (wi, folded); 0 <= --n; )
2398 {
2399 char buf[MB_LEN_MAX];
2400 mbstate_t s = { 0 };
2401 if (wcrtomb (buf, folded[n], &s) != 1)
2402 {
2403 ok = -1;
2404 break;
2405 }
2406 }
2407 ok_fold[i] = ok;
2408 }
2409}
2410
2411/* Return the number of bytes in the initial character of PAT, of size
2412 PATLEN, if Fcompile can handle that character. Return -1 if
2413 Fcompile cannot handle it. MBS is the multibyte conversion state.
2414 PATLEN must be nonzero. */
2415
2416static int
2417fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2418{
2419 unsigned char pat0 = pat[0];
2420
2421 /* If PAT starts with a single-byte character, Fcompile works if
2422 every case folded counterpart is also single-byte. */
2423 if (localeinfo.sbctowc[pat0] != WEOF)
2424 return ok_fold[pat0];
2425
2426 wchar_t wc;
2427 size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2428
2429 /* If PAT starts with an encoding error, Fcompile does not work. */
2430 if (MB_LEN_MAX < wn)
2431 return -1;
2432
2433 /* PAT starts with a multibyte character. Fcompile works if the
2434 character has no case folded counterparts and toupper translates
2435 none of its encoding's bytes. */
2436 wchar_t folded[CASE_FOLDED_BUFSIZE];
2437 if (case_folded_counterparts (wc, folded))
2438 return -1;
2439 for (int i = wn; 0 < --i; )
2440 {
2441 unsigned char c = pat[i];
2442 if (toupper (c) != c)
2443 return -1;
2444 }
2445 return wn;
2446}
2447
2448/* Return true if the -F patterns PAT, of size PATLEN, contain only
2449 single-byte characters that case-fold only to single-byte
2450 characters, or multibyte characters not subject to case folding,
2451 and so can be processed by Fcompile. */
2452
2453static bool
2454fgrep_icase_available (char const *pat, size_t patlen)
2455{
2456 mbstate_t mbs = {0,};
2457
2458 for (size_t i = 0; i < patlen; )
2459 {
2460 int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2461 if (n < 0)
2462 return false;
2463 i += n;
2464 }
2465
2466 return true;
2467}
2468
2469/* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
2470
2471void
2472fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2473{
2474 size_t len = *len_p;
2475 char *keys = *keys_p;
2476 mbstate_t mb_state = { 0 };
2477 char *new_keys = xnmalloc (len + 1, 2);
2478 char *p = new_keys;
2479 size_t n;
2480
2481 for (; len; keys += n, len -= n)
2482 {
2483 n = mb_clen (keys, len, &mb_state);
2484 switch (n)
2485 {
2486 case (size_t) -2:
2487 n = len;
2488 FALLTHROUGH;
2489 default:
2490 p = mempcpy (p, keys, n);
2491 break;
2492
2493 case (size_t) -1:
2494 memset (&mb_state, 0, sizeof mb_state);
2495 n = 1;
2496 FALLTHROUGH;
2497 case 1:
2498 switch (*keys)
2499 {
2500 case '$': case '*': case '.': case '[': case '\\': case '^':
2501 *p++ = '\\'; break;
2502 }
2503 *p++ = *keys;
2504 break;
2505 }
2506 }
2507
2508 *p = '\n';
2509 free (*keys_p);
2510 *keys_p = new_keys;
2511 *len_p = p - new_keys;
2512}
2513
2514/* If it is easy, convert the MATCHER-style patterns KEYS (of size
2515 *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2516 return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
2517 return MATCHER. This function is conservative and sometimes misses
2518 conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2519 to the -F pattern "a". */
2520
2521static int
2522try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2523{
2524 int result = matcher;
2525 size_t len = *len_p;
2526 char *new_keys = xmalloc (len + 1);
2527 char *p = new_keys;
2528 char const *q = keys;
2529 mbstate_t mb_state = { 0 };
2530
2531 while (len != 0)
2532 {
2533 switch (*q)
2534 {
2535 case '$': case '*': case '.': case '[': case '^':
2536 goto fail;
2537
2538 case '(': case '+': case '?': case '{': case '|':
2539 /* There is no "case ')'" here, as "grep -E ')'" acts like
2540 "grep -E '\)'". */
2541 if (matcher != G_MATCHER_INDEX)
2542 goto fail;
2543 break;
2544
2545 case '\\':
2546 if (1 < len)
2547 switch (q[1])
2548 {
2549 case '\n':
2550 case 'B': case 'S': case 'W': case'\'': case '<':
2551 case 'b': case 's': case 'w': case '`': case '>':
2552 case '1': case '2': case '3': case '4':
2553 case '5': case '6': case '7': case '8': case '9':
2554 goto fail;
2555
2556 case '(': case '+': case '?': case '{': case '|':
2557 /* Pass '\)' to GEAcompile so it can complain. Otherwise,
2558 "grep '\)'" would act like "grep ')'" while "grep '.*\)'
2559 would be an error. */
2560 case ')':
2561 if (matcher == G_MATCHER_INDEX)
2562 goto fail;
2563 FALLTHROUGH;
2564 default:
2565 q++, len--;
2566 break;
2567 }
2568 break;
2569 }
2570
2571 {
2572 size_t n;
2573 if (match_icase)
2574 {
2575 int ni = fgrep_icase_charlen (q, len, &mb_state);
2576 if (ni < 0)
2577 goto fail;
2578 n = ni;
2579 }
2580 else
2581 {
2582 n = mb_clen (q, len, &mb_state);
2583 if (MB_LEN_MAX < n)
2584 goto fail;
2585 }
2586
2587 p = mempcpy (p, q, n);
2588 q += n;
2589 len -= n;
2590 }
2591 }
2592
2593 if (*len_p != p - new_keys)
2594 {
2595 *len_p = p - new_keys;
2596 char *keys_end = mempcpy (keys, new_keys, p - new_keys);
2597 *keys_end = '\n';
2598 }
2599 result = F_MATCHER_INDEX;
2600
2601 fail:
2602 free (new_keys);
2603 return result;
2604}
2605
2606int
2607main (int argc, char **argv)
2608{
2609 char *keys = NULL;
2610 size_t keycc = 0, keyalloc = 0;
2611 int matcher = -1;
2612 int opt;
2613 int prev_optind, last_recursive;
2614 int fread_errno;
2615 intmax_t default_context;
2616 FILE *fp;
2617 exit_failure = EXIT_TROUBLE;
2618 initialize_main (&argc, &argv);
2619
2620 /* Which command-line options have been specified for filename output.
2621 -1 for -h, 1 for -H, 0 for neither. */
2622 int filename_option = 0;
2623
2624 eolbyte = '\n';
2625 filename_mask = ~0;
2626
2627 max_count = INTMAX_MAX;
2628
2629 /* The value -1 means to use DEFAULT_CONTEXT. */
2630 out_after = out_before = -1;
2631 /* Default before/after context: changed by -C/-NUM options */
2632 default_context = -1;
2633 /* Changed by -o option */
2634 only_matching = false;
2635
2636 /* Internationalization. */
2637#if defined HAVE_SETLOCALE
2638# if defined(KMK_GREP) && defined(KBUILD_OS_WINDOWS)
2639 if (getenv ("KMK_GREP_CODEPAGE"))
2640 kmk_grep_set_codepage (getenv ("KMK_GREP_CODEPAGE"));
2641 else
2642# endif
2643 setlocale (LC_ALL, "");
2644#endif
2645#if defined ENABLE_NLS
2646 bindtextdomain (PACKAGE, LOCALEDIR);
2647 textdomain (PACKAGE);
2648#endif
2649
2650 init_localeinfo (&localeinfo);
2651
2652 atexit (clean_up_stdout);
2653 c_stack_action (NULL);
2654
2655 last_recursive = 0;
2656
2657 pattern_table = hash_initialize (0, 0, hash_pattern, compare_patterns, 0);
2658 if (!pattern_table)
2659 xalloc_die ();
2660
2661 while (prev_optind = optind,
2662 (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2663 switch (opt)
2664 {
2665 case 'A':
2666 context_length_arg (optarg, &out_after);
2667 break;
2668
2669 case 'B':
2670 context_length_arg (optarg, &out_before);
2671 break;
2672
2673 case 'C':
2674 /* Set output match context, but let any explicit leading or
2675 trailing amount specified with -A or -B stand. */
2676 context_length_arg (optarg, &default_context);
2677 break;
2678
2679 case 'D':
2680 if (STREQ (optarg, "read"))
2681 devices = READ_DEVICES;
2682 else if (STREQ (optarg, "skip"))
2683 devices = SKIP_DEVICES;
2684 else
2685 die (EXIT_TROUBLE, 0, _("unknown devices method"));
2686 break;
2687
2688 case 'E':
2689 matcher = setmatcher ("egrep", matcher);
2690 break;
2691
2692 case 'F':
2693 matcher = setmatcher ("fgrep", matcher);
2694 break;
2695
2696 case 'P':
2697 matcher = setmatcher ("perl", matcher);
2698 break;
2699
2700 case 'G':
2701 matcher = setmatcher ("grep", matcher);
2702 break;
2703
2704 case 'X': /* undocumented on purpose */
2705 matcher = setmatcher (optarg, matcher);
2706 break;
2707
2708 case 'H':
2709 filename_option = 1;
2710 break;
2711
2712 case 'I':
2713 binary_files = WITHOUT_MATCH_BINARY_FILES;
2714 break;
2715
2716 case 'T':
2717 align_tabs = true;
2718 break;
2719
2720 case 'U':
2721 if (O_BINARY)
2722 binary = true;
2723 break;
2724
2725 case 'u':
2726 /* Obsolete option; it had no effect; FIXME: remove in 2023 */
2727 error (0, 0, _("warning: --unix-byte-offsets (-u) is obsolete"));
2728 break;
2729
2730 case 'V':
2731 show_version = true;
2732 break;
2733
2734 case 'a':
2735 binary_files = TEXT_BINARY_FILES;
2736 break;
2737
2738 case 'b':
2739 out_byte = true;
2740 break;
2741
2742 case 'c':
2743 count_matches = true;
2744 break;
2745
2746 case 'd':
2747 directories = XARGMATCH ("--directories", optarg,
2748 directories_args, directories_types);
2749 if (directories == RECURSE_DIRECTORIES)
2750 last_recursive = prev_optind;
2751 break;
2752
2753 case 'e':
2754 {
2755 ptrdiff_t cc = strlen (optarg);
2756 if (keyalloc < keycc + cc + 1)
2757 {
2758 keyalloc = keycc + cc + 1;
2759 pattern_array = keys = x2realloc (keys, &keyalloc);
2760 }
2761 char *keyend = mempcpy (keys + keycc, optarg, cc);
2762 *keyend = '\n';
2763 keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
2764 }
2765 break;
2766
2767 case 'f':
2768 {
2769 if (STREQ (optarg, "-"))
2770 {
2771 if (binary)
2772 xset_binary_mode (STDIN_FILENO, O_BINARY);
2773 fp = stdin;
2774 }
2775 else
2776 {
2777 fp = fopen (optarg, binary ? "rb" : "r");
2778 if (!fp)
2779 die (EXIT_TROUBLE, errno, "%s", optarg);
2780 }
2781 ptrdiff_t newkeycc = keycc, cc;
2782 for (;; newkeycc += cc)
2783 {
2784 if (keyalloc <= newkeycc + 1)
2785 pattern_array = keys = x2realloc (keys, &keyalloc);
2786 cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
2787 if (cc == 0)
2788 break;
2789 }
2790 fread_errno = errno;
2791 if (ferror (fp))
2792 die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2793 if (fp != stdin)
2794 fclose (fp);
2795 /* Append final newline if file ended in non-newline. */
2796 if (newkeycc != keycc && keys[newkeycc - 1] != '\n')
2797 keys[newkeycc++] = '\n';
2798 keycc = update_patterns (keys, keycc, newkeycc, optarg);
2799 }
2800 break;
2801
2802 case 'h':
2803 filename_option = -1;
2804 break;
2805
2806 case 'i':
2807 case 'y': /* For old-timers . . . */
2808 match_icase = true;
2809 break;
2810
2811 case NO_IGNORE_CASE_OPTION:
2812 match_icase = false;
2813 break;
2814
2815 case 'L':
2816 /* Like -l, except list files that don't contain matches.
2817 Inspired by the same option in Hume's gre. */
2818 list_files = LISTFILES_NONMATCHING;
2819 break;
2820
2821 case 'l':
2822 list_files = LISTFILES_MATCHING;
2823 break;
2824
2825 case 'm':
2826 switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2827 {
2828 case LONGINT_OK:
2829 case LONGINT_OVERFLOW:
2830 break;
2831
2832 default:
2833 die (EXIT_TROUBLE, 0, _("invalid max count"));
2834 }
2835 break;
2836
2837 case 'n':
2838 out_line = true;
2839 break;
2840
2841 case 'o':
2842 only_matching = true;
2843 break;
2844
2845 case 'q':
2846 exit_on_match = true;
2847 exit_failure = 0;
2848 break;
2849
2850 case 'R':
2851 fts_options = basic_fts_options | FTS_LOGICAL;
2852 FALLTHROUGH;
2853 case 'r':
2854 directories = RECURSE_DIRECTORIES;
2855 last_recursive = prev_optind;
2856 break;
2857
2858 case 's':
2859 suppress_errors = true;
2860 break;
2861
2862 case 'v':
2863 out_invert = true;
2864 break;
2865
2866 case 'w':
2867 wordinit ();
2868 match_words = true;
2869 break;
2870
2871 case 'x':
2872 match_lines = true;
2873 break;
2874
2875 case 'Z':
2876 filename_mask = 0;
2877 break;
2878
2879 case 'z':
2880 eolbyte = '\0';
2881 break;
2882
2883 case BINARY_FILES_OPTION:
2884 if (STREQ (optarg, "binary"))
2885 binary_files = BINARY_BINARY_FILES;
2886 else if (STREQ (optarg, "text"))
2887 binary_files = TEXT_BINARY_FILES;
2888 else if (STREQ (optarg, "without-match"))
2889 binary_files = WITHOUT_MATCH_BINARY_FILES;
2890 else
2891 die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2892 break;
2893
2894 case COLOR_OPTION:
2895 if (optarg)
2896 {
2897 if (!c_strcasecmp (optarg, "always")
2898 || !c_strcasecmp (optarg, "yes")
2899 || !c_strcasecmp (optarg, "force"))
2900 color_option = 1;
2901 else if (!c_strcasecmp (optarg, "never")
2902 || !c_strcasecmp (optarg, "no")
2903 || !c_strcasecmp (optarg, "none"))
2904 color_option = 0;
2905 else if (!c_strcasecmp (optarg, "auto")
2906 || !c_strcasecmp (optarg, "tty")
2907 || !c_strcasecmp (optarg, "if-tty"))
2908 color_option = 2;
2909 else
2910 show_help = 1;
2911 }
2912 else
2913 color_option = 2;
2914 break;
2915
2916 case EXCLUDE_OPTION:
2917 case INCLUDE_OPTION:
2918 for (int cmd = 0; cmd < 2; cmd++)
2919 {
2920 if (!excluded_patterns[cmd])
2921 excluded_patterns[cmd] = new_exclude ();
2922 add_exclude (excluded_patterns[cmd], optarg,
2923 ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2924 | exclude_options (cmd)));
2925 }
2926 break;
2927 case EXCLUDE_FROM_OPTION:
2928 for (int cmd = 0; cmd < 2; cmd++)
2929 {
2930 if (!excluded_patterns[cmd])
2931 excluded_patterns[cmd] = new_exclude ();
2932 if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2933 optarg, exclude_options (cmd), '\n')
2934 != 0)
2935 die (EXIT_TROUBLE, errno, "%s", optarg);
2936 }
2937 break;
2938
2939 case EXCLUDE_DIRECTORY_OPTION:
2940 strip_trailing_slashes (optarg);
2941 for (int cmd = 0; cmd < 2; cmd++)
2942 {
2943 if (!excluded_directory_patterns[cmd])
2944 excluded_directory_patterns[cmd] = new_exclude ();
2945 add_exclude (excluded_directory_patterns[cmd], optarg,
2946 exclude_options (cmd));
2947 }
2948 break;
2949
2950 case GROUP_SEPARATOR_OPTION:
2951 group_separator = optarg;
2952 break;
2953
2954 case LINE_BUFFERED_OPTION:
2955 line_buffered = true;
2956 break;
2957
2958 case LABEL_OPTION:
2959 label = optarg;
2960 break;
2961
2962#ifdef KMK_GREP
2963 /* The --utf8 and --codepage <cp> options are mainly for windows where
2964 UCRT doesn't check any of the standard locale selecting environment
2965 variables and we have to give it directly to setlocale if we want
2966 any control beyond the Windows defaults.
2967
2968 The UCRT setlocale has a nice feature of allowing us to set just
2969 the codepage, omitting the rest of the locale spec. */
2970 case UTF8_OPTION:
2971 kmk_grep_set_codepage (".UTF-8");
2972 break;
2973 case CODEPAGE_OPTION:
2974 kmk_grep_set_codepage (optarg);
2975 break;
2976#endif
2977
2978 case 0:
2979 /* long options */
2980 break;
2981
2982 default:
2983 usage (EXIT_TROUBLE);
2984 break;
2985
2986 }
2987
2988 if (show_version)
2989 {
2990 version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2991 (char *) NULL);
2992 puts (_("Written by Mike Haertel and others; see\n"
2993 "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2994 return EXIT_SUCCESS;
2995 }
2996
2997 if (show_help)
2998 usage (EXIT_SUCCESS);
2999
3000 if (keys)
3001 {
3002 if (keycc == 0)
3003 {
3004 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
3005 out_invert ^= true;
3006 match_lines = match_words = false;
3007 keys[keycc++] = '\n';
3008 }
3009 }
3010 else if (optind < argc)
3011 {
3012 /* Make a copy so that it can be reallocated or freed later. */
3013 pattern_array = keys = xstrdup (argv[optind++]);
3014 ptrdiff_t patlen = strlen (keys);
3015 keys[patlen] = '\n';
3016 keycc = update_patterns (keys, 0, patlen + 1, "");
3017 }
3018 else
3019 usage (EXIT_TROUBLE);
3020
3021 /* Strip trailing newline from keys. */
3022 keycc--;
3023
3024 hash_free (pattern_table);
3025
3026 bool possibly_tty = false;
3027 struct stat tmp_stat;
3028 if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
3029 {
3030 if (S_ISREG (tmp_stat.st_mode))
3031 out_stat = tmp_stat;
3032 else if (S_ISCHR (tmp_stat.st_mode))
3033 {
3034 struct stat null_stat;
3035 if (stat ("/dev/null", &null_stat) == 0
3036 && SAME_INODE (tmp_stat, null_stat))
3037 dev_null_output = true;
3038 else
3039 possibly_tty = true;
3040 }
3041 }
3042
3043 /* POSIX says -c, -l and -q are mutually exclusive. In this
3044 implementation, -q overrides -l and -L, which in turn override -c. */
3045 if (exit_on_match | dev_null_output)
3046 list_files = LISTFILES_NONE;
3047 if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
3048 {
3049 count_matches = false;
3050 done_on_match = true;
3051 }
3052 out_quiet = count_matches | done_on_match;
3053
3054 if (out_after < 0)
3055 out_after = default_context;
3056 if (out_before < 0)
3057 out_before = default_context;
3058
3059 /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
3060 /dev/null'), fail without reading the input. */
3061 if ((max_count == 0
3062 || (keycc == 0 && out_invert && !match_lines && !match_words))
3063 && list_files != LISTFILES_NONMATCHING)
3064 return EXIT_FAILURE;
3065
3066 if (color_option == 2)
3067 color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
3068 init_colorize ();
3069
3070 if (color_option)
3071 {
3072 /* Legacy. */
3073 char *userval = getenv ("GREP_COLOR");
3074 if (userval != NULL && *userval != '\0')
3075 selected_match_color = context_match_color = userval;
3076
3077 /* New GREP_COLORS has priority. */
3078 parse_grep_colors ();
3079 }
3080
3081 initialize_unibyte_mask ();
3082
3083 if (matcher < 0)
3084 matcher = G_MATCHER_INDEX;
3085
3086 if (matcher == F_MATCHER_INDEX
3087 || matcher == E_MATCHER_INDEX || matcher == G_MATCHER_INDEX)
3088 {
3089 if (match_icase)
3090 setup_ok_fold ();
3091
3092 /* In a single-byte locale, switch from -F to -G if it is a single
3093 pattern that matches words, where -G is typically faster. In a
3094 multibyte locale, switch if the patterns have an encoding error
3095 (where -F does not work) or if -i and the patterns will not work
3096 for -iF. */
3097 if (matcher == F_MATCHER_INDEX)
3098 {
3099 if (! localeinfo.multibyte
3100 ? n_patterns == 1 && match_words
3101 : (contains_encoding_error (keys, keycc)
3102 || (match_icase && !fgrep_icase_available (keys, keycc))))
3103 {
3104 fgrep_to_grep_pattern (&pattern_array, &keycc);
3105 keys = pattern_array;
3106 matcher = G_MATCHER_INDEX;
3107 }
3108 }
3109 /* With two or more patterns, if -F works then switch from either -E
3110 or -G, as -F is probably faster then. */
3111 else if (1 < n_patterns)
3112 matcher = try_fgrep_pattern (matcher, keys, &keycc);
3113 }
3114
3115 execute = matchers[matcher].execute;
3116 compiled_pattern =
3117 matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
3118 only_matching | color_option);
3119 /* We need one byte prior and one after. */
3120 char eolbytes[3] = { 0, eolbyte, 0 };
3121 size_t match_size;
3122 skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
3123 &match_size, NULL) == 0)
3124 == out_invert);
3125
3126 int num_operands = argc - optind;
3127 out_file = (filename_option == 0 && num_operands <= 1
3128 ? - (directories == RECURSE_DIRECTORIES)
3129 : 0 <= filename_option);
3130
3131 if (binary)
3132 xset_binary_mode (STDOUT_FILENO, O_BINARY);
3133
3134 /* Prefer sysconf for page size, as getpagesize typically returns int. */
3135#ifdef _SC_PAGESIZE
3136 long psize = sysconf (_SC_PAGESIZE);
3137#else
3138 long psize = getpagesize ();
3139#endif
3140 if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
3141 abort ();
3142 pagesize = psize;
3143 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
3144 buffer = xmalloc (bufalloc);
3145
3146 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
3147 devices = READ_DEVICES;
3148
3149 char *const *files;
3150 if (0 < num_operands)
3151 {
3152 files = argv + optind;
3153 }
3154 else if (directories == RECURSE_DIRECTORIES && 0 < last_recursive)
3155 {
3156 static char *const cwd_only[] = { (char *) ".", NULL };
3157 files = cwd_only;
3158 omit_dot_slash = true;
3159 }
3160 else
3161 {
3162 static char *const stdin_only[] = { (char *) "-", NULL };
3163 files = stdin_only;
3164 }
3165
3166 bool status = true;
3167 do
3168 status &= grep_command_line_arg (*files++);
3169 while (*files != NULL);
3170
3171 /* We register via atexit to test stdout. */
3172 return errseen ? EXIT_TROUBLE : status;
3173}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette