VirtualBox

source: kBuild/vendor/grep/current/src/kwsearch.c@ 3530

Last change on this file since 3530 was 3529, checked in by bird, 3 years ago

Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.

  • Property svn:eol-style set to native
File size: 7.4 KB
Line 
1/* kwsearch.c - searching subroutines using kwset for grep.
2 Copyright 1992, 1998, 2000, 2007, 2009-2021 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19/* Written August 1992 by Mike Haertel. */
20
21#include <config.h>
22#include "search.h"
23
24/* A compiled -F pattern list. */
25
26struct kwsearch
27{
28 /* The kwset for this pattern list. */
29 kwset_t kwset;
30
31 /* The number of user-specified patterns. This is less than
32 'kwswords (kwset)' when some extra one-character words have been
33 appended, one for each troublesome character that will require a
34 DFA search. */
35 ptrdiff_t words;
36
37 /* The user's pattern and its size in bytes. */
38 char *pattern;
39 size_t size;
40
41 /* The user's pattern compiled as a regular expression,
42 or null if it has not been compiled. */
43 void *re;
44};
45
46/* Compile the -F style PATTERN, containing SIZE bytes that are
47 followed by '\n'. Return a description of the compiled pattern. */
48
49void *
50Fcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
51{
52 kwset_t kwset;
53 char *buf = NULL;
54 size_t bufalloc = 0;
55
56 kwset = kwsinit (true);
57
58 char const *p = pattern;
59 do
60 {
61 char const *sep = rawmemchr (p, '\n');
62 ptrdiff_t len = sep - p;
63
64 if (match_lines)
65 {
66 if (eolbyte == '\n' && pattern < p)
67 p--;
68 else
69 {
70 if (bufalloc < len + 2)
71 {
72 free (buf);
73 bufalloc = len + 2;
74 buf = x2realloc (NULL, &bufalloc);
75 buf[0] = eolbyte;
76 }
77 memcpy (buf + 1, p, len);
78 buf[len + 1] = eolbyte;
79 p = buf;
80 }
81 len += 2;
82 }
83 kwsincr (kwset, p, len);
84
85 p = sep + 1;
86 }
87 while (p <= pattern + size);
88
89 free (buf);
90
91 ptrdiff_t words = kwswords (kwset);
92 kwsprep (kwset);
93
94 struct kwsearch *kwsearch = xmalloc (sizeof *kwsearch);
95 kwsearch->kwset = kwset;
96 kwsearch->words = words;
97 kwsearch->pattern = pattern;
98 kwsearch->size = size;
99 kwsearch->re = NULL;
100 return kwsearch;
101}
102
103/* Use the compiled pattern VCP to search the buffer BUF of size SIZE.
104 If found, return the offset of the first match and store its
105 size into *MATCH_SIZE. If not found, return SIZE_MAX.
106 If START_PTR is nonnull, start searching there. */
107size_t
108Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
109 char const *start_ptr)
110{
111 char const *beg, *end, *mb_start;
112 ptrdiff_t len;
113 char eol = eolbyte;
114 struct kwsearch *kwsearch = vcp;
115 kwset_t kwset = kwsearch->kwset;
116 bool mb_check = localeinfo.multibyte & !localeinfo.using_utf8 & !match_lines;
117 bool longest = (mb_check | !!start_ptr | match_words) & !match_lines;
118
119 for (mb_start = beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++)
120 {
121 struct kwsmatch kwsmatch;
122 ptrdiff_t offset = kwsexec (kwset, beg - match_lines,
123 buf + size - beg + match_lines, &kwsmatch,
124 longest);
125 if (offset < 0)
126 break;
127 len = kwsmatch.size - 2 * match_lines;
128
129 size_t mbclen = 0;
130 if (mb_check
131 && mb_goback (&mb_start, &mbclen, beg + offset, buf + size) != 0)
132 {
133 /* We have matched a single byte that is not at the beginning of a
134 multibyte character. mb_goback has advanced MB_START past that
135 multibyte character. Now, we want to position BEG so that the
136 next kwsexec search starts there. Thus, to compensate for the
137 for-loop's BEG++, above, subtract one here. This code is
138 unusually hard to reach, and exceptionally, let's show how to
139 trigger it here:
140
141 printf '\203AA\n'|LC_ALL=ja_JP.SHIFT_JIS src/grep -F A
142
143 That assumes the named locale is installed.
144 Note that your system's shift-JIS locale may have a different
145 name, possibly including "sjis". */
146 beg = mb_start - 1;
147 continue;
148 }
149 beg += offset;
150 if (!!start_ptr & !match_words)
151 goto success_in_beg_and_len;
152 if (match_lines)
153 {
154 len += start_ptr == NULL;
155 goto success_in_beg_and_len;
156 }
157 if (! match_words)
158 goto success;
159
160 /* We need a preceding mb_start pointer. Use the beginning of line
161 if there is a preceding newline. */
162 if (mbclen == 0)
163 {
164 char const *nl = memrchr (mb_start, eol, beg - mb_start);
165 if (nl)
166 mb_start = nl + 1;
167 }
168
169 /* Succeed if neither the preceding nor the following character is a
170 word constituent. If the preceding is not, yet the following
171 character IS a word constituent, keep trying with shorter matches. */
172 if (mbclen > 0
173 ? ! wordchar_next (beg - mbclen, buf + size)
174 : ! wordchar_prev (mb_start, beg, buf + size))
175 for (;;)
176 {
177 if (! wordchar_next (beg + len, buf + size))
178 {
179 if (start_ptr)
180 goto success_in_beg_and_len;
181 else
182 goto success;
183 }
184 if (!start_ptr && !localeinfo.multibyte)
185 {
186 if (! kwsearch->re)
187 {
188 fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size);
189 kwsearch->re = GEAcompile (kwsearch->pattern,
190 kwsearch->size,
191 RE_SYNTAX_GREP, !!start_ptr);
192 }
193 if (beg + len < buf + size)
194 {
195 end = rawmemchr (beg + len, eol);
196 end++;
197 }
198 else
199 end = buf + size;
200
201 if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL)
202 != (size_t) -1)
203 goto success_match_words;
204 beg = end - 1;
205 break;
206 }
207 if (!len)
208 break;
209
210 struct kwsmatch shorter_match;
211 if (kwsexec (kwset, beg, --len, &shorter_match, true) != 0)
212 break;
213 len = shorter_match.size;
214 }
215
216 /* No word match was found at BEG. Skip past word constituents,
217 since they cannot precede the next match and not skipping
218 them could make things much slower. */
219 beg += wordchars_size (beg, buf + size);
220 mb_start = beg;
221 }
222
223 return -1;
224
225 success:
226 if (beg + len < buf + size)
227 {
228 end = rawmemchr (beg + len, eol);
229 end++;
230 }
231 else
232 end = buf + size;
233 success_match_words:
234 beg = memrchr (buf, eol, beg - buf);
235 beg = beg ? beg + 1 : buf;
236 len = end - beg;
237 success_in_beg_and_len:;
238 *match_size = len;
239 return beg - buf;
240}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette