| 1 |
|
|---|
| 2 | #define DPRINTF(p) /*nothing */
|
|---|
| 3 | #define DPRINTF(p) printf p
|
|---|
| 4 | #define GETCHAR(c, eptr) c = *eptr;
|
|---|
| 5 | #define GETCHARINC(c, eptr) c = *eptr++;
|
|---|
| 6 | #define class pcre_class
|
|---|
| 7 | #define match_condassert 0x01 /* Called to check a condition assertion */
|
|---|
| 8 | #define match_isgroup 0x02 /* Set if start of bracketed group */
|
|---|
| 9 | #else
|
|---|
| 10 | #endif
|
|---|
| 11 | #ifdef DEBUG /* Sigh. Some compilers never learn. */
|
|---|
| 12 | #ifdef DEBUG
|
|---|
| 13 | #ifdef __cplusplus
|
|---|
| 14 | #include "internal.h"
|
|---|
| 15 | && length - re->max_match_size > start_offset)
|
|---|
| 16 | ((*ecode++ == OP_BEG_WORD) ? prev_is_word : cur_is_word))
|
|---|
| 17 | ((md->ctypes[*eptr] & ctype_word) != 0);
|
|---|
| 18 | ((md->ctypes[eptr[-1]] & ctype_word) != 0);
|
|---|
| 19 | (eptr == md->end_subject - 1 && *eptr != '\n'))
|
|---|
| 20 | (i.e. keep it out of the loop). Also we can test that there are at least
|
|---|
| 21 | (md->ctypes[*eptr++] & ctype_digit) != 0)
|
|---|
| 22 | (md->ctypes[*eptr++] & ctype_digit) == 0)
|
|---|
| 23 | (md->ctypes[*eptr++] & ctype_space) != 0)
|
|---|
| 24 | (md->ctypes[*eptr++] & ctype_space) == 0)
|
|---|
| 25 | (md->ctypes[*eptr++] & ctype_word) != 0)
|
|---|
| 26 | (md->ctypes[*eptr++] & ctype_word) == 0)
|
|---|
| 27 | (offsetcount - 2) * sizeof (int));
|
|---|
| 28 | (offsets == NULL && offsetcount > 0))
|
|---|
| 29 | (pcre_free) (match_block.offset_vector);
|
|---|
| 30 | (pcre_free) (save);
|
|---|
| 31 | (re->tables + fcc_offset)[req_char] : req_char;
|
|---|
| 32 | * Match a back-reference *
|
|---|
| 33 | * Execute a Regular Expression *
|
|---|
| 34 | * Match from current position *
|
|---|
| 35 | * Debugging function to print chars *
|
|---|
| 36 | * Perl-Compatible Regular Expressions *
|
|---|
| 37 | * Macros and tables for character handling *
|
|---|
| 38 | *************************************************/
|
|---|
| 39 | */
|
|---|
| 40 | *iptr = -1;
|
|---|
| 41 | *iptr++ = -1;
|
|---|
| 42 | *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
|
|---|
| 43 | *prev == OP_ONCE)
|
|---|
| 44 | -----------------------------------------------------------------------------
|
|---|
| 45 | -1 => failed to match
|
|---|
| 46 | /*
|
|---|
| 47 | /* "Once" brackets are like assertion brackets except that after a match,
|
|---|
| 48 | /* ... else fall through */
|
|---|
| 49 | /* Advance to a possible match for an initial string after study */
|
|---|
| 50 | /* Allow compilation as C++ source code, should anybody want to do that. */
|
|---|
| 51 | /* Always fail if not enough characters left */
|
|---|
| 52 | /* An alternation is the end of a branch; scan along to find the end of the
|
|---|
| 53 | /* Assert before internal newline if multiline, or before a terminating
|
|---|
| 54 | /* Assertion brackets. Check the alternative branches in turn - the
|
|---|
| 55 | /* At the start of a bracketed group, add the current subject pointer to the
|
|---|
| 56 | /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
|
|---|
| 57 | /* Caseful comparisons */
|
|---|
| 58 | /* Change option settings */
|
|---|
| 59 | /* Common code for all repeated single character type matches */
|
|---|
| 60 | /* Common code for all repeated single-character matches. We can give
|
|---|
| 61 | /* Compute the minimum number of offsets that we need to reset each time. Doing
|
|---|
| 62 | /* Conditional group: compilation checked that there are no more than
|
|---|
| 63 | /* Continue as from after the assertion, updating the offsets high water
|
|---|
| 64 | /* Continue from after the assertion, updating the offsets high water
|
|---|
| 65 | /* Control never gets here */
|
|---|
| 66 | /* Control never reaches here */
|
|---|
| 67 | /* Copy the offset information from temporary store if necessary */
|
|---|
| 68 | /* Do a single test if no case difference is set up */
|
|---|
| 69 | /* Do not stick any code in here without much thought; it is assumed
|
|---|
| 70 | /* End of a group, repeated or non-repeating. If we are at the end of
|
|---|
| 71 | /* End of subject assertion (\z) */
|
|---|
| 72 | /* End of subject or ending \n assertion (\Z) */
|
|---|
| 73 | /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
|
|---|
| 74 | /* First, ensure the minimum number of matches are present. */
|
|---|
| 75 | /* First, ensure the minimum number of matches are present. Use inline
|
|---|
| 76 | /* First, ensure the minimum number of matches are present. We get back
|
|---|
| 77 | /* Flag bits for the match() function */
|
|---|
| 78 | /* For a non-repeating ket, just continue at this level. This also
|
|---|
| 79 | /* For anchored or unanchored matches, there may be a "last known required
|
|---|
| 80 | /* For extended extraction brackets (large number), we have to fish out
|
|---|
| 81 | /* For extended extraction brackets (large number), we have to fish out the
|
|---|
| 82 | /* For matches anchored to the end of the pattern, we can often avoid
|
|---|
| 83 | /* If a back reference hasn't been set, the length that is passed is greater
|
|---|
| 84 | /* If checking an assertion for a condition, return TRUE. */
|
|---|
| 85 | /* If hit the end of the group (which could be repeated), fail */
|
|---|
| 86 | /* If max == min we can continue with the main loop without the
|
|---|
| 87 | /* If maximizing it is worth using inline code for speed, doing the type
|
|---|
| 88 | /* If maximizing, find the longest possible run, then work backwards. */
|
|---|
| 89 | /* If maximizing, find the longest string and work backwards */
|
|---|
| 90 | /* If min = max, continue at the same level without recursing */
|
|---|
| 91 | /* If min = max, continue at the same level without recursion.
|
|---|
| 92 | /* If minimizing, keep testing the rest of the expression and advancing
|
|---|
| 93 | /* If minimizing, keep trying and advancing the pointer */
|
|---|
| 94 | /* If minimizing, we have to test the rest of the pattern before each
|
|---|
| 95 | /* If req_char is set, we know that that character must appear in the subject
|
|---|
| 96 | /* If the expression has got more back references than the offsets supplied can
|
|---|
| 97 | /* If the length of the reference is zero, just continue with the
|
|---|
| 98 | /* If the reference is unset, set the length to be longer than the amount
|
|---|
| 99 | /* If we can't find the required character, break the matching loop */
|
|---|
| 100 | /* If we have found the required character, save the point where we
|
|---|
| 101 | /* In all other cases except a conditional group we have to check the
|
|---|
| 102 | /* In case the recursion has set more capturing values, save the final
|
|---|
| 103 | /* Include the internals header, which itself includes Standard C headers plus
|
|---|
| 104 | /* Insufficient room for saving captured contents */
|
|---|
| 105 | /* Loop for handling unanchored repeated matching attempts; for anchored regexs
|
|---|
| 106 | /* Match a back reference, possibly repeatedly. Look past the end of the
|
|---|
| 107 | /* Match a character class, possibly repeatedly. Look past the end of the
|
|---|
| 108 | /* Match a negated single character */
|
|---|
| 109 | /* Match a negated single character repeatedly. This is almost a repeat of
|
|---|
| 110 | /* Match a run of characters */
|
|---|
| 111 | /* Match a single character repeatedly; different opcodes share code. */
|
|---|
| 112 | /* Match a single character type repeatedly; several different opcodes
|
|---|
| 113 | /* Match a single character type; inline for speed */
|
|---|
| 114 | /* Min and max values for the common repeats; for the maxima, 0 => infinity */
|
|---|
| 115 | /* Move the subject pointer back. This occurs only at the start of
|
|---|
| 116 | /* Negative assertion: all branches must fail to match */
|
|---|
| 117 | /* Now start processing the operations. */
|
|---|
| 118 | /* OP_KETRMAX */
|
|---|
| 119 | /* On entry ecode points to the first opcode, and eptr to the first character
|
|---|
| 120 | /* Opening capturing bracket. If there is space in the offset vector, save
|
|---|
| 121 | /* Or to a non-unique first char after study */
|
|---|
| 122 | /* Or to a unique first char if possible */
|
|---|
| 123 | /* Or to just after \n for a multiline match if possible */
|
|---|
| 124 | /* Other types of node can be handled by a switch */
|
|---|
| 125 | /* Otherwise test for either case */
|
|---|
| 126 | /* Print a sequence of chars in printable format, stopping at the end of the
|
|---|
| 127 | /* Recursion matches the current regex, nested. If there are any capturing
|
|---|
| 128 | /* Reset the maximum number of extractions we might see. */
|
|---|
| 129 | /* Reset the value of the ims flags, in case they got changed during
|
|---|
| 130 | /* Reset the working variable associated with each extraction. These should
|
|---|
| 131 | /* Separate the caselesss case for speed */
|
|---|
| 132 | /* Set up for repetition, or handle the non-repeated case */
|
|---|
| 133 | /* Set up the first character to match, if available. The first_char value is
|
|---|
| 134 | /* Skip over conditional reference data or large extraction number data if
|
|---|
| 135 | /* Start of subject assertion */
|
|---|
| 136 | /* Start of subject unless notbol, or after internal newline if multiline */
|
|---|
| 137 | /* Structure for building a chain of data that actually lives on the
|
|---|
| 138 | /* The code is duplicated for the caseless and caseful cases, for speed,
|
|---|
| 139 | /* The condition is an assertion. Call match() to evaluate it - setting
|
|---|
| 140 | /* The ims options can vary during the matching as a result of the presence
|
|---|
| 141 | /* The repeating kets try the rest of the pattern or restart from the
|
|---|
| 142 | /* There's been some horrible disaster. */
|
|---|
| 143 | /* This "while" is the end of the "do" above */
|
|---|
| 144 | /* This function applies a compiled re to a subject string and picks out
|
|---|
| 145 | /* Use a macro for debugging printing, 'cause that limits the use of #ifdef
|
|---|
| 146 | /* We don't need to repeat the search if we haven't yet reached the
|
|---|
| 147 | /* When a match occurs, substrings will be set for all internal extractions;
|
|---|
| 148 | /* Word boundary assertions */
|
|---|
| 149 | /*************************************************
|
|---|
| 150 | 1. This software is distributed in the hope that it will be useful,
|
|---|
| 151 | 2. The origin of this software must not be misrepresented, either by
|
|---|
| 152 | 3. Altered versions must be plainly marked as such, and must not be
|
|---|
| 153 | 4. If PCRE is embedded in any software that is released under the GNU
|
|---|
| 154 | 5.005. If there is an options reset, it will get obeyed in the normal
|
|---|
| 155 | 6 : 3 + (ecode[1] << 8) + ecode[2]),
|
|---|
| 156 | < -1 => some kind of unexpected problem
|
|---|
| 157 | = 0 => success, but offsets is not big enough
|
|---|
| 158 | Arguments:
|
|---|
| 159 | BOOL anchored;
|
|---|
| 160 | BOOL cur_is_word = (eptr < md->end_subject) &&
|
|---|
| 161 | BOOL is_subject;
|
|---|
| 162 | BOOL minimize = FALSE;
|
|---|
| 163 | BOOL prev_is_word = (eptr != md->start_subject) &&
|
|---|
| 164 | BOOL rc;
|
|---|
| 165 | BOOL startline;
|
|---|
| 166 | BOOL using_temporary_offsets = FALSE;
|
|---|
| 167 | Copyright (c) 1997-2000 University of Cambridge
|
|---|
| 168 | DPRINTF ((">>>> returning %d\n", match_block.errorcode));
|
|---|
| 169 | DPRINTF ((">>>> returning %d\n", rc));
|
|---|
| 170 | DPRINTF (("Copied offsets from temporary memory\n"));
|
|---|
| 171 | DPRINTF (("Freeing temporary memory\n"));
|
|---|
| 172 | DPRINTF (("Got memory to hold back references\n"));
|
|---|
| 173 | DPRINTF (("Unknown opcode %d\n", *ecode));
|
|---|
| 174 | DPRINTF (("bracket %d failed\n", number));
|
|---|
| 175 | DPRINTF (("bracket 0 failed\n"));
|
|---|
| 176 | DPRINTF (("ims reset to %02lx\n", ims));
|
|---|
| 177 | DPRINTF (("ims set to %02lx at group repeat\n", ims));
|
|---|
| 178 | DPRINTF (("ims set to %02lx\n", ims));
|
|---|
| 179 | DPRINTF (("matching %c{%d,%d} against subject %.*s\n", c, min, max,
|
|---|
| 180 | DPRINTF (("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
|
|---|
| 181 | DPRINTF (("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
|
|---|
| 182 | DPRINTF (("start bracket 0\n"));
|
|---|
| 183 | GETCHAR (c, eptr) /* Get character */
|
|---|
| 184 | GETCHARINC (c, eptr) /* Get character; increment eptr */
|
|---|
| 185 | General Purpose Licence (GPL), then the terms of that licence shall
|
|---|
| 186 | However, if the referenced string is the empty string, always treat
|
|---|
| 187 | If the bracket fails to match, we need to restore this value and also the
|
|---|
| 188 | If there isn't enough space in the offset vector, treat this as if it were a
|
|---|
| 189 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|---|
| 190 | Otherwise, we can use the vector supplied, rounding down its size to a multiple
|
|---|
| 191 | Permission is granted to anyone to use this software for any purpose on any
|
|---|
| 192 | REPEATCHAR:
|
|---|
| 193 | REPEATNOTCHAR:
|
|---|
| 194 | REPEATTYPE:
|
|---|
| 195 | Returns: > 0 => success; value is the number of elements filled in
|
|---|
| 196 | Returns: TRUE if matched
|
|---|
| 197 | Returns: TRUE if matched
|
|---|
| 198 | Returns: nothing
|
|---|
| 199 | They are not both allowed to be zero. */
|
|---|
| 200 | This is a library of functions to support regular expressions whose syntax
|
|---|
| 201 | This is the forcible breaking of infinite loops as implemented in Perl
|
|---|
| 202 | Writing separate code makes it go faster, as does using an autoincrement and
|
|---|
| 203 | Written by: Philip Hazel <ph10@cam.ac.uk>
|
|---|
| 204 | a move back into the brackets. Check the alternative branches in turn - the
|
|---|
| 205 | address of eptr, so that eptr can be a register variable. */
|
|---|
| 206 | an assertion "group", stop matching and return TRUE, but record the
|
|---|
| 207 | an empty string - recursion will then try other alternatives, if any. */
|
|---|
| 208 | an error. Save the top 15 values on the stack, and accept that the rest
|
|---|
| 209 | an unanchored pattern, of course. If there's no first char and the pattern was
|
|---|
| 210 | analyzing most of the pattern. length > re->max_match_size is
|
|---|
| 211 | anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
|
|---|
| 212 | and advance one byte in the pattern code. */
|
|---|
| 213 | and reinstate them after the recursion. However, we don't know how many
|
|---|
| 214 | and semantics are as close as possible to those of the Perl 5 language. See
|
|---|
| 215 | and the required character in fact is caseful. */
|
|---|
| 216 | at run time, so we have to test for anchoring. The first char may be unset for
|
|---|
| 217 | avoid duplicate testing (which takes significant time). This covers the vast
|
|---|
| 218 | backing off on a match. */
|
|---|
| 219 | bmtable = extra->data.bmtable;
|
|---|
| 220 | both cases of the character. Otherwise set the two values the same, which will
|
|---|
| 221 | bracketed group and go to there. */
|
|---|
| 222 | brackets - for testing for empty matches
|
|---|
| 223 | brackets started but not finished, we have to save their starting points
|
|---|
| 224 | break;
|
|---|
| 225 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 226 | c != md->lcc[*eptr++])
|
|---|
| 227 | c = *ecode++ - OP_CRSTAR;
|
|---|
| 228 | c = *ecode++ - OP_NOTSTAR;
|
|---|
| 229 | c = *ecode++ - OP_STAR;
|
|---|
| 230 | c = *ecode++ - OP_TYPESTAR;
|
|---|
| 231 | c = *ecode++;
|
|---|
| 232 | c = *eptr++;
|
|---|
| 233 | c = 15;
|
|---|
| 234 | c = max - min;
|
|---|
| 235 | c = md->end_subject - eptr;
|
|---|
| 236 | c = md->lcc[c];
|
|---|
| 237 | c = md->offset_max;
|
|---|
| 238 | c == md->lcc[*eptr++])
|
|---|
| 239 | can't just fail here, because of the possibility of quantifiers with zero
|
|---|
| 240 | case OP_ALT:
|
|---|
| 241 | case OP_ANY:
|
|---|
| 242 | case OP_ASSERT:
|
|---|
| 243 | case OP_ASSERTBACK:
|
|---|
| 244 | case OP_ASSERTBACK_NOT:
|
|---|
| 245 | case OP_ASSERT_NOT:
|
|---|
| 246 | case OP_BEG_WORD:
|
|---|
| 247 | case OP_BRA: /* Non-capturing bracket: optimized */
|
|---|
| 248 | case OP_BRAMINZERO:
|
|---|
| 249 | case OP_BRANUMBER:
|
|---|
| 250 | case OP_BRAZERO:
|
|---|
| 251 | case OP_CHARS:
|
|---|
| 252 | case OP_CIRC:
|
|---|
| 253 | case OP_CLASS:
|
|---|
| 254 | case OP_COND:
|
|---|
| 255 | case OP_CREF:
|
|---|
| 256 | case OP_CRMINPLUS:
|
|---|
| 257 | case OP_CRMINQUERY:
|
|---|
| 258 | case OP_CRMINRANGE:
|
|---|
| 259 | case OP_CRMINSTAR:
|
|---|
| 260 | case OP_CRPLUS:
|
|---|
| 261 | case OP_CRQUERY:
|
|---|
| 262 | case OP_CRRANGE:
|
|---|
| 263 | case OP_CRSTAR:
|
|---|
| 264 | case OP_DIGIT:
|
|---|
| 265 | case OP_DOLL:
|
|---|
| 266 | case OP_END:
|
|---|
| 267 | case OP_END_WORD:
|
|---|
| 268 | case OP_EOD:
|
|---|
| 269 | case OP_EODN:
|
|---|
| 270 | case OP_EXACT:
|
|---|
| 271 | case OP_KET:
|
|---|
| 272 | case OP_KETRMAX:
|
|---|
| 273 | case OP_KETRMIN:
|
|---|
| 274 | case OP_MINPLUS:
|
|---|
| 275 | case OP_MINQUERY:
|
|---|
| 276 | case OP_MINSTAR:
|
|---|
| 277 | case OP_MINUPTO:
|
|---|
| 278 | case OP_NOT:
|
|---|
| 279 | case OP_NOTEXACT:
|
|---|
| 280 | case OP_NOTMINPLUS:
|
|---|
| 281 | case OP_NOTMINQUERY:
|
|---|
| 282 | case OP_NOTMINSTAR:
|
|---|
| 283 | case OP_NOTMINUPTO:
|
|---|
| 284 | case OP_NOTPLUS:
|
|---|
| 285 | case OP_NOTQUERY:
|
|---|
| 286 | case OP_NOTSTAR:
|
|---|
| 287 | case OP_NOTUPTO:
|
|---|
| 288 | case OP_NOT_DIGIT:
|
|---|
| 289 | case OP_NOT_WHITESPACE:
|
|---|
| 290 | case OP_NOT_WORDCHAR:
|
|---|
| 291 | case OP_NOT_WORD_BOUNDARY:
|
|---|
| 292 | case OP_ONCE:
|
|---|
| 293 | case OP_OPT:
|
|---|
| 294 | case OP_PLUS:
|
|---|
| 295 | case OP_QUERY:
|
|---|
| 296 | case OP_RECURSE:
|
|---|
| 297 | case OP_REF:
|
|---|
| 298 | case OP_REVERSE:
|
|---|
| 299 | case OP_SOD:
|
|---|
| 300 | case OP_STAR:
|
|---|
| 301 | case OP_TYPEEXACT:
|
|---|
| 302 | case OP_TYPEMINPLUS:
|
|---|
| 303 | case OP_TYPEMINQUERY:
|
|---|
| 304 | case OP_TYPEMINSTAR:
|
|---|
| 305 | case OP_TYPEMINUPTO:
|
|---|
| 306 | case OP_TYPEPLUS:
|
|---|
| 307 | case OP_TYPEQUERY:
|
|---|
| 308 | case OP_TYPESTAR:
|
|---|
| 309 | case OP_TYPEUPTO:
|
|---|
| 310 | case OP_UPTO:
|
|---|
| 311 | case OP_WHITESPACE:
|
|---|
| 312 | case OP_WORDCHAR:
|
|---|
| 313 | case OP_WORD_BOUNDARY:
|
|---|
| 314 | case matching may be when this character is hit, so test for it in both its
|
|---|
| 315 | caselessly, or if there are any changes of this flag within the regex, set up
|
|---|
| 316 | cases if necessary. However, the different cased versions will not be set up
|
|---|
| 317 | character" set. If the PCRE_CASELESS is set, implying that the match starts
|
|---|
| 318 | characters and work backwards. */
|
|---|
| 319 | code for maximizing the speed, and do the type test once at the start
|
|---|
| 320 | code to character type repeats - written out again for speed. */
|
|---|
| 321 | commoning these up that doesn't require a test of the positive/negative
|
|---|
| 322 | computer system, and to redistribute it freely, subject to the following
|
|---|
| 323 | const char *subject;
|
|---|
| 324 | const pcre *re;
|
|---|
| 325 | const pcre_extra *extra;
|
|---|
| 326 | const uschar *bmtable = NULL;
|
|---|
| 327 | const uschar *data = ecode + 1; /* Save for matching */
|
|---|
| 328 | const uschar *end_subject;
|
|---|
| 329 | const uschar *next = ecode + 1;
|
|---|
| 330 | const uschar *p = md->start_subject + md->offset_vector[offset];
|
|---|
| 331 | const uschar *p;
|
|---|
| 332 | const uschar *pp = eptr;
|
|---|
| 333 | const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
|
|---|
| 334 | const uschar *prev = ecode;
|
|---|
| 335 | const uschar *req_char_ptr = start_match - 1;
|
|---|
| 336 | const uschar *saved_eptr = eptr;
|
|---|
| 337 | const uschar *saved_eptr = eptrb->saved_eptr;
|
|---|
| 338 | const uschar *saved_eptr;
|
|---|
| 339 | const uschar *start_bits = NULL;
|
|---|
| 340 | const uschar *start_match = (const uschar *) subject + start_offset;
|
|---|
| 341 | continue; /* With the main loop */
|
|---|
| 342 | continue;
|
|---|
| 343 | course of events. */
|
|---|
| 344 | ctype = *ecode++; /* Code for the character type */
|
|---|
| 345 | cur_is_word == prev_is_word : cur_is_word != prev_is_word)
|
|---|
| 346 | current high water mark for use by positive assertions. Do this also
|
|---|
| 347 | default: /* No repeat follows */
|
|---|
| 348 | default:
|
|---|
| 349 | do
|
|---|
| 350 | each branch of a lookbehind assertion. If we are too close to the start to
|
|---|
| 351 | each substring: the offsets to the start and end of the substring.
|
|---|
| 352 | ecode position in code
|
|---|
| 353 | ecode + ((offset < offset_top && md->offset_vector[offset] >= 0) ?
|
|---|
| 354 | ecode += (ecode[1] << 8) + ecode[2];
|
|---|
| 355 | ecode += 2;
|
|---|
| 356 | ecode += 3 + (ecode[4] << 8) + ecode[5];
|
|---|
| 357 | ecode += 33; /* Advance past the item */
|
|---|
| 358 | ecode += 3; /* Advance past the item */
|
|---|
| 359 | ecode += 3;
|
|---|
| 360 | ecode += 5;
|
|---|
| 361 | ecode = next + 3;
|
|---|
| 362 | ecode++;
|
|---|
| 363 | else
|
|---|
| 364 | else if ((extra->options & PCRE_STUDY_BM) != 0)
|
|---|
| 365 | else if (first_char >= 0)
|
|---|
| 366 | else if (start_bits != NULL)
|
|---|
| 367 | else if (startline)
|
|---|
| 368 | encountered */
|
|---|
| 369 | end_subject = match_block.end_subject;
|
|---|
| 370 | eptr pointer in subject
|
|---|
| 371 | eptr points into the subject
|
|---|
| 372 | eptr += c;
|
|---|
| 373 | eptr += length;
|
|---|
| 374 | eptr += min;
|
|---|
| 375 | eptr -= (ecode[1] << 8) + ecode[2];
|
|---|
| 376 | eptr -= length;
|
|---|
| 377 | eptr = md->end_match_ptr;
|
|---|
| 378 | eptr++;
|
|---|
| 379 | eptrb pointer to chain of blocks containing eptr at start of
|
|---|
| 380 | eptrb = &newptrb;
|
|---|
| 381 | eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */
|
|---|
| 382 | eptrblock *eptrb;
|
|---|
| 383 | eptrblock newptrb;
|
|---|
| 384 | eptrblock;
|
|---|
| 385 | exactly what going to the ket would do. */
|
|---|
| 386 | explicit claim or by omission.
|
|---|
| 387 | external_extra points to "hints" from pcre_study() or is NULL
|
|---|
| 388 | external_re points to the compiled expression
|
|---|
| 389 | extraction by setting the offsets and bumping the high water mark. */
|
|---|
| 390 | first_char = match_block.lcc[first_char];
|
|---|
| 391 | first_char = re->first_char;
|
|---|
| 392 | flags can contain
|
|---|
| 393 | for (;;)
|
|---|
| 394 | for (i = 1; i <= c; i++)
|
|---|
| 395 | for (i = 1; i <= min; i++)
|
|---|
| 396 | for (i = min; i < max; i++)
|
|---|
| 397 | for (i = min;; i++)
|
|---|
| 398 | for the "once" (not-backup up) groups. */
|
|---|
| 399 | for the match to succeed. If the first character is set, req_char must be
|
|---|
| 400 | found it, so that we don't search again next time round the loop if
|
|---|
| 401 | from a previous iteration of this group, and be referred to by a reference
|
|---|
| 402 | goto REPEATCHAR;
|
|---|
| 403 | goto REPEATNOTCHAR;
|
|---|
| 404 | goto REPEATTYPE;
|
|---|
| 405 | group number back at the start and if necessary complete handling an
|
|---|
| 406 | happens for a repeating ket if no characters were matched in the group.
|
|---|
| 407 | here; that is handled in the code for KET. */
|
|---|
| 408 | hold, we get a temporary bit of working store to use during the matching.
|
|---|
| 409 | i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
|
|---|
| 410 | if (!anchored)
|
|---|
| 411 | if (!match (start_match, re->code, 2, &match_block, ims, NULL, match_isgroup))
|
|---|
| 412 | if (!match_ref (offset, eptr, length, md, ims))
|
|---|
| 413 | if (!md->endonly)
|
|---|
| 414 | if (!rc)
|
|---|
| 415 | if (!startline && extra != NULL)
|
|---|
| 416 | if ((*ecode++ == OP_WORD_BOUNDARY) ?
|
|---|
| 417 | if ((data[c / 8] & (1 << (c & 7))) != 0)
|
|---|
| 418 | if ((data[c / 8] & (1 << (c & 7))) == 0)
|
|---|
| 419 | if ((extra->options & PCRE_STUDY_MAPPED) != 0)
|
|---|
| 420 | if ((flags & match_condassert) != 0)
|
|---|
| 421 | if ((flags & match_isgroup) != 0)
|
|---|
| 422 | if ((ims & PCRE_CASELESS) != 0)
|
|---|
| 423 | if ((ims & PCRE_DOTALL) == 0 && c == '\n')
|
|---|
| 424 | if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
|
|---|
| 425 | if ((ims & PCRE_DOTALL) == 0)
|
|---|
| 426 | if ((ims & PCRE_MULTILINE) != 0)
|
|---|
| 427 | if ((md->ctypes[*eptr++] & ctype_digit) != 0)
|
|---|
| 428 | if ((md->ctypes[*eptr++] & ctype_digit) == 0)
|
|---|
| 429 | if ((md->ctypes[*eptr++] & ctype_space) != 0)
|
|---|
| 430 | if ((md->ctypes[*eptr++] & ctype_space) == 0)
|
|---|
| 431 | if ((md->ctypes[*eptr++] & ctype_word) != 0)
|
|---|
| 432 | if ((md->ctypes[*eptr++] & ctype_word) == 0)
|
|---|
| 433 | if ((md->ctypes[c] & ctype_digit) != 0)
|
|---|
| 434 | if ((md->ctypes[c] & ctype_digit) == 0)
|
|---|
| 435 | if ((md->ctypes[c] & ctype_space) != 0)
|
|---|
| 436 | if ((md->ctypes[c] & ctype_space) == 0)
|
|---|
| 437 | if ((md->ctypes[c] & ctype_word) != 0)
|
|---|
| 438 | if ((md->ctypes[c] & ctype_word) == 0)
|
|---|
| 439 | if ((options & ~PUBLIC_EXEC_OPTIONS) != 0)
|
|---|
| 440 | if ((re->options & PCRE_FIRSTSET) != 0)
|
|---|
| 441 | if ((re->options & PCRE_REQCHSET) != 0)
|
|---|
| 442 | if ((start_bits[c / 8] & (1 << (c & 7))) == 0)
|
|---|
| 443 | if (*ecode != OP_ONCE && *ecode != OP_ALT)
|
|---|
| 444 | if (*ecode == OP_KET || eptr == saved_eptr)
|
|---|
| 445 | if (*ecode == OP_KET)
|
|---|
| 446 | if (*ecode == OP_KETRMIN)
|
|---|
| 447 | if (*ecode++ != *eptr++)
|
|---|
| 448 | if (*ecode++ == *eptr++)
|
|---|
| 449 | if (*eptr != '\n')
|
|---|
| 450 | if (*eptr++ == '\n')
|
|---|
| 451 | if (*p++ != *eptr++)
|
|---|
| 452 | if (*p++ == req_char)
|
|---|
| 453 | if (*prev != OP_COND)
|
|---|
| 454 | if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
|
|---|
| 455 | if (bmtable != NULL)
|
|---|
| 456 | if (bmtable[*start_match])
|
|---|
| 457 | if (c != *eptr++)
|
|---|
| 458 | if (c != md->lcc[*eptr++])
|
|---|
| 459 | if (c < 16)
|
|---|
| 460 | if (c == *eptr++)
|
|---|
| 461 | if (c == md->lcc[*eptr++])
|
|---|
| 462 | if (c > md->end_subject - eptr)
|
|---|
| 463 | if (cur_is_word == prev_is_word ||
|
|---|
| 464 | if (ecode[3] == OP_CREF) /* Condition is extraction test */
|
|---|
| 465 | if (ecode[3] == OP_OPT)
|
|---|
| 466 | if (eptr != md->start_subject && eptr[-1] != '\n')
|
|---|
| 467 | if (eptr != md->start_subject)
|
|---|
| 468 | if (eptr < md->end_subject - 1 ||
|
|---|
| 469 | if (eptr < md->end_subject)
|
|---|
| 470 | if (eptr < md->start_subject)
|
|---|
| 471 | if (eptr >= md->end_subject ||
|
|---|
| 472 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
|
|---|
| 473 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
|
|---|
| 474 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
|
|---|
| 475 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
|
|---|
| 476 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
|
|---|
| 477 | if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
|
|---|
| 478 | if (eptr >= md->end_subject || *eptr == '\n')
|
|---|
| 479 | if (eptr >= md->end_subject || c != *eptr)
|
|---|
| 480 | if (eptr >= md->end_subject || c != md->lcc[*eptr])
|
|---|
| 481 | if (eptr >= md->end_subject || c == *eptr)
|
|---|
| 482 | if (eptr >= md->end_subject || c == md->lcc[*eptr])
|
|---|
| 483 | if (eptr >= md->end_subject)
|
|---|
| 484 | if (eptr++ >= md->end_subject)
|
|---|
| 485 | if (i >= max || !match_ref (offset, eptr, length, md, ims))
|
|---|
| 486 | if (i >= max || eptr >= md->end_subject ||
|
|---|
| 487 | if (i >= max || eptr >= md->end_subject || c != *eptr++)
|
|---|
| 488 | if (i >= max || eptr >= md->end_subject || c == *eptr++)
|
|---|
| 489 | if (i >= max || eptr >= md->end_subject)
|
|---|
| 490 | if (is_subject && length > md->end_subject - p)
|
|---|
| 491 | if (isprint (c = *(p++)))
|
|---|
| 492 | if (length == 0)
|
|---|
| 493 | if (length > md->end_subject - eptr)
|
|---|
| 494 | if (match (eptr, ecode + 3, offset_top, md, ims, NULL,
|
|---|
| 495 | if (match (eptr, ecode + 3, offset_top, md, ims, NULL, match_isgroup))
|
|---|
| 496 | if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0) ||
|
|---|
| 497 | if (match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup))
|
|---|
| 498 | if (match (eptr, ecode, offset_top, md, ims, eptrb, 0))
|
|---|
| 499 | if (match (eptr, next + 3, offset_top, md, ims, eptrb, match_isgroup))
|
|---|
| 500 | if (match (eptr, next, offset_top, md, ims, eptrb, match_isgroup))
|
|---|
| 501 | if (match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup) ||
|
|---|
| 502 | if (match (eptr--, ecode, offset_top, md, ims, eptrb, 0))
|
|---|
| 503 | if (match_block.end_offset_top > offsetcount)
|
|---|
| 504 | if (match_block.offset_vector != NULL)
|
|---|
| 505 | if (match_block.offset_vector == NULL)
|
|---|
| 506 | if (max == 0)
|
|---|
| 507 | if (md->lcc[*ecode++] != md->lcc[*eptr++])
|
|---|
| 508 | if (md->lcc[*ecode++] == md->lcc[*eptr++])
|
|---|
| 509 | if (md->lcc[*p++] != md->lcc[*eptr++])
|
|---|
| 510 | if (md->notbol && eptr == md->start_subject)
|
|---|
| 511 | if (md->notempty && eptr == md->start_match)
|
|---|
| 512 | if (md->noteol)
|
|---|
| 513 | if (min == max)
|
|---|
| 514 | if (min > 0)
|
|---|
| 515 | if (min > md->end_subject - eptr)
|
|---|
| 516 | if (minimize)
|
|---|
| 517 | if (number > 0)
|
|---|
| 518 | if (number > EXTRACT_BASIC_MAX)
|
|---|
| 519 | if (offset < md->offset_max)
|
|---|
| 520 | if (offset >= md->offset_max)
|
|---|
| 521 | if (offset_top <= offset)
|
|---|
| 522 | if (offsetcount < 2)
|
|---|
| 523 | if (offsetcount >= 4)
|
|---|
| 524 | if (op > OP_BRA)
|
|---|
| 525 | if (p > req_char_ptr)
|
|---|
| 526 | if (p >= end_subject)
|
|---|
| 527 | if (pp == req_char || pp == req_char2)
|
|---|
| 528 | if (re == NULL || subject == NULL ||
|
|---|
| 529 | if (re->magic_number != MAGIC_NUMBER)
|
|---|
| 530 | if (re->max_match_size >= 0
|
|---|
| 531 | if (re->top_backref > 0 && re->top_backref >= ocount / 3)
|
|---|
| 532 | if (req_char == req_char2)
|
|---|
| 533 | if (req_char >= 0)
|
|---|
| 534 | if (resetcount > offsetcount)
|
|---|
| 535 | if (save != stacksave)
|
|---|
| 536 | if (save == NULL)
|
|---|
| 537 | if (skipped_chars)
|
|---|
| 538 | if (start_match + bmtable[256] > end_subject)
|
|---|
| 539 | if (start_match > match_block.start_subject + start_offset)
|
|---|
| 540 | if (using_temporary_offsets)
|
|---|
| 541 | if certain parts of the pattern were not used. */
|
|---|
| 542 | if the malloc fails ... there is no way of returning to the top level with
|
|---|
| 543 | implied in the second condition, because start_offset > 0. */
|
|---|
| 544 | ims current /i, /m, and /s options
|
|---|
| 545 | ims the ims flags
|
|---|
| 546 | ims = (ims & ~PCRE_IMS) | ecode[4];
|
|---|
| 547 | ims = ecode[1];
|
|---|
| 548 | ims = original_ims;
|
|---|
| 549 | ims = re->options & (PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL);
|
|---|
| 550 | in the pattern. */
|
|---|
| 551 | in the subject string, while eptrb holds the value of eptr at the start of the
|
|---|
| 552 | initialize them to avoid reading uninitialized locations. */
|
|---|
| 553 | inline, and there are *still* stupid compilers about that don't like indented
|
|---|
| 554 | inside the group.
|
|---|
| 555 | int
|
|---|
| 556 | int *offsets;
|
|---|
| 557 | int *save;
|
|---|
| 558 | int c;
|
|---|
| 559 | int first_char = -1;
|
|---|
| 560 | int flags;
|
|---|
| 561 | int length;
|
|---|
| 562 | int min, max, ctype;
|
|---|
| 563 | int number = *prev - OP_BRA;
|
|---|
| 564 | int number = op - OP_BRA;
|
|---|
| 565 | int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled reference number */
|
|---|
| 566 | int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled reference number */
|
|---|
| 567 | int offset;
|
|---|
| 568 | int offset_top;
|
|---|
| 569 | int offsetcount;
|
|---|
| 570 | int op = (int) *ecode;
|
|---|
| 571 | int options;
|
|---|
| 572 | int rc;
|
|---|
| 573 | int req_char = -1;
|
|---|
| 574 | int req_char2 = -1;
|
|---|
| 575 | int resetcount, ocount;
|
|---|
| 576 | int save_offset1 = md->offset_vector[offset];
|
|---|
| 577 | int save_offset2 = md->offset_vector[offset + 1];
|
|---|
| 578 | int save_offset3 = md->offset_vector[md->offset_end - number];
|
|---|
| 579 | int skipped_chars = 0;
|
|---|
| 580 | int stacksave[15];
|
|---|
| 581 | int start_offset;
|
|---|
| 582 | is a bit large to put on the stack, but using malloc for small numbers
|
|---|
| 583 | is_subject TRUE if printing from within md->start_subject
|
|---|
| 584 | it as matched, any number of times (otherwise there could be infinite
|
|---|
| 585 | item to see if there is repeat information following. The code is similar
|
|---|
| 586 | item to see if there is repeat information following. Then obey similar
|
|---|
| 587 | last bracketed group - used for breaking infinite loops matching zero-length
|
|---|
| 588 | later in the subject; otherwise the test starts at the match point. This
|
|---|
| 589 | length length of subject string (may contain binary zeros)
|
|---|
| 590 | length length to be matched
|
|---|
| 591 | length number to print
|
|---|
| 592 | length = (offset >= offset_top || md->offset_vector[offset] < 0) ?
|
|---|
| 593 | length = md->end_subject - p;
|
|---|
| 594 | level without recursing. Otherwise, if minimizing, keep trying the rest of
|
|---|
| 595 | loop. */
|
|---|
| 596 | loops). */
|
|---|
| 597 | main loop. */
|
|---|
| 598 | majority of cases. It will be suboptimal when the case flag changes in a regex
|
|---|
| 599 | mark, since extracts may have been taken during the assertion. */
|
|---|
| 600 | mark, since extracts may have been taken. */
|
|---|
| 601 | match (eptr, ecode + 3, offset_top, md, ims, eptrb, 0))
|
|---|
| 602 | match (eptr, ecode, offset_top, md, ims, eptrb, flags)
|
|---|
| 603 | match (eptr, prev, offset_top, md, ims, eptrb, match_isgroup))
|
|---|
| 604 | match_block.ctypes = re->tables + ctypes_offset;
|
|---|
| 605 | match_block.end_subject = match_block.start_subject + length;
|
|---|
| 606 | match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
|
|---|
| 607 | match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
|
|---|
| 608 | match_block.errorcode == PCRE_ERROR_NOMATCH &&
|
|---|
| 609 | match_block.lcc = re->tables + lcc_offset;
|
|---|
| 610 | match_block.lcc[*start_match] != first_char)
|
|---|
| 611 | match_block.notbol = (options & PCRE_NOTBOL) != 0;
|
|---|
| 612 | match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
|
|---|
| 613 | match_block.noteol = (options & PCRE_NOTEOL) != 0;
|
|---|
| 614 | match_block.offset_end = ocount;
|
|---|
| 615 | match_block.offset_max = (2 * ocount) / 3;
|
|---|
| 616 | match_block.offset_overflow = FALSE;
|
|---|
| 617 | match_block.offset_overflow = TRUE;
|
|---|
| 618 | match_block.offset_vector = (int *) (pcre_malloc) (ocount * sizeof (int));
|
|---|
| 619 | match_block.offset_vector = offsets;
|
|---|
| 620 | match_block.start_match = start_match;
|
|---|
| 621 | match_block.start_pattern = re->code;
|
|---|
| 622 | match_block.start_subject = (const uschar *) subject;
|
|---|
| 623 | match_condassert - this is an assertion condition
|
|---|
| 624 | match_condassert | match_isgroup))
|
|---|
| 625 | match_data *md;
|
|---|
| 626 | match_data match_block;
|
|---|
| 627 | match_isgroup - this is the start of a bracketed group
|
|---|
| 628 | match_isgroup);
|
|---|
| 629 | match_ref (offset, eptr, length, md, ims)
|
|---|
| 630 | matches, we carry on as at the end of a normal bracket, leaving the subject
|
|---|
| 631 | matching won't pass the KET for an assertion. If any one branch matches,
|
|---|
| 632 | matching won't pass the KET for this kind of subpattern. If any one branch
|
|---|
| 633 | max = (ecode[1] << 8) + ecode[2];
|
|---|
| 634 | max = (ecode[3] << 8) + ecode[4];
|
|---|
| 635 | max = INT_MAX;
|
|---|
| 636 | max = rep_max[c]; /* zero for max => infinity */
|
|---|
| 637 | max, eptr));
|
|---|
| 638 | maximum. Alternatively, if maximizing, find the maximum number of
|
|---|
| 639 | may be wrong. */
|
|---|
| 640 | md pointer to "static" info for the match
|
|---|
| 641 | md pointer to matching data block, if is_subject is TRUE
|
|---|
| 642 | md points to match data block
|
|---|
| 643 | md->end_match_ptr = eptr; /* For ONCE */
|
|---|
| 644 | md->end_match_ptr = eptr; /* Record where we ended */
|
|---|
| 645 | md->end_offset_top = offset_top; /* and how many extracts were taken */
|
|---|
| 646 | md->end_offset_top = offset_top;
|
|---|
| 647 | md->end_subject - eptr + 1 :
|
|---|
| 648 | md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
|
|---|
| 649 | md->offset_overflow = TRUE;
|
|---|
| 650 | md->offset_vector[md->offset_end - i] = save[i];
|
|---|
| 651 | md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
|
|---|
| 652 | md->offset_vector[md->offset_end - number] = save_offset3;
|
|---|
| 653 | md->offset_vector[md->offset_end - number];
|
|---|
| 654 | md->offset_vector[offset + 1] - md->offset_vector[offset];
|
|---|
| 655 | md->offset_vector[offset + 1] = eptr - md->start_subject;
|
|---|
| 656 | md->offset_vector[offset + 1] = save_offset2;
|
|---|
| 657 | md->offset_vector[offset] =
|
|---|
| 658 | md->offset_vector[offset] = save_offset1;
|
|---|
| 659 | memcpy (offsets + 2, match_block.offset_vector + 2,
|
|---|
| 660 | min = (ecode[1] << 8) + ecode[2];
|
|---|
| 661 | min = 0;
|
|---|
| 662 | min = max = (ecode[1] << 8) + ecode[2];
|
|---|
| 663 | min = max = 1;
|
|---|
| 664 | min = rep_min[c]; /* Pick up values from tables; */
|
|---|
| 665 | minima. */
|
|---|
| 666 | minimize = (*ecode == OP_CRMINRANGE);
|
|---|
| 667 | minimize = (c & 1) != 0;
|
|---|
| 668 | minimize = *ecode == OP_MINUPTO;
|
|---|
| 669 | minimize = *ecode == OP_NOTMINUPTO;
|
|---|
| 670 | minimize = *ecode == OP_TYPEMINUPTO;
|
|---|
| 671 | minimize = TRUE;
|
|---|
| 672 | minimum number of matches are present. If min = max, continue at the same
|
|---|
| 673 | misrepresented as being the original software.
|
|---|
| 674 | move back, this match function fails. */
|
|---|
| 675 | mustn't change the current values of the data slot, because they may be set
|
|---|
| 676 | need to recurse. */
|
|---|
| 677 | never be used unless previously set, but they get saved and restored, and so we
|
|---|
| 678 | never set for an anchored regular expression, but the anchoring may be forced
|
|---|
| 679 | newline unless endonly is set, else end of subject unless noteol is set. */
|
|---|
| 680 | newptrb.prev = eptrb;
|
|---|
| 681 | newptrb.saved_eptr = eptr;
|
|---|
| 682 | next += (next[1] << 8) + next[2];
|
|---|
| 683 | non-capturing bracket. Don't worry about setting the flag for the error case
|
|---|
| 684 | number = (ecode[4] << 8) | ecode[5];
|
|---|
| 685 | number = (prev[4] << 8) | prev[5];
|
|---|
| 686 | number from a dummy opcode at the start. */
|
|---|
| 687 | number, then move along the subject till after the recursive match,
|
|---|
| 688 | ocount = offsetcount - (offsetcount % 3);
|
|---|
| 689 | ocount = re->top_backref * 3 + 3;
|
|---|
| 690 | of (?ims) items in the pattern. They are kept in a local variable so that
|
|---|
| 691 | of 3. */
|
|---|
| 692 | of subject left; this ensures that every attempt at a match fails. We
|
|---|
| 693 | offset index into the offset vector
|
|---|
| 694 | offset = number << 1;
|
|---|
| 695 | offset_top current top pointer
|
|---|
| 696 | offset_top = md->end_offset_top;
|
|---|
| 697 | offset_top = offset + 2;
|
|---|
| 698 | offset_top, md, ims, eptrb, match_isgroup);
|
|---|
| 699 | offsetcount the number of elements in the vector
|
|---|
| 700 | offsets points to a vector of ints to be filled in with offsets
|
|---|
| 701 | offsets[0] = start_match - match_block.start_subject;
|
|---|
| 702 | offsets[1] = match_block.end_match_ptr - match_block.start_subject;
|
|---|
| 703 | op = OP_BRA;
|
|---|
| 704 | opcode. */
|
|---|
| 705 | optimization can save a huge amount of backtracking in patterns with nested
|
|---|
| 706 | option for each character match. Maybe that wouldn't add very much to the
|
|---|
| 707 | options option bits
|
|---|
| 708 | p points to characters
|
|---|
| 709 | p--;
|
|---|
| 710 | past the end if there is only one branch, but that's OK because that is
|
|---|
| 711 | pchars (ecode, length, FALSE, md);
|
|---|
| 712 | pchars (eptr, 16, TRUE, md);
|
|---|
| 713 | pchars (eptr, length, TRUE, md);
|
|---|
| 714 | pchars (p, length, FALSE, md);
|
|---|
| 715 | pchars (p, length, is_subject, md)
|
|---|
| 716 | pchars (start_match, end_subject - start_match, TRUE, &match_block);
|
|---|
| 717 | pcre_exec (re, extra, subject, length, start_offset, options, offsets, offsetcount)
|
|---|
| 718 | place we found it at last time. */
|
|---|
| 719 | pointer. */
|
|---|
| 720 | portions of the string if it matches. Two elements in the vector are set for
|
|---|
| 721 | pre-processor statements. I suppose it's only been 10 years... */
|
|---|
| 722 | preceded by BRAZERO or BRAMINZERO. */
|
|---|
| 723 | preceding bracket, in the appropriate order. */
|
|---|
| 724 | preceding bracket, in the appropriate order. We need to reset any options
|
|---|
| 725 | printf (" against backref ");
|
|---|
| 726 | printf (" against pattern ");
|
|---|
| 727 | printf ("%c", c);
|
|---|
| 728 | printf (">>>> Match against: ");
|
|---|
| 729 | printf (">>>>> Skipped %d chars to reach first character\n",
|
|---|
| 730 | printf ("\\x%02x", c);
|
|---|
| 731 | printf ("\n");
|
|---|
| 732 | printf ("end bracket %d", number);
|
|---|
| 733 | printf ("matching subject ");
|
|---|
| 734 | printf ("matching subject <null> against pattern ");
|
|---|
| 735 | printf ("matching subject <null>");
|
|---|
| 736 | printf ("start bracket %d subject=", number);
|
|---|
| 737 | rc = 0;
|
|---|
| 738 | rc = match (eptr, md->start_pattern, offset_top, md, ims, eptrb,
|
|---|
| 739 | rc = match_block.offset_overflow ? 0 : match_block.end_offset_top / 2;
|
|---|
| 740 | register const uschar *ecode;
|
|---|
| 741 | register const uschar *eptr;
|
|---|
| 742 | register const uschar *p = start_match + ((first_char >= 0) ? 1 : 0);
|
|---|
| 743 | register int *iend = iptr + resetcount;
|
|---|
| 744 | register int *iend = iptr - resetcount / 2 + 1;
|
|---|
| 745 | register int *iptr = match_block.offset_vector + ocount;
|
|---|
| 746 | register int *iptr = match_block.offset_vector;
|
|---|
| 747 | register int c = *start_match;
|
|---|
| 748 | register int c;
|
|---|
| 749 | register int i;
|
|---|
| 750 | register int length = ecode[1];
|
|---|
| 751 | register int pp = *p++;
|
|---|
| 752 | repeat it in the interests of efficiency. */
|
|---|
| 753 | repeat limits are compiled as a number of copies, with the optional ones
|
|---|
| 754 | req_char = re->req_char;
|
|---|
| 755 | req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0) ?
|
|---|
| 756 | req_char_ptr = p;
|
|---|
| 757 | resetcount = 2 + re->top_bracket * 2;
|
|---|
| 758 | resetcount = ocount;
|
|---|
| 759 | restoring at the exit of a group is easy. */
|
|---|
| 760 | restrictions:
|
|---|
| 761 | return FALSE;
|
|---|
| 762 | return PCRE_ERROR_BADMAGIC;
|
|---|
| 763 | return PCRE_ERROR_BADOPTION;
|
|---|
| 764 | return PCRE_ERROR_NOMATCH;
|
|---|
| 765 | return PCRE_ERROR_NOMEMORY;
|
|---|
| 766 | return PCRE_ERROR_NULL;
|
|---|
| 767 | return TRUE;
|
|---|
| 768 | return match (eptr,
|
|---|
| 769 | return match (eptr, ecode + 3, offset_top, md, ims, eptrb, match_isgroup);
|
|---|
| 770 | return match_block.errorcode;
|
|---|
| 771 | return rc;
|
|---|
| 772 | save = (int *) (pcre_malloc) ((c + 1) * sizeof (int));
|
|---|
| 773 | save = stacksave;
|
|---|
| 774 | save[i] = md->offset_vector[md->offset_end - i];
|
|---|
| 775 | seems expensive. As a compromise, the stack is used when there are fewer
|
|---|
| 776 | share code. This is very similar to the code for single characters, but we
|
|---|
| 777 | similar code to character type repeats - written out again for speed.
|
|---|
| 778 | since matching characters is likely to be quite common. First, ensure the
|
|---|
| 779 | skipped_chars += bmtable[*start_match],
|
|---|
| 780 | skipped_chars += bmtable[256] - 1;
|
|---|
| 781 | skipped_chars -= bmtable[256] - 1;
|
|---|
| 782 | skipped_chars);
|
|---|
| 783 | skipped_chars++,
|
|---|
| 784 | stack of such pointers, to be re-instated at the end of the group when we hit
|
|---|
| 785 | stack, for holding the values of the subject pointer at the start of each
|
|---|
| 786 | start of each branch to move the current point backwards, so the code at
|
|---|
| 787 | start_bits = extra->data.start_bits;
|
|---|
| 788 | start_match += bmtable[*start_match];
|
|---|
| 789 | start_match += bmtable[256] - 1;
|
|---|
| 790 | start_match -= bmtable[256] - 1;
|
|---|
| 791 | start_match = (const uschar *) subject + length - re->max_match_size;
|
|---|
| 792 | start_match++ < end_subject);
|
|---|
| 793 | start_match++;
|
|---|
| 794 | start_offset where to start in the subject string
|
|---|
| 795 | startline = (re->options & PCRE_STARTLINE) != 0;
|
|---|
| 796 | static BOOL
|
|---|
| 797 | static const char rep_max[] =
|
|---|
| 798 | static const char rep_min[] =
|
|---|
| 799 | static void
|
|---|
| 800 | strings.
|
|---|
| 801 | struct eptrblock *prev;
|
|---|
| 802 | studied, there may be a bitmap of possible first characters. */
|
|---|
| 803 | subject points to the subject string
|
|---|
| 804 | subject if the requested.
|
|---|
| 805 | subpattern - to break infinite loops. */
|
|---|
| 806 | subpattern, so as to detect when an empty string has been matched by a
|
|---|
| 807 | subsequent match. */
|
|---|
| 808 | such there are (offset_top records the completed total) so we just have
|
|---|
| 809 | supersede any condition above with which it is incompatible.
|
|---|
| 810 | switch (*ecode)
|
|---|
| 811 | switch (ctype)
|
|---|
| 812 | switch (op)
|
|---|
| 813 | test once at the start (i.e. keep it out of the loop). */
|
|---|
| 814 | than 16 values to store; otherwise malloc is used. A problem is what to do
|
|---|
| 815 | than the number of characters left in the string, so the match fails.
|
|---|
| 816 | that "continue" in the code above comes out to here to repeat the main
|
|---|
| 817 | that changed within the bracket before re-running it, so check the next
|
|---|
| 818 | that it may occur zero times. It may repeat infinitely, or not at all -
|
|---|
| 819 | the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
|
|---|
| 820 | the closing ket. When match() is called in other circumstances, we don't add to
|
|---|
| 821 | the code for a repeated single character, but I haven't found a nice way of
|
|---|
| 822 | the current subject position in the working slot at the top of the vector. We
|
|---|
| 823 | the expression and advancing one matching character if failing, up to the
|
|---|
| 824 | the external pcre header. */
|
|---|
| 825 | the file Tech.Notes for some information on the internals.
|
|---|
| 826 | the final argument TRUE causes it to stop at the end of an assertion. */
|
|---|
| 827 | the group. */
|
|---|
| 828 | the length of the reference string explicitly rather than passing the
|
|---|
| 829 | the loop runs just once. */
|
|---|
| 830 | the minimum number of bytes before we start. */
|
|---|
| 831 | the number from a dummy opcode at the start. */
|
|---|
| 832 | the point in the subject string is not moved back. Thus there can never be
|
|---|
| 833 | the pointer while it matches the class. */
|
|---|
| 834 | the same bracket.
|
|---|
| 835 | the stack. */
|
|---|
| 836 | the start hasn't passed this character yet. */
|
|---|
| 837 | the subject. */
|
|---|
| 838 | there were too many extractions, set the return code to zero. In the case
|
|---|
| 839 | this level is identical to the lookahead case. */
|
|---|
| 840 | this makes a huge difference to execution time when there aren't many brackets
|
|---|
| 841 | those back references that we can. In this case there need not be overflow
|
|---|
| 842 | time taken, but character matching *is* what this is all about... */
|
|---|
| 843 | to save all the potential data. There may be up to 99 such values, which
|
|---|
| 844 | to that for character classes, but repeated for efficiency. Then obey
|
|---|
| 845 | two branches. If the condition is false, skipping the first branch takes us
|
|---|
| 846 | typedef struct eptrblock
|
|---|
| 847 | unless PCRE_CASELESS was given or the casing state changes within the regex.
|
|---|
| 848 | unlimited repeats that aren't going to match. We don't know what the state of
|
|---|
| 849 | unsigned long int ims = 0;
|
|---|
| 850 | unsigned long int ims;
|
|---|
| 851 | unsigned long int original_ims = ims; /* Save for resetting on ')' */
|
|---|
| 852 | up quickly if there are fewer than the minimum number of characters left in
|
|---|
| 853 | using_temporary_offsets = TRUE;
|
|---|
| 854 | values of the final offsets, in case they were set by a previous iteration of
|
|---|
| 855 | we just need to set up the whole thing as substring 0 before returning. If
|
|---|
| 856 | where we had to get some local store to hold offsets for backreferences, copy
|
|---|
| 857 | while (!anchored &&
|
|---|
| 858 | while (*ecode == OP_ALT)
|
|---|
| 859 | while (*ecode == OP_ALT);
|
|---|
| 860 | while (*next == OP_ALT);
|
|---|
| 861 | while (--iptr >= iend)
|
|---|
| 862 | while (eptr >= pp)
|
|---|
| 863 | while (iptr < iend)
|
|---|
| 864 | while (length-- > 0)
|
|---|
| 865 | while (p < end_subject)
|
|---|
| 866 | while (start_match < end_subject &&
|
|---|
| 867 | while (start_match < end_subject && *start_match != first_char)
|
|---|
| 868 | while (start_match < end_subject && start_match[-1] != '\n')
|
|---|
| 869 | while (start_match < end_subject)
|
|---|
| 870 | {
|
|---|
| 871 | {0, 0, 0, 0, 1, 1};
|
|---|
| 872 | {0, 0, 1, 1, 0, 0};
|
|---|
| 873 | } /* End of main loop */
|
|---|
| 874 | }
|
|---|