| 1 | /* GNU SED, a batch stream editor.
|
|---|
| 2 | Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
|
|---|
| 3 | Free Software Foundation, Inc.
|
|---|
| 4 |
|
|---|
| 5 | This program is free software; you can redistribute it and/or modify
|
|---|
| 6 | it under the terms of the GNU General Public License as published by
|
|---|
| 7 | the Free Software Foundation; either version 2, or (at your option)
|
|---|
| 8 | any later version.
|
|---|
| 9 |
|
|---|
| 10 | This program is distributed in the hope that it will be useful,
|
|---|
| 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 13 | GNU General Public License for more details.
|
|---|
| 14 |
|
|---|
| 15 | You should have received a copy of the GNU General Public License
|
|---|
| 16 | along with this program; if not, write to the Free Software
|
|---|
| 17 | Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
|---|
| 18 |
|
|---|
| 19 | #undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/
|
|---|
| 20 | #define INITIAL_BUFFER_SIZE 50
|
|---|
| 21 | #define FREAD_BUFFER_SIZE 8192
|
|---|
| 22 |
|
|---|
| 23 | #include "sed.h"
|
|---|
| 24 |
|
|---|
| 25 | #include <stdio.h>
|
|---|
| 26 | #include <ctype.h>
|
|---|
| 27 |
|
|---|
| 28 | #include <errno.h>
|
|---|
| 29 | #ifndef errno
|
|---|
| 30 | extern int errno;
|
|---|
| 31 | #endif
|
|---|
| 32 |
|
|---|
| 33 | #ifdef HAVE_UNISTD_H
|
|---|
| 34 | # include <unistd.h>
|
|---|
| 35 | #endif
|
|---|
| 36 |
|
|---|
| 37 | #ifdef __GNUC__
|
|---|
| 38 | # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7)
|
|---|
| 39 | /* silence warning about unused parameter even for "gcc -W -Wunused" */
|
|---|
| 40 | # define UNUSED __attribute__((unused))
|
|---|
| 41 | # endif
|
|---|
| 42 | #endif
|
|---|
| 43 | #ifndef UNUSED
|
|---|
| 44 | # define UNUSED
|
|---|
| 45 | #endif
|
|---|
| 46 |
|
|---|
| 47 | #ifdef HAVE_STRINGS_H
|
|---|
| 48 | # include <strings.h>
|
|---|
| 49 | #else
|
|---|
| 50 | # include <string.h>
|
|---|
| 51 | #endif /*HAVE_STRINGS_H*/
|
|---|
| 52 | #ifdef HAVE_MEMORY_H
|
|---|
| 53 | # include <memory.h>
|
|---|
| 54 | #endif
|
|---|
| 55 |
|
|---|
| 56 | #ifndef HAVE_STRCHR
|
|---|
| 57 | # define strchr index
|
|---|
| 58 | # define strrchr rindex
|
|---|
| 59 | #endif
|
|---|
| 60 |
|
|---|
| 61 | #ifdef HAVE_STDLIB_H
|
|---|
| 62 | # include <stdlib.h>
|
|---|
| 63 | #endif
|
|---|
| 64 | #ifndef EXIT_SUCCESS
|
|---|
| 65 | # define EXIT_SUCCESS 0
|
|---|
| 66 | #endif
|
|---|
| 67 |
|
|---|
| 68 | #ifdef HAVE_SYS_TYPES_H
|
|---|
| 69 | # include <sys/types.h>
|
|---|
| 70 | #endif
|
|---|
| 71 |
|
|---|
| 72 | #include <sys/stat.h>
|
|---|
| 73 |
|
|---|
| 74 | |
|---|
| 75 |
|
|---|
| 76 | /* Sed operates a line at a time. */
|
|---|
| 77 | struct line {
|
|---|
| 78 | char *text; /* Pointer to line allocated by malloc. */
|
|---|
| 79 | char *active; /* Pointer to non-consumed part of text. */
|
|---|
| 80 | size_t length; /* Length of text (or active, if used). */
|
|---|
| 81 | size_t alloc; /* Allocated space for active. */
|
|---|
| 82 | bool chomped; /* Was a trailing newline dropped? */
|
|---|
| 83 | #ifdef HAVE_MBRTOWC
|
|---|
| 84 | mbstate_t mbstate;
|
|---|
| 85 | #endif
|
|---|
| 86 | };
|
|---|
| 87 |
|
|---|
| 88 | /* A queue of text to write out at the end of a cycle
|
|---|
| 89 | (filled by the "a", "r" and "R" commands.) */
|
|---|
| 90 | struct append_queue {
|
|---|
| 91 | const char *fname;
|
|---|
| 92 | char *text;
|
|---|
| 93 | size_t textlen;
|
|---|
| 94 | struct append_queue *next;
|
|---|
| 95 | bool free;
|
|---|
| 96 | };
|
|---|
| 97 |
|
|---|
| 98 | /* State information for the input stream. */
|
|---|
| 99 | struct input {
|
|---|
| 100 | /* The list of yet-to-be-opened files. It is invalid for file_list
|
|---|
| 101 | to be NULL. When *file_list is NULL we are currently processing
|
|---|
| 102 | the last file. */
|
|---|
| 103 |
|
|---|
| 104 | char **file_list;
|
|---|
| 105 |
|
|---|
| 106 | /* Count of files we failed to open. */
|
|---|
| 107 | countT bad_count;
|
|---|
| 108 |
|
|---|
| 109 | /* Current input line number (over all files). */
|
|---|
| 110 | countT line_number;
|
|---|
| 111 |
|
|---|
| 112 | /* True if we'll reset line numbers and addresses before
|
|---|
| 113 | starting to process the next (possibly the first) file. */
|
|---|
| 114 | bool reset_at_next_file;
|
|---|
| 115 |
|
|---|
| 116 | /* Function to read one line. If FP is NULL, read_fn better not
|
|---|
| 117 | be one which uses fp; in particular, read_always_fail() is
|
|---|
| 118 | recommended. */
|
|---|
| 119 | bool (*read_fn) P_((struct input *)); /* read one line */
|
|---|
| 120 |
|
|---|
| 121 | char *out_file_name;
|
|---|
| 122 |
|
|---|
| 123 | const char *in_file_name;
|
|---|
| 124 |
|
|---|
| 125 | /* if NULL, none of the following are valid */
|
|---|
| 126 | FILE *fp;
|
|---|
| 127 |
|
|---|
| 128 | bool no_buffering;
|
|---|
| 129 | };
|
|---|
| 130 |
|
|---|
| 131 |
|
|---|
| 132 | /* Have we done any replacements lately? This is used by the `t' command. */
|
|---|
| 133 | static bool replaced = false;
|
|---|
| 134 |
|
|---|
| 135 | /* The current output file (stdout if -i is not being used. */
|
|---|
| 136 | static struct output output_file;
|
|---|
| 137 |
|
|---|
| 138 | /* The `current' input line. */
|
|---|
| 139 | static struct line line;
|
|---|
| 140 |
|
|---|
| 141 | /* An input line used to accumulate the result of the s and e commands. */
|
|---|
| 142 | static struct line s_accum;
|
|---|
| 143 |
|
|---|
| 144 | /* An input line that's been stored by later use by the program */
|
|---|
| 145 | static struct line hold;
|
|---|
| 146 |
|
|---|
| 147 | /* The buffered input look-ahead. The only field that should be
|
|---|
| 148 | used outside of read_mem_line() or line_init() is buffer.length. */
|
|---|
| 149 | static struct line buffer;
|
|---|
| 150 |
|
|---|
| 151 | static struct append_queue *append_head = NULL;
|
|---|
| 152 | static struct append_queue *append_tail = NULL;
|
|---|
| 153 |
|
|---|
| 154 | |
|---|
| 155 |
|
|---|
| 156 | #ifdef BOOTSTRAP
|
|---|
| 157 | /* We can't be sure that the system we're boostrapping on has
|
|---|
| 158 | memchr(), and ../lib/memchr.c requires configuration knowledge
|
|---|
| 159 | about how many bits are in a `long'. This implementation
|
|---|
| 160 | is far from ideal, but it should get us up-and-limping well
|
|---|
| 161 | enough to run the configure script, which is all that matters.
|
|---|
| 162 | */
|
|---|
| 163 | # ifdef memchr
|
|---|
| 164 | # undef memchr
|
|---|
| 165 | # endif
|
|---|
| 166 | # define memchr bootstrap_memchr
|
|---|
| 167 |
|
|---|
| 168 | static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n));
|
|---|
| 169 | static VOID *
|
|---|
| 170 | bootstrap_memchr(s, c, n)
|
|---|
| 171 | const VOID *s;
|
|---|
| 172 | int c;
|
|---|
| 173 | size_t n;
|
|---|
| 174 | {
|
|---|
| 175 | char *p;
|
|---|
| 176 |
|
|---|
| 177 | for (p=(char *)s; n-- > 0; ++p)
|
|---|
| 178 | if (*p == c)
|
|---|
| 179 | return p;
|
|---|
| 180 | return CAST(VOID *)0;
|
|---|
| 181 | }
|
|---|
| 182 | #endif /*BOOTSTRAP*/
|
|---|
| 183 |
|
|---|
| 184 | /* increase a struct line's length, making some attempt at
|
|---|
| 185 | keeping realloc() calls under control by padding for future growth. */
|
|---|
| 186 | static void resize_line P_((struct line *, size_t));
|
|---|
| 187 | static void
|
|---|
| 188 | resize_line(lb, len)
|
|---|
| 189 | struct line *lb;
|
|---|
| 190 | size_t len;
|
|---|
| 191 | {
|
|---|
| 192 | int inactive;
|
|---|
| 193 | inactive = lb->active - lb->text;
|
|---|
| 194 |
|
|---|
| 195 | /* If the inactive part has got to more than two thirds of the buffer,
|
|---|
| 196 | * remove it. */
|
|---|
| 197 | if (inactive > lb->alloc * 2)
|
|---|
| 198 | {
|
|---|
| 199 | MEMMOVE(lb->text, lb->active, lb->length);
|
|---|
| 200 | lb->alloc += lb->active - lb->text;
|
|---|
| 201 | lb->active = lb->text;
|
|---|
| 202 | inactive = 0;
|
|---|
| 203 |
|
|---|
| 204 | if (lb->alloc > len)
|
|---|
| 205 | return;
|
|---|
| 206 | }
|
|---|
| 207 |
|
|---|
| 208 | lb->alloc *= 2;
|
|---|
| 209 | if (lb->alloc < len)
|
|---|
| 210 | lb->alloc = len;
|
|---|
| 211 | if (lb->alloc < INITIAL_BUFFER_SIZE)
|
|---|
| 212 | lb->alloc = INITIAL_BUFFER_SIZE;
|
|---|
| 213 |
|
|---|
| 214 | lb->text = REALLOC(lb->text, inactive + lb->alloc, char);
|
|---|
| 215 | lb->active = lb->text + inactive;
|
|---|
| 216 | }
|
|---|
| 217 |
|
|---|
| 218 | /* Append `length' bytes from `string' to the line `to'. */
|
|---|
| 219 | static void str_append P_((struct line *, const char *, size_t));
|
|---|
| 220 | static void
|
|---|
| 221 | str_append(to, string, length)
|
|---|
| 222 | struct line *to;
|
|---|
| 223 | const char *string;
|
|---|
| 224 | size_t length;
|
|---|
| 225 | {
|
|---|
| 226 | size_t new_length = to->length + length;
|
|---|
| 227 |
|
|---|
| 228 | if (to->alloc < new_length)
|
|---|
| 229 | resize_line(to, new_length);
|
|---|
| 230 | MEMCPY(to->active + to->length, string, length);
|
|---|
| 231 | to->length = new_length;
|
|---|
| 232 |
|
|---|
| 233 | #ifdef HAVE_MBRTOWC
|
|---|
| 234 | if (mb_cur_max == 1)
|
|---|
| 235 | return;
|
|---|
| 236 |
|
|---|
| 237 | while (length)
|
|---|
| 238 | {
|
|---|
| 239 | int n = MBRLEN (string, length, &to->mbstate);
|
|---|
| 240 |
|
|---|
| 241 | /* An invalid sequence is treated like a singlebyte character. */
|
|---|
| 242 | if (n == -1)
|
|---|
| 243 | {
|
|---|
| 244 | memset (&to->mbstate, 0, sizeof (to->mbstate));
|
|---|
| 245 | n = 1;
|
|---|
| 246 | }
|
|---|
| 247 |
|
|---|
| 248 | if (n > 0)
|
|---|
| 249 | length -= n;
|
|---|
| 250 | else
|
|---|
| 251 | break;
|
|---|
| 252 | }
|
|---|
| 253 | #endif
|
|---|
| 254 | }
|
|---|
| 255 |
|
|---|
| 256 | static void str_append_modified P_((struct line *, const char *, size_t,
|
|---|
| 257 | enum replacement_types));
|
|---|
| 258 | static void
|
|---|
| 259 | str_append_modified(to, string, length, type)
|
|---|
| 260 | struct line *to;
|
|---|
| 261 | const char *string;
|
|---|
| 262 | size_t length;
|
|---|
| 263 | enum replacement_types type;
|
|---|
| 264 | {
|
|---|
| 265 | size_t old_length = to->length;
|
|---|
| 266 | char *start, *end;
|
|---|
| 267 |
|
|---|
| 268 | if (length == 0)
|
|---|
| 269 | return;
|
|---|
| 270 |
|
|---|
| 271 | #ifdef HAVE_MBRTOWC
|
|---|
| 272 | {
|
|---|
| 273 | mbstate_t from_stat;
|
|---|
| 274 |
|
|---|
| 275 | if (type == REPL_ASIS)
|
|---|
| 276 | {
|
|---|
| 277 | str_append(to, string, length);
|
|---|
| 278 | return;
|
|---|
| 279 | }
|
|---|
| 280 |
|
|---|
| 281 | if (to->alloc - to->length < length * mb_cur_max)
|
|---|
| 282 | resize_line(to, to->length + length * mb_cur_max);
|
|---|
| 283 |
|
|---|
| 284 | MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t));
|
|---|
| 285 | while (length)
|
|---|
| 286 | {
|
|---|
| 287 | wchar_t wc;
|
|---|
| 288 | int n = MBRTOWC (&wc, string, length, &from_stat);
|
|---|
| 289 |
|
|---|
| 290 | /* An invalid sequence is treated like a singlebyte character. */
|
|---|
| 291 | if (n == -1)
|
|---|
| 292 | {
|
|---|
| 293 | memset (&to->mbstate, 0, sizeof (from_stat));
|
|---|
| 294 | n = 1;
|
|---|
| 295 | }
|
|---|
| 296 |
|
|---|
| 297 | if (n > 0)
|
|---|
| 298 | string += n, length -= n;
|
|---|
| 299 | else
|
|---|
| 300 | {
|
|---|
| 301 | /* Incomplete sequence, copy it manually. */
|
|---|
| 302 | str_append(to, string, length);
|
|---|
| 303 | return;
|
|---|
| 304 | }
|
|---|
| 305 |
|
|---|
| 306 | /* Convert the first character specially... */
|
|---|
| 307 | if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST))
|
|---|
| 308 | {
|
|---|
| 309 | if (type & REPL_UPPERCASE_FIRST)
|
|---|
| 310 | wc = towupper(wc);
|
|---|
| 311 | else
|
|---|
| 312 | wc = towlower(wc);
|
|---|
| 313 |
|
|---|
| 314 | type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST);
|
|---|
| 315 | if (type == REPL_ASIS)
|
|---|
| 316 | {
|
|---|
| 317 | n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
|
|---|
| 318 | to->length += n;
|
|---|
| 319 | str_append(to, string, length);
|
|---|
| 320 | return;
|
|---|
| 321 | }
|
|---|
| 322 | }
|
|---|
| 323 |
|
|---|
| 324 | else if (type & REPL_UPPERCASE)
|
|---|
| 325 | wc = towupper(wc);
|
|---|
| 326 | else
|
|---|
| 327 | wc = towlower(wc);
|
|---|
| 328 |
|
|---|
| 329 | /* Copy the new wide character to the end of the string. */
|
|---|
| 330 | n = WCRTOMB (to->active + to->length, wc, &to->mbstate);
|
|---|
| 331 | to->length += n;
|
|---|
| 332 | if (n == -1)
|
|---|
| 333 | {
|
|---|
| 334 | fprintf (stderr, "Case conversion produced an invalid character!");
|
|---|
| 335 | abort ();
|
|---|
| 336 | }
|
|---|
| 337 | }
|
|---|
| 338 | }
|
|---|
| 339 | #else
|
|---|
| 340 | str_append(to, string, length);
|
|---|
| 341 | start = to->active + old_length;
|
|---|
| 342 | end = start + length;
|
|---|
| 343 |
|
|---|
| 344 | /* Now do the required modifications. First \[lu]... */
|
|---|
| 345 | if (type & REPL_UPPERCASE_FIRST)
|
|---|
| 346 | {
|
|---|
| 347 | *start = toupper(*start);
|
|---|
| 348 | start++;
|
|---|
| 349 | type &= ~REPL_UPPERCASE_FIRST;
|
|---|
| 350 | }
|
|---|
| 351 | else if (type & REPL_LOWERCASE_FIRST)
|
|---|
| 352 | {
|
|---|
| 353 | *start = tolower(*start);
|
|---|
| 354 | start++;
|
|---|
| 355 | type &= ~REPL_LOWERCASE_FIRST;
|
|---|
| 356 | }
|
|---|
| 357 |
|
|---|
| 358 | if (type == REPL_ASIS)
|
|---|
| 359 | return;
|
|---|
| 360 |
|
|---|
| 361 | /* ...and then \[LU] */
|
|---|
| 362 | if (type == REPL_UPPERCASE)
|
|---|
| 363 | for (; start != end; start++)
|
|---|
| 364 | *start = toupper(*start);
|
|---|
| 365 | else
|
|---|
| 366 | for (; start != end; start++)
|
|---|
| 367 | *start = tolower(*start);
|
|---|
| 368 | #endif
|
|---|
| 369 | }
|
|---|
| 370 |
|
|---|
| 371 | /* initialize a "struct line" buffer */
|
|---|
| 372 | static void line_init P_((struct line *, size_t initial_size));
|
|---|
| 373 | static void
|
|---|
| 374 | line_init(buf, initial_size)
|
|---|
| 375 | struct line *buf;
|
|---|
| 376 | size_t initial_size;
|
|---|
| 377 | {
|
|---|
| 378 | buf->text = MALLOC(initial_size, char);
|
|---|
| 379 | buf->active = buf->text;
|
|---|
| 380 | buf->alloc = initial_size;
|
|---|
| 381 | buf->length = 0;
|
|---|
| 382 | buf->chomped = true;
|
|---|
| 383 |
|
|---|
| 384 | #ifdef HAVE_MBRTOWC
|
|---|
| 385 | memset (&buf->mbstate, 0, sizeof (buf->mbstate));
|
|---|
| 386 | #endif
|
|---|
| 387 |
|
|---|
| 388 | }
|
|---|
| 389 |
|
|---|
| 390 | /* Copy the contents of the line `from' into the line `to'.
|
|---|
| 391 | This destroys the old contents of `to'. */
|
|---|
| 392 | static void line_copy P_((struct line *from, struct line *to));
|
|---|
| 393 | static void
|
|---|
| 394 | line_copy(from, to)
|
|---|
| 395 | struct line *from;
|
|---|
| 396 | struct line *to;
|
|---|
| 397 | {
|
|---|
| 398 | /* Remove the inactive portion in the destination buffer. */
|
|---|
| 399 | to->alloc += to->active - to->text;
|
|---|
| 400 |
|
|---|
| 401 | if (to->alloc < from->length)
|
|---|
| 402 | {
|
|---|
| 403 | to->alloc *= 2;
|
|---|
| 404 | if (to->alloc < from->length)
|
|---|
| 405 | to->alloc = from->length;
|
|---|
| 406 | if (to->alloc < INITIAL_BUFFER_SIZE)
|
|---|
| 407 | to->alloc = INITIAL_BUFFER_SIZE;
|
|---|
| 408 | /* Use FREE()+MALLOC() instead of REALLOC() to
|
|---|
| 409 | avoid unnecessary copying of old text. */
|
|---|
| 410 | FREE(to->text);
|
|---|
| 411 | to->text = MALLOC(to->alloc, char);
|
|---|
| 412 | }
|
|---|
| 413 |
|
|---|
| 414 | to->active = to->text;
|
|---|
| 415 | to->length = from->length;
|
|---|
| 416 | to->chomped = from->chomped;
|
|---|
| 417 | MEMCPY(to->active, from->active, from->length);
|
|---|
| 418 |
|
|---|
| 419 | #ifdef HAVE_MBRTOWC
|
|---|
| 420 | MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate));
|
|---|
| 421 | #endif
|
|---|
| 422 | }
|
|---|
| 423 |
|
|---|
| 424 | /* Append the contents of the line `from' to the line `to'. */
|
|---|
| 425 | static void line_append P_((struct line *from, struct line *to));
|
|---|
| 426 | static void
|
|---|
| 427 | line_append(from, to)
|
|---|
| 428 | struct line *from;
|
|---|
| 429 | struct line *to;
|
|---|
| 430 | {
|
|---|
| 431 | str_append(to, "\n", 1);
|
|---|
| 432 | str_append(to, from->active, from->length);
|
|---|
| 433 | to->chomped = from->chomped;
|
|---|
| 434 |
|
|---|
| 435 | #ifdef HAVE_MBRTOWC
|
|---|
| 436 | MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate));
|
|---|
| 437 | #endif
|
|---|
| 438 | }
|
|---|
| 439 |
|
|---|
| 440 | /* Exchange the contents of two "struct line" buffers. */
|
|---|
| 441 | static void line_exchange P_((struct line *, struct line *));
|
|---|
| 442 | static void
|
|---|
| 443 | line_exchange(a, b)
|
|---|
| 444 | struct line *a;
|
|---|
| 445 | struct line *b;
|
|---|
| 446 | {
|
|---|
| 447 | struct line t;
|
|---|
| 448 |
|
|---|
| 449 | MEMCPY(&t, a, sizeof(struct line));
|
|---|
| 450 | MEMCPY( a, b, sizeof(struct line));
|
|---|
| 451 | MEMCPY( b, &t, sizeof(struct line));
|
|---|
| 452 | }
|
|---|
| 453 |
|
|---|
| 454 | |
|---|
| 455 |
|
|---|
| 456 | /* dummy function to simplify read_pattern_space() */
|
|---|
| 457 | static bool read_always_fail P_((struct input *));
|
|---|
| 458 | static bool
|
|---|
| 459 | read_always_fail(input)
|
|---|
| 460 | struct input *input UNUSED;
|
|---|
| 461 | {
|
|---|
| 462 | return false;
|
|---|
| 463 | }
|
|---|
| 464 |
|
|---|
| 465 | static bool read_file_line P_((struct input *));
|
|---|
| 466 | static bool
|
|---|
| 467 | read_file_line(input)
|
|---|
| 468 | struct input *input;
|
|---|
| 469 | {
|
|---|
| 470 | static char *b;
|
|---|
| 471 | static size_t blen;
|
|---|
| 472 |
|
|---|
| 473 | long result = ck_getline (&b, &blen, input->fp);
|
|---|
| 474 | if (result <= 0)
|
|---|
| 475 | return false;
|
|---|
| 476 |
|
|---|
| 477 | /* Remove the trailing new-line that is left by getline. */
|
|---|
| 478 | if (b[result - 1] == '\n')
|
|---|
| 479 | --result;
|
|---|
| 480 | else
|
|---|
| 481 | line.chomped = false;
|
|---|
| 482 |
|
|---|
| 483 | str_append(&line, b, result);
|
|---|
| 484 | return true;
|
|---|
| 485 | }
|
|---|
| 486 |
|
|---|
| 487 | |
|---|
| 488 |
|
|---|
| 489 | static inline void output_missing_newline P_((struct output *));
|
|---|
| 490 | static inline void
|
|---|
| 491 | output_missing_newline(outf)
|
|---|
| 492 | struct output *outf;
|
|---|
| 493 | {
|
|---|
| 494 | if (outf->missing_newline)
|
|---|
| 495 | {
|
|---|
| 496 | ck_fwrite("\n", 1, 1, outf->fp);
|
|---|
| 497 | outf->missing_newline = false;
|
|---|
| 498 | }
|
|---|
| 499 | }
|
|---|
| 500 |
|
|---|
| 501 | static inline void flush_output P_((FILE *));
|
|---|
| 502 | static inline void
|
|---|
| 503 | flush_output(fp)
|
|---|
| 504 | FILE *fp;
|
|---|
| 505 | {
|
|---|
| 506 | if (fp != stdout || unbuffered_output)
|
|---|
| 507 | ck_fflush(fp);
|
|---|
| 508 | }
|
|---|
| 509 |
|
|---|
| 510 | static void output_line P_((const char *, size_t, bool, struct output *));
|
|---|
| 511 | static void
|
|---|
| 512 | output_line(text, length, nl, outf)
|
|---|
| 513 | const char *text;
|
|---|
| 514 | size_t length;
|
|---|
| 515 | bool nl;
|
|---|
| 516 | struct output *outf;
|
|---|
| 517 | {
|
|---|
| 518 | output_missing_newline(outf);
|
|---|
| 519 |
|
|---|
| 520 | if (length)
|
|---|
| 521 | ck_fwrite(text, 1, length, outf->fp);
|
|---|
| 522 |
|
|---|
| 523 | if (nl)
|
|---|
| 524 | ck_fwrite("\n", 1, 1, outf->fp);
|
|---|
| 525 | else
|
|---|
| 526 | outf->missing_newline = true;
|
|---|
| 527 |
|
|---|
| 528 | flush_output(outf->fp);
|
|---|
| 529 | }
|
|---|
| 530 |
|
|---|
| 531 | static struct append_queue *next_append_slot P_((void));
|
|---|
| 532 | static struct append_queue *
|
|---|
| 533 | next_append_slot()
|
|---|
| 534 | {
|
|---|
| 535 | struct append_queue *n = MALLOC(1, struct append_queue);
|
|---|
| 536 |
|
|---|
| 537 | n->fname = NULL;
|
|---|
| 538 | n->text = NULL;
|
|---|
| 539 | n->textlen = 0;
|
|---|
| 540 | n->next = NULL;
|
|---|
| 541 | n->free = false;
|
|---|
| 542 |
|
|---|
| 543 | if (append_tail)
|
|---|
| 544 | append_tail->next = n;
|
|---|
| 545 | else
|
|---|
| 546 | append_head = n;
|
|---|
| 547 | return append_tail = n;
|
|---|
| 548 | }
|
|---|
| 549 |
|
|---|
| 550 | static void release_append_queue P_((void));
|
|---|
| 551 | static void
|
|---|
| 552 | release_append_queue()
|
|---|
| 553 | {
|
|---|
| 554 | struct append_queue *p, *q;
|
|---|
| 555 |
|
|---|
| 556 | for (p=append_head; p; p=q)
|
|---|
| 557 | {
|
|---|
| 558 | if (p->free)
|
|---|
| 559 | FREE(p->text);
|
|---|
| 560 |
|
|---|
| 561 | q = p->next;
|
|---|
| 562 | FREE(p);
|
|---|
| 563 | }
|
|---|
| 564 | append_head = append_tail = NULL;
|
|---|
| 565 | }
|
|---|
| 566 |
|
|---|
| 567 | static void dump_append_queue P_((void));
|
|---|
| 568 | static void
|
|---|
| 569 | dump_append_queue()
|
|---|
| 570 | {
|
|---|
| 571 | struct append_queue *p;
|
|---|
| 572 |
|
|---|
| 573 | output_missing_newline(&output_file);
|
|---|
| 574 | for (p=append_head; p; p=p->next)
|
|---|
| 575 | {
|
|---|
| 576 | if (p->text)
|
|---|
| 577 | ck_fwrite(p->text, 1, p->textlen, output_file.fp);
|
|---|
| 578 |
|
|---|
| 579 | if (p->fname)
|
|---|
| 580 | {
|
|---|
| 581 | char buf[FREAD_BUFFER_SIZE];
|
|---|
| 582 | size_t cnt;
|
|---|
| 583 | FILE *fp;
|
|---|
| 584 |
|
|---|
| 585 | /* "If _fname_ does not exist or cannot be read, it shall
|
|---|
| 586 | be treated as if it were an empty file, causing no error
|
|---|
| 587 | condition." IEEE Std 1003.2-1992
|
|---|
| 588 | So, don't fail. */
|
|---|
| 589 | fp = ck_fopen(p->fname, "r", false);
|
|---|
| 590 | if (fp)
|
|---|
| 591 | {
|
|---|
| 592 | while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0)
|
|---|
| 593 | ck_fwrite(buf, 1, cnt, output_file.fp);
|
|---|
| 594 | ck_fclose(fp);
|
|---|
| 595 | }
|
|---|
| 596 | }
|
|---|
| 597 | }
|
|---|
| 598 |
|
|---|
| 599 | flush_output(output_file.fp);
|
|---|
| 600 | release_append_queue();
|
|---|
| 601 | }
|
|---|
| 602 |
|
|---|
| 603 | |
|---|
| 604 |
|
|---|
| 605 | /* Compute the name of the backup file for in-place editing */
|
|---|
| 606 | static char *get_backup_file_name P_((const char *));
|
|---|
| 607 | static char *
|
|---|
| 608 | get_backup_file_name(name)
|
|---|
| 609 | const char *name;
|
|---|
| 610 | {
|
|---|
| 611 | char *old_asterisk, *asterisk, *backup, *p;
|
|---|
| 612 | int name_length = strlen(name), backup_length = strlen(in_place_extension);
|
|---|
| 613 |
|
|---|
| 614 | /* Compute the length of the backup file */
|
|---|
| 615 | for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
|
|---|
| 616 | asterisk = strchr(old_asterisk, '*');
|
|---|
| 617 | old_asterisk = asterisk + 1)
|
|---|
| 618 | backup_length += name_length - 1;
|
|---|
| 619 |
|
|---|
| 620 | p = backup = xmalloc(backup_length + 1);
|
|---|
| 621 |
|
|---|
| 622 | /* Each iteration gobbles up to an asterisk */
|
|---|
| 623 | for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1;
|
|---|
| 624 | asterisk = strchr(old_asterisk, '*');
|
|---|
| 625 | old_asterisk = asterisk + 1)
|
|---|
| 626 | {
|
|---|
| 627 | MEMCPY (p, old_asterisk, asterisk - old_asterisk);
|
|---|
| 628 | p += asterisk - old_asterisk;
|
|---|
| 629 | strcpy (p, name);
|
|---|
| 630 | p += name_length;
|
|---|
| 631 | }
|
|---|
| 632 |
|
|---|
| 633 | /* Tack on what's after the last asterisk */
|
|---|
| 634 | strcpy (p, old_asterisk);
|
|---|
| 635 | return backup;
|
|---|
| 636 | }
|
|---|
| 637 |
|
|---|
| 638 | /* Initialize a struct input for the named file. */
|
|---|
| 639 | static void open_next_file P_((const char *name, struct input *));
|
|---|
| 640 | static void
|
|---|
| 641 | open_next_file(name, input)
|
|---|
| 642 | const char *name;
|
|---|
| 643 | struct input *input;
|
|---|
| 644 | {
|
|---|
| 645 | buffer.length = 0;
|
|---|
| 646 |
|
|---|
| 647 | if (name[0] == '-' && name[1] == '\0' && !in_place_extension)
|
|---|
| 648 | {
|
|---|
| 649 | clearerr(stdin); /* clear any stale EOF indication */
|
|---|
| 650 | input->fp = stdin;
|
|---|
| 651 | }
|
|---|
| 652 | else if ( ! (input->fp = ck_fopen(name, "r", false)) )
|
|---|
| 653 | {
|
|---|
| 654 | const char *ptr = strerror(errno);
|
|---|
| 655 | fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr);
|
|---|
| 656 | input->read_fn = read_always_fail; /* a redundancy */
|
|---|
| 657 | ++input->bad_count;
|
|---|
| 658 | return;
|
|---|
| 659 | }
|
|---|
| 660 |
|
|---|
| 661 | input->read_fn = read_file_line;
|
|---|
| 662 |
|
|---|
| 663 | if (in_place_extension)
|
|---|
| 664 | {
|
|---|
| 665 | int output_fd;
|
|---|
| 666 | char *tmpdir = ck_strdup(name), *p;
|
|---|
| 667 | struct stat st;
|
|---|
| 668 |
|
|---|
| 669 | /* get the base name */
|
|---|
| 670 | if (p = strrchr(tmpdir, '/'))
|
|---|
| 671 | *(p + 1) = 0;
|
|---|
| 672 | else
|
|---|
| 673 | strcpy(tmpdir, ".");
|
|---|
| 674 |
|
|---|
| 675 | input->in_file_name = name;
|
|---|
| 676 |
|
|---|
| 677 | if (isatty (fileno (input->fp)))
|
|---|
| 678 | panic(_("couldn't edit %s: is a terminal"), input->in_file_name);
|
|---|
| 679 |
|
|---|
| 680 | fstat (fileno (input->fp), &st);
|
|---|
| 681 | if (!S_ISREG (st.st_mode))
|
|---|
| 682 | panic(_("couldn't edit %s: not a regular file"), input->in_file_name);
|
|---|
| 683 |
|
|---|
| 684 | output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed");
|
|---|
| 685 | output_file.missing_newline = false;
|
|---|
| 686 | free (tmpdir);
|
|---|
| 687 |
|
|---|
| 688 | if (!output_file.fp)
|
|---|
| 689 | panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno));
|
|---|
| 690 |
|
|---|
| 691 | output_fd = fileno (output_file.fp);
|
|---|
| 692 | #ifdef HAVE_FCHMOD
|
|---|
| 693 | fchmod (output_fd, st.st_mode);
|
|---|
| 694 | #endif
|
|---|
| 695 | #ifdef HAVE_FCHOWN
|
|---|
| 696 | if (fchown (output_fd, st.st_uid, st.st_gid) == -1)
|
|---|
| 697 | fchown (output_fd, -1, st.st_gid);
|
|---|
| 698 | #endif
|
|---|
| 699 | }
|
|---|
| 700 | else
|
|---|
| 701 | output_file.fp = stdout;
|
|---|
| 702 | }
|
|---|
| 703 |
|
|---|
| 704 |
|
|---|
| 705 | /* Clean up an input stream that we are done with. */
|
|---|
| 706 | static void closedown P_((struct input *));
|
|---|
| 707 | static void
|
|---|
| 708 | closedown(input)
|
|---|
| 709 | struct input *input;
|
|---|
| 710 | {
|
|---|
| 711 | input->read_fn = read_always_fail;
|
|---|
| 712 | if (!input->fp)
|
|---|
| 713 | return;
|
|---|
| 714 | if (input->fp != stdin) /* stdin can be reused on tty and tape devices */
|
|---|
| 715 | ck_fclose(input->fp);
|
|---|
| 716 |
|
|---|
| 717 | if (in_place_extension && output_file.fp != NULL)
|
|---|
| 718 | {
|
|---|
| 719 | ck_fclose (output_file.fp);
|
|---|
| 720 | if (strcmp(in_place_extension, "*") != 0)
|
|---|
| 721 | {
|
|---|
| 722 | char *backup_file_name = get_backup_file_name(input->in_file_name);
|
|---|
| 723 | ck_rename (input->in_file_name, backup_file_name, input->out_file_name);
|
|---|
| 724 | free (backup_file_name);
|
|---|
| 725 | }
|
|---|
| 726 |
|
|---|
| 727 | ck_rename (input->out_file_name, input->in_file_name, input->out_file_name);
|
|---|
| 728 | free (input->out_file_name);
|
|---|
| 729 | }
|
|---|
| 730 |
|
|---|
| 731 | input->fp = NULL;
|
|---|
| 732 | }
|
|---|
| 733 |
|
|---|
| 734 | /* Reset range commands so that they are marked as non-matching */
|
|---|
| 735 | static void reset_addresses P_((struct vector *));
|
|---|
| 736 | static void
|
|---|
| 737 | reset_addresses(vec)
|
|---|
| 738 | struct vector *vec;
|
|---|
| 739 | {
|
|---|
| 740 | struct sed_cmd *cur_cmd;
|
|---|
| 741 | int n;
|
|---|
| 742 |
|
|---|
| 743 | for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++)
|
|---|
| 744 | if (cur_cmd->a1
|
|---|
| 745 | && cur_cmd->a1->addr_type == ADDR_IS_NUM
|
|---|
| 746 | && cur_cmd->a1->addr_number == 0)
|
|---|
| 747 | cur_cmd->range_state = RANGE_ACTIVE;
|
|---|
| 748 | else
|
|---|
| 749 | cur_cmd->range_state = RANGE_INACTIVE;
|
|---|
| 750 | }
|
|---|
| 751 |
|
|---|
| 752 | /* Read in the next line of input, and store it in the pattern space.
|
|---|
| 753 | Return zero if there is nothing left to input. */
|
|---|
| 754 | static bool read_pattern_space P_((struct input *, struct vector *, bool));
|
|---|
| 755 | static bool
|
|---|
| 756 | read_pattern_space(input, the_program, append)
|
|---|
| 757 | struct input *input;
|
|---|
| 758 | struct vector *the_program;
|
|---|
| 759 | bool append;
|
|---|
| 760 | {
|
|---|
| 761 | if (append_head) /* redundant test to optimize for common case */
|
|---|
| 762 | dump_append_queue();
|
|---|
| 763 | replaced = false;
|
|---|
| 764 | if (!append)
|
|---|
| 765 | line.length = 0;
|
|---|
| 766 | line.chomped = true; /* default, until proved otherwise */
|
|---|
| 767 |
|
|---|
| 768 | while ( ! (*input->read_fn)(input) )
|
|---|
| 769 | {
|
|---|
| 770 | closedown(input);
|
|---|
| 771 |
|
|---|
| 772 | if (!*input->file_list)
|
|---|
| 773 | return false;
|
|---|
| 774 |
|
|---|
| 775 | if (input->reset_at_next_file)
|
|---|
| 776 | {
|
|---|
| 777 | input->line_number = 0;
|
|---|
| 778 | reset_addresses (the_program);
|
|---|
| 779 | rewind_read_files ();
|
|---|
| 780 |
|
|---|
| 781 | /* If doing in-place editing, we will never append the
|
|---|
| 782 | new-line to this file; but if the output goes to stdout,
|
|---|
| 783 | we might still have to output the missing new-line. */
|
|---|
| 784 | if (in_place_extension)
|
|---|
| 785 | output_file.missing_newline = false;
|
|---|
| 786 |
|
|---|
| 787 | input->reset_at_next_file = separate_files;
|
|---|
| 788 | }
|
|---|
| 789 |
|
|---|
| 790 | open_next_file (*input->file_list++, input);
|
|---|
| 791 | }
|
|---|
| 792 |
|
|---|
| 793 | ++input->line_number;
|
|---|
| 794 | return true;
|
|---|
| 795 | }
|
|---|
| 796 |
|
|---|
| 797 | |
|---|
| 798 |
|
|---|
| 799 | static bool last_file_with_data_p P_((struct input *));
|
|---|
| 800 | static bool
|
|---|
| 801 | last_file_with_data_p(input)
|
|---|
| 802 | struct input *input;
|
|---|
| 803 | {
|
|---|
| 804 | for (;;)
|
|---|
| 805 | {
|
|---|
| 806 | int ch;
|
|---|
| 807 |
|
|---|
| 808 | closedown(input);
|
|---|
| 809 | if (!*input->file_list)
|
|---|
| 810 | return true;
|
|---|
| 811 | open_next_file(*input->file_list++, input);
|
|---|
| 812 | if (input->fp)
|
|---|
| 813 | {
|
|---|
| 814 | if ((ch = getc(input->fp)) != EOF)
|
|---|
| 815 | {
|
|---|
| 816 | ungetc(ch, input->fp);
|
|---|
| 817 | return false;
|
|---|
| 818 | }
|
|---|
| 819 | }
|
|---|
| 820 | }
|
|---|
| 821 | }
|
|---|
| 822 |
|
|---|
| 823 | /* Determine if we match the `$' address. */
|
|---|
| 824 | static bool test_eof P_((struct input *));
|
|---|
| 825 | static bool
|
|---|
| 826 | test_eof(input)
|
|---|
| 827 | struct input *input;
|
|---|
| 828 | {
|
|---|
| 829 | int ch;
|
|---|
| 830 |
|
|---|
| 831 | if (buffer.length)
|
|---|
| 832 | return false;
|
|---|
| 833 | if (!input->fp)
|
|---|
| 834 | return separate_files || last_file_with_data_p(input);
|
|---|
| 835 | if (feof(input->fp))
|
|---|
| 836 | return separate_files || last_file_with_data_p(input);
|
|---|
| 837 | if ((ch = getc(input->fp)) == EOF)
|
|---|
| 838 | return separate_files || last_file_with_data_p(input);
|
|---|
| 839 | ungetc(ch, input->fp);
|
|---|
| 840 | return false;
|
|---|
| 841 | }
|
|---|
| 842 |
|
|---|
| 843 | /* Return non-zero if the current line matches the address
|
|---|
| 844 | pointed to by `addr'. */
|
|---|
| 845 | static bool match_an_address_p P_((struct addr *, struct input *));
|
|---|
| 846 | static bool
|
|---|
| 847 | match_an_address_p(addr, input)
|
|---|
| 848 | struct addr *addr;
|
|---|
| 849 | struct input *input;
|
|---|
| 850 | {
|
|---|
| 851 | switch (addr->addr_type)
|
|---|
| 852 | {
|
|---|
| 853 | case ADDR_IS_NULL:
|
|---|
| 854 | return true;
|
|---|
| 855 |
|
|---|
| 856 | case ADDR_IS_REGEX:
|
|---|
| 857 | return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0);
|
|---|
| 858 |
|
|---|
| 859 | case ADDR_IS_NUM_MOD:
|
|---|
| 860 | return (input->line_number >= addr->addr_number
|
|---|
| 861 | && ((input->line_number - addr->addr_number) % addr->addr_step) == 0);
|
|---|
| 862 |
|
|---|
| 863 | case ADDR_IS_STEP:
|
|---|
| 864 | case ADDR_IS_STEP_MOD:
|
|---|
| 865 | /* reminder: these are only meaningful for a2 addresses */
|
|---|
| 866 | /* a2->addr_number needs to be recomputed each time a1 address
|
|---|
| 867 | matches for the step and step_mod types */
|
|---|
| 868 | return (addr->addr_number <= input->line_number);
|
|---|
| 869 |
|
|---|
| 870 | case ADDR_IS_LAST:
|
|---|
| 871 | return test_eof(input);
|
|---|
| 872 |
|
|---|
| 873 | /* ADDR_IS_NUM is handled in match_address_p. */
|
|---|
| 874 | case ADDR_IS_NUM:
|
|---|
| 875 | default:
|
|---|
| 876 | panic("INTERNAL ERROR: bad address type");
|
|---|
| 877 | }
|
|---|
| 878 | /*NOTREACHED*/
|
|---|
| 879 | return false;
|
|---|
| 880 | }
|
|---|
| 881 |
|
|---|
| 882 | /* return non-zero if current address is valid for cmd */
|
|---|
| 883 | static bool match_address_p P_((struct sed_cmd *, struct input *));
|
|---|
| 884 | static bool
|
|---|
| 885 | match_address_p(cmd, input)
|
|---|
| 886 | struct sed_cmd *cmd;
|
|---|
| 887 | struct input *input;
|
|---|
| 888 | {
|
|---|
| 889 | if (!cmd->a1)
|
|---|
| 890 | return true;
|
|---|
| 891 |
|
|---|
| 892 | if (cmd->range_state != RANGE_ACTIVE)
|
|---|
| 893 | {
|
|---|
| 894 | /* Find if we are going to activate a range. Handle ADDR_IS_NUM
|
|---|
| 895 | specially: it represent an "absolute" state, it should not
|
|---|
| 896 | be computed like regexes. */
|
|---|
| 897 | if (cmd->a1->addr_type == ADDR_IS_NUM)
|
|---|
| 898 | {
|
|---|
| 899 | if (!cmd->a2)
|
|---|
| 900 | return (input->line_number == cmd->a1->addr_number);
|
|---|
| 901 |
|
|---|
| 902 | if (cmd->range_state == RANGE_CLOSED
|
|---|
| 903 | || input->line_number < cmd->a1->addr_number)
|
|---|
| 904 | return false;
|
|---|
| 905 | }
|
|---|
| 906 | else
|
|---|
| 907 | {
|
|---|
| 908 | if (!cmd->a2)
|
|---|
| 909 | return match_an_address_p(cmd->a1, input);
|
|---|
| 910 |
|
|---|
| 911 | if (!match_an_address_p(cmd->a1, input))
|
|---|
| 912 | return false;
|
|---|
| 913 | }
|
|---|
| 914 |
|
|---|
| 915 | /* Ok, start a new range. */
|
|---|
| 916 | cmd->range_state = RANGE_ACTIVE;
|
|---|
| 917 | switch (cmd->a2->addr_type)
|
|---|
| 918 | {
|
|---|
| 919 | case ADDR_IS_REGEX:
|
|---|
| 920 | /* Always include at least two lines. */
|
|---|
| 921 | return true;
|
|---|
| 922 | case ADDR_IS_NUM:
|
|---|
| 923 | /* Same handling as below, but always include at least one line. */
|
|---|
| 924 | if (input->line_number >= cmd->a2->addr_number)
|
|---|
| 925 | cmd->range_state = RANGE_CLOSED;
|
|---|
| 926 | return true;
|
|---|
| 927 | case ADDR_IS_STEP:
|
|---|
| 928 | cmd->a2->addr_number = input->line_number + cmd->a2->addr_step;
|
|---|
| 929 | return true;
|
|---|
| 930 | case ADDR_IS_STEP_MOD:
|
|---|
| 931 | cmd->a2->addr_number = input->line_number + cmd->a2->addr_step
|
|---|
| 932 | - (input->line_number%cmd->a2->addr_step);
|
|---|
| 933 | return true;
|
|---|
| 934 | default:
|
|---|
| 935 | break;
|
|---|
| 936 | }
|
|---|
| 937 | }
|
|---|
| 938 |
|
|---|
| 939 | /* cmd->range_state == RANGE_ACTIVE. Check if the range is
|
|---|
| 940 | ending; also handle ADDR_IS_NUM specially in this case. */
|
|---|
| 941 |
|
|---|
| 942 | if (cmd->a2->addr_type == ADDR_IS_NUM)
|
|---|
| 943 | {
|
|---|
| 944 | /* If the second address is a line number, and if we got past
|
|---|
| 945 | that line, fail to match (it can happen when you jump
|
|---|
| 946 | over such addresses with `b' and `t'. Use RANGE_CLOSED
|
|---|
| 947 | so that the range is not re-enabled anymore. */
|
|---|
| 948 | if (input->line_number >= cmd->a2->addr_number)
|
|---|
| 949 | cmd->range_state = RANGE_CLOSED;
|
|---|
| 950 |
|
|---|
| 951 | return (input->line_number <= cmd->a2->addr_number);
|
|---|
| 952 | }
|
|---|
| 953 |
|
|---|
| 954 | /* Other addresses are treated as usual. */
|
|---|
| 955 | if (match_an_address_p(cmd->a2, input))
|
|---|
| 956 | cmd->range_state = RANGE_CLOSED;
|
|---|
| 957 |
|
|---|
| 958 | return true;
|
|---|
| 959 | }
|
|---|
| 960 |
|
|---|
| 961 | |
|---|
| 962 |
|
|---|
| 963 | static void do_list P_((int line_len));
|
|---|
| 964 | static void
|
|---|
| 965 | do_list(line_len)
|
|---|
| 966 | int line_len;
|
|---|
| 967 | {
|
|---|
| 968 | unsigned char *p = CAST(unsigned char *)line.active;
|
|---|
| 969 | countT len = line.length;
|
|---|
| 970 | countT width = 0;
|
|---|
| 971 | char obuf[180]; /* just in case we encounter a 512-bit char (;-) */
|
|---|
| 972 | char *o;
|
|---|
| 973 | size_t olen;
|
|---|
| 974 | FILE *fp = output_file.fp;
|
|---|
| 975 |
|
|---|
| 976 | output_missing_newline(&output_file);
|
|---|
| 977 | for (; len--; ++p) {
|
|---|
| 978 | o = obuf;
|
|---|
| 979 |
|
|---|
| 980 | /* Some locales define 8-bit characters as printable. This makes the
|
|---|
| 981 | testsuite fail at 8to7.sed because the `l' command in fact will not
|
|---|
| 982 | convert the 8-bit characters. */
|
|---|
| 983 | #if defined isascii || defined HAVE_ISASCII
|
|---|
| 984 | if (isascii(*p) && ISPRINT(*p)) {
|
|---|
| 985 | #else
|
|---|
| 986 | if (ISPRINT(*p)) {
|
|---|
| 987 | #endif
|
|---|
| 988 | *o++ = *p;
|
|---|
| 989 | if (*p == '\\')
|
|---|
| 990 | *o++ = '\\';
|
|---|
| 991 | } else {
|
|---|
| 992 | *o++ = '\\';
|
|---|
| 993 | switch (*p) {
|
|---|
| 994 | #if defined __STDC__ && __STDC__-0
|
|---|
| 995 | case '\a': *o++ = 'a'; break;
|
|---|
| 996 | #else /* Not STDC; we'll just assume ASCII */
|
|---|
| 997 | case 007: *o++ = 'a'; break;
|
|---|
| 998 | #endif
|
|---|
| 999 | case '\b': *o++ = 'b'; break;
|
|---|
| 1000 | case '\f': *o++ = 'f'; break;
|
|---|
| 1001 | case '\n': *o++ = 'n'; break;
|
|---|
| 1002 | case '\r': *o++ = 'r'; break;
|
|---|
| 1003 | case '\t': *o++ = 't'; break;
|
|---|
| 1004 | case '\v': *o++ = 'v'; break;
|
|---|
| 1005 | default:
|
|---|
| 1006 | sprintf(o, "%03o", *p);
|
|---|
| 1007 | o += strlen(o);
|
|---|
| 1008 | break;
|
|---|
| 1009 | }
|
|---|
| 1010 | }
|
|---|
| 1011 | olen = o - obuf;
|
|---|
| 1012 | if (width+olen >= line_len && line_len > 0) {
|
|---|
| 1013 | ck_fwrite("\\\n", 1, 2, fp);
|
|---|
| 1014 | width = 0;
|
|---|
| 1015 | }
|
|---|
| 1016 | ck_fwrite(obuf, 1, olen, fp);
|
|---|
| 1017 | width += olen;
|
|---|
| 1018 | }
|
|---|
| 1019 | ck_fwrite("$\n", 1, 2, fp);
|
|---|
| 1020 | flush_output (fp);
|
|---|
| 1021 | }
|
|---|
| 1022 |
|
|---|
| 1023 |
|
|---|
| 1024 | static enum replacement_types append_replacement P_((struct line *, struct replacement *,
|
|---|
| 1025 | struct re_registers *,
|
|---|
| 1026 | enum replacement_types));
|
|---|
| 1027 | static enum replacement_types
|
|---|
| 1028 | append_replacement (buf, p, regs, repl_mod)
|
|---|
| 1029 | struct line *buf;
|
|---|
| 1030 | struct replacement *p;
|
|---|
| 1031 | struct re_registers *regs;
|
|---|
| 1032 | enum replacement_types repl_mod;
|
|---|
| 1033 | {
|
|---|
| 1034 | for (; p; p=p->next)
|
|---|
| 1035 | {
|
|---|
| 1036 | int i = p->subst_id;
|
|---|
| 1037 | enum replacement_types curr_type;
|
|---|
| 1038 |
|
|---|
| 1039 | /* Apply a \[lu] modifier that was given earlier, but which we
|
|---|
| 1040 | have not had yet the occasion to apply. But don't do it
|
|---|
| 1041 | if this replacement has a modifier of its own. */
|
|---|
| 1042 | curr_type = (p->repl_type & REPL_MODIFIERS)
|
|---|
| 1043 | ? p->repl_type
|
|---|
| 1044 | : p->repl_type | repl_mod;
|
|---|
| 1045 |
|
|---|
| 1046 | repl_mod = 0;
|
|---|
| 1047 | if (p->prefix_length)
|
|---|
| 1048 | {
|
|---|
| 1049 | str_append_modified(buf, p->prefix, p->prefix_length,
|
|---|
| 1050 | curr_type);
|
|---|
| 1051 | curr_type &= ~REPL_MODIFIERS;
|
|---|
| 1052 | }
|
|---|
| 1053 |
|
|---|
| 1054 | if (0 <= i)
|
|---|
| 1055 | if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS)
|
|---|
| 1056 | /* Save this modifier, we shall apply it later.
|
|---|
| 1057 | e.g. in s/()([a-z])/\u\1\2/
|
|---|
| 1058 | the \u modifier is applied to \2, not \1 */
|
|---|
| 1059 | repl_mod = curr_type & REPL_MODIFIERS;
|
|---|
| 1060 |
|
|---|
| 1061 | else
|
|---|
| 1062 | str_append_modified(buf, line.active + regs->start[i],
|
|---|
| 1063 | CAST(size_t)(regs->end[i] - regs->start[i]),
|
|---|
| 1064 | curr_type);
|
|---|
| 1065 | }
|
|---|
| 1066 |
|
|---|
| 1067 | return repl_mod;
|
|---|
| 1068 | }
|
|---|
| 1069 |
|
|---|
| 1070 | static void do_subst P_((struct subst *));
|
|---|
| 1071 | static void
|
|---|
| 1072 | do_subst(sub)
|
|---|
| 1073 | struct subst *sub;
|
|---|
| 1074 | {
|
|---|
| 1075 | size_t start = 0; /* where to start scan for (next) match in LINE */
|
|---|
| 1076 | size_t last_end = 0; /* where did the last successful match end in LINE */
|
|---|
| 1077 | countT count = 0; /* number of matches found */
|
|---|
| 1078 | bool again = true;
|
|---|
| 1079 |
|
|---|
| 1080 | static struct re_registers regs;
|
|---|
| 1081 |
|
|---|
| 1082 | if (s_accum.alloc == 0)
|
|---|
| 1083 | line_init(&s_accum, INITIAL_BUFFER_SIZE);
|
|---|
| 1084 | s_accum.length = 0;
|
|---|
| 1085 |
|
|---|
| 1086 | /* The first part of the loop optimizes s/xxx// when xxx is at the
|
|---|
| 1087 | start, and s/xxx$// */
|
|---|
| 1088 | if (!match_regex(sub->regx, line.active, line.length, start,
|
|---|
| 1089 | ®s, sub->max_id + 1))
|
|---|
| 1090 | return;
|
|---|
| 1091 |
|
|---|
| 1092 | if (!sub->replacement && sub->numb <= 1)
|
|---|
| 1093 | if (regs.start[0] == 0 && !sub->global)
|
|---|
| 1094 | {
|
|---|
| 1095 | /* We found a match, set the `replaced' flag. */
|
|---|
| 1096 | replaced = true;
|
|---|
| 1097 |
|
|---|
| 1098 | line.active += regs.end[0];
|
|---|
| 1099 | line.length -= regs.end[0];
|
|---|
| 1100 | line.alloc -= regs.end[0];
|
|---|
| 1101 | goto post_subst;
|
|---|
| 1102 | }
|
|---|
| 1103 | else if (regs.end[0] == line.length)
|
|---|
| 1104 | {
|
|---|
| 1105 | /* We found a match, set the `replaced' flag. */
|
|---|
| 1106 | replaced = true;
|
|---|
| 1107 |
|
|---|
| 1108 | line.length = regs.start[0];
|
|---|
| 1109 | goto post_subst;
|
|---|
| 1110 | }
|
|---|
| 1111 |
|
|---|
| 1112 | do
|
|---|
| 1113 | {
|
|---|
| 1114 | enum replacement_types repl_mod = 0;
|
|---|
| 1115 |
|
|---|
| 1116 | size_t offset = regs.start[0];
|
|---|
| 1117 | size_t matched = regs.end[0] - regs.start[0];
|
|---|
| 1118 |
|
|---|
| 1119 | /* Copy stuff to the left of this match into the output string. */
|
|---|
| 1120 | if (start < offset)
|
|---|
| 1121 | str_append(&s_accum, line.active + start, offset - start);
|
|---|
| 1122 |
|
|---|
| 1123 | /* If we're counting up to the Nth match, are we there yet?
|
|---|
| 1124 | And even if we are there, there is another case we have to
|
|---|
| 1125 | skip: are we matching an empty string immediately following
|
|---|
| 1126 | another match?
|
|---|
| 1127 |
|
|---|
| 1128 | This latter case avoids that baaaac, when passed through
|
|---|
| 1129 | s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is
|
|---|
| 1130 | unacceptable because it is not consistently applied (for
|
|---|
| 1131 | example, `baaaa' gives `xbx', not `xbxx'). */
|
|---|
| 1132 | if ((matched > 0 || count == 0 || offset > last_end)
|
|---|
| 1133 | && ++count >= sub->numb)
|
|---|
| 1134 | {
|
|---|
| 1135 | /* We found a match, set the `replaced' flag. */
|
|---|
| 1136 | replaced = true;
|
|---|
| 1137 |
|
|---|
| 1138 | /* Now expand the replacement string into the output string. */
|
|---|
| 1139 | repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod);
|
|---|
| 1140 | again = sub->global;
|
|---|
| 1141 | }
|
|---|
| 1142 | else
|
|---|
| 1143 | {
|
|---|
| 1144 | /* The match was not replaced. Copy the text until its
|
|---|
| 1145 | end; if it was vacuous, skip over one character and
|
|---|
| 1146 | add that character to the output. */
|
|---|
| 1147 | if (matched == 0)
|
|---|
| 1148 | {
|
|---|
| 1149 | if (start < line.length)
|
|---|
| 1150 | matched = 1;
|
|---|
| 1151 | else
|
|---|
| 1152 | break;
|
|---|
| 1153 | }
|
|---|
| 1154 |
|
|---|
| 1155 | str_append(&s_accum, line.active + offset, matched);
|
|---|
| 1156 | }
|
|---|
| 1157 |
|
|---|
| 1158 | /* Start after the match. last_end is the real end of the matched
|
|---|
| 1159 | substring, excluding characters that were skipped in case the RE
|
|---|
| 1160 | matched the empty string. */
|
|---|
| 1161 | start = offset + matched;
|
|---|
| 1162 | last_end = regs.end[0];
|
|---|
| 1163 | }
|
|---|
| 1164 | while (again
|
|---|
| 1165 | && start <= line.length
|
|---|
| 1166 | && match_regex(sub->regx, line.active, line.length, start,
|
|---|
| 1167 | ®s, sub->max_id + 1));
|
|---|
| 1168 |
|
|---|
| 1169 | /* Copy stuff to the right of the last match into the output string. */
|
|---|
| 1170 | if (start < line.length)
|
|---|
| 1171 | str_append(&s_accum, line.active + start, line.length-start);
|
|---|
| 1172 | s_accum.chomped = line.chomped;
|
|---|
| 1173 |
|
|---|
| 1174 | /* Exchange line and s_accum. This can be much cheaper
|
|---|
| 1175 | than copying s_accum.active into line.text (for huge lines). */
|
|---|
| 1176 | line_exchange(&line, &s_accum);
|
|---|
| 1177 |
|
|---|
| 1178 | /* Finish up. */
|
|---|
| 1179 | if (count < sub->numb)
|
|---|
| 1180 | return;
|
|---|
| 1181 |
|
|---|
| 1182 | post_subst:
|
|---|
| 1183 | if (sub->print & 1)
|
|---|
| 1184 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1185 |
|
|---|
| 1186 | if (sub->eval)
|
|---|
| 1187 | {
|
|---|
| 1188 | #ifdef HAVE_POPEN
|
|---|
| 1189 | FILE *pipe;
|
|---|
| 1190 | s_accum.length = 0;
|
|---|
| 1191 |
|
|---|
| 1192 | str_append (&line, "", 1);
|
|---|
| 1193 | pipe = popen(line.active, "r");
|
|---|
| 1194 |
|
|---|
| 1195 | if (pipe != NULL)
|
|---|
| 1196 | {
|
|---|
| 1197 | while (!feof (pipe))
|
|---|
| 1198 | {
|
|---|
| 1199 | char buf[4096];
|
|---|
| 1200 | int n = fread (buf, sizeof(char), 4096, pipe);
|
|---|
| 1201 | if (n > 0)
|
|---|
| 1202 | str_append(&s_accum, buf, n);
|
|---|
| 1203 | }
|
|---|
| 1204 |
|
|---|
| 1205 | pclose (pipe);
|
|---|
| 1206 |
|
|---|
| 1207 | line_exchange(&line, &s_accum);
|
|---|
| 1208 | if (line.length &&
|
|---|
| 1209 | line.active[line.length - 1] == '\n')
|
|---|
| 1210 | line.length--;
|
|---|
| 1211 | }
|
|---|
| 1212 | else
|
|---|
| 1213 | panic(_("error in subprocess"));
|
|---|
| 1214 | #else
|
|---|
| 1215 | panic(_("option `e' not supported"));
|
|---|
| 1216 | #endif
|
|---|
| 1217 | }
|
|---|
| 1218 |
|
|---|
| 1219 | if (sub->print & 2)
|
|---|
| 1220 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1221 | if (sub->outf)
|
|---|
| 1222 | output_line(line.active, line.length, line.chomped, sub->outf);
|
|---|
| 1223 | }
|
|---|
| 1224 |
|
|---|
| 1225 | #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
|
|---|
| 1226 | /* Used to attempt a simple-minded optimization. */
|
|---|
| 1227 |
|
|---|
| 1228 | static countT branches;
|
|---|
| 1229 |
|
|---|
| 1230 | static countT count_branches P_((struct vector *));
|
|---|
| 1231 | static countT
|
|---|
| 1232 | count_branches(program)
|
|---|
| 1233 | struct vector *program;
|
|---|
| 1234 | {
|
|---|
| 1235 | struct sed_cmd *cur_cmd = program->v;
|
|---|
| 1236 | countT isn_cnt = program->v_length;
|
|---|
| 1237 | countT cnt = 0;
|
|---|
| 1238 |
|
|---|
| 1239 | while (isn_cnt-- > 0)
|
|---|
| 1240 | {
|
|---|
| 1241 | switch (cur_cmd->cmd)
|
|---|
| 1242 | {
|
|---|
| 1243 | case 'b':
|
|---|
| 1244 | case 't':
|
|---|
| 1245 | case 'T':
|
|---|
| 1246 | case '{':
|
|---|
| 1247 | ++cnt;
|
|---|
| 1248 | }
|
|---|
| 1249 | }
|
|---|
| 1250 | return cnt;
|
|---|
| 1251 | }
|
|---|
| 1252 |
|
|---|
| 1253 | static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *));
|
|---|
| 1254 | static struct sed_cmd *
|
|---|
| 1255 | shrink_program(vec, cur_cmd)
|
|---|
| 1256 | struct vector *vec;
|
|---|
| 1257 | struct sed_cmd *cur_cmd;
|
|---|
| 1258 | {
|
|---|
| 1259 | struct sed_cmd *v = vec->v;
|
|---|
| 1260 | struct sed_cmd *last_cmd = v + vec->v_length;
|
|---|
| 1261 | struct sed_cmd *p;
|
|---|
| 1262 | countT cmd_cnt;
|
|---|
| 1263 |
|
|---|
| 1264 | for (p=v; p < cur_cmd; ++p)
|
|---|
| 1265 | if (p->cmd != '#')
|
|---|
| 1266 | MEMCPY(v++, p, sizeof *v);
|
|---|
| 1267 | cmd_cnt = v - vec->v;
|
|---|
| 1268 |
|
|---|
| 1269 | for (; p < last_cmd; ++p)
|
|---|
| 1270 | if (p->cmd != '#')
|
|---|
| 1271 | MEMCPY(v++, p, sizeof *v);
|
|---|
| 1272 | vec->v_length = v - vec->v;
|
|---|
| 1273 |
|
|---|
| 1274 | return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0;
|
|---|
| 1275 | }
|
|---|
| 1276 | #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
|
|---|
| 1277 |
|
|---|
| 1278 | /* Execute the program `vec' on the current input line.
|
|---|
| 1279 | Return exit status if caller should quit, -1 otherwise. */
|
|---|
| 1280 | static int execute_program P_((struct vector *, struct input *));
|
|---|
| 1281 | static int
|
|---|
| 1282 | execute_program(vec, input)
|
|---|
| 1283 | struct vector *vec;
|
|---|
| 1284 | struct input *input;
|
|---|
| 1285 | {
|
|---|
| 1286 | struct sed_cmd *cur_cmd;
|
|---|
| 1287 | struct sed_cmd *end_cmd;
|
|---|
| 1288 |
|
|---|
| 1289 | cur_cmd = vec->v;
|
|---|
| 1290 | end_cmd = vec->v + vec->v_length;
|
|---|
| 1291 | while (cur_cmd < end_cmd)
|
|---|
| 1292 | {
|
|---|
| 1293 | if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang)
|
|---|
| 1294 | {
|
|---|
| 1295 | switch (cur_cmd->cmd)
|
|---|
| 1296 | {
|
|---|
| 1297 | case 'a':
|
|---|
| 1298 | {
|
|---|
| 1299 | struct append_queue *aq = next_append_slot();
|
|---|
| 1300 | aq->text = cur_cmd->x.cmd_txt.text;
|
|---|
| 1301 | aq->textlen = cur_cmd->x.cmd_txt.text_length;
|
|---|
| 1302 | }
|
|---|
| 1303 | break;
|
|---|
| 1304 |
|
|---|
| 1305 | case '{':
|
|---|
| 1306 | case 'b':
|
|---|
| 1307 | cur_cmd = vec->v + cur_cmd->x.jump_index;
|
|---|
| 1308 | continue;
|
|---|
| 1309 |
|
|---|
| 1310 | case '}':
|
|---|
| 1311 | case '#':
|
|---|
| 1312 | case ':':
|
|---|
| 1313 | /* Executing labels and block-ends are easy. */
|
|---|
| 1314 | break;
|
|---|
| 1315 |
|
|---|
| 1316 | case 'c':
|
|---|
| 1317 | if (cur_cmd->range_state != RANGE_ACTIVE)
|
|---|
| 1318 | output_line(cur_cmd->x.cmd_txt.text,
|
|---|
| 1319 | cur_cmd->x.cmd_txt.text_length - 1, true,
|
|---|
| 1320 | &output_file);
|
|---|
| 1321 | /* POSIX.2 is silent about c starting a new cycle,
|
|---|
| 1322 | but it seems to be expected (and make sense). */
|
|---|
| 1323 | /* Fall Through */
|
|---|
| 1324 | case 'd':
|
|---|
| 1325 | return -1;
|
|---|
| 1326 |
|
|---|
| 1327 | case 'D':
|
|---|
| 1328 | {
|
|---|
| 1329 | char *p = memchr(line.active, '\n', line.length);
|
|---|
| 1330 | if (!p)
|
|---|
| 1331 | return -1;
|
|---|
| 1332 |
|
|---|
| 1333 | ++p;
|
|---|
| 1334 | line.alloc -= p - line.active;
|
|---|
| 1335 | line.length -= p - line.active;
|
|---|
| 1336 | line.active += p - line.active;
|
|---|
| 1337 |
|
|---|
| 1338 | /* reset to start next cycle without reading a new line: */
|
|---|
| 1339 | cur_cmd = vec->v;
|
|---|
| 1340 | continue;
|
|---|
| 1341 | }
|
|---|
| 1342 |
|
|---|
| 1343 | case 'e': {
|
|---|
| 1344 | #ifdef HAVE_POPEN
|
|---|
| 1345 | FILE *pipe;
|
|---|
| 1346 | int cmd_length = cur_cmd->x.cmd_txt.text_length;
|
|---|
| 1347 | if (s_accum.alloc == 0)
|
|---|
| 1348 | line_init(&s_accum, INITIAL_BUFFER_SIZE);
|
|---|
| 1349 | s_accum.length = 0;
|
|---|
| 1350 |
|
|---|
| 1351 | if (!cmd_length)
|
|---|
| 1352 | {
|
|---|
| 1353 | str_append (&line, "", 1);
|
|---|
| 1354 | pipe = popen(line.active, "r");
|
|---|
| 1355 | }
|
|---|
| 1356 | else
|
|---|
| 1357 | {
|
|---|
| 1358 | cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0;
|
|---|
| 1359 | pipe = popen(cur_cmd->x.cmd_txt.text, "r");
|
|---|
| 1360 | output_missing_newline(&output_file);
|
|---|
| 1361 | }
|
|---|
| 1362 |
|
|---|
| 1363 | if (pipe != NULL)
|
|---|
| 1364 | {
|
|---|
| 1365 | while (!feof (pipe))
|
|---|
| 1366 | {
|
|---|
| 1367 | char buf[4096];
|
|---|
| 1368 | int n = fread (buf, sizeof(char), 4096, pipe);
|
|---|
| 1369 | if (n > 0)
|
|---|
| 1370 | if (!cmd_length)
|
|---|
| 1371 | str_append(&s_accum, buf, n);
|
|---|
| 1372 | else
|
|---|
| 1373 | ck_fwrite(buf, 1, n, output_file.fp);
|
|---|
| 1374 | }
|
|---|
| 1375 |
|
|---|
| 1376 | pclose (pipe);
|
|---|
| 1377 | if (!cmd_length)
|
|---|
| 1378 | {
|
|---|
| 1379 | /* Store into pattern space for plain `e' commands */
|
|---|
| 1380 | if (s_accum.length &&
|
|---|
| 1381 | s_accum.active[s_accum.length - 1] == '\n')
|
|---|
| 1382 | s_accum.length--;
|
|---|
| 1383 |
|
|---|
| 1384 | /* Exchange line and s_accum. This can be much
|
|---|
| 1385 | cheaper than copying s_accum.active into line.text
|
|---|
| 1386 | (for huge lines). */
|
|---|
| 1387 | line_exchange(&line, &s_accum);
|
|---|
| 1388 | }
|
|---|
| 1389 | else
|
|---|
| 1390 | flush_output(output_file.fp);
|
|---|
| 1391 |
|
|---|
| 1392 | }
|
|---|
| 1393 | else
|
|---|
| 1394 | panic(_("error in subprocess"));
|
|---|
| 1395 | #else
|
|---|
| 1396 | panic(_("`e' command not supported"));
|
|---|
| 1397 | #endif
|
|---|
| 1398 | break;
|
|---|
| 1399 | }
|
|---|
| 1400 |
|
|---|
| 1401 | case 'g':
|
|---|
| 1402 | line_copy(&hold, &line);
|
|---|
| 1403 | break;
|
|---|
| 1404 |
|
|---|
| 1405 | case 'G':
|
|---|
| 1406 | line_append(&hold, &line);
|
|---|
| 1407 | break;
|
|---|
| 1408 |
|
|---|
| 1409 | case 'h':
|
|---|
| 1410 | line_copy(&line, &hold);
|
|---|
| 1411 | break;
|
|---|
| 1412 |
|
|---|
| 1413 | case 'H':
|
|---|
| 1414 | line_append(&line, &hold);
|
|---|
| 1415 | break;
|
|---|
| 1416 |
|
|---|
| 1417 | case 'i':
|
|---|
| 1418 | output_line(cur_cmd->x.cmd_txt.text,
|
|---|
| 1419 | cur_cmd->x.cmd_txt.text_length - 1,
|
|---|
| 1420 | true, &output_file);
|
|---|
| 1421 | break;
|
|---|
| 1422 |
|
|---|
| 1423 | case 'l':
|
|---|
| 1424 | do_list(cur_cmd->x.int_arg == -1
|
|---|
| 1425 | ? lcmd_out_line_len
|
|---|
| 1426 | : cur_cmd->x.int_arg);
|
|---|
| 1427 | break;
|
|---|
| 1428 |
|
|---|
| 1429 | case 'L':
|
|---|
| 1430 | output_missing_newline(&output_file);
|
|---|
| 1431 | fmt(line.active, line.active + line.length,
|
|---|
| 1432 | cur_cmd->x.int_arg == -1
|
|---|
| 1433 | ? lcmd_out_line_len
|
|---|
| 1434 | : cur_cmd->x.int_arg,
|
|---|
| 1435 | output_file.fp);
|
|---|
| 1436 | flush_output(output_file.fp);
|
|---|
| 1437 | break;
|
|---|
| 1438 |
|
|---|
| 1439 | case 'n':
|
|---|
| 1440 | if (!no_default_output)
|
|---|
| 1441 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1442 | if (test_eof(input) || !read_pattern_space(input, vec, false))
|
|---|
| 1443 | return -1;
|
|---|
| 1444 | break;
|
|---|
| 1445 |
|
|---|
| 1446 | case 'N':
|
|---|
| 1447 | str_append(&line, "\n", 1);
|
|---|
| 1448 |
|
|---|
| 1449 | if (test_eof(input) || !read_pattern_space(input, vec, true))
|
|---|
| 1450 | {
|
|---|
| 1451 | line.length--;
|
|---|
| 1452 | if (posixicity == POSIXLY_EXTENDED && !no_default_output)
|
|---|
| 1453 | output_line(line.active, line.length, line.chomped,
|
|---|
| 1454 | &output_file);
|
|---|
| 1455 | return -1;
|
|---|
| 1456 | }
|
|---|
| 1457 | break;
|
|---|
| 1458 |
|
|---|
| 1459 | case 'p':
|
|---|
| 1460 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1461 | break;
|
|---|
| 1462 |
|
|---|
| 1463 | case 'P':
|
|---|
| 1464 | {
|
|---|
| 1465 | char *p = memchr(line.active, '\n', line.length);
|
|---|
| 1466 | output_line(line.active, p ? p - line.active : line.length,
|
|---|
| 1467 | p ? true : line.chomped, &output_file);
|
|---|
| 1468 | }
|
|---|
| 1469 | break;
|
|---|
| 1470 |
|
|---|
| 1471 | case 'q':
|
|---|
| 1472 | if (!no_default_output)
|
|---|
| 1473 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1474 | dump_append_queue();
|
|---|
| 1475 |
|
|---|
| 1476 | case 'Q':
|
|---|
| 1477 | return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg;
|
|---|
| 1478 |
|
|---|
| 1479 | case 'r':
|
|---|
| 1480 | if (cur_cmd->x.fname)
|
|---|
| 1481 | {
|
|---|
| 1482 | struct append_queue *aq = next_append_slot();
|
|---|
| 1483 | aq->fname = cur_cmd->x.fname;
|
|---|
| 1484 | }
|
|---|
| 1485 | break;
|
|---|
| 1486 |
|
|---|
| 1487 | case 'R':
|
|---|
| 1488 | if (cur_cmd->x.fp && !feof (cur_cmd->x.fp))
|
|---|
| 1489 | {
|
|---|
| 1490 | struct append_queue *aq;
|
|---|
| 1491 | size_t buflen;
|
|---|
| 1492 | char *text = NULL;
|
|---|
| 1493 | int result;
|
|---|
| 1494 |
|
|---|
| 1495 | result = ck_getline (&text, &buflen, cur_cmd->x.fp);
|
|---|
| 1496 | if (result != EOF)
|
|---|
| 1497 | {
|
|---|
| 1498 | aq = next_append_slot();
|
|---|
| 1499 | aq->free = true;
|
|---|
| 1500 | aq->text = text;
|
|---|
| 1501 | aq->textlen = result;
|
|---|
| 1502 | }
|
|---|
| 1503 | }
|
|---|
| 1504 | break;
|
|---|
| 1505 |
|
|---|
| 1506 | case 's':
|
|---|
| 1507 | do_subst(cur_cmd->x.cmd_subst);
|
|---|
| 1508 | break;
|
|---|
| 1509 |
|
|---|
| 1510 | case 't':
|
|---|
| 1511 | if (replaced)
|
|---|
| 1512 | {
|
|---|
| 1513 | replaced = false;
|
|---|
| 1514 | cur_cmd = vec->v + cur_cmd->x.jump_index;
|
|---|
| 1515 | continue;
|
|---|
| 1516 | }
|
|---|
| 1517 | break;
|
|---|
| 1518 |
|
|---|
| 1519 | case 'T':
|
|---|
| 1520 | if (!replaced)
|
|---|
| 1521 | {
|
|---|
| 1522 | cur_cmd = vec->v + cur_cmd->x.jump_index;
|
|---|
| 1523 | continue;
|
|---|
| 1524 | }
|
|---|
| 1525 | else
|
|---|
| 1526 | replaced = false;
|
|---|
| 1527 | break;
|
|---|
| 1528 |
|
|---|
| 1529 | case 'w':
|
|---|
| 1530 | if (cur_cmd->x.fp)
|
|---|
| 1531 | output_line(line.active, line.length,
|
|---|
| 1532 | line.chomped, cur_cmd->x.outf);
|
|---|
| 1533 | break;
|
|---|
| 1534 |
|
|---|
| 1535 | case 'W':
|
|---|
| 1536 | if (cur_cmd->x.fp)
|
|---|
| 1537 | {
|
|---|
| 1538 | char *p = memchr(line.active, '\n', line.length);
|
|---|
| 1539 | output_line(line.active, p ? p - line.active : line.length,
|
|---|
| 1540 | p ? true : line.chomped, cur_cmd->x.outf);
|
|---|
| 1541 | }
|
|---|
| 1542 | break;
|
|---|
| 1543 |
|
|---|
| 1544 | case 'x':
|
|---|
| 1545 | line_exchange(&line, &hold);
|
|---|
| 1546 | break;
|
|---|
| 1547 |
|
|---|
| 1548 | case 'y':
|
|---|
| 1549 | {
|
|---|
| 1550 | #ifdef HAVE_MBRTOWC
|
|---|
| 1551 | if (mb_cur_max > 1)
|
|---|
| 1552 | {
|
|---|
| 1553 | int idx, prev_idx; /* index in the input line. */
|
|---|
| 1554 | char **trans;
|
|---|
| 1555 | mbstate_t mbstate;
|
|---|
| 1556 | memset(&mbstate, 0, sizeof(mbstate_t));
|
|---|
| 1557 | for (idx = 0; idx < line.length;)
|
|---|
| 1558 | {
|
|---|
| 1559 | int mbclen, i;
|
|---|
| 1560 | mbclen = MBRLEN (line.active + idx, line.length - idx,
|
|---|
| 1561 | &mbstate);
|
|---|
| 1562 | /* An invalid sequence, or a truncated multibyte
|
|---|
| 1563 | character. We treat it as a singlebyte character.
|
|---|
| 1564 | */
|
|---|
| 1565 | if (mbclen == (size_t) -1 || mbclen == (size_t) -2
|
|---|
| 1566 | || mbclen == 0)
|
|---|
| 1567 | mbclen = 1;
|
|---|
| 1568 |
|
|---|
| 1569 | trans = cur_cmd->x.translatemb;
|
|---|
| 1570 | /* `i' indicate i-th translate pair. */
|
|---|
| 1571 | for (i = 0; trans[2*i] != NULL; i++)
|
|---|
| 1572 | {
|
|---|
| 1573 | if (strncmp(line.active + idx, trans[2*i], mbclen) == 0)
|
|---|
| 1574 | {
|
|---|
| 1575 | bool move_remain_buffer = false;
|
|---|
| 1576 | int trans_len = strlen(trans[2*i+1]);
|
|---|
| 1577 |
|
|---|
| 1578 | if (mbclen < trans_len)
|
|---|
| 1579 | {
|
|---|
| 1580 | int new_len;
|
|---|
| 1581 | new_len = line.length + 1 + trans_len - mbclen;
|
|---|
| 1582 | /* We must extend the line buffer. */
|
|---|
| 1583 | if (line.alloc < new_len)
|
|---|
| 1584 | {
|
|---|
| 1585 | /* And we must resize the buffer. */
|
|---|
| 1586 | resize_line(&line, new_len);
|
|---|
| 1587 | }
|
|---|
| 1588 | move_remain_buffer = true;
|
|---|
| 1589 | }
|
|---|
| 1590 | else if (mbclen > trans_len)
|
|---|
| 1591 | {
|
|---|
| 1592 | /* We must truncate the line buffer. */
|
|---|
| 1593 | move_remain_buffer = true;
|
|---|
| 1594 | }
|
|---|
| 1595 | prev_idx = idx;
|
|---|
| 1596 | if (move_remain_buffer)
|
|---|
| 1597 | {
|
|---|
| 1598 | int move_len, move_offset;
|
|---|
| 1599 | char *move_from, *move_to;
|
|---|
| 1600 | /* Move the remaining with \0. */
|
|---|
| 1601 | move_from = line.active + idx + mbclen;
|
|---|
| 1602 | move_to = line.active + idx + trans_len;
|
|---|
| 1603 | move_len = line.length + 1 - idx - mbclen;
|
|---|
| 1604 | move_offset = trans_len - mbclen;
|
|---|
| 1605 | memmove(move_to, move_from, move_len);
|
|---|
| 1606 | line.length += move_offset;
|
|---|
| 1607 | idx += move_offset;
|
|---|
| 1608 | }
|
|---|
| 1609 | strncpy(line.active + prev_idx, trans[2*i+1],
|
|---|
| 1610 | trans_len);
|
|---|
| 1611 | break;
|
|---|
| 1612 | }
|
|---|
| 1613 | }
|
|---|
| 1614 | idx += mbclen;
|
|---|
| 1615 | }
|
|---|
| 1616 | }
|
|---|
| 1617 | else
|
|---|
| 1618 | #endif /* HAVE_MBRTOWC */
|
|---|
| 1619 | {
|
|---|
| 1620 | unsigned char *p, *e;
|
|---|
| 1621 | p = CAST(unsigned char *)line.active;
|
|---|
| 1622 | for (e=p+line.length; p<e; ++p)
|
|---|
| 1623 | *p = cur_cmd->x.translate[*p];
|
|---|
| 1624 | }
|
|---|
| 1625 | }
|
|---|
| 1626 | break;
|
|---|
| 1627 |
|
|---|
| 1628 | case '=':
|
|---|
| 1629 | output_missing_newline(&output_file);
|
|---|
| 1630 | fprintf(output_file.fp, "%lu\n",
|
|---|
| 1631 | CAST(unsigned long)input->line_number);
|
|---|
| 1632 | flush_output(output_file.fp);
|
|---|
| 1633 | break;
|
|---|
| 1634 |
|
|---|
| 1635 | default:
|
|---|
| 1636 | panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd);
|
|---|
| 1637 | }
|
|---|
| 1638 | }
|
|---|
| 1639 |
|
|---|
| 1640 | #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
|
|---|
| 1641 | /* If our top-level program consists solely of commands with
|
|---|
| 1642 | ADDR_IS_NUM addresses then once we past the last mentioned
|
|---|
| 1643 | line we should be able to quit if no_default_output is true,
|
|---|
| 1644 | or otherwise quickly copy input to output. Now whether this
|
|---|
| 1645 | optimization is a win or not depends on how cheaply we can
|
|---|
| 1646 | implement this for the cases where it doesn't help, as
|
|---|
| 1647 | compared against how much time is saved. One semantic
|
|---|
| 1648 | difference (which I think is an improvement) is that *this*
|
|---|
| 1649 | version will terminate after printing line two in the script
|
|---|
| 1650 | "yes | sed -n 2p".
|
|---|
| 1651 |
|
|---|
| 1652 | Don't use this when in-place editing is active, because line
|
|---|
| 1653 | numbers restart each time then. */
|
|---|
| 1654 | else if (!separate_files)
|
|---|
| 1655 | {
|
|---|
| 1656 | if (cur_cmd->a1->addr_type == ADDR_IS_NUM
|
|---|
| 1657 | && (cur_cmd->a2
|
|---|
| 1658 | ? cur_cmd->range_state == RANGE_CLOSED
|
|---|
| 1659 | : cur_cmd->a1->addr_number < input->line_number))
|
|---|
| 1660 | {
|
|---|
| 1661 | /* Skip this address next time */
|
|---|
| 1662 | cur_cmd->addr_bang = !cur_cmd->addr_bang;
|
|---|
| 1663 | cur_cmd->a1->addr_type = ADDR_IS_NULL;
|
|---|
| 1664 | if (cur_cmd->a2)
|
|---|
| 1665 | cur_cmd->a2->addr_type = ADDR_IS_NULL;
|
|---|
| 1666 |
|
|---|
| 1667 | /* can we make an optimization? */
|
|---|
| 1668 | if (cur_cmd->addr_bang)
|
|---|
| 1669 | {
|
|---|
| 1670 | if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't'
|
|---|
| 1671 | || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}')
|
|---|
| 1672 | branches--;
|
|---|
| 1673 |
|
|---|
| 1674 | cur_cmd->cmd = '#'; /* replace with no-op */
|
|---|
| 1675 | if (branches == 0)
|
|---|
| 1676 | cur_cmd = shrink_program(vec, cur_cmd);
|
|---|
| 1677 | if (!cur_cmd && no_default_output)
|
|---|
| 1678 | return 0;
|
|---|
| 1679 | end_cmd = vec->v + vec->v_length;
|
|---|
| 1680 | if (!cur_cmd)
|
|---|
| 1681 | cur_cmd = end_cmd;
|
|---|
| 1682 | continue;
|
|---|
| 1683 | }
|
|---|
| 1684 | }
|
|---|
| 1685 | }
|
|---|
| 1686 | #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
|
|---|
| 1687 |
|
|---|
| 1688 | /* this is buried down here so that a "continue" statement can skip it */
|
|---|
| 1689 | ++cur_cmd;
|
|---|
| 1690 | }
|
|---|
| 1691 |
|
|---|
| 1692 | if (!no_default_output)
|
|---|
| 1693 | output_line(line.active, line.length, line.chomped, &output_file);
|
|---|
| 1694 | return -1;
|
|---|
| 1695 | }
|
|---|
| 1696 |
|
|---|
| 1697 | |
|---|
| 1698 |
|
|---|
| 1699 |
|
|---|
| 1700 | /* Apply the compiled script to all the named files. */
|
|---|
| 1701 | int
|
|---|
| 1702 | process_files(the_program, argv)
|
|---|
| 1703 | struct vector *the_program;
|
|---|
| 1704 | char **argv;
|
|---|
| 1705 | {
|
|---|
| 1706 | static char dash[] = "-";
|
|---|
| 1707 | static char *stdin_argv[2] = { dash, NULL };
|
|---|
| 1708 | struct input input;
|
|---|
| 1709 | int status;
|
|---|
| 1710 |
|
|---|
| 1711 | line_init(&line, INITIAL_BUFFER_SIZE);
|
|---|
| 1712 | line_init(&hold, 0);
|
|---|
| 1713 | line_init(&buffer, 0);
|
|---|
| 1714 |
|
|---|
| 1715 | #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION
|
|---|
| 1716 | branches = count_branches(the_program);
|
|---|
| 1717 | #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/
|
|---|
| 1718 | input.reset_at_next_file = true;
|
|---|
| 1719 | if (argv && *argv)
|
|---|
| 1720 | input.file_list = argv;
|
|---|
| 1721 | else if (in_place_extension)
|
|---|
| 1722 | panic(_("no input files"));
|
|---|
| 1723 | else
|
|---|
| 1724 | input.file_list = stdin_argv;
|
|---|
| 1725 |
|
|---|
| 1726 | input.bad_count = 0;
|
|---|
| 1727 | input.line_number = 0;
|
|---|
| 1728 | input.read_fn = read_always_fail;
|
|---|
| 1729 | input.fp = NULL;
|
|---|
| 1730 |
|
|---|
| 1731 | status = EXIT_SUCCESS;
|
|---|
| 1732 | while (read_pattern_space(&input, the_program, false))
|
|---|
| 1733 | {
|
|---|
| 1734 | status = execute_program(the_program, &input);
|
|---|
| 1735 | if (status == -1)
|
|---|
| 1736 | status = EXIT_SUCCESS;
|
|---|
| 1737 | else
|
|---|
| 1738 | break;
|
|---|
| 1739 | }
|
|---|
| 1740 | closedown(&input);
|
|---|
| 1741 |
|
|---|
| 1742 | #ifdef DEBUG_LEAKS
|
|---|
| 1743 | /* We're about to exit, so these free()s are redundant.
|
|---|
| 1744 | But if we're running under a memory-leak detecting
|
|---|
| 1745 | implementation of malloc(), we want to explicitly
|
|---|
| 1746 | deallocate in order to avoid extraneous noise from
|
|---|
| 1747 | the allocator. */
|
|---|
| 1748 | release_append_queue();
|
|---|
| 1749 | FREE(buffer.text);
|
|---|
| 1750 | FREE(hold.text);
|
|---|
| 1751 | FREE(line.text);
|
|---|
| 1752 | FREE(s_accum.text);
|
|---|
| 1753 | #endif /*DEBUG_LEAKS*/
|
|---|
| 1754 |
|
|---|
| 1755 | if (input.bad_count)
|
|---|
| 1756 | status = 2;
|
|---|
| 1757 |
|
|---|
| 1758 | return status;
|
|---|
| 1759 | }
|
|---|