VirtualBox

source: kBuild/trunk/src/sed/sed/compile.c@ 800

Last change on this file since 800 was 599, checked in by bird, 18 years ago

GNU sed 4.1.5.

File size: 41.2 KB
Line 
1/* GNU SED, a batch stream editor.
2 Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006
3 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
18
19/* compile.c: translate sed source into internal form */
20
21#include "sed.h"
22#include "strverscmp.h"
23#include <stdio.h>
24#include <ctype.h>
25
26#ifdef HAVE_STRINGS_H
27# include <strings.h>
28# ifdef HAVE_MEMORY_H
29# include <memory.h>
30# endif
31#else
32# include <string.h>
33#endif /* HAVE_STRINGS_H */
34
35#ifdef HAVE_STDLIB_H
36# include <stdlib.h>
37#endif
38#ifndef EXIT_FAILURE
39# define EXIT_FAILURE 1
40#endif
41
42#ifdef HAVE_SYS_TYPES_H
43# include <sys/types.h>
44#endif
45
46#include <obstack.h>
47
48
49
50#define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/
51#define VECTOR_ALLOC_INCREMENT 40
52
53/* let's not confuse text editors that have only dumb bracket-matching... */
54#define OPEN_BRACKET '['
55#define CLOSE_BRACKET ']'
56#define OPEN_BRACE '{'
57#define CLOSE_BRACE '}'
58
59struct prog_info {
60 /* When we're reading a script command from a string, `prog.base'
61 points to the first character in the string, 'prog.cur' points
62 to the current character in the string, and 'prog.end' points
63 to the end of the string. This allows us to compile script
64 strings that contain nulls. */
65 const unsigned char *base;
66 const unsigned char *cur;
67 const unsigned char *end;
68
69 /* This is the current script file. If it is NULL, we are reading
70 from a string stored at `prog.cur' instead. If both `prog.file'
71 and `prog.cur' are NULL, we're in trouble! */
72 FILE *file;
73};
74
75/* Information used to give out useful and informative error messages. */
76struct error_info {
77 /* This is the name of the current script file. */
78 const char *name;
79
80 /* This is the number of the current script line that we're compiling. */
81 countT line;
82
83 /* This is the index of the "-e" expressions on the command line. */
84 countT string_expr_count;
85};
86
87
88/* Label structure used to resolve GOTO's, labels, and block beginnings. */
89struct sed_label {
90 countT v_index; /* index of vector element being referenced */
91 char *name; /* NUL-terminated name of the label */
92 struct error_info err_info; /* track where `{}' blocks start */
93 struct sed_label *next; /* linked list (stack) */
94};
95
96struct special_files {
97 struct output outf;
98 FILE **pfp;
99};
100
101FILE *my_stdin, *my_stdout, *my_stderr;
102struct special_files special_files[] = {
103 { { "/dev/stdin", false, NULL, NULL }, &my_stdin },
104 { { "/dev/stdout", false, NULL, NULL }, &my_stdout },
105 { { "/dev/stderr", false, NULL, NULL }, &my_stderr },
106 { { NULL, false, NULL, NULL }, NULL }
107};
108
109
110
111/* Where we are in the processing of the input. */
112static struct prog_info prog;
113static struct error_info cur_input;
114
115/* Information about labels and jumps-to-labels. This is used to do
116 the required backpatching after we have compiled all the scripts. */
117static struct sed_label *jumps = NULL;
118static struct sed_label *labels = NULL;
119
120/* We wish to detect #n magic only in the first input argument;
121 this flag tracks when we have consumed the first file of input. */
122static bool first_script = true;
123
124/* Allow for scripts like "sed -e 'i\' -e foo": */
125static struct buffer *pending_text = NULL;
126static struct text_buf *old_text_buf = NULL;
127
128/* Information about block start positions. This is used to backpatch
129 block end positions. */
130static struct sed_label *blocks = NULL;
131
132/* Use an obstack for compilation. */
133static struct obstack obs;
134
135/* Various error messages we may want to print */
136static const char errors[] =
137 "multiple `!'s\0"
138 "unexpected `,'\0"
139 "invalid usage of +N or ~N as first address\0"
140 "unmatched `{'\0"
141 "unexpected `}'\0"
142 "extra characters after command\0"
143 "expected \\ after `a', `c' or `i'\0"
144 "`}' doesn't want any addresses\0"
145 ": doesn't want any addresses\0"
146 "comments don't accept any addresses\0"
147 "missing command\0"
148 "command only uses one address\0"
149 "unterminated address regex\0"
150 "unterminated `s' command\0"
151 "unterminated `y' command\0"
152 "unknown option to `s'\0"
153 "multiple `p' options to `s' command\0"
154 "multiple `g' options to `s' command\0"
155 "multiple number options to `s' command\0"
156 "number option to `s' command may not be zero\0"
157 "strings for `y' command are different lengths\0"
158 "delimiter character is not a single-byte character\0"
159 "expected newer version of sed\0"
160 "invalid usage of line address 0\0"
161 "unknown command: `%c'";
162
163#define BAD_BANG (errors)
164#define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s")))
165#define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'")))
166#define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address")))
167#define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'")))
168#define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'")))
169#define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command")))
170#define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'")))
171#define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses")))
172#define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses")))
173#define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses")))
174#define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command")))
175#define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address")))
176#define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex")))
177#define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command")))
178#define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command")))
179#define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'")))
180#define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command")))
181#define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command")))
182#define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command")))
183#define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero")))
184#define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths")))
185#define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character")))
186#define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed")))
187#define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0")))
188#define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'")))
189
190static struct output *file_read = NULL;
191static struct output *file_write = NULL;
192
193
194
195/* Complain about an unknown command and exit. */
196void
197bad_command(ch)
198 char ch;
199{
200 const char *msg = _(UNKNOWN_CMD);
201 char *unknown_cmd = xmalloc(strlen(msg));
202 sprintf(unknown_cmd, msg, ch);
203 bad_prog(unknown_cmd);
204}
205
206/* Complain about a programming error and exit. */
207void
208bad_prog(why)
209 const char *why;
210{
211 if (cur_input.name)
212 fprintf(stderr, _("%s: file %s line %lu: %s\n"),
213 myname, cur_input.name, CAST(unsigned long)cur_input.line, why);
214 else
215 fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"),
216 myname,
217 CAST(unsigned long)cur_input.string_expr_count,
218 CAST(unsigned long)(prog.cur-prog.base),
219 why);
220 exit(EXIT_FAILURE);
221}
222
223
224
225/* Read the next character from the program. Return EOF if there isn't
226 anything to read. Keep cur_input.line up to date, so error messages
227 can be meaningful. */
228static int inchar P_((void));
229static int
230inchar()
231{
232 int ch = EOF;
233
234 if (prog.cur)
235 {
236 if (prog.cur < prog.end)
237 ch = *prog.cur++;
238 }
239 else if (prog.file)
240 {
241 if (!feof(prog.file))
242 ch = getc(prog.file);
243 }
244 if (ch == '\n')
245 ++cur_input.line;
246 return ch;
247}
248
249/* unget `ch' so the next call to inchar will return it. */
250static void savchar P_((int ch));
251static void
252savchar(ch)
253 int ch;
254{
255 if (ch == EOF)
256 return;
257 if (ch == '\n' && cur_input.line > 0)
258 --cur_input.line;
259 if (prog.cur)
260 {
261 if (prog.cur <= prog.base || *--prog.cur != ch)
262 panic("Called savchar() with unexpected pushback (%x)",
263 CAST(unsigned char)ch);
264 }
265 else
266 ungetc(ch, prog.file);
267}
268
269/* Read the next non-blank character from the program. */
270static int in_nonblank P_((void));
271static int
272in_nonblank()
273{
274 int ch;
275 do
276 ch = inchar();
277 while (ISBLANK(ch));
278 return ch;
279}
280
281/* Read an integer value from the program. */
282static countT in_integer P_((int ch));
283static countT
284in_integer(ch)
285 int ch;
286{
287 countT num = 0;
288
289 while (ISDIGIT(ch))
290 {
291 num = num * 10 + ch - '0';
292 ch = inchar();
293 }
294 savchar(ch);
295 return num;
296}
297
298static int add_then_next P_((struct buffer *b, int ch));
299static int
300add_then_next(b, ch)
301 struct buffer *b;
302 int ch;
303{
304 add1_buffer(b, ch);
305 return inchar();
306}
307
308static char * convert_number P_((char *, char *, const char *, int, int, int));
309static char *
310convert_number(result, buf, bufend, base, maxdigits, default_char)
311 char *result;
312 char *buf;
313 const char *bufend;
314 int base;
315 int maxdigits;
316 int default_char;
317{
318 int n = 0;
319 char *p;
320
321 for (p=buf; p < bufend && maxdigits-- > 0; ++p)
322 {
323 int d = -1;
324 switch (*p)
325 {
326 case '0': d = 0x0; break;
327 case '1': d = 0x1; break;
328 case '2': d = 0x2; break;
329 case '3': d = 0x3; break;
330 case '4': d = 0x4; break;
331 case '5': d = 0x5; break;
332 case '6': d = 0x6; break;
333 case '7': d = 0x7; break;
334 case '8': d = 0x8; break;
335 case '9': d = 0x9; break;
336 case 'A': case 'a': d = 0xa; break;
337 case 'B': case 'b': d = 0xb; break;
338 case 'C': case 'c': d = 0xc; break;
339 case 'D': case 'd': d = 0xd; break;
340 case 'E': case 'e': d = 0xe; break;
341 case 'F': case 'f': d = 0xf; break;
342 }
343 if (d < 0 || base <= d)
344 break;
345 n = n * base + d;
346 }
347 if (p == buf)
348 *result = default_char;
349 else
350 *result = n;
351 return p;
352}
353
354
355
356/* Read in a filename for a `r', `w', or `s///w' command. */
357static struct buffer *read_filename P_((void));
358static struct buffer *
359read_filename()
360{
361 struct buffer *b;
362 int ch;
363
364 b = init_buffer();
365 ch = in_nonblank();
366 while (ch != EOF && ch != '\n')
367 {
368#if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/
369 if (posixicity == POSIXLY_EXTENDED)
370 if (ch == ';' || ch == '#')
371 {
372 savchar(ch);
373 break;
374 }
375#endif
376 ch = add_then_next(b, ch);
377 }
378 add1_buffer(b, '\0');
379 return b;
380}
381
382static struct output *get_openfile P_((struct output **file_ptrs, char *mode, bool fail));
383static struct output *
384get_openfile(file_ptrs, mode, fail)
385 struct output **file_ptrs;
386 char *mode;
387 bool fail;
388{
389 struct buffer *b;
390 char *file_name;
391 struct output *p;
392 int is_stderr;
393
394 b = read_filename();
395 file_name = get_buffer(b);
396 for (p=*file_ptrs; p; p=p->link)
397 if (strcmp(p->name, file_name) == 0)
398 break;
399
400 if (posixicity == POSIXLY_EXTENDED)
401 {
402 /* Check whether it is a special file (stdin, stdout or stderr) */
403 struct special_files *special = special_files;
404
405 /* std* sometimes are not constants, so they
406 cannot be used in the initializer for special_files */
407 my_stdin = stdin; my_stdout = stdout; my_stderr = stderr;
408 for (special = special_files; special->outf.name; special++)
409 if (strcmp(special->outf.name, file_name) == 0)
410 {
411 special->outf.fp = *special->pfp;
412 free_buffer (b);
413 return &special->outf;
414 }
415 }
416
417 if (!p)
418 {
419 p = OB_MALLOC(&obs, 1, struct output);
420 p->name = ck_strdup(file_name);
421 p->fp = ck_fopen(p->name, mode, fail);
422 p->missing_newline = false;
423 p->link = *file_ptrs;
424 *file_ptrs = p;
425 }
426 free_buffer(b);
427 return p;
428}
429
430
431
432static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp));
433static struct sed_cmd *
434next_cmd_entry(vectorp)
435 struct vector **vectorp;
436{
437 struct sed_cmd *cmd;
438 struct vector *v;
439
440 v = *vectorp;
441 if (v->v_length == v->v_allocated)
442 {
443 v->v_allocated += VECTOR_ALLOC_INCREMENT;
444 v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd);
445 }
446
447 cmd = v->v + v->v_length;
448 cmd->a1 = NULL;
449 cmd->a2 = NULL;
450 cmd->range_state = RANGE_INACTIVE;
451 cmd->addr_bang = false;
452 cmd->cmd = '\0'; /* something invalid, to catch bugs early */
453
454 *vectorp = v;
455 return cmd;
456}
457
458static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat));
459static int
460snarf_char_class(b, cur_stat)
461 struct buffer *b;
462 mbstate_t *cur_stat;
463{
464 int ch;
465 int state = 0;
466 int delim;
467 bool pending_mb = 0;
468
469 ch = inchar();
470 if (ch == '^')
471 ch = add_then_next(b, ch);
472 if (ch == CLOSE_BRACKET)
473 ch = add_then_next(b, ch);
474
475 /* States are:
476 0 outside a collation element, character class or collation class
477 1 after the bracket
478 2 after the opening ./:/=
479 3 after the closing ./:/= */
480
481 for (;; ch = add_then_next (b, ch))
482 {
483 pending_mb = BRLEN (ch, cur_stat) != 1;
484
485 switch (ch)
486 {
487 case EOF:
488 case '\n':
489 return ch;
490
491 case '.':
492 case ':':
493 case '=':
494 if (pending_mb)
495 continue;
496
497 if (state == 1)
498 {
499 delim = ch;
500 state++;
501 }
502 else if (ch == delim && state == 2)
503 state++;
504 else
505 break;
506
507 continue;
508
509 case OPEN_BRACKET:
510 if (pending_mb)
511 continue;
512
513 state++;
514 continue;
515
516 case CLOSE_BRACKET:
517 if (pending_mb)
518 continue;
519
520 if (state == 0 || state == 1)
521 return ch;
522 else if (state == 3)
523 state = 0;
524
525 break;
526
527 default:
528 break;
529 }
530
531 /* Getting a character different from .=: whilst in state 1
532 goes back to state 0, getting a character different from ]
533 whilst in state 3 goes back to state 2. */
534 state &= ~1;
535 }
536}
537
538static struct buffer *match_slash P_((int slash, bool regex));
539static struct buffer *
540match_slash(slash, regex)
541 int slash;
542 bool regex;
543{
544 struct buffer *b;
545 int ch;
546 bool pending_mb = false;
547 mbstate_t cur_stat;
548
549 memset (&cur_stat, 0, sizeof (mbstate_t));
550
551 if (BRLEN (slash, &cur_stat) == -2)
552 if (BRLEN (slash, &cur_stat) == -2)
553 bad_prog (BAD_DELIM);
554
555 memset (&cur_stat, 0, sizeof (mbstate_t));
556
557 b = init_buffer();
558 while ((ch = inchar()) != EOF && ch != '\n')
559 {
560 pending_mb = BRLEN (ch, &cur_stat) != 1;
561 pending_mb = BRLEN (ch, &cur_stat) != 1;
562
563 if (!pending_mb)
564 {
565 if (ch == slash)
566 return b;
567 else if (ch == '\\')
568 {
569 ch = inchar();
570 if (ch == EOF)
571 break;
572#ifndef REG_PERL
573 else if (ch == 'n' && regex)
574 ch = '\n';
575#endif
576 else if (ch != '\n' && ch != slash)
577 add1_buffer(b, '\\');
578 }
579 else if (ch == OPEN_BRACKET && regex)
580 {
581 add1_buffer(b, ch);
582 ch = snarf_char_class(b, &cur_stat);
583 if (ch != CLOSE_BRACKET)
584 break;
585 }
586 }
587
588 add1_buffer(b, ch);
589 }
590
591 if (ch == '\n')
592 savchar(ch); /* for proper line number in error report */
593 free_buffer(b);
594 return NULL;
595}
596
597static int mark_subst_opts P_((struct subst *cmd));
598static int
599mark_subst_opts(cmd)
600 struct subst *cmd;
601{
602 int flags = 0;
603 int ch;
604
605 cmd->global = false;
606 cmd->print = false;
607 cmd->eval = false;
608 cmd->numb = 0;
609 cmd->outf = NULL;
610
611 for (;;)
612 switch ( (ch = in_nonblank()) )
613 {
614 case 'i': /* GNU extension */
615 case 'I': /* GNU extension */
616 flags |= REG_ICASE;
617 break;
618
619#ifdef REG_PERL
620 case 's': /* GNU extension */
621 case 'S': /* GNU extension */
622 if (extended_regexp_flags & REG_PERL)
623 flags |= REG_DOTALL;
624 break;
625
626 case 'x': /* GNU extension */
627 case 'X': /* GNU extension */
628 if (extended_regexp_flags & REG_PERL)
629 flags |= REG_EXTENDED;
630 break;
631#endif
632
633 case 'm': /* GNU extension */
634 case 'M': /* GNU extension */
635 flags |= REG_NEWLINE;
636 break;
637
638 case 'e':
639 cmd->eval = true;
640 break;
641
642 case 'p':
643 if (cmd->print)
644 bad_prog(_(EXCESS_P_OPT));
645 cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */
646 break;
647
648 case 'g':
649 if (cmd->global)
650 bad_prog(_(EXCESS_G_OPT));
651 cmd->global = true;
652 break;
653
654 case 'w':
655 cmd->outf = get_openfile(&file_write, "w", true);
656 return flags;
657
658 case '0': case '1': case '2': case '3': case '4':
659 case '5': case '6': case '7': case '8': case '9':
660 if (cmd->numb)
661 bad_prog(_(EXCESS_N_OPT));
662 cmd->numb = in_integer(ch);
663 if (!cmd->numb)
664 bad_prog(_(ZERO_N_OPT));
665 break;
666
667 case CLOSE_BRACE:
668 case '#':
669 savchar(ch);
670 /* Fall Through */
671 case EOF:
672 case '\n':
673 case ';':
674 return flags;
675
676 case '\r':
677 if (inchar() == '\n')
678 return flags;
679 /* FALLTHROUGH */
680
681 default:
682 bad_prog(_(UNKNOWN_S_OPT));
683 /*NOTREACHED*/
684 }
685}
686
687
688
689/* read in a label for a `:', `b', or `t' command */
690static char *read_label P_((void));
691static char *
692read_label()
693{
694 struct buffer *b;
695 int ch;
696 char *ret;
697
698 b = init_buffer();
699 ch = in_nonblank();
700
701 while (ch != EOF && ch != '\n'
702 && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#')
703 ch = add_then_next (b, ch);
704
705 savchar(ch);
706 add1_buffer(b, '\0');
707 ret = ck_strdup(get_buffer(b));
708 free_buffer(b);
709 return ret;
710}
711
712/* Store a label (or label reference) created by a `:', `b', or `t'
713 command so that the jump to/from the label can be backpatched after
714 compilation is complete, or a reference created by a `{' to be
715 backpatched when the corresponding `}' is found. */
716static struct sed_label *setup_label
717 P_((struct sed_label *, countT, char *, const struct error_info *));
718static struct sed_label *
719setup_label(list, idx, name, err_info)
720 struct sed_label *list;
721 countT idx;
722 char *name;
723 const struct error_info *err_info;
724{
725 struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label);
726 ret->v_index = idx;
727 ret->name = name;
728 if (err_info)
729 MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info));
730 ret->next = list;
731 return ret;
732}
733
734static struct sed_label *release_label P_((struct sed_label *list_head));
735static struct sed_label *
736release_label(list_head)
737 struct sed_label *list_head;
738{
739 struct sed_label *ret;
740
741 if (!list_head)
742 return NULL;
743 ret = list_head->next;
744
745 FREE(list_head->name);
746
747#if 0
748 /* We use obstacks */
749 FREE(list_head);
750#endif
751 return ret;
752}
753
754static struct replacement *new_replacement P_((char *, size_t,
755 enum replacement_types));
756static struct replacement *
757new_replacement(text, length, type)
758 char *text;
759 size_t length;
760 enum replacement_types type;
761{
762 struct replacement *r = OB_MALLOC(&obs, 1, struct replacement);
763
764 r->prefix = text;
765 r->prefix_length = length;
766 r->subst_id = -1;
767 r->repl_type = type;
768
769 /* r-> next = NULL; */
770 return r;
771}
772
773static void setup_replacement P_((struct subst *, const char *, size_t));
774static void
775setup_replacement(sub, text, length)
776 struct subst *sub;
777 const char *text;
778 size_t length;
779{
780 char *base;
781 char *p;
782 char *text_end;
783 enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS;
784 struct replacement root;
785 struct replacement *tail;
786
787 sub->max_id = 0;
788 base = MEMDUP(text, length, char);
789 length = normalize_text(base, length, TEXT_REPLACEMENT);
790
791 text_end = base + length;
792 tail = &root;
793
794 for (p=base; p<text_end; ++p)
795 {
796 if (*p == '\\')
797 {
798 /* Preceding the backslash may be some literal text: */
799 tail = tail->next =
800 new_replacement(base, CAST(size_t)(p - base), repl_type);
801
802 repl_type = save_type;
803
804 /* Skip the backslash and look for a numeric back-reference,
805 or a case-munging escape if not in POSIX mode: */
806 ++p;
807 if (p < text_end && (posixicity != POSIXLY_BASIC || ISDIGIT (*p)))
808 switch (*p)
809 {
810 case '0': case '1': case '2': case '3': case '4':
811 case '5': case '6': case '7': case '8': case '9':
812 tail->subst_id = *p - '0';
813 if (sub->max_id < tail->subst_id)
814 sub->max_id = tail->subst_id;
815 break;
816
817 case 'L':
818 repl_type = REPL_LOWERCASE;
819 save_type = REPL_LOWERCASE;
820 break;
821
822 case 'U':
823 repl_type = REPL_UPPERCASE;
824 save_type = REPL_UPPERCASE;
825 break;
826
827 case 'E':
828 repl_type = REPL_ASIS;
829 save_type = REPL_ASIS;
830 break;
831
832 case 'l':
833 save_type = repl_type;
834 repl_type |= REPL_LOWERCASE_FIRST;
835 break;
836
837 case 'u':
838 save_type = repl_type;
839 repl_type |= REPL_UPPERCASE_FIRST;
840 break;
841
842 default:
843 p[-1] = *p;
844 ++tail->prefix_length;
845 }
846
847 base = p + 1;
848 }
849 else if (*p == '&')
850 {
851 /* Preceding the ampersand may be some literal text: */
852 tail = tail->next =
853 new_replacement(base, CAST(size_t)(p - base), repl_type);
854
855 repl_type = save_type;
856 tail->subst_id = 0;
857 base = p + 1;
858 }
859 }
860 /* There may be some trailing literal text: */
861 if (base < text_end)
862 tail = tail->next =
863 new_replacement(base, CAST(size_t)(text_end - base), repl_type);
864
865 tail->next = NULL;
866 sub->replacement = root.next;
867}
868
869static void read_text P_((struct text_buf *buf, int leadin_ch));
870static void
871read_text(buf, leadin_ch)
872 struct text_buf *buf;
873 int leadin_ch;
874{
875 int ch;
876
877 /* Should we start afresh (as opposed to continue a partial text)? */
878 if (buf)
879 {
880 if (pending_text)
881 free_buffer(pending_text);
882 pending_text = init_buffer();
883 buf->text = NULL;
884 buf->text_length = 0;
885 old_text_buf = buf;
886 }
887 /* assert(old_text_buf != NULL); */
888
889 if (leadin_ch == EOF)
890 return;
891
892 if (leadin_ch != '\n')
893 add1_buffer(pending_text, leadin_ch);
894
895 ch = inchar();
896 while (ch != EOF && ch != '\n')
897 {
898 if (ch == '\\')
899 {
900 ch = inchar();
901 if (ch != EOF)
902 add1_buffer (pending_text, '\\');
903 }
904
905 if (ch == EOF)
906 {
907 add1_buffer (pending_text, '\n');
908 return;
909 }
910
911 ch = add_then_next (pending_text, ch);
912 }
913
914 add1_buffer(pending_text, '\n');
915 if (!buf)
916 buf = old_text_buf;
917 buf->text_length = normalize_text (get_buffer (pending_text),
918 size_buffer (pending_text), TEXT_BUFFER);
919 buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char);
920 free_buffer(pending_text);
921 pending_text = NULL;
922}
923
924
925
926/* Try to read an address for a sed command. If it succeeds,
927 return non-zero and store the resulting address in `*addr'.
928 If the input doesn't look like an address read nothing
929 and return zero. */
930static bool compile_address P_((struct addr *addr, int ch));
931static bool
932compile_address(addr, ch)
933 struct addr *addr;
934 int ch;
935{
936 addr->addr_type = ADDR_IS_NULL;
937 addr->addr_step = 0;
938 addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */
939 addr->addr_regex = NULL;
940
941 if (ch == '/' || ch == '\\')
942 {
943 int flags = 0;
944 struct buffer *b;
945 addr->addr_type = ADDR_IS_REGEX;
946 if (ch == '\\')
947 ch = inchar();
948 if ( !(b = match_slash(ch, true)) )
949 bad_prog(_(UNTERM_ADDR_RE));
950
951 for(;;)
952 {
953 ch = in_nonblank();
954 switch(ch)
955 {
956 case 'I': /* GNU extension */
957 flags |= REG_ICASE;
958 break;
959
960#ifdef REG_PERL
961 case 'S': /* GNU extension */
962 if (extended_regexp_flags & REG_PERL)
963 flags |= REG_DOTALL;
964 break;
965
966 case 'X': /* GNU extension */
967 if (extended_regexp_flags & REG_PERL)
968 flags |= REG_EXTENDED;
969 break;
970#endif
971
972 case 'M': /* GNU extension */
973 flags |= REG_NEWLINE;
974 break;
975
976 default:
977 savchar (ch);
978 addr->addr_regex = compile_regex (b, flags, 0);
979 free_buffer(b);
980 return true;
981 }
982 }
983 }
984 else if (ISDIGIT(ch))
985 {
986 addr->addr_number = in_integer(ch);
987 addr->addr_type = ADDR_IS_NUM;
988 ch = in_nonblank();
989 if (ch != '~')
990 {
991 savchar(ch);
992 }
993 else
994 {
995 countT step = in_integer(in_nonblank());
996 if (step > 0)
997 {
998 addr->addr_step = step;
999 addr->addr_type = ADDR_IS_NUM_MOD;
1000 }
1001 }
1002 }
1003 else if (ch == '+' || ch == '~')
1004 {
1005 addr->addr_step = in_integer(in_nonblank());
1006 if (addr->addr_step==0)
1007 ; /* default to ADDR_IS_NULL; forces matching to stop on next line */
1008 else if (ch == '+')
1009 addr->addr_type = ADDR_IS_STEP;
1010 else
1011 addr->addr_type = ADDR_IS_STEP_MOD;
1012 }
1013 else if (ch == '$')
1014 {
1015 addr->addr_type = ADDR_IS_LAST;
1016 }
1017 else
1018 return false;
1019
1020 return true;
1021}
1022
1023/* Read a program (or a subprogram within `{' `}' pairs) in and store
1024 the compiled form in `*vector'. Return a pointer to the new vector. */
1025static struct vector *compile_program P_((struct vector *));
1026static struct vector *
1027compile_program(vector)
1028 struct vector *vector;
1029{
1030 struct sed_cmd *cur_cmd;
1031 struct buffer *b;
1032 int ch;
1033
1034 if (!vector)
1035 {
1036 vector = MALLOC(1, struct vector);
1037 vector->v = NULL;
1038 vector->v_allocated = 0;
1039 vector->v_length = 0;
1040
1041 obstack_init (&obs);
1042 }
1043 if (pending_text)
1044 read_text(NULL, '\n');
1045
1046 for (;;)
1047 {
1048 struct addr a;
1049
1050 while ((ch=inchar()) == ';' || ISSPACE(ch))
1051 ;
1052 if (ch == EOF)
1053 break;
1054
1055 cur_cmd = next_cmd_entry(&vector);
1056 if (compile_address(&a, ch))
1057 {
1058 if (a.addr_type == ADDR_IS_STEP
1059 || a.addr_type == ADDR_IS_STEP_MOD)
1060 bad_prog(_(BAD_STEP));
1061
1062 cur_cmd->a1 = MEMDUP(&a, 1, struct addr);
1063 ch = in_nonblank();
1064 if (ch == ',')
1065 {
1066 if (!compile_address(&a, in_nonblank()))
1067 bad_prog(_(BAD_COMMA));
1068
1069 cur_cmd->a2 = MEMDUP(&a, 1, struct addr);
1070 ch = in_nonblank();
1071 }
1072
1073 if (cur_cmd->a1->addr_type == ADDR_IS_NUM
1074 && cur_cmd->a1->addr_number == 0
1075 && (!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX))
1076 bad_prog(_(INVALID_LINE_0));
1077 }
1078 if (ch == '!')
1079 {
1080 cur_cmd->addr_bang = true;
1081 ch = in_nonblank();
1082 if (ch == '!')
1083 bad_prog(_(BAD_BANG));
1084 }
1085
1086 /* Do not accept extended commands in --posix mode. Also,
1087 a few commands only accept one address in that mode. */
1088 if (posixicity == POSIXLY_BASIC)
1089 switch (ch)
1090 {
1091 case 'v': case 'L': case 'Q': case 'T':
1092 case 'R': case 'W':
1093 bad_command(ch);
1094
1095 case 'a': case 'i': case 'l':
1096 case '=': case 'r':
1097 if (cur_cmd->a2)
1098 bad_prog(_(ONE_ADDR));
1099 }
1100
1101 cur_cmd->cmd = ch;
1102 switch (ch)
1103 {
1104 case '#':
1105 if (cur_cmd->a1)
1106 bad_prog(_(NO_SHARP_ADDR));
1107 ch = inchar();
1108 if (ch=='n' && first_script && cur_input.line < 2)
1109 if ( (prog.base && prog.cur==2+prog.base)
1110 || (prog.file && !prog.base && 2==ftell(prog.file)))
1111 no_default_output = true;
1112 while (ch != EOF && ch != '\n')
1113 ch = inchar();
1114 continue; /* restart the for (;;) loop */
1115
1116 case 'v':
1117 /* This is an extension. Programs needing GNU sed might start
1118 * with a `v' command so that other seds will stop.
1119 * We compare the version and ignore POSIXLY_CORRECT.
1120 */
1121 {
1122 char *version = read_label ();
1123 char *compared_version;
1124 compared_version = (*version == '\0') ? "4.0" : version;
1125 if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0)
1126 bad_prog(_(ANCIENT_VERSION));
1127
1128 free (version);
1129 posixicity = POSIXLY_EXTENDED;
1130 }
1131 continue;
1132
1133 case '{':
1134 blocks = setup_label(blocks, vector->v_length, NULL, &cur_input);
1135 cur_cmd->addr_bang = !cur_cmd->addr_bang;
1136 break;
1137
1138 case '}':
1139 if (!blocks)
1140 bad_prog(_(EXCESS_CLOSE_BRACE));
1141 if (cur_cmd->a1)
1142 bad_prog(_(NO_CLOSE_BRACE_ADDR));
1143 ch = in_nonblank();
1144 if (ch == CLOSE_BRACE || ch == '#')
1145 savchar(ch);
1146 else if (ch != EOF && ch != '\n' && ch != ';')
1147 bad_prog(_(EXCESS_JUNK));
1148
1149 vector->v[blocks->v_index].x.jump_index = vector->v_length;
1150 blocks = release_label(blocks); /* done with this entry */
1151 break;
1152
1153 case 'e':
1154 ch = in_nonblank();
1155 if (ch == EOF || ch == '\n')
1156 {
1157 cur_cmd->x.cmd_txt.text_length = 0;
1158 break;
1159 }
1160 else
1161 goto read_text_to_slash;
1162
1163 case 'a':
1164 case 'i':
1165 case 'c':
1166 ch = in_nonblank();
1167
1168 read_text_to_slash:
1169 if (ch == EOF)
1170 bad_prog(_(EXPECTED_SLASH));
1171
1172 if (ch == '\\')
1173 ch = inchar();
1174 else
1175 {
1176 savchar(ch);
1177 ch = '\n';
1178 }
1179
1180 read_text(&cur_cmd->x.cmd_txt, ch);
1181 break;
1182
1183 case ':':
1184 if (cur_cmd->a1)
1185 bad_prog(_(NO_COLON_ADDR));
1186 labels = setup_label(labels, vector->v_length, read_label(), NULL);
1187 break;
1188
1189 case 'T':
1190 case 'b':
1191 case 't':
1192 jumps = setup_label(jumps, vector->v_length, read_label(), NULL);
1193 break;
1194
1195 case 'Q':
1196 case 'q':
1197 if (cur_cmd->a2)
1198 bad_prog(_(ONE_ADDR));
1199 /* Fall through */
1200
1201 case 'L':
1202 case 'l':
1203 ch = in_nonblank();
1204 if (ISDIGIT(ch))
1205 {
1206 cur_cmd->x.int_arg = in_integer(ch);
1207 ch = in_nonblank();
1208 }
1209 else
1210 cur_cmd->x.int_arg = -1;
1211
1212 if (ch == CLOSE_BRACE || ch == '#')
1213 savchar(ch);
1214 else if (ch != EOF && ch != '\n' && ch != ';')
1215 bad_prog(_(EXCESS_JUNK));
1216
1217 break;
1218
1219 case '=':
1220 case 'd':
1221 case 'D':
1222 case 'g':
1223 case 'G':
1224 case 'h':
1225 case 'H':
1226 case 'n':
1227 case 'N':
1228 case 'p':
1229 case 'P':
1230 case 'x':
1231 ch = in_nonblank();
1232 if (ch == CLOSE_BRACE || ch == '#')
1233 savchar(ch);
1234 else if (ch != EOF && ch != '\n' && ch != ';')
1235 bad_prog(_(EXCESS_JUNK));
1236 break;
1237
1238 case 'r':
1239 b = read_filename();
1240 cur_cmd->x.fname = ck_strdup(get_buffer(b));
1241 free_buffer(b);
1242 break;
1243
1244 case 'R':
1245 cur_cmd->x.fp = get_openfile(&file_read, "r", false)->fp;
1246 break;
1247
1248 case 'W':
1249 case 'w':
1250 cur_cmd->x.outf = get_openfile(&file_write, "w", true);
1251 break;
1252
1253 case 's':
1254 {
1255 struct buffer *b2;
1256 int flags;
1257 int slash;
1258
1259 slash = inchar();
1260 if ( !(b = match_slash(slash, true)) )
1261 bad_prog(_(UNTERM_S_CMD));
1262 if ( !(b2 = match_slash(slash, false)) )
1263 bad_prog(_(UNTERM_S_CMD));
1264
1265 cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst);
1266 setup_replacement(cur_cmd->x.cmd_subst,
1267 get_buffer(b2), size_buffer(b2));
1268 free_buffer(b2);
1269
1270 flags = mark_subst_opts(cur_cmd->x.cmd_subst);
1271 cur_cmd->x.cmd_subst->regx =
1272 compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1);
1273 free_buffer(b);
1274 }
1275 break;
1276
1277 case 'y':
1278 {
1279 size_t len, dest_len;
1280 int slash;
1281 struct buffer *b2;
1282 char *src_buf, *dest_buf;
1283
1284 slash = inchar();
1285 if ( !(b = match_slash(slash, false)) )
1286 bad_prog(_(UNTERM_Y_CMD));
1287 src_buf = get_buffer(b);
1288 len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER);
1289
1290 if ( !(b2 = match_slash(slash, false)) )
1291 bad_prog(_(UNTERM_Y_CMD));
1292 dest_buf = get_buffer(b2);
1293 dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER);
1294
1295 if (mb_cur_max > 1)
1296 {
1297 int i, j, idx, src_char_num;
1298 size_t *src_lens = MALLOC(len, size_t);
1299 char **trans_pairs;
1300 size_t mbclen;
1301 mbstate_t cur_stat;
1302
1303 /* Enumerate how many character the source buffer has. */
1304 memset(&cur_stat, 0, sizeof(mbstate_t));
1305 for (i = 0, j = 0; i < len;)
1306 {
1307 mbclen = MBRLEN (src_buf + i, len - i, &cur_stat);
1308 /* An invalid sequence, or a truncated multibyte character.
1309 We treat it as a singlebyte character. */
1310 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1311 || mbclen == 0)
1312 mbclen = 1;
1313 src_lens[j++] = mbclen;
1314 i += mbclen;
1315 }
1316 src_char_num = j;
1317
1318 memset(&cur_stat, 0, sizeof(mbstate_t));
1319 idx = 0;
1320
1321 /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL}
1322 src(i) : pointer to i-th source character.
1323 dest(i) : pointer to i-th destination character.
1324 NULL : terminator */
1325 trans_pairs = MALLOC(2 * src_char_num + 1, char*);
1326 cur_cmd->x.translatemb = trans_pairs;
1327 for (i = 0; i < src_char_num; i++)
1328 {
1329 if (idx >= dest_len)
1330 bad_prog(_(Y_CMD_LEN));
1331
1332 /* Set the i-th source character. */
1333 trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char);
1334 strncpy(trans_pairs[2 * i], src_buf, src_lens[i]);
1335 trans_pairs[2 * i][src_lens[i]] = '\0';
1336 src_buf += src_lens[i]; /* Forward to next character. */
1337
1338 /* Fetch the i-th destination character. */
1339 mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat);
1340 /* An invalid sequence, or a truncated multibyte character.
1341 We treat it as a singlebyte character. */
1342 if (mbclen == (size_t) -1 || mbclen == (size_t) -2
1343 || mbclen == 0)
1344 mbclen = 1;
1345
1346 /* Set the i-th destination character. */
1347 trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char);
1348 strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen);
1349 trans_pairs[2 * i + 1][mbclen] = '\0';
1350 idx += mbclen; /* Forward to next character. */
1351 }
1352 trans_pairs[2 * i] = NULL;
1353 if (idx != dest_len)
1354 bad_prog(_(Y_CMD_LEN));
1355 }
1356 else
1357 {
1358 char *translate = OB_MALLOC(&obs, YMAP_LENGTH, char);
1359 unsigned char *ustring = CAST(unsigned char *)src_buf;
1360
1361 if (len != dest_len)
1362 bad_prog(_(Y_CMD_LEN));
1363
1364 for (len = 0; len < YMAP_LENGTH; len++)
1365 translate[len] = len;
1366
1367 while (dest_len--)
1368 translate[(unsigned char)*ustring++] = *dest_buf++;
1369
1370 cur_cmd->x.translate = translate;
1371 }
1372
1373 if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';')
1374 bad_prog(_(EXCESS_JUNK));
1375
1376 free_buffer(b);
1377 free_buffer(b2);
1378 }
1379 break;
1380
1381 case EOF:
1382 bad_prog(_(NO_COMMAND));
1383 /*NOTREACHED*/
1384
1385 default:
1386 bad_command (ch);
1387 /*NOTREACHED*/
1388 }
1389
1390 /* this is buried down here so that "continue" statements will miss it */
1391 ++vector->v_length;
1392 }
1393 return vector;
1394}
1395
1396
1397
1398/* deal with \X escapes */
1399size_t
1400normalize_text(buf, len, buftype)
1401 char *buf;
1402 size_t len;
1403 enum text_types buftype;
1404{
1405 const char *bufend = buf + len;
1406 char *p = buf;
1407 char *q = buf;
1408
1409 /* This variable prevents normalizing text within bracket
1410 subexpressions when conforming to POSIX. If 0, we
1411 are not within a bracket expression. If -1, we are within a
1412 bracket expression but are not within [.FOO.], [=FOO=],
1413 or [:FOO:]. Otherwise, this is the '.', '=', or ':'
1414 respectively within these three types of subexpressions. */
1415 int bracket_state = 0;
1416
1417 int mbclen;
1418 mbstate_t cur_stat;
1419 memset(&cur_stat, 0, sizeof(mbstate_t));
1420
1421 while (p < bufend)
1422 {
1423 int c;
1424 mbclen = MBRLEN (p, bufend - p, &cur_stat);
1425 if (mbclen != 1)
1426 {
1427 /* An invalid sequence, or a truncated multibyte character.
1428 We treat it as a singlebyte character. */
1429 if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
1430 mbclen = 1;
1431
1432 memmove (q, p, mbclen);
1433 q += mbclen;
1434 p += mbclen;
1435 continue;
1436 }
1437
1438 if (*p == '\\' && p+1 < bufend && bracket_state == 0)
1439 switch ( (c = *++p) )
1440 {
1441#if defined __STDC__ && __STDC__-0
1442 case 'a': *q++ = '\a'; p++; continue;
1443#else /* Not STDC; we'll just assume ASCII */
1444 case 'a': *q++ = '\007'; p++; continue;
1445#endif
1446 /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */
1447 case 'f': *q++ = '\f'; p++; continue;
1448 case '\n': /*fall through */
1449 case 'n': *q++ = '\n'; p++; continue;
1450 case 'r': *q++ = '\r'; p++; continue;
1451 case 't': *q++ = '\t'; p++; continue;
1452 case 'v': *q++ = '\v'; p++; continue;
1453
1454 case 'd': /* decimal byte */
1455 p = convert_number(q, p+1, bufend, 10, 3, 'd');
1456 q++;
1457 continue;
1458
1459 case 'x': /* hexadecimal byte */
1460 p = convert_number(q, p+1, bufend, 16, 2, 'x');
1461 q++;
1462 continue;
1463
1464#ifdef REG_PERL
1465 case '0': case '1': case '2': case '3':
1466 case '4': case '5': case '6': case '7':
1467 if ((extended_regexp_flags & REG_PERL)
1468 && p+1 < bufend
1469 && p[1] >= '0' && p[1] <= '9')
1470 {
1471 p = convert_number(q, p, bufend, 8, 3, *p);
1472 q++;
1473 }
1474 else
1475 {
1476 /* we just pass the \ up one level for interpretation */
1477 if (buftype != TEXT_BUFFER)
1478 *q++ = '\\';
1479 }
1480
1481 continue;
1482
1483 case 'o': /* octal byte */
1484 if (!(extended_regexp_flags & REG_PERL))
1485 {
1486 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1487 q++;
1488 }
1489 else
1490 {
1491 /* we just pass the \ up one level for interpretation */
1492 if (buftype != TEXT_BUFFER)
1493 *q++ = '\\';
1494 }
1495
1496 continue;
1497#else
1498 case 'o': /* octal byte */
1499 p = convert_number(q, p+1, bufend, 8, 3, 'o');
1500 q++;
1501 continue;
1502#endif
1503
1504 case 'c':
1505 if (++p < bufend)
1506 {
1507 *q++ = toupper(*p) ^ 0x40;
1508 p++;
1509 continue;
1510 }
1511 else
1512 {
1513 /* we just pass the \ up one level for interpretation */
1514 if (buftype != TEXT_BUFFER)
1515 *q++ = '\\';
1516 continue;
1517 }
1518
1519 default:
1520 /* we just pass the \ up one level for interpretation */
1521 if (buftype != TEXT_BUFFER)
1522 *q++ = '\\';
1523 break;
1524 }
1525 else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED)
1526 switch (*p)
1527 {
1528 case '[':
1529 if (!bracket_state)
1530 bracket_state = -1;
1531 break;
1532
1533 case ':':
1534 case '.':
1535 case '=':
1536 if (bracket_state == -1 && p[-1] == '[')
1537 bracket_state = *p;
1538 break;
1539
1540 case ']':
1541 if (bracket_state == 0)
1542 ;
1543 else if (bracket_state == -1)
1544 bracket_state = 0;
1545 else if (p[-2] != bracket_state && p[-1] == bracket_state)
1546 bracket_state = -1;
1547 break;
1548 }
1549
1550 *q++ = *p++;
1551 }
1552 return (size_t)(q - buf);
1553}
1554
1555
1556/* `str' is a string (from the command line) that contains a sed command.
1557 Compile the command, and add it to the end of `cur_program'. */
1558struct vector *
1559compile_string(cur_program, str, len)
1560 struct vector *cur_program;
1561 char *str;
1562 size_t len;
1563{
1564 static countT string_expr_count = 0;
1565 struct vector *ret;
1566
1567 prog.file = NULL;
1568 prog.base = CAST(unsigned char *)str;
1569 prog.cur = prog.base;
1570 prog.end = prog.cur + len;
1571
1572 cur_input.line = 0;
1573 cur_input.name = NULL;
1574 cur_input.string_expr_count = ++string_expr_count;
1575
1576 ret = compile_program(cur_program);
1577 prog.base = NULL;
1578 prog.cur = NULL;
1579 prog.end = NULL;
1580
1581 first_script = false;
1582 return ret;
1583}
1584
1585/* `cmdfile' is the name of a file containing sed commands.
1586 Read them in and add them to the end of `cur_program'.
1587 */
1588struct vector *
1589compile_file(cur_program, cmdfile)
1590 struct vector *cur_program;
1591 const char *cmdfile;
1592{
1593 size_t len;
1594 struct vector *ret;
1595
1596 prog.file = stdin;
1597 if (cmdfile[0] != '-' || cmdfile[1] != '\0')
1598 prog.file = ck_fopen(cmdfile, "rt", true);
1599
1600 cur_input.line = 1;
1601 cur_input.name = cmdfile;
1602 cur_input.string_expr_count = 0;
1603
1604 ret = compile_program(cur_program);
1605 if (prog.file != stdin)
1606 ck_fclose(prog.file);
1607 prog.file = NULL;
1608
1609 first_script = false;
1610 return ret;
1611}
1612
1613/* Make any checks which require the whole program to have been read.
1614 In particular: this backpatches the jump targets.
1615 Any cleanup which can be done after these checks is done here also. */
1616void
1617check_final_program(program)
1618 struct vector *program;
1619{
1620 struct sed_label *go;
1621 struct sed_label *lbl;
1622
1623 /* do all "{"s have a corresponding "}"? */
1624 if (blocks)
1625 {
1626 /* update info for error reporting: */
1627 MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input));
1628 bad_prog(_(EXCESS_OPEN_BRACE));
1629 }
1630
1631 /* was the final command an unterminated a/c/i command? */
1632 if (pending_text)
1633 {
1634 old_text_buf->text_length = size_buffer(pending_text);
1635 old_text_buf->text = MEMDUP(get_buffer(pending_text),
1636 old_text_buf->text_length, char);
1637 free_buffer(pending_text);
1638 pending_text = NULL;
1639 }
1640
1641 for (go = jumps; go; go = release_label(go))
1642 {
1643 for (lbl = labels; lbl; lbl = lbl->next)
1644 if (strcmp(lbl->name, go->name) == 0)
1645 break;
1646 if (lbl)
1647 {
1648 program->v[go->v_index].x.jump_index = lbl->v_index;
1649 }
1650 else
1651 {
1652 if (*go->name)
1653 panic(_("can't find label for jump to `%s'"), go->name);
1654 program->v[go->v_index].x.jump_index = program->v_length;
1655 }
1656 }
1657 jumps = NULL;
1658
1659 for (lbl = labels; lbl; lbl = release_label(lbl))
1660 ;
1661 labels = NULL;
1662
1663 /* There is no longer a need to track file names: */
1664 {
1665 struct output *p;
1666
1667 for (p=file_read; p; p=p->link)
1668 if (p->name)
1669 {
1670 FREE(p->name);
1671 p->name = NULL;
1672 }
1673
1674 for (p=file_write; p; p=p->link)
1675 if (p->name)
1676 {
1677 FREE(p->name);
1678 p->name = NULL;
1679 }
1680 }
1681}
1682
1683/* Rewind all resources which were allocated in this module. */
1684void
1685rewind_read_files()
1686{
1687 struct output *p;
1688
1689 for (p=file_read; p; p=p->link)
1690 if (p->fp)
1691 rewind(p->fp);
1692}
1693
1694/* Release all resources which were allocated in this module. */
1695void
1696finish_program(program)
1697 struct vector *program;
1698{
1699 /* close all files... */
1700 {
1701 struct output *p, *q;
1702
1703 for (p=file_read; p; p=q)
1704 {
1705 if (p->fp)
1706 ck_fclose(p->fp);
1707 q = p->link;
1708#if 0
1709 /* We use obstacks. */
1710 FREE(p);
1711#endif
1712 }
1713
1714 for (p=file_write; p; p=q)
1715 {
1716 if (p->fp)
1717 ck_fclose(p->fp);
1718 q = p->link;
1719#if 0
1720 /* We use obstacks. */
1721 FREE(p);
1722#endif
1723 }
1724 file_read = file_write = NULL;
1725 }
1726
1727#ifdef DEBUG_LEAKS
1728 obstack_free (&obs, NULL);
1729#endif /*DEBUG_LEAKS*/
1730}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette