VirtualBox

source: kBuild/vendor/grep/2.12/src/dosbuf.c@ 2595

Last change on this file since 2595 was 2595, checked in by bird, 12 years ago

gnu grep version 2.12 (grep-2.12.tar.xz, md5sum=8d2f0346d08b13c18afb81f0e8aa1e2f)

  • Property svn:eol-style set to native
File size: 6.2 KB
Line 
1/* dosbuf.c
2 Copyright (C) 1992, 1997-2002, 2004-2012 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19/* Messy DOS-specific code for correctly treating binary, Unix text
20 and DOS text files.
21
22 This has several aspects:
23
24 * Guessing the file type (unless the user tells us);
25 * Stripping CR characters from DOS text files (otherwise regex
26 functions won't work correctly);
27 * Reporting correct byte count with -b for any kind of file.
28
29*/
30
31#include <config.h>
32
33typedef enum {
34 UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT
35} File_type;
36
37struct dos_map {
38 off_t pos; /* position in buffer passed to matcher */
39 off_t add; /* how much to add when reporting char position */
40};
41
42static int dos_report_unix_offset = 0;
43
44static File_type dos_file_type = UNKNOWN;
45static File_type dos_use_file_type = UNKNOWN;
46static off_t dos_stripped_crs = 0;
47static struct dos_map *dos_pos_map;
48static int dos_pos_map_size = 0;
49static int dos_pos_map_used = 0;
50static int inp_map_idx = 0, out_map_idx = 1;
51
52/* Guess DOS file type by looking at its contents. */
53static inline File_type
54guess_type (char *buf, size_t buflen)
55{
56 int crlf_seen = 0;
57 char *bp = buf;
58
59 while (buflen--)
60 {
61 /* Treat a file as binary if it has a NUL character. */
62 if (!*bp)
63 return DOS_BINARY;
64
65 /* CR before LF means DOS text file (unless we later see
66 binary characters). */
67 else if (*bp == '\r' && buflen && bp[1] == '\n')
68 crlf_seen = 1;
69
70 bp++;
71 }
72
73 return crlf_seen ? DOS_TEXT : UNIX_TEXT;
74}
75
76/* Convert external DOS file representation to internal.
77 Return the count of characters left in the buffer.
78 Build table to map character positions when reporting byte counts. */
79static inline int
80undossify_input (char *buf, size_t buflen)
81{
82 int chars_left = 0;
83
84 if (totalcc == 0)
85 {
86 /* New file: forget everything we knew about character
87 position mapping table and file type. */
88 inp_map_idx = 0;
89 out_map_idx = 1;
90 dos_pos_map_used = 0;
91 dos_stripped_crs = 0;
92 dos_file_type = dos_use_file_type;
93 }
94
95 /* Guess if this file is binary, unless we already know that. */
96 if (dos_file_type == UNKNOWN)
97 dos_file_type = guess_type(buf, buflen);
98
99 /* If this file is to be treated as DOS Text, strip the CR characters
100 and maybe build the table for character position mapping on output. */
101 if (dos_file_type == DOS_TEXT)
102 {
103 char *destp = buf;
104
105 while (buflen--)
106 {
107 if (*buf != '\r')
108 {
109 *destp++ = *buf++;
110 chars_left++;
111 }
112 else
113 {
114 buf++;
115 if (out_byte && !dos_report_unix_offset)
116 {
117 dos_stripped_crs++;
118 while (buflen && *buf == '\r')
119 {
120 dos_stripped_crs++;
121 buflen--;
122 buf++;
123 }
124 if (inp_map_idx >= dos_pos_map_size - 1)
125 {
126 dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000;
127 dos_pos_map = xrealloc(dos_pos_map,
128 dos_pos_map_size *
129 sizeof(struct dos_map));
130 }
131
132 if (!inp_map_idx)
133 {
134 /* Add sentinel entry. */
135 dos_pos_map[inp_map_idx].pos = 0;
136 dos_pos_map[inp_map_idx++].add = 0;
137
138 /* Initialize first real entry. */
139 dos_pos_map[inp_map_idx].add = 0;
140 }
141
142 /* Put the new entry. If the stripped CR characters
143 precede a Newline (the usual case), pretend that
144 they were found *after* the Newline. This makes
145 displayed byte offsets more reasonable in some
146 cases, and fits better the intuitive notion that
147 the line ends *before* the CR, not *after* it. */
148 inp_map_idx++;
149 dos_pos_map[inp_map_idx-1].pos =
150 (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc;
151 dos_pos_map[inp_map_idx].add = dos_stripped_crs;
152 dos_pos_map_used = inp_map_idx;
153
154 /* The following will be updated on the next pass. */
155 dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1;
156 }
157 }
158 }
159
160 return chars_left;
161 }
162
163 return buflen;
164}
165
166/* Convert internal byte count into external. */
167static inline off_t
168dossified_pos (off_t byteno)
169{
170 off_t pos_lo;
171 off_t pos_hi;
172
173 if (dos_file_type != DOS_TEXT || dos_report_unix_offset)
174 return byteno;
175
176 /* Optimization: usually the file will be scanned sequentially.
177 So in most cases, this byte position will be found in the
178 table near the previous one, as recorded in `out_map_idx'. */
179 pos_lo = dos_pos_map[out_map_idx-1].pos;
180 pos_hi = dos_pos_map[out_map_idx].pos;
181
182 /* If the initial guess failed, search up or down, as
183 appropriate, beginning with the previous place. */
184 if (byteno >= pos_hi)
185 {
186 out_map_idx++;
187 while (out_map_idx < dos_pos_map_used &&
188 byteno >= dos_pos_map[out_map_idx].pos)
189 out_map_idx++;
190 }
191
192 else if (byteno < pos_lo)
193 {
194 out_map_idx--;
195 while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos)
196 out_map_idx--;
197 }
198
199 return byteno + dos_pos_map[out_map_idx].add;
200}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette