1 |
|
|
/* $OpenBSD: filter.c,v 1.9 2017/08/30 02:54:07 lteo Exp $ */ |
2 |
|
|
|
3 |
|
|
/* filter - postprocessing of flex output through filters */ |
4 |
|
|
|
5 |
|
|
/* This file is part of flex. */ |
6 |
|
|
|
7 |
|
|
/* Redistribution and use in source and binary forms, with or without */ |
8 |
|
|
/* modification, are permitted provided that the following conditions */ |
9 |
|
|
/* are met: */ |
10 |
|
|
|
11 |
|
|
/* 1. Redistributions of source code must retain the above copyright */ |
12 |
|
|
/* notice, this list of conditions and the following disclaimer. */ |
13 |
|
|
/* 2. Redistributions in binary form must reproduce the above copyright */ |
14 |
|
|
/* notice, this list of conditions and the following disclaimer in the */ |
15 |
|
|
/* documentation and/or other materials provided with the distribution. */ |
16 |
|
|
|
17 |
|
|
/* Neither the name of the University nor the names of its contributors */ |
18 |
|
|
/* may be used to endorse or promote products derived from this software */ |
19 |
|
|
/* without specific prior written permission. */ |
20 |
|
|
|
21 |
|
|
/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ |
22 |
|
|
/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ |
23 |
|
|
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ |
24 |
|
|
/* PURPOSE. */ |
25 |
|
|
|
26 |
|
|
#include "flexdef.h" |
27 |
|
|
static const char *check_4_gnu_m4 = |
28 |
|
|
"m4_dnl ifdef(`__gnu__', ," |
29 |
|
|
"`errprint(Flex requires GNU M4. Set the PATH or set the M4 environment variable to its path name.)" |
30 |
|
|
" m4exit(2)')\n"; |
31 |
|
|
|
32 |
|
|
|
33 |
|
|
/** global chain. */ |
34 |
|
|
struct filter *output_chain = NULL; |
35 |
|
|
|
36 |
|
|
/* Allocate and initialize an external filter. |
37 |
|
|
* @param chain the current chain or NULL for new chain |
38 |
|
|
* @param cmd the command to execute. |
39 |
|
|
* @param ... a NULL terminated list of (const char*) arguments to command, |
40 |
|
|
* not including argv[0]. |
41 |
|
|
* @return newest filter in chain |
42 |
|
|
*/ |
43 |
|
|
struct filter * |
44 |
|
|
filter_create_ext(struct filter * chain, const char *cmd, |
45 |
|
|
...) |
46 |
|
|
{ |
47 |
|
|
struct filter *f; |
48 |
|
|
int max_args; |
49 |
|
|
const char *s; |
50 |
|
60 |
va_list ap; |
51 |
|
|
|
52 |
|
|
/* allocate and initialize new filter */ |
53 |
|
30 |
f = calloc(sizeof(struct filter), 1); |
54 |
✗✓ |
30 |
if (!f) |
55 |
|
|
flexerror(_("calloc failed (f) in filter_create_ext")); |
56 |
|
30 |
f->filter_func = NULL; |
57 |
|
30 |
f->extra = NULL; |
58 |
|
30 |
f->next = NULL; |
59 |
|
30 |
f->argc = 0; |
60 |
|
|
|
61 |
✓✗ |
30 |
if (chain != NULL) { |
62 |
|
|
/* append f to end of chain */ |
63 |
✗✓ |
30 |
while (chain->next) |
64 |
|
|
chain = chain->next; |
65 |
|
30 |
chain->next = f; |
66 |
|
30 |
} |
67 |
|
|
/* allocate argv, and populate it with the argument list. */ |
68 |
|
|
max_args = 8; |
69 |
|
30 |
f->argv = malloc(sizeof(char *) * (max_args + 1)); |
70 |
✗✓ |
30 |
if (!f->argv) |
71 |
|
|
flexerror(_("malloc failed (f->argv) in filter_create_ext")); |
72 |
|
30 |
f->argv[f->argc++] = cmd; |
73 |
|
|
|
74 |
|
30 |
va_start(ap, cmd); |
75 |
✓✗✓✓
|
240 |
while ((s = va_arg(ap, const char *)) != NULL) { |
76 |
✗✓ |
30 |
if (f->argc >= max_args) { |
77 |
|
|
max_args += 8; |
78 |
|
|
f->argv = realloc(f->argv, |
79 |
|
|
sizeof(char *) * (max_args + 1)); |
80 |
|
|
} |
81 |
|
30 |
f->argv[f->argc++] = s; |
82 |
|
|
} |
83 |
|
30 |
f->argv[f->argc] = NULL; |
84 |
|
|
|
85 |
|
30 |
va_end(ap); |
86 |
|
30 |
return f; |
87 |
|
30 |
} |
88 |
|
|
|
89 |
|
|
/* Allocate and initialize an internal filter. |
90 |
|
|
* @param chain the current chain or NULL for new chain |
91 |
|
|
* @param filter_func The function that will perform the filtering. |
92 |
|
|
* filter_func should return 0 if successful, and -1 |
93 |
|
|
* if an error occurs -- or it can simply exit(). |
94 |
|
|
* @param extra optional user-defined data to pass to the filter. |
95 |
|
|
* @return newest filter in chain |
96 |
|
|
*/ |
97 |
|
|
struct filter * |
98 |
|
|
filter_create_int(struct filter * chain, |
99 |
|
|
int (*filter_func) (struct filter *), |
100 |
|
|
void *extra) |
101 |
|
|
{ |
102 |
|
|
struct filter *f; |
103 |
|
|
|
104 |
|
|
/* allocate and initialize new filter */ |
105 |
|
120 |
f = calloc(sizeof(struct filter), 1); |
106 |
✗✓ |
60 |
if (!f) |
107 |
|
|
flexerror(_("calloc failed in filter_create_int")); |
108 |
|
60 |
f->next = NULL; |
109 |
|
60 |
f->argc = 0; |
110 |
|
60 |
f->argv = NULL; |
111 |
|
|
|
112 |
|
60 |
f->filter_func = filter_func; |
113 |
|
60 |
f->extra = extra; |
114 |
|
|
|
115 |
✓✓ |
60 |
if (chain != NULL) { |
116 |
|
|
/* append f to end of chain */ |
117 |
✓✓ |
90 |
while (chain->next) |
118 |
|
|
chain = chain->next; |
119 |
|
30 |
chain->next = f; |
120 |
|
30 |
} |
121 |
|
60 |
return f; |
122 |
|
|
} |
123 |
|
|
|
124 |
|
|
/** Fork and exec entire filter chain. |
125 |
|
|
* @param chain The head of the chain. |
126 |
|
|
* @return true on success. |
127 |
|
|
*/ |
128 |
|
|
bool |
129 |
|
|
filter_apply_chain(struct filter * chain) |
130 |
|
|
{ |
131 |
|
240 |
int pid, pipes[2]; |
132 |
|
|
|
133 |
|
|
/* |
134 |
|
|
* Tricky recursion, since we want to begin the chain at the END. |
135 |
|
|
* Why? Because we need all the forked processes to be children of |
136 |
|
|
* the main flex process. |
137 |
|
|
*/ |
138 |
✓✓ |
120 |
if (chain) |
139 |
|
70 |
filter_apply_chain(chain->next); |
140 |
|
|
else |
141 |
|
30 |
return true; |
142 |
|
|
|
143 |
|
|
/* |
144 |
|
|
* Now we are the right-most unprocessed link in the chain. |
145 |
|
|
*/ |
146 |
|
|
|
147 |
|
70 |
fflush(stdout); |
148 |
|
70 |
fflush(stderr); |
149 |
|
|
|
150 |
|
|
|
151 |
✗✓ |
70 |
if (pipe(pipes) == -1) |
152 |
|
|
flexerror(_("pipe failed")); |
153 |
|
|
|
154 |
✗✓ |
70 |
if ((pid = fork()) == -1) |
155 |
|
|
flexerror(_("fork failed")); |
156 |
|
|
|
157 |
✓✓ |
70 |
if (pid == 0) { |
158 |
|
|
/* child */ |
159 |
|
|
|
160 |
|
|
/* |
161 |
|
|
* We need stdin (the FILE* stdin) to connect to this new |
162 |
|
|
* pipe. There is no portable way to set stdin to a new file |
163 |
|
|
* descriptor, as stdin is not an lvalue on some systems |
164 |
|
|
* (BSD). So we dup the new pipe onto the stdin descriptor |
165 |
|
|
* and use a no-op fseek to sync the stream. This is a Hail |
166 |
|
|
* Mary situation. It seems to work. |
167 |
|
|
*/ |
168 |
|
20 |
close(pipes[1]); |
169 |
✓✗ |
40 |
clearerr(stdin); |
170 |
✓✗✗✓
|
60 |
if (dup2(pipes[0], fileno(stdin)) == -1) |
171 |
|
|
flexfatal(_("dup2(pipes[0],0)")); |
172 |
|
20 |
close(pipes[0]); |
173 |
|
20 |
fseek(stdin, 0, SEEK_CUR); |
174 |
|
|
|
175 |
|
|
/* run as a filter, either internally or by exec */ |
176 |
✓✗ |
20 |
if (chain->filter_func) { |
177 |
✗✓ |
10 |
if (chain->filter_func(chain) == -1) |
178 |
|
|
flexfatal(_("filter_func failed")); |
179 |
|
|
exit(0); |
180 |
|
|
} else { |
181 |
|
|
execvp(chain->argv[0], |
182 |
|
|
(char **const) (chain->argv)); |
183 |
|
|
lerrsf_fatal(_("exec of %s failed"), |
184 |
|
|
chain->argv[0]); |
185 |
|
|
} |
186 |
|
|
|
187 |
|
|
exit(1); |
188 |
|
|
} |
189 |
|
|
/* Parent */ |
190 |
|
50 |
close(pipes[0]); |
191 |
✓✗✗✓
|
150 |
if (dup2(pipes[1], fileno(stdout)) == -1) |
192 |
|
|
flexfatal(_("dup2(pipes[1],1)")); |
193 |
|
50 |
close(pipes[1]); |
194 |
|
50 |
fseek(stdout, 0, SEEK_CUR); |
195 |
|
|
|
196 |
|
50 |
return true; |
197 |
|
80 |
} |
198 |
|
|
|
199 |
|
|
/** Truncate the chain to max_len number of filters. |
200 |
|
|
* @param chain the current chain. |
201 |
|
|
* @param max_len the maximum length of the chain. |
202 |
|
|
* @return the resulting length of the chain. |
203 |
|
|
*/ |
204 |
|
|
int |
205 |
|
|
filter_truncate(struct filter * chain, int max_len) |
206 |
|
|
{ |
207 |
|
|
int len = 1; |
208 |
|
|
|
209 |
✗✓ |
60 |
if (!chain) |
210 |
|
|
return 0; |
211 |
|
|
|
212 |
✓✓✓✓
|
300 |
while (chain->next && len < max_len) { |
213 |
|
60 |
chain = chain->next; |
214 |
|
60 |
++len; |
215 |
|
|
} |
216 |
|
|
|
217 |
|
30 |
chain->next = NULL; |
218 |
|
30 |
return len; |
219 |
|
30 |
} |
220 |
|
|
|
221 |
|
|
/** Splits the chain in order to write to a header file. |
222 |
|
|
* Similar in spirit to the 'tee' program. |
223 |
|
|
* The header file name is in extra. |
224 |
|
|
* @return 0 (zero) on success, and -1 on failure. |
225 |
|
|
*/ |
226 |
|
|
int |
227 |
|
|
filter_tee_header(struct filter * chain) |
228 |
|
|
{ |
229 |
|
|
/* |
230 |
|
|
* This function reads from stdin and writes to both the C file and |
231 |
|
|
* the header file at the same time. |
232 |
|
|
*/ |
233 |
|
|
|
234 |
|
|
const int readsz = 512; |
235 |
|
|
char *buf; |
236 |
|
|
int to_cfd = -1; |
237 |
|
|
FILE *to_c = NULL, *to_h = NULL; |
238 |
|
|
bool write_header; |
239 |
|
|
|
240 |
|
20 |
write_header = (chain->extra != NULL); |
241 |
|
|
|
242 |
|
|
/* |
243 |
|
|
* Store a copy of the stdout pipe, which is already piped to C file |
244 |
|
|
* through the running chain. Then create a new pipe to the H file as |
245 |
|
|
* stdout, and fork the rest of the chain again. |
246 |
|
|
*/ |
247 |
|
|
|
248 |
✗✓ |
10 |
if ((to_cfd = dup(1)) == -1) |
249 |
|
|
flexfatal(_("dup(1) failed")); |
250 |
|
10 |
to_c = fdopen(to_cfd, "w"); |
251 |
|
|
|
252 |
✗✓ |
10 |
if (write_header) { |
253 |
|
|
if (freopen((char *) chain->extra, "w", stdout) == NULL) |
254 |
|
|
flexfatal(_("freopen(headerfilename) failed")); |
255 |
|
|
|
256 |
|
|
filter_apply_chain(chain->next); |
257 |
|
|
to_h = stdout; |
258 |
|
|
} |
259 |
|
|
/* |
260 |
|
|
* Now to_c is a pipe to the C branch, and to_h is a pipe to the H |
261 |
|
|
* branch. |
262 |
|
|
*/ |
263 |
|
|
|
264 |
✗✓ |
10 |
if (write_header) { |
265 |
|
|
fputs(check_4_gnu_m4, to_h); |
266 |
|
|
fputs("m4_changecom`'m4_dnl\n", to_h); |
267 |
|
|
fputs("m4_changequote`'m4_dnl\n", to_h); |
268 |
|
|
fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_h); |
269 |
|
|
fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_h); |
270 |
|
|
fputs("m4_define( [[M4_YY_IN_HEADER]],[[]])m4_dnl\n", |
271 |
|
|
to_h); |
272 |
|
|
fprintf(to_h, "#ifndef %sHEADER_H\n", prefix); |
273 |
|
|
fprintf(to_h, "#define %sHEADER_H 1\n", prefix); |
274 |
|
|
fprintf(to_h, "#define %sIN_HEADER 1\n\n", prefix); |
275 |
|
|
fprintf(to_h, |
276 |
|
|
"m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", |
277 |
|
|
headerfilename ? headerfilename : "<stdout>"); |
278 |
|
|
|
279 |
|
|
} |
280 |
|
10 |
fputs(check_4_gnu_m4, to_c); |
281 |
|
10 |
fputs("m4_changecom`'m4_dnl\n", to_c); |
282 |
|
10 |
fputs("m4_changequote`'m4_dnl\n", to_c); |
283 |
|
10 |
fputs("m4_changequote([[,]])[[]]m4_dnl\n", to_c); |
284 |
|
10 |
fputs("m4_define([[M4_YY_NOOP]])[[]]m4_dnl\n", to_c); |
285 |
|
10 |
fprintf(to_c, "m4_define( [[M4_YY_OUTFILE_NAME]],[[%s]])m4_dnl\n", |
286 |
|
10 |
outfilename ? outfilename : "<stdout>"); |
287 |
|
|
|
288 |
|
10 |
buf = malloc(readsz); |
289 |
✗✓ |
10 |
if (!buf) |
290 |
|
|
flexerror(_("malloc failed in filter_tee_header")); |
291 |
✓✓ |
36943 |
while (fgets(buf, readsz, stdin)) { |
292 |
|
36933 |
fputs(buf, to_c); |
293 |
✓✗ |
36933 |
if (write_header) |
294 |
|
|
fputs(buf, to_h); |
295 |
|
|
} |
296 |
|
|
|
297 |
✗✓ |
10 |
if (write_header) { |
298 |
|
|
fprintf(to_h, "\n"); |
299 |
|
|
|
300 |
|
|
/* |
301 |
|
|
* write a fake line number. It will get fixed by the linedir |
302 |
|
|
* filter. |
303 |
|
|
*/ |
304 |
|
|
fprintf(to_h, "#line 4000 \"M4_YY_OUTFILE_NAME\"\n"); |
305 |
|
|
|
306 |
|
|
fprintf(to_h, "#undef %sIN_HEADER\n", prefix); |
307 |
|
|
fprintf(to_h, "#endif /* %sHEADER_H */\n", prefix); |
308 |
|
|
fputs("m4_undefine( [[M4_YY_IN_HEADER]])m4_dnl\n", to_h); |
309 |
|
|
|
310 |
|
|
fflush(to_h); |
311 |
|
|
if (ferror(to_h)) |
312 |
|
|
lerrsf(_("error writing output file %s"), |
313 |
|
|
(char *) chain->extra); |
314 |
|
|
|
315 |
|
|
else if (fclose(to_h)) |
316 |
|
|
lerrsf(_("error closing output file %s"), |
317 |
|
|
(char *) chain->extra); |
318 |
|
|
} |
319 |
|
10 |
fflush(to_c); |
320 |
✓✗✗✓ ✗✗ |
20 |
if (ferror(to_c)) |
321 |
|
|
lerrsf(_("error writing output file %s"), |
322 |
|
|
outfilename ? outfilename : "<stdout>"); |
323 |
|
|
|
324 |
✗✓ |
10 |
else if (fclose(to_c)) |
325 |
|
|
lerrsf(_("error closing output file %s"), |
326 |
|
|
outfilename ? outfilename : "<stdout>"); |
327 |
|
|
|
328 |
✗✓ |
10 |
while (wait(0) > 0); |
329 |
|
|
|
330 |
|
|
exit(0); |
331 |
|
|
return 0; |
332 |
|
|
} |
333 |
|
|
|
334 |
|
|
/** Adjust the line numbers in the #line directives of the generated scanner. |
335 |
|
|
* After the m4 expansion, the line numbers are incorrect since the m4 macros |
336 |
|
|
* can add or remove lines. This only adjusts line numbers for generated code, |
337 |
|
|
* not user code. This also happens to be a good place to squeeze multiple |
338 |
|
|
* blank lines into a single blank line. |
339 |
|
|
*/ |
340 |
|
|
int |
341 |
|
|
filter_fix_linedirs(struct filter * chain) |
342 |
|
|
{ |
343 |
|
|
char *buf; |
344 |
|
|
const int readsz = 512; |
345 |
|
|
int lineno = 1; |
346 |
|
|
bool in_gen = true; /* in generated code */ |
347 |
|
|
bool last_was_blank = false; |
348 |
|
|
|
349 |
✗✓ |
20 |
if (!chain) |
350 |
|
|
return 0; |
351 |
|
|
|
352 |
|
10 |
buf = malloc(readsz); |
353 |
✗✓ |
10 |
if (!buf) |
354 |
|
|
flexerror(_("malloc failed in filter_fix_linedirs")); |
355 |
|
|
|
356 |
✓✓ |
32432 |
while (fgets(buf, readsz, stdin)) { |
357 |
|
|
|
358 |
|
32422 |
regmatch_t m[10]; |
359 |
|
|
|
360 |
|
|
/* Check for #line directive. */ |
361 |
✓✓ |
35125 |
if (buf[0] == '#' |
362 |
✓✓ |
35125 |
&& regexec(®ex_linedir, buf, 3, m, 0) == 0) { |
363 |
|
|
|
364 |
|
|
int num; |
365 |
|
|
char *fname; |
366 |
|
|
|
367 |
|
|
/* extract the line number and filename */ |
368 |
|
658 |
num = regmatch_strtol(&m[1], buf, NULL, 0); |
369 |
|
658 |
fname = regmatch_dup(&m[2], buf); |
370 |
|
|
|
371 |
✗✓ |
1272 |
if (strcmp(fname, |
372 |
✓✓ |
1316 |
outfilename ? outfilename : "<stdout>") == 0 || |
373 |
|
614 |
strcmp(fname, headerfilename ? headerfilename : |
374 |
|
614 |
"<stdout>") == 0) { |
375 |
|
|
|
376 |
|
|
char *s1, *s2; |
377 |
|
44 |
char filename[MAXLINE]; |
378 |
|
|
|
379 |
|
|
s1 = fname; |
380 |
|
44 |
s2 = filename; |
381 |
|
|
|
382 |
✓✗✓✓
|
1158 |
while ((s2 - filename) < (MAXLINE - 1) && *s1) { |
383 |
|
|
/* Escape the backslash */ |
384 |
✗✓ |
342 |
if (*s1 == '\\') |
385 |
|
|
*s2++ = '\\'; |
386 |
|
|
/* Escape the double quote */ |
387 |
✗✓ |
342 |
if (*s1 == '\"') |
388 |
|
|
*s2++ = '\\'; |
389 |
|
|
/* Copy the character as usual */ |
390 |
|
342 |
*s2++ = *s1++; |
391 |
|
|
} |
392 |
|
|
|
393 |
|
44 |
*s2 = '\0'; |
394 |
|
|
|
395 |
|
|
/* Adjust the line directives. */ |
396 |
|
|
in_gen = true; |
397 |
|
44 |
snprintf(buf, readsz, "#line %d \"%s\"\n", |
398 |
|
44 |
lineno + 1, filename); |
399 |
|
44 |
} else { |
400 |
|
|
/* |
401 |
|
|
* it's a #line directive for code we didn't |
402 |
|
|
* write |
403 |
|
|
*/ |
404 |
|
|
in_gen = false; |
405 |
|
|
} |
406 |
|
|
|
407 |
|
658 |
free(fname); |
408 |
|
|
last_was_blank = false; |
409 |
|
658 |
} |
410 |
|
|
/* squeeze blank lines from generated code */ |
411 |
✓✓✓✓
|
58358 |
else if (in_gen && |
412 |
|
26594 |
regexec(®ex_blank_line, buf, 0, NULL, 0) == 0) { |
413 |
✓✓ |
7599 |
if (last_was_blank) |
414 |
|
3811 |
continue; |
415 |
|
|
else |
416 |
|
|
last_was_blank = true; |
417 |
|
3788 |
} else { |
418 |
|
|
/* it's a line of normal, non-empty code. */ |
419 |
|
|
last_was_blank = false; |
420 |
|
|
} |
421 |
|
|
|
422 |
|
28611 |
fputs(buf, stdout); |
423 |
|
28611 |
lineno++; |
424 |
|
61033 |
} |
425 |
|
10 |
fflush(stdout); |
426 |
✓✗✗✓ ✗✗ |
20 |
if (ferror(stdout)) |
427 |
|
|
lerrsf(_("error writing output file %s"), |
428 |
|
|
outfilename ? outfilename : "<stdout>"); |
429 |
|
|
|
430 |
✗✓ |
10 |
else if (fclose(stdout)) |
431 |
|
|
lerrsf(_("error closing output file %s"), |
432 |
|
|
outfilename ? outfilename : "<stdout>"); |
433 |
|
|
|
434 |
|
10 |
return 0; |
435 |
|
10 |
} |