| GCC Code Coverage Report | |||||||||||||||||||||
        
  | 
    |||||||||||||||||||||
| Line | Branch | Exec | Source | 
1  | 
    /* $OpenBSD: fmt.c,v 1.38 2017/02/20 15:48:00 schwarze Exp $ */  | 
    ||
2  | 
    /*  | 
    ||
3  | 
    * This file is a derived work.  | 
    ||
4  | 
    * The changes are covered by the following Copyright and license:  | 
    ||
5  | 
    *  | 
    ||
6  | 
    * Copyright (c) 2015, 2016 Ingo Schwarze <schwarze@openbsd.org>  | 
    ||
7  | 
    * Copyright (c) 2000 Paul Janzen <pjanzen@foatdi.net>  | 
    ||
8  | 
    *  | 
    ||
9  | 
    * Permission to use, copy, modify, and distribute this software for any  | 
    ||
10  | 
    * purpose with or without fee is hereby granted, provided that the above  | 
    ||
11  | 
    * copyright notice and this permission notice appear in all copies.  | 
    ||
12  | 
    *  | 
    ||
13  | 
    * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES  | 
    ||
14  | 
    * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF  | 
    ||
15  | 
    * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR  | 
    ||
16  | 
    * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES  | 
    ||
17  | 
    * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN  | 
    ||
18  | 
    * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF  | 
    ||
19  | 
    * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  | 
    ||
20  | 
    *  | 
    ||
21  | 
    *  | 
    ||
22  | 
    * The unchanged parts are covered by the following Copyright and license:  | 
    ||
23  | 
    *  | 
    ||
24  | 
    * Copyright (c) 1997 Gareth McCaughan. All rights reserved.  | 
    ||
25  | 
    *  | 
    ||
26  | 
    * Redistribution and use of this code, in source or binary forms,  | 
    ||
27  | 
    * with or without modification, are permitted subject to the following  | 
    ||
28  | 
    * conditions:  | 
    ||
29  | 
    *  | 
    ||
30  | 
    * - Redistribution of source code must retain the above copyright  | 
    ||
31  | 
    * notice, this list of conditions and the following disclaimer.  | 
    ||
32  | 
    *  | 
    ||
33  | 
    * - If you distribute modified source code it must also include  | 
    ||
34  | 
    * a notice saying that it has been modified, and giving a brief  | 
    ||
35  | 
    * description of what changes have been made.  | 
    ||
36  | 
    *  | 
    ||
37  | 
    * Disclaimer: I am not responsible for the results of using this code.  | 
    ||
38  | 
    * If it formats your hard disc, sends obscene messages to  | 
    ||
39  | 
    * your boss and kills your children then that's your problem  | 
    ||
40  | 
    * not mine. I give absolutely no warranty of any sort as to  | 
    ||
41  | 
    * what the program will do, and absolutely refuse to be held  | 
    ||
42  | 
    * liable for any consequences of your using it.  | 
    ||
43  | 
    * Thank you. Have a nice day.  | 
    ||
44  | 
    *  | 
    ||
45  | 
    *  | 
    ||
46  | 
    * Brief overview of the changes made by OpenBSD:  | 
    ||
47  | 
    * Added UTF-8 support (2016).  | 
    ||
48  | 
    * Added pledge(2) support (2015).  | 
    ||
49  | 
    * ANSI function syntax and KNF (2004).  | 
    ||
50  | 
    * Added -w option (2000).  | 
    ||
51  | 
    * Some minor changes can be seen in the public OpenBSD CVS repository.  | 
    ||
52  | 
    */  | 
    ||
53  | 
    |||
54  | 
    /* Sensible version of fmt  | 
    ||
55  | 
    *  | 
    ||
56  | 
    * Syntax: fmt [ options ] [ goal [ max ] ] [ filename ... ]  | 
    ||
57  | 
    *  | 
    ||
58  | 
    * Since the documentation for the original fmt is so poor, here  | 
    ||
59  | 
    * is an accurate description of what this one does. It's usually  | 
    ||
60  | 
    * the same. The *mechanism* used may differ from that suggested  | 
    ||
61  | 
    * here. Note that we are *not* entirely compatible with fmt,  | 
    ||
62  | 
    * because fmt gets so many things wrong.  | 
    ||
63  | 
    *  | 
    ||
64  | 
    * 1. Tabs are expanded, assuming 8-space tab stops.  | 
    ||
65  | 
    * If the `-t <n>' option is given, we assume <n>-space  | 
    ||
66  | 
    * tab stops instead.  | 
    ||
67  | 
    * Trailing blanks are removed from all lines.  | 
    ||
68  | 
    * x\b == nothing, for any x other than \b.  | 
    ||
69  | 
    * Other control characters are simply stripped. This  | 
    ||
70  | 
    * includes \r.  | 
    ||
71  | 
    * 2. Each line is split into leading whitespace and  | 
    ||
72  | 
    * everything else. Maximal consecutive sequences of  | 
    ||
73  | 
    * lines with the same leading whitespace are considered  | 
    ||
74  | 
    * to form paragraphs, except that a blank line is always  | 
    ||
75  | 
    * a paragraph to itself.  | 
    ||
76  | 
    * If the `-p' option is given then the first line of a  | 
    ||
77  | 
    * paragraph is permitted to have indentation different  | 
    ||
78  | 
    * from that of the other lines.  | 
    ||
79  | 
    * If the `-m' option is given then a line that looks  | 
    ||
80  | 
    * like a mail message header, if it is not immediately  | 
    ||
81  | 
    * preceded by a non-blank non-message-header line, is  | 
    ||
82  | 
    * taken to start a new paragraph, which also contains  | 
    ||
83  | 
    * any subsequent lines with non-empty leading whitespace.  | 
    ||
84  | 
    * Unless the `-n' option is given, lines beginning with  | 
    ||
85  | 
    * a . (dot) are not formatted.  | 
    ||
86  | 
    * 3. The "everything else" is split into words; a word  | 
    ||
87  | 
    * includes its trailing whitespace, and a word at the  | 
    ||
88  | 
    * end of a line is deemed to be followed by a single  | 
    ||
89  | 
    * space, or two spaces if it ends with a sentence-end  | 
    ||
90  | 
    * character. (See the `-d' option for how to change that.)  | 
    ||
91  | 
    * If the `-s' option has been given, then a word's trailing  | 
    ||
92  | 
    * whitespace is replaced by what it would have had if it  | 
    ||
93  | 
    * had occurred at end of line.  | 
    ||
94  | 
    * 4. Each paragraph is sent to standard output as follows.  | 
    ||
95  | 
    * We output the leading whitespace, and then enough words  | 
    ||
96  | 
    * to make the line length as near as possible to the goal  | 
    ||
97  | 
    * without exceeding the maximum. (If a single word would  | 
    ||
98  | 
    * exceed the maximum, we output that anyway.) Of course  | 
    ||
99  | 
    * the trailing whitespace of the last word is ignored.  | 
    ||
100  | 
    * We then emit a newline and start again if there are any  | 
    ||
101  | 
    * words left.  | 
    ||
102  | 
    * Note that for a blank line this translates as "We emit  | 
    ||
103  | 
    * a newline".  | 
    ||
104  | 
    * If the `-l <n>' option is given, then leading whitespace  | 
    ||
105  | 
    * is modified slightly: <n> spaces are replaced by a tab.  | 
    ||
106  | 
    * Indented paragraphs (see above under `-p') make matters  | 
    ||
107  | 
    * more complicated than this suggests. Actually every paragraph  | 
    ||
108  | 
    * has two `leading whitespace' values; the value for the first  | 
    ||
109  | 
    * line, and the value for the most recent line. (While processing  | 
    ||
110  | 
    * the first line, the two are equal. When `-p' has not been  | 
    ||
111  | 
    * given, they are always equal.) The leading whitespace  | 
    ||
112  | 
    * actually output is that of the first line (for the first  | 
    ||
113  | 
    * line of *output*) or that of the most recent line (for  | 
    ||
114  | 
    * all other lines of output).  | 
    ||
115  | 
    * When `-m' has been given, message header paragraphs are  | 
    ||
116  | 
    * taken as having first-leading-whitespace empty and  | 
    ||
117  | 
    * subsequent-leading-whitespace two spaces.  | 
    ||
118  | 
    *  | 
    ||
119  | 
    * Multiple input files are formatted one at a time, so that a file  | 
    ||
120  | 
    * never ends in the middle of a line.  | 
    ||
121  | 
    *  | 
    ||
122  | 
    * There's an alternative mode of operation, invoked by giving  | 
    ||
123  | 
    * the `-c' option. In that case we just center every line,  | 
    ||
124  | 
    * and most of the other options are ignored. This should  | 
    ||
125  | 
    * really be in a separate program, but we must stay compatible  | 
    ||
126  | 
    * with old `fmt'.  | 
    ||
127  | 
    *  | 
    ||
128  | 
    * QUERY: Should `-m' also try to do the right thing with quoted text?  | 
    ||
129  | 
    * QUERY: `-b' to treat backslashed whitespace as old `fmt' does?  | 
    ||
130  | 
    * QUERY: Option meaning `never join lines'?  | 
    ||
131  | 
    * QUERY: Option meaning `split in mid-word to avoid overlong lines'?  | 
    ||
132  | 
    * (Those last two might not be useful, since we have `fold'.)  | 
    ||
133  | 
    *  | 
    ||
134  | 
    * Differences from old `fmt':  | 
    ||
135  | 
    *  | 
    ||
136  | 
    * - We have many more options. Options that aren't understood  | 
    ||
137  | 
    * generate a lengthy usage message, rather than being  | 
    ||
138  | 
    * treated as filenames.  | 
    ||
139  | 
    * - Even with `-m', our handling of message headers is  | 
    ||
140  | 
    * significantly different. (And much better.)  | 
    ||
141  | 
    * - We don't treat `\ ' as non-word-breaking.  | 
    ||
142  | 
    * - Downward changes of indentation start new paragraphs  | 
    ||
143  | 
    * for us, as well as upward. (I think old `fmt' behaves  | 
    ||
144  | 
    * in the way it does in order to allow indented paragraphs,  | 
    ||
145  | 
    * but this is a broken way of making indented paragraphs  | 
    ||
146  | 
    * behave right.)  | 
    ||
147  | 
    * - Given the choice of going over or under |goal_length|  | 
    ||
148  | 
    * by the same amount, we go over; old `fmt' goes under.  | 
    ||
149  | 
    * - We treat `?' as ending a sentence, and not `:'. Old `fmt'  | 
    ||
150  | 
    * does the reverse.  | 
    ||
151  | 
    * - We return approved return codes. Old `fmt' returns  | 
    ||
152  | 
    * 1 for some errors, and *the number of unopenable files*  | 
    ||
153  | 
    * when that was all that went wrong.  | 
    ||
154  | 
    * - We have fewer crashes and more helpful error messages.  | 
    ||
155  | 
    * - We don't turn spaces into tabs at starts of lines unless  | 
    ||
156  | 
    * specifically requested.  | 
    ||
157  | 
    * - New `fmt' is somewhat smaller and slightly faster than  | 
    ||
158  | 
    * old `fmt'.  | 
    ||
159  | 
    *  | 
    ||
160  | 
    * Bugs:  | 
    ||
161  | 
    *  | 
    ||
162  | 
    * None known. There probably are some, though.  | 
    ||
163  | 
    *  | 
    ||
164  | 
    * Portability:  | 
    ||
165  | 
    *  | 
    ||
166  | 
    * I believe this code to be pretty portable. It does require  | 
    ||
167  | 
    * that you have `getopt'. If you need to include "getopt.h"  | 
    ||
168  | 
    * for this (e.g., if your system didn't come with `getopt'  | 
    ||
169  | 
    * and you installed it yourself) then you should arrange for  | 
    ||
170  | 
    * NEED_getopt_h to be #defined.  | 
    ||
171  | 
    *  | 
    ||
172  | 
    * Everything here should work OK even on nasty 16-bit  | 
    ||
173  | 
    * machines and nice 64-bit ones. However, it's only really  | 
    ||
174  | 
    * been tested on my FreeBSD machine. Your mileage may vary.  | 
    ||
175  | 
    */  | 
    ||
176  | 
    |||
177  | 
    #include <ctype.h>  | 
    ||
178  | 
    #include <err.h>  | 
    ||
179  | 
    #include <locale.h>  | 
    ||
180  | 
    #include <stdio.h>  | 
    ||
181  | 
    #include <stdlib.h>  | 
    ||
182  | 
    #include <string.h>  | 
    ||
183  | 
    #include <unistd.h>  | 
    ||
184  | 
    #include <wchar.h>  | 
    ||
185  | 
    #include <wctype.h>  | 
    ||
186  | 
    |||
187  | 
    /* Something that, we hope, will never be a genuine line length,  | 
    ||
188  | 
    * indentation etc.  | 
    ||
189  | 
    */  | 
    ||
190  | 
    #define SILLY ((size_t)-1)  | 
    ||
191  | 
    |||
192  | 
    /* I used to use |strtoul| for this, but (1) not all systems have it  | 
    ||
193  | 
    * and (2) it's probably better to use |strtol| to detect negative  | 
    ||
194  | 
    * numbers better.  | 
    ||
195  | 
    * If |fussyp==0| then we don't complain about non-numbers  | 
    ||
196  | 
    * (returning 0 instead), but we do complain about bad numbers.  | 
    ||
197  | 
    */  | 
    ||
198  | 
    static size_t  | 
    ||
199  | 
    get_positive(const char *s, const char *err_mess, int fussyP)  | 
    ||
200  | 
    { | 
    ||
201  | 
    1080  | 
    char *t;  | 
    |
202  | 
    540  | 
    long result = strtol(s, &t, 0);  | 
    |
203  | 
    |||
204  | 
    ✗✓ | 540  | 
    	if (*t) { | 
    
205  | 
    if (fussyP)  | 
    ||
206  | 
    goto Lose;  | 
    ||
207  | 
    else  | 
    ||
208  | 
    return 0;  | 
    ||
209  | 
    }  | 
    ||
210  | 
    ✗✓ | 540  | 
    	if (result <= 0) { | 
    
211  | 
    Lose:  | 
    ||
212  | 
    errx(1, "%s", err_mess);  | 
    ||
213  | 
    }  | 
    ||
214  | 
    |||
215  | 
    540  | 
    return (size_t) result;  | 
    |
216  | 
    540  | 
    }  | 
    |
217  | 
    |||
218  | 
    /* Global variables */  | 
    ||
219  | 
    |||
220  | 
    static int centerP = 0; /* Try to center lines? */  | 
    ||
221  | 
    static size_t goal_length = 0; /* Target length for output lines */  | 
    ||
222  | 
    static size_t max_length = 0; /* Maximum length for output lines */  | 
    ||
223  | 
    static int coalesce_spaces_P = 0; /* Coalesce multiple whitespace -> ' ' ? */  | 
    ||
224  | 
    static int allow_indented_paragraphs = 0; /* Can first line have diff. ind.? */  | 
    ||
225  | 
    static int tab_width = 8; /* Number of spaces per tab stop */  | 
    ||
226  | 
    static size_t output_tab_width = 0; /* Ditto, when squashing leading spaces */  | 
    ||
227  | 
    static const char *sentence_enders = ".?!"; /* Double-space after these */  | 
    ||
228  | 
    static int grok_mail_headers = 0; /* treat embedded mail headers magically? */  | 
    ||
229  | 
    static int format_troff = 0; /* Format troff? */  | 
    ||
230  | 
    |||
231  | 
    static int n_errors = 0; /* Number of failed files. */  | 
    ||
232  | 
    static size_t x; /* Horizontal position in output line */  | 
    ||
233  | 
    static size_t x0; /* Ditto, ignoring leading whitespace */  | 
    ||
234  | 
    static size_t pending_spaces; /* Spaces to add before next word */  | 
    ||
235  | 
    static int output_in_paragraph = 0; /* Any of current para written out yet? */  | 
    ||
236  | 
    |||
237  | 
    /* Prototypes */  | 
    ||
238  | 
    |||
239  | 
    static void process_named_file(const char *);  | 
    ||
240  | 
    static void process_stream(FILE *, const char *);  | 
    ||
241  | 
    static size_t indent_length(const char *);  | 
    ||
242  | 
    static int might_be_header(const char *);  | 
    ||
243  | 
    static void new_paragraph(size_t);  | 
    ||
244  | 
    static void output_word(size_t, size_t, const char *, int, int, int);  | 
    ||
245  | 
    static void output_indent(size_t);  | 
    ||
246  | 
    static void center_stream(FILE *, const char *);  | 
    ||
247  | 
    static char *get_line(FILE *);  | 
    ||
248  | 
    static void *xrealloc(void *, size_t);  | 
    ||
249  | 
    void usage(void);  | 
    ||
250  | 
    |||
251  | 
    #define ERRS(x) (x >= 127 ? 127 : ++x)  | 
    ||
252  | 
    |||
253  | 
    /* Here is perhaps the right place to mention that this code is  | 
    ||
254  | 
    * all in top-down order. Hence, |main| comes first.  | 
    ||
255  | 
    */  | 
    ||
256  | 
    int  | 
    ||
257  | 
    main(int argc, char *argv[])  | 
    ||
258  | 
    { | 
    ||
259  | 
    int ch; /* used for |getopt| processing */  | 
    ||
260  | 
    |||
261  | 
    1980  | 
    (void)setlocale(LC_CTYPE, "");  | 
    |
262  | 
    |||
263  | 
    ✗✓ | 990  | 
    	if (pledge("stdio rpath flock cpath wpath", NULL) == -1) | 
    
264  | 
    err(1, "pledge");  | 
    ||
265  | 
    |||
266  | 
    /* 1. Grok parameters. */  | 
    ||
267  | 
    ✓✓ | 1386  | 
    	while ((ch = getopt(argc, argv, "0123456789cd:hl:mnpst:w:")) != -1) { | 
    
268  | 
    ✓✗✓✓ ✓✓✓✗ ✗✗✗✗ ✗✗✗✗ ✗✗✗✗  | 
    396  | 
    		switch (ch) { | 
    
269  | 
    case 'c':  | 
    ||
270  | 
    72  | 
    centerP = 1;  | 
    |
271  | 
    72  | 
    break;  | 
    |
272  | 
    case 'd':  | 
    ||
273  | 
    sentence_enders = optarg;  | 
    ||
274  | 
    break;  | 
    ||
275  | 
    case 'l':  | 
    ||
276  | 
    output_tab_width  | 
    ||
277  | 
    18  | 
    = get_positive(optarg, "output tab width must be positive", 1);  | 
    |
278  | 
    18  | 
    break;  | 
    |
279  | 
    case 'm':  | 
    ||
280  | 
    144  | 
    grok_mail_headers = 1;  | 
    |
281  | 
    144  | 
    break;  | 
    |
282  | 
    case 'n':  | 
    ||
283  | 
    72  | 
    format_troff = 1;  | 
    |
284  | 
    72  | 
    break;  | 
    |
285  | 
    case 'p':  | 
    ||
286  | 
    54  | 
    allow_indented_paragraphs = 1;  | 
    |
287  | 
    54  | 
    break;  | 
    |
288  | 
    case 's':  | 
    ||
289  | 
    36  | 
    coalesce_spaces_P = 1;  | 
    |
290  | 
    36  | 
    break;  | 
    |
291  | 
    case 't':  | 
    ||
292  | 
    tab_width = get_positive(optarg, "tab width must be positive", 1);  | 
    ||
293  | 
    break;  | 
    ||
294  | 
    case 'w':  | 
    ||
295  | 
    goal_length = get_positive(optarg, "width must be positive", 1);  | 
    ||
296  | 
    max_length = goal_length;  | 
    ||
297  | 
    break;  | 
    ||
298  | 
    case '0': case '1': case '2': case '3': case '4': case '5':  | 
    ||
299  | 
    case '6': case '7': case '8': case '9':  | 
    ||
300  | 
    /* XXX this is not a stylistically approved use of getopt() */  | 
    ||
301  | 
    			if (goal_length == 0) { | 
    ||
302  | 
    char *p;  | 
    ||
303  | 
    |||
304  | 
    p = argv[optind - 1];  | 
    ||
305  | 
    if (p[0] == '-' && p[1] == ch && !p[2])  | 
    ||
306  | 
    goal_length = get_positive(++p, "width must be nonzero", 1);  | 
    ||
307  | 
    else  | 
    ||
308  | 
    goal_length = get_positive(argv[optind]+1,  | 
    ||
309  | 
    "width must be nonzero", 1);  | 
    ||
310  | 
    max_length = goal_length;  | 
    ||
311  | 
    }  | 
    ||
312  | 
    break;  | 
    ||
313  | 
    case 'h':  | 
    ||
314  | 
    default:  | 
    ||
315  | 
    usage();  | 
    ||
316  | 
    /* NOT REACHED */  | 
    ||
317  | 
    }  | 
    ||
318  | 
    }  | 
    ||
319  | 
    |||
320  | 
    990  | 
    argc -= optind;  | 
    |
321  | 
    990  | 
    argv += optind;  | 
    |
322  | 
    |||
323  | 
    /* [ goal [ maximum ] ] */  | 
    ||
324  | 
    ✓✓✓✗ | 
    1296  | 
    if (argc > 0 && goal_length == 0 &&  | 
    
325  | 
    306  | 
    	    (goal_length = get_positive(*argv,"goal length must be positive", 0)) != 0) { | 
    |
326  | 
    306  | 
    --argc;  | 
    |
327  | 
    306  | 
    ++argv;  | 
    |
328  | 
    ✓✓✓✗ | 
    522  | 
    		if (argc > 0 && (max_length = get_positive(*argv,"max length must be positive", 0)) != 0) { | 
    
329  | 
    216  | 
    --argc;  | 
    |
330  | 
    216  | 
    ++argv;  | 
    |
331  | 
    ✗✓ | 216  | 
    if (max_length < goal_length)  | 
    
332  | 
    errx(1, "max length must be >= goal length");  | 
    ||
333  | 
    }  | 
    ||
334  | 
    }  | 
    ||
335  | 
    |||
336  | 
    ✓✓ | 990  | 
    if (goal_length == 0)  | 
    
337  | 
    684  | 
    goal_length = 65;  | 
    |
338  | 
    ✓✓ | 990  | 
    if (max_length == 0)  | 
    
339  | 
    774  | 
    max_length = goal_length+10;  | 
    |
340  | 
    |||
341  | 
    /* 2. Process files. */  | 
    ||
342  | 
    |||
343  | 
    ✗✓ | 990  | 
    	if (argc > 0) { | 
    
344  | 
    while (argc-- > 0)  | 
    ||
345  | 
    process_named_file(*argv++);  | 
    ||
346  | 
    	} else { | 
    ||
347  | 
    ✗✓ | 990  | 
    		if (pledge("stdio flock rpath cpath wpath", NULL) == -1) | 
    
348  | 
    err(1, "pledge");  | 
    ||
349  | 
    990  | 
    process_stream(stdin, "standard input");  | 
    |
350  | 
    }  | 
    ||
351  | 
    |||
352  | 
    /* We're done. */  | 
    ||
353  | 
    990  | 
    return n_errors;  | 
    |
354  | 
    |||
355  | 
    }  | 
    ||
356  | 
    |||
357  | 
    /* Process a single file, given its name.  | 
    ||
358  | 
    */  | 
    ||
359  | 
    static void  | 
    ||
360  | 
    process_named_file(const char *name)  | 
    ||
361  | 
    { | 
    ||
362  | 
    FILE *f;  | 
    ||
363  | 
    |||
364  | 
    	if ((f = fopen(name, "r")) == NULL) { | 
    ||
365  | 
    		warn("%s", name); | 
    ||
366  | 
    ERRS(n_errors);  | 
    ||
367  | 
    	} else { | 
    ||
368  | 
    process_stream(f, name);  | 
    ||
369  | 
    fclose(f);  | 
    ||
370  | 
    }  | 
    ||
371  | 
    }  | 
    ||
372  | 
    |||
373  | 
    /* Types of mail header continuation lines:  | 
    ||
374  | 
    */  | 
    ||
375  | 
    typedef enum { | 
    ||
376  | 
    hdr_ParagraphStart = -1,  | 
    ||
377  | 
    hdr_NonHeader = 0,  | 
    ||
378  | 
    hdr_Header = 1,  | 
    ||
379  | 
    hdr_Continuation = 2  | 
    ||
380  | 
    } HdrType;  | 
    ||
381  | 
    |||
382  | 
    /* Process a stream. This is where the real work happens,  | 
    ||
383  | 
    * except that centering is handled separately.  | 
    ||
384  | 
    */  | 
    ||
385  | 
    static void  | 
    ||
386  | 
    process_stream(FILE *stream, const char *name)  | 
    ||
387  | 
    { | 
    ||
388  | 
    const char *wordp, *cp;  | 
    ||
389  | 
    1980  | 
    wchar_t wc;  | 
    |
390  | 
    size_t np;  | 
    ||
391  | 
    size_t last_indent = SILLY; /* how many spaces in last indent? */  | 
    ||
392  | 
    size_t para_line_number = 0; /* how many lines already read in this para? */  | 
    ||
393  | 
    size_t first_indent = SILLY; /* indentation of line 0 of paragraph */  | 
    ||
394  | 
    int wcl; /* number of bytes in wide character */  | 
    ||
395  | 
    int wcw; /* display width of wide character */  | 
    ||
396  | 
    int word_length; /* number of bytes in word */  | 
    ||
397  | 
    int word_width; /* display width of word */  | 
    ||
398  | 
    int space_width; /* display width of space after word */  | 
    ||
399  | 
    int line_width; /* display width of line */  | 
    ||
400  | 
    HdrType prev_header_type = hdr_ParagraphStart;  | 
    ||
401  | 
    HdrType header_type;  | 
    ||
402  | 
    |||
403  | 
    /* ^-- header_type of previous line; -1 at para start */  | 
    ||
404  | 
    const char *line;  | 
    ||
405  | 
    |||
406  | 
    ✓✓ | 990  | 
    	if (centerP) { | 
    
407  | 
    72  | 
    center_stream(stream, name);  | 
    |
408  | 
    72  | 
    return;  | 
    |
409  | 
    }  | 
    ||
410  | 
    |||
411  | 
    ✓✓ | 4302  | 
    	while ((line = get_line(stream)) != NULL) { | 
    
412  | 
    1764  | 
    np = indent_length(line);  | 
    |
413  | 
    header_type = hdr_NonHeader;  | 
    ||
414  | 
    ✓✓ | 1764  | 
    		if (grok_mail_headers && prev_header_type != hdr_NonHeader) { | 
    
415  | 
    ✓✓✓✓ | 
    450  | 
    if (np == 0 && might_be_header(line))  | 
    
416  | 
    126  | 
    header_type = hdr_Header;  | 
    |
417  | 
    ✓✓ | 144  | 
    else if (np > 0 && prev_header_type>hdr_NonHeader)  | 
    
418  | 
    90  | 
    header_type = hdr_Continuation;  | 
    |
419  | 
    }  | 
    ||
420  | 
    |||
421  | 
    /* We need a new paragraph if and only if:  | 
    ||
422  | 
    * this line is blank,  | 
    ||
423  | 
    * OR it's a troff request,  | 
    ||
424  | 
    * OR it's a mail header,  | 
    ||
425  | 
    * OR it's not a mail header AND the last line was one,  | 
    ||
426  | 
    * OR the indentation has changed  | 
    ||
427  | 
    * AND the line isn't a mail header continuation line  | 
    ||
428  | 
    * AND this isn't the second line of an indented paragraph.  | 
    ||
429  | 
    */  | 
    ||
430  | 
    ✓✓✓✓ | 
    5148  | 
    if (*line == '\0' || (*line == '.' && !format_troff) ||  | 
    
431  | 
    1692  | 
    header_type == hdr_Header ||  | 
    |
432  | 
    ✓✗ | 1494  | 
    (header_type == hdr_NonHeader && prev_header_type > hdr_NonHeader) ||  | 
    
433  | 
    ✓✓ | 1494  | 
    (np != last_indent && header_type != hdr_Continuation &&  | 
    
434  | 
    ✓✓ | 936  | 
    		    (!allow_indented_paragraphs || para_line_number != 1)) ) { | 
    
435  | 
    1152  | 
    new_paragraph(np);  | 
    |
436  | 
    para_line_number = 0;  | 
    ||
437  | 
    first_indent = np;  | 
    ||
438  | 
    last_indent = np;  | 
    ||
439  | 
    |||
440  | 
    /* nroff compatibility */  | 
    ||
441  | 
    ✓✓ | 1152  | 
    			if (*line == '.' && !format_troff) { | 
    
442  | 
    72  | 
    puts(line);  | 
    |
443  | 
    72  | 
    continue;  | 
    |
444  | 
    }  | 
    ||
445  | 
    ✓✓ | 1080  | 
    if (header_type == hdr_Header)  | 
    
446  | 
    126  | 
    last_indent = 2; /* for cont. lines */  | 
    |
447  | 
    ✓✓ | 1080  | 
    			if (*line == '\0') { | 
    
448  | 
    ✓✗ | 144  | 
    				putchar('\n'); | 
    
449  | 
    prev_header_type = hdr_ParagraphStart;  | 
    ||
450  | 
    72  | 
    continue;  | 
    |
451  | 
    			} else { | 
    ||
452  | 
    /* If this is an indented paragraph other than a mail header  | 
    ||
453  | 
    * continuation, set |last_indent|.  | 
    ||
454  | 
    */  | 
    ||
455  | 
    ✓✓ | 1008  | 
    if (np != last_indent && header_type != hdr_Continuation)  | 
    
456  | 
    126  | 
    last_indent = np;  | 
    |
457  | 
    }  | 
    ||
458  | 
    prev_header_type = header_type;  | 
    ||
459  | 
    1008  | 
    }  | 
    |
460  | 
    |||
461  | 
    1620  | 
    line_width = np;  | 
    |
462  | 
    ✓✓ | 7200  | 
    		for (wordp = line; *wordp != '\0'; wordp = cp) { | 
    
463  | 
    word_length = 0;  | 
    ||
464  | 
    word_width = space_width = 0;  | 
    ||
465  | 
    ✓✓ | 11880  | 
    			for (cp = wordp; *cp != '\0'; cp += wcl) { | 
    
466  | 
    4320  | 
    wcl = mbtowc(&wc, cp, MB_CUR_MAX);  | 
    |
467  | 
    ✗✓ | 4320  | 
    				if (wcl == -1) { | 
    
468  | 
    (void)mbtowc(NULL, NULL, MB_CUR_MAX);  | 
    ||
469  | 
    wc = L'?';  | 
    ||
470  | 
    wcl = 1;  | 
    ||
471  | 
    wcw = 1;  | 
    ||
472  | 
    ✓✓ | 4320  | 
    } else if (wc == L'\t')  | 
    
473  | 
    252  | 
    wcw = (line_width / tab_width + 1) *  | 
    |
474  | 
    126  | 
    tab_width - line_width;  | 
    |
475  | 
    4194  | 
    else if ((wcw = wcwidth(wc)) == -1)  | 
    |
476  | 
    wcw = 1;  | 
    ||
477  | 
    ✓✓ | 4320  | 
    				if (iswblank(wc) && wc != 0xa0) { | 
    
478  | 
    /* Skip whitespace at start of line. */  | 
    ||
479  | 
    ✓✓ | 1152  | 
    					if (word_length == 0) { | 
    
480  | 
    756  | 
    wordp += wcl;  | 
    |
481  | 
    756  | 
    continue;  | 
    |
482  | 
    }  | 
    ||
483  | 
    /* Count whitespace after word. */  | 
    ||
484  | 
    396  | 
    space_width += wcw;  | 
    |
485  | 
    396  | 
    				} else { | 
    |
486  | 
    /* Detect end of word. */  | 
    ||
487  | 
    ✓✓ | 3168  | 
    if (space_width > 0)  | 
    
488  | 
    break;  | 
    ||
489  | 
    /* Measure word. */  | 
    ||
490  | 
    2808  | 
    word_length += wcl;  | 
    |
491  | 
    2808  | 
    word_width += wcw;  | 
    |
492  | 
    }  | 
    ||
493  | 
    3204  | 
    line_width += wcw;  | 
    |
494  | 
    3204  | 
    }  | 
    |
495  | 
    |||
496  | 
    /* Send the word to the output machinery. */  | 
    ||
497  | 
    1980  | 
    output_word(first_indent, last_indent, wordp,  | 
    |
498  | 
    word_length, word_width, space_width);  | 
    ||
499  | 
    }  | 
    ||
500  | 
    1620  | 
    ++para_line_number;  | 
    |
501  | 
    }  | 
    ||
502  | 
    |||
503  | 
    918  | 
    new_paragraph(0);  | 
    |
504  | 
    ✓✗✗✓ ✗✗  | 
    1836  | 
    	if (ferror(stream)) { | 
    
505  | 
    		warn("%s", name); | 
    ||
506  | 
    ERRS(n_errors);  | 
    ||
507  | 
    }  | 
    ||
508  | 
    1908  | 
    }  | 
    |
509  | 
    |||
510  | 
    /* How long is the indent on this line?  | 
    ||
511  | 
    */  | 
    ||
512  | 
    static size_t  | 
    ||
513  | 
    indent_length(const char *line)  | 
    ||
514  | 
    { | 
    ||
515  | 
    size_t n = 0;  | 
    ||
516  | 
    |||
517  | 
    3528  | 
    	for (;;) { | 
    |
518  | 
    ✓✓✓ | 2520  | 
    		switch(*line++) { | 
    
519  | 
    case ' ':  | 
    ||
520  | 
    684  | 
    ++n;  | 
    |
521  | 
    684  | 
    continue;  | 
    |
522  | 
    case '\t':  | 
    ||
523  | 
    72  | 
    n = (n / tab_width + 1) * tab_width;  | 
    |
524  | 
    72  | 
    continue;  | 
    |
525  | 
    default:  | 
    ||
526  | 
    break;  | 
    ||
527  | 
    }  | 
    ||
528  | 
    break;  | 
    ||
529  | 
    }  | 
    ||
530  | 
    1764  | 
    return n;  | 
    |
531  | 
    }  | 
    ||
532  | 
    |||
533  | 
    /* Might this line be a mail header?  | 
    ||
534  | 
    * We deem a line to be a possible header if it matches the  | 
    ||
535  | 
    * Perl regexp /^[A-Z][-A-Za-z0-9]*:\s/. This is *not* the same  | 
    ||
536  | 
    * as in RFC whatever-number-it-is; we want to be gratuitously  | 
    ||
537  | 
    * conservative to avoid mangling ordinary civilised text.  | 
    ||
538  | 
    */  | 
    ||
539  | 
    static int  | 
    ||
540  | 
    might_be_header(const char *line)  | 
    ||
541  | 
    { | 
    ||
542  | 
    |||
543  | 
    ✓✓ | 360  | 
    if (!isupper((unsigned char)*line++))  | 
    
544  | 
    54  | 
    return 0;  | 
    |
545  | 
    ✓✗✗✓ | 
    378  | 
    while (isalnum((unsigned char)*line) || *line == '-')  | 
    
546  | 
    ++line;  | 
    ||
547  | 
    ✓✗ | 378  | 
    return (*line == ':' && isspace((unsigned char)line[1]));  | 
    
548  | 
    180  | 
    }  | 
    |
549  | 
    |||
550  | 
    /* Begin a new paragraph with an indent of |indent| spaces.  | 
    ||
551  | 
    */  | 
    ||
552  | 
    static void  | 
    ||
553  | 
    new_paragraph(size_t indent)  | 
    ||
554  | 
    { | 
    ||
555  | 
    |||
556  | 
    ✓✓ | 4140  | 
    if (x0 > 0)  | 
    
557  | 
    ✓✗ | 2052  | 
    		putchar('\n'); | 
    
558  | 
    2070  | 
    x = indent;  | 
    |
559  | 
    2070  | 
    x0 = 0;  | 
    |
560  | 
    2070  | 
    pending_spaces = 0;  | 
    |
561  | 
    2070  | 
    output_in_paragraph = 0;  | 
    |
562  | 
    2070  | 
    }  | 
    |
563  | 
    |||
564  | 
    /* Output spaces or tabs for leading indentation.  | 
    ||
565  | 
    */  | 
    ||
566  | 
    static void  | 
    ||
567  | 
    output_indent(size_t n_spaces)  | 
    ||
568  | 
    { | 
    ||
569  | 
    |||
570  | 
    ✓✓ | 2502  | 
    if (n_spaces == 0)  | 
    
571  | 
    return;  | 
    ||
572  | 
    ✓✓ | 252  | 
    	if (output_tab_width) { | 
    
573  | 
    ✓✓ | 90  | 
    		while (n_spaces >= output_tab_width) { | 
    
574  | 
    ✓✗ | 36  | 
    			putchar('\t'); | 
    
575  | 
    18  | 
    n_spaces -= output_tab_width;  | 
    |
576  | 
    }  | 
    ||
577  | 
    }  | 
    ||
578  | 
    ✓✓ | 612  | 
    while (n_spaces-- > 0)  | 
    
579  | 
    ✓✗ | 720  | 
    		putchar(' '); | 
    
580  | 
    1251  | 
    }  | 
    |
581  | 
    |||
582  | 
    /* Output a single word.  | 
    ||
583  | 
    * indent0 and indent1 are the indents to use on the first and subsequent  | 
    ||
584  | 
    * lines of a paragraph. They'll often be the same, of course.  | 
    ||
585  | 
    */  | 
    ||
586  | 
    static void  | 
    ||
587  | 
    output_word(size_t indent0, size_t indent1, const char *word,  | 
    ||
588  | 
    int length, int width, int spaces)  | 
    ||
589  | 
    { | 
    ||
590  | 
    3960  | 
    size_t new_x = x + pending_spaces + width;  | 
    |
591  | 
    |||
592  | 
    /* If either |spaces==0| (at end of line) or |coalesce_spaces_P|  | 
    ||
593  | 
    * (squashing internal whitespace), then add just one space;  | 
    ||
594  | 
    * except that if the last character was a sentence-ender we  | 
    ||
595  | 
    * actually add two spaces.  | 
    ||
596  | 
    */  | 
    ||
597  | 
    ✓✓ | 1980  | 
    if (coalesce_spaces_P || spaces == 0)  | 
    
598  | 
    1656  | 
    spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1;  | 
    |
599  | 
    |||
600  | 
    ✓✓ | 1980  | 
    if (x0 == 0)  | 
    
601  | 
    1026  | 
    output_indent(output_in_paragraph ? indent1 : indent0);  | 
    |
602  | 
    ✓✓✓✓ ✓✓  | 
    1935  | 
    else if (new_x > max_length || x >= goal_length ||  | 
    
603  | 
    ✓✓ | 891  | 
    	    (new_x > goal_length && new_x-goal_length > goal_length-x)) { | 
    
604  | 
    ✓✗ | 450  | 
    		putchar('\n'); | 
    
605  | 
    225  | 
    output_indent(indent1);  | 
    |
606  | 
    225  | 
    x0 = 0;  | 
    |
607  | 
    225  | 
    x = indent1;  | 
    |
608  | 
    225  | 
    	} else { | 
    |
609  | 
    729  | 
    x0 += pending_spaces;  | 
    |
610  | 
    729  | 
    x += pending_spaces;  | 
    |
611  | 
    ✓✓ | 2511  | 
    while (pending_spaces--)  | 
    
612  | 
    ✓✗ | 2106  | 
    			putchar(' '); | 
    
613  | 
    }  | 
    ||
614  | 
    1980  | 
    x0 += width;  | 
    |
615  | 
    1980  | 
    x += width;  | 
    |
616  | 
    ✓✓ | 6840  | 
    while(length--)  | 
    
617  | 
    ✓✗ | 5760  | 
    putchar(*word++);  | 
    
618  | 
    1980  | 
    pending_spaces = spaces;  | 
    |
619  | 
    1980  | 
    output_in_paragraph = 1;  | 
    |
620  | 
    1980  | 
    }  | 
    |
621  | 
    |||
622  | 
    /* Process a stream, but just center its lines rather than trying to  | 
    ||
623  | 
    * format them neatly.  | 
    ||
624  | 
    */  | 
    ||
625  | 
    static void  | 
    ||
626  | 
    center_stream(FILE *stream, const char *name)  | 
    ||
627  | 
    { | 
    ||
628  | 
    char *line, *cp;  | 
    ||
629  | 
    144  | 
    wchar_t wc;  | 
    |
630  | 
    size_t l; /* Display width of the line. */  | 
    ||
631  | 
    int wcw; /* Display width of one character. */  | 
    ||
632  | 
    int wcl; /* Length in bytes of one character. */  | 
    ||
633  | 
    |||
634  | 
    ✓✓ | 576  | 
    	while ((line = get_line(stream)) != NULL) { | 
    
635  | 
    l = 0;  | 
    ||
636  | 
    ✓✓ | 1800  | 
    		for (cp = line; *cp != '\0'; cp += wcl) { | 
    
637  | 
    ✓✓ | 684  | 
    if (*cp == '\t')  | 
    
638  | 
    72  | 
    *cp = ' ';  | 
    |
639  | 
    ✗✓ | 684  | 
    			if ((wcl = mbtowc(&wc, cp, MB_CUR_MAX)) == -1) { | 
    
640  | 
    (void)mbtowc(NULL, NULL, MB_CUR_MAX);  | 
    ||
641  | 
    *cp = '?';  | 
    ||
642  | 
    wcl = 1;  | 
    ||
643  | 
    wcw = 1;  | 
    ||
644  | 
    684  | 
    } else if ((wcw = wcwidth(wc)) == -1)  | 
    |
645  | 
    wcw = 1;  | 
    ||
646  | 
    ✓✓✓✓ | 
    1044  | 
    if (l == 0 && iswspace(wc))  | 
    
647  | 
    144  | 
    line += wcl;  | 
    |
648  | 
    else  | 
    ||
649  | 
    540  | 
    l += wcw;  | 
    |
650  | 
    }  | 
    ||
651  | 
    ✓✓ | 648  | 
    		while (l < goal_length) { | 
    
652  | 
    ✓✗ | 432  | 
    			putchar(' '); | 
    
653  | 
    216  | 
    l += 2;  | 
    |
654  | 
    }  | 
    ||
655  | 
    216  | 
    puts(line);  | 
    |
656  | 
    }  | 
    ||
657  | 
    |||
658  | 
    ✓✗✗✓ ✗✗  | 
    144  | 
    	if (ferror(stream)) { | 
    
659  | 
    		warn("%s", name); | 
    ||
660  | 
    ERRS(n_errors);  | 
    ||
661  | 
    }  | 
    ||
662  | 
    72  | 
    }  | 
    |
663  | 
    |||
664  | 
    /* Get a single line from a stream. Strip control  | 
    ||
665  | 
    * characters and trailing whitespace, and handle backspaces.  | 
    ||
666  | 
    * Return the address of the buffer containing the line.  | 
    ||
667  | 
    * This can cope with arbitrarily long lines, and with lines  | 
    ||
668  | 
    * without terminating \n.  | 
    ||
669  | 
    * If there are no characters left or an error happens, we  | 
    ||
670  | 
    * return NULL.  | 
    ||
671  | 
    */  | 
    ||
672  | 
    static char *  | 
    ||
673  | 
    get_line(FILE *stream)  | 
    ||
674  | 
    { | 
    ||
675  | 
    int ch;  | 
    ||
676  | 
    int troff = 0;  | 
    ||
677  | 
    static char *buf = NULL;  | 
    ||
678  | 
    static size_t length = 0;  | 
    ||
679  | 
    size_t len = 0;  | 
    ||
680  | 
    |||
681  | 
    ✓✓ | 5940  | 
    	if (buf == NULL) { | 
    
682  | 
    990  | 
    length = 100;  | 
    |
683  | 
    990  | 
    buf = xrealloc(NULL, length);  | 
    |
684  | 
    990  | 
    }  | 
    |
685  | 
    |||
686  | 
    ✓✗✓✓ ✓✓  | 
    32904  | 
    	while ((ch = getc(stream)) != '\n' && ch != EOF) { | 
    
687  | 
    ✓✓ | 5256  | 
    if ((len == 0) && (ch == '.' && !format_troff))  | 
    
688  | 
    72  | 
    troff = 1;  | 
    |
689  | 
    ✓✓✓✓ | 
    10008  | 
    		if (troff || ch == '\t' || !iscntrl(ch)) { | 
    
690  | 
    ✗✓ | 5148  | 
    			if (len >= length) { | 
    
691  | 
    length *= 2;  | 
    ||
692  | 
    buf = xrealloc(buf, length);  | 
    ||
693  | 
    }  | 
    ||
694  | 
    5148  | 
    buf[len++] = ch;  | 
    |
695  | 
    ✓✓ | 5364  | 
    		} else if (ch == '\b') { | 
    
696  | 
    108  | 
    if (len)  | 
    |
697  | 
    54  | 
    --len;  | 
    |
698  | 
    }  | 
    ||
699  | 
    }  | 
    ||
700  | 
    ✓✓✓✓ | 
    5256  | 
    while (len > 0 && isspace((unsigned char)buf[len-1]))  | 
    
701  | 
    126  | 
    --len;  | 
    |
702  | 
    2970  | 
    buf[len] = '\0';  | 
    |
703  | 
    2970  | 
    return (len > 0 || ch != EOF) ? buf : NULL;  | 
    |
704  | 
    }  | 
    ||
705  | 
    |||
706  | 
    /* (Re)allocate some memory, exiting with an error if we can't.  | 
    ||
707  | 
    */  | 
    ||
708  | 
    static void *  | 
    ||
709  | 
    xrealloc(void *ptr, size_t nbytes)  | 
    ||
710  | 
    { | 
    ||
711  | 
    void *p;  | 
    ||
712  | 
    |||
713  | 
    1980  | 
    p = realloc(ptr, nbytes);  | 
    |
714  | 
    ✗✓ | 990  | 
    if (p == NULL)  | 
    
715  | 
    errx(1, "out of memory");  | 
    ||
716  | 
    990  | 
    return p;  | 
    |
717  | 
    }  | 
    ||
718  | 
    |||
719  | 
    void  | 
    ||
720  | 
    usage(void)  | 
    ||
721  | 
    { | 
    ||
722  | 
    extern char *__progname;  | 
    ||
723  | 
    |||
724  | 
    fprintf(stderr,  | 
    ||
725  | 
    "usage: %s [-cmnps] [-d chars] [-l number] [-t number]\n"  | 
    ||
726  | 
    "\t[goal [maximum] | -width | -w width] [file ...]\n",  | 
    ||
727  | 
    __progname);  | 
    ||
728  | 
    exit (1);  | 
    ||
729  | 
    }  | 
    
| Generated by: GCOVR (Version 3.3) |