1 |
|
|
/* $OpenBSD: fmt.c,v 1.36 2016/01/07 18:02:43 schwarze Exp $ */ |
2 |
|
|
|
3 |
|
|
/* Sensible version of fmt |
4 |
|
|
* |
5 |
|
|
* Syntax: fmt [ options ] [ goal [ max ] ] [ filename ... ] |
6 |
|
|
* |
7 |
|
|
* Since the documentation for the original fmt is so poor, here |
8 |
|
|
* is an accurate description of what this one does. It's usually |
9 |
|
|
* the same. The *mechanism* used may differ from that suggested |
10 |
|
|
* here. Note that we are *not* entirely compatible with fmt, |
11 |
|
|
* because fmt gets so many things wrong. |
12 |
|
|
* |
13 |
|
|
* 1. Tabs are expanded, assuming 8-space tab stops. |
14 |
|
|
* If the `-t <n>' option is given, we assume <n>-space |
15 |
|
|
* tab stops instead. |
16 |
|
|
* Trailing blanks are removed from all lines. |
17 |
|
|
* x\b == nothing, for any x other than \b. |
18 |
|
|
* Other control characters are simply stripped. This |
19 |
|
|
* includes \r. |
20 |
|
|
* 2. Each line is split into leading whitespace and |
21 |
|
|
* everything else. Maximal consecutive sequences of |
22 |
|
|
* lines with the same leading whitespace are considered |
23 |
|
|
* to form paragraphs, except that a blank line is always |
24 |
|
|
* a paragraph to itself. |
25 |
|
|
* If the `-p' option is given then the first line of a |
26 |
|
|
* paragraph is permitted to have indentation different |
27 |
|
|
* from that of the other lines. |
28 |
|
|
* If the `-m' option is given then a line that looks |
29 |
|
|
* like a mail message header, if it is not immediately |
30 |
|
|
* preceded by a non-blank non-message-header line, is |
31 |
|
|
* taken to start a new paragraph, which also contains |
32 |
|
|
* any subsequent lines with non-empty leading whitespace. |
33 |
|
|
* Unless the `-n' option is given, lines beginning with |
34 |
|
|
* a . (dot) are not formatted. |
35 |
|
|
* 3. The "everything else" is split into words; a word |
36 |
|
|
* includes its trailing whitespace, and a word at the |
37 |
|
|
* end of a line is deemed to be followed by a single |
38 |
|
|
* space, or two spaces if it ends with a sentence-end |
39 |
|
|
* character. (See the `-d' option for how to change that.) |
40 |
|
|
* If the `-s' option has been given, then a word's trailing |
41 |
|
|
* whitespace is replaced by what it would have had if it |
42 |
|
|
* had occurred at end of line. |
43 |
|
|
* 4. Each paragraph is sent to standard output as follows. |
44 |
|
|
* We output the leading whitespace, and then enough words |
45 |
|
|
* to make the line length as near as possible to the goal |
46 |
|
|
* without exceeding the maximum. (If a single word would |
47 |
|
|
* exceed the maximum, we output that anyway.) Of course |
48 |
|
|
* the trailing whitespace of the last word is ignored. |
49 |
|
|
* We then emit a newline and start again if there are any |
50 |
|
|
* words left. |
51 |
|
|
* Note that for a blank line this translates as "We emit |
52 |
|
|
* a newline". |
53 |
|
|
* If the `-l <n>' option is given, then leading whitespace |
54 |
|
|
* is modified slightly: <n> spaces are replaced by a tab. |
55 |
|
|
* Indented paragraphs (see above under `-p') make matters |
56 |
|
|
* more complicated than this suggests. Actually every paragraph |
57 |
|
|
* has two `leading whitespace' values; the value for the first |
58 |
|
|
* line, and the value for the most recent line. (While processing |
59 |
|
|
* the first line, the two are equal. When `-p' has not been |
60 |
|
|
* given, they are always equal.) The leading whitespace |
61 |
|
|
* actually output is that of the first line (for the first |
62 |
|
|
* line of *output*) or that of the most recent line (for |
63 |
|
|
* all other lines of output). |
64 |
|
|
* When `-m' has been given, message header paragraphs are |
65 |
|
|
* taken as having first-leading-whitespace empty and |
66 |
|
|
* subsequent-leading-whitespace two spaces. |
67 |
|
|
* |
68 |
|
|
* Multiple input files are formatted one at a time, so that a file |
69 |
|
|
* never ends in the middle of a line. |
70 |
|
|
* |
71 |
|
|
* There's an alternative mode of operation, invoked by giving |
72 |
|
|
* the `-c' option. In that case we just center every line, |
73 |
|
|
* and most of the other options are ignored. This should |
74 |
|
|
* really be in a separate program, but we must stay compatible |
75 |
|
|
* with old `fmt'. |
76 |
|
|
* |
77 |
|
|
* QUERY: Should `-m' also try to do the right thing with quoted text? |
78 |
|
|
* QUERY: `-b' to treat backslashed whitespace as old `fmt' does? |
79 |
|
|
* QUERY: Option meaning `never join lines'? |
80 |
|
|
* QUERY: Option meaning `split in mid-word to avoid overlong lines'? |
81 |
|
|
* (Those last two might not be useful, since we have `fold'.) |
82 |
|
|
* |
83 |
|
|
* Differences from old `fmt': |
84 |
|
|
* |
85 |
|
|
* - We have many more options. Options that aren't understood |
86 |
|
|
* generate a lengthy usage message, rather than being |
87 |
|
|
* treated as filenames. |
88 |
|
|
* - Even with `-m', our handling of message headers is |
89 |
|
|
* significantly different. (And much better.) |
90 |
|
|
* - We don't treat `\ ' as non-word-breaking. |
91 |
|
|
* - Downward changes of indentation start new paragraphs |
92 |
|
|
* for us, as well as upward. (I think old `fmt' behaves |
93 |
|
|
* in the way it does in order to allow indented paragraphs, |
94 |
|
|
* but this is a broken way of making indented paragraphs |
95 |
|
|
* behave right.) |
96 |
|
|
* - Given the choice of going over or under |goal_length| |
97 |
|
|
* by the same amount, we go over; old `fmt' goes under. |
98 |
|
|
* - We treat `?' as ending a sentence, and not `:'. Old `fmt' |
99 |
|
|
* does the reverse. |
100 |
|
|
* - We return approved return codes. Old `fmt' returns |
101 |
|
|
* 1 for some errors, and *the number of unopenable files* |
102 |
|
|
* when that was all that went wrong. |
103 |
|
|
* - We have fewer crashes and more helpful error messages. |
104 |
|
|
* - We don't turn spaces into tabs at starts of lines unless |
105 |
|
|
* specifically requested. |
106 |
|
|
* - New `fmt' is somewhat smaller and slightly faster than |
107 |
|
|
* old `fmt'. |
108 |
|
|
* |
109 |
|
|
* Bugs: |
110 |
|
|
* |
111 |
|
|
* None known. There probably are some, though. |
112 |
|
|
* |
113 |
|
|
* Portability: |
114 |
|
|
* |
115 |
|
|
* I believe this code to be pretty portable. It does require |
116 |
|
|
* that you have `getopt'. If you need to include "getopt.h" |
117 |
|
|
* for this (e.g., if your system didn't come with `getopt' |
118 |
|
|
* and you installed it yourself) then you should arrange for |
119 |
|
|
* NEED_getopt_h to be #defined. |
120 |
|
|
* |
121 |
|
|
* Everything here should work OK even on nasty 16-bit |
122 |
|
|
* machines and nice 64-bit ones. However, it's only really |
123 |
|
|
* been tested on my FreeBSD machine. Your mileage may vary. |
124 |
|
|
*/ |
125 |
|
|
|
126 |
|
|
/* Copyright (c) 1997 Gareth McCaughan. All rights reserved. |
127 |
|
|
* |
128 |
|
|
* Redistribution and use of this code, in source or binary forms, |
129 |
|
|
* with or without modification, are permitted subject to the following |
130 |
|
|
* conditions: |
131 |
|
|
* |
132 |
|
|
* - Redistribution of source code must retain the above copyright |
133 |
|
|
* notice, this list of conditions and the following disclaimer. |
134 |
|
|
* |
135 |
|
|
* - If you distribute modified source code it must also include |
136 |
|
|
* a notice saying that it has been modified, and giving a brief |
137 |
|
|
* description of what changes have been made. |
138 |
|
|
* |
139 |
|
|
* Disclaimer: I am not responsible for the results of using this code. |
140 |
|
|
* If it formats your hard disc, sends obscene messages to |
141 |
|
|
* your boss and kills your children then that's your problem |
142 |
|
|
* not mine. I give absolutely no warranty of any sort as to |
143 |
|
|
* what the program will do, and absolutely refuse to be held |
144 |
|
|
* liable for any consequences of your using it. |
145 |
|
|
* Thank you. Have a nice day. |
146 |
|
|
*/ |
147 |
|
|
|
148 |
|
|
/* RCS change log: |
149 |
|
|
* Revision 1.5 1998/03/02 18:02:21 gjm11 |
150 |
|
|
* Minor changes for portability. |
151 |
|
|
* |
152 |
|
|
* Revision 1.4 1997/10/01 11:51:28 gjm11 |
153 |
|
|
* Repair broken indented-paragraph handling. |
154 |
|
|
* Add mail message header stuff. |
155 |
|
|
* Improve comments and layout. |
156 |
|
|
* Make usable with non-BSD systems. |
157 |
|
|
* Add revision display to usage message. |
158 |
|
|
* |
159 |
|
|
* Revision 1.3 1997/09/30 16:24:47 gjm11 |
160 |
|
|
* Add copyright notice, rcsid string and log message. |
161 |
|
|
* |
162 |
|
|
* Revision 1.2 1997/09/30 16:13:39 gjm11 |
163 |
|
|
* Add options: -d <chars>, -l <width>, -p, -s, -t <width>, -h . |
164 |
|
|
* Parse options with `getopt'. Clean up code generally. |
165 |
|
|
* Make comments more accurate. |
166 |
|
|
* |
167 |
|
|
* Revision 1.1 1997/09/30 11:29:57 gjm11 |
168 |
|
|
* Initial revision |
169 |
|
|
*/ |
170 |
|
|
|
171 |
|
|
#include <ctype.h> |
172 |
|
|
#include <err.h> |
173 |
|
|
#include <locale.h> |
174 |
|
|
#include <stdio.h> |
175 |
|
|
#include <stdlib.h> |
176 |
|
|
#include <string.h> |
177 |
|
|
#include <unistd.h> |
178 |
|
|
#include <wchar.h> |
179 |
|
|
#include <wctype.h> |
180 |
|
|
|
181 |
|
|
/* Something that, we hope, will never be a genuine line length, |
182 |
|
|
* indentation etc. |
183 |
|
|
*/ |
184 |
|
|
#define SILLY ((size_t)-1) |
185 |
|
|
|
186 |
|
|
/* I used to use |strtoul| for this, but (1) not all systems have it |
187 |
|
|
* and (2) it's probably better to use |strtol| to detect negative |
188 |
|
|
* numbers better. |
189 |
|
|
* If |fussyp==0| then we don't complain about non-numbers |
190 |
|
|
* (returning 0 instead), but we do complain about bad numbers. |
191 |
|
|
*/ |
192 |
|
|
static size_t |
193 |
|
|
get_positive(const char *s, const char *err_mess, int fussyP) |
194 |
|
|
{ |
195 |
|
|
char *t; |
196 |
|
|
long result = strtol(s, &t, 0); |
197 |
|
|
|
198 |
|
|
if (*t) { |
199 |
|
|
if (fussyP) |
200 |
|
|
goto Lose; |
201 |
|
|
else |
202 |
|
|
return 0; |
203 |
|
|
} |
204 |
|
|
if (result <= 0) { |
205 |
|
|
Lose: |
206 |
|
|
errx(1, "%s", err_mess); |
207 |
|
|
} |
208 |
|
|
|
209 |
|
|
return (size_t) result; |
210 |
|
|
} |
211 |
|
|
|
212 |
|
|
/* Global variables */ |
213 |
|
|
|
214 |
|
|
static int centerP = 0; /* Try to center lines? */ |
215 |
|
|
static size_t goal_length = 0; /* Target length for output lines */ |
216 |
|
|
static size_t max_length = 0; /* Maximum length for output lines */ |
217 |
|
|
static int coalesce_spaces_P = 0; /* Coalesce multiple whitespace -> ' ' ? */ |
218 |
|
|
static int allow_indented_paragraphs = 0; /* Can first line have diff. ind.? */ |
219 |
|
|
static int tab_width = 8; /* Number of spaces per tab stop */ |
220 |
|
|
static size_t output_tab_width = 0; /* Ditto, when squashing leading spaces */ |
221 |
|
|
static const char *sentence_enders = ".?!"; /* Double-space after these */ |
222 |
|
|
static int grok_mail_headers = 0; /* treat embedded mail headers magically? */ |
223 |
|
|
static int format_troff = 0; /* Format troff? */ |
224 |
|
|
|
225 |
|
|
static int n_errors = 0; /* Number of failed files. */ |
226 |
|
|
static size_t x; /* Horizontal position in output line */ |
227 |
|
|
static size_t x0; /* Ditto, ignoring leading whitespace */ |
228 |
|
|
static size_t pending_spaces; /* Spaces to add before next word */ |
229 |
|
|
static int output_in_paragraph = 0; /* Any of current para written out yet? */ |
230 |
|
|
|
231 |
|
|
/* Prototypes */ |
232 |
|
|
|
233 |
|
|
static void process_named_file(const char *); |
234 |
|
|
static void process_stream(FILE *, const char *); |
235 |
|
|
static size_t indent_length(const char *); |
236 |
|
|
static int might_be_header(const char *); |
237 |
|
|
static void new_paragraph(size_t); |
238 |
|
|
static void output_word(size_t, size_t, const char *, int, int, int); |
239 |
|
|
static void output_indent(size_t); |
240 |
|
|
static void center_stream(FILE *, const char *); |
241 |
|
|
static char *get_line(FILE *); |
242 |
|
|
static void *xrealloc(void *, size_t); |
243 |
|
|
void usage(void); |
244 |
|
|
|
245 |
|
|
#define ERRS(x) (x >= 127 ? 127 : ++x) |
246 |
|
|
|
247 |
|
|
/* Here is perhaps the right place to mention that this code is |
248 |
|
|
* all in top-down order. Hence, |main| comes first. |
249 |
|
|
*/ |
250 |
|
|
int |
251 |
|
|
main(int argc, char *argv[]) |
252 |
|
|
{ |
253 |
|
|
int ch; /* used for |getopt| processing */ |
254 |
|
|
|
255 |
|
|
(void)setlocale(LC_CTYPE, ""); |
256 |
|
|
|
257 |
|
|
if (pledge("stdio rpath wpath cpath", NULL) == -1) |
258 |
|
|
err(1, "pledge"); |
259 |
|
|
|
260 |
|
|
/* 1. Grok parameters. */ |
261 |
|
|
while ((ch = getopt(argc, argv, "0123456789cd:hl:mnpst:w:")) != -1) { |
262 |
|
|
switch (ch) { |
263 |
|
|
case 'c': |
264 |
|
|
centerP = 1; |
265 |
|
|
break; |
266 |
|
|
case 'd': |
267 |
|
|
sentence_enders = optarg; |
268 |
|
|
break; |
269 |
|
|
case 'l': |
270 |
|
|
output_tab_width |
271 |
|
|
= get_positive(optarg, "output tab width must be positive", 1); |
272 |
|
|
break; |
273 |
|
|
case 'm': |
274 |
|
|
grok_mail_headers = 1; |
275 |
|
|
break; |
276 |
|
|
case 'n': |
277 |
|
|
format_troff = 1; |
278 |
|
|
break; |
279 |
|
|
case 'p': |
280 |
|
|
allow_indented_paragraphs = 1; |
281 |
|
|
break; |
282 |
|
|
case 's': |
283 |
|
|
coalesce_spaces_P = 1; |
284 |
|
|
break; |
285 |
|
|
case 't': |
286 |
|
|
tab_width = get_positive(optarg, "tab width must be positive", 1); |
287 |
|
|
break; |
288 |
|
|
case 'w': |
289 |
|
|
goal_length = get_positive(optarg, "width must be positive", 1); |
290 |
|
|
max_length = goal_length; |
291 |
|
|
break; |
292 |
|
|
case '0': case '1': case '2': case '3': case '4': case '5': |
293 |
|
|
case '6': case '7': case '8': case '9': |
294 |
|
|
/* XXX this is not a stylistically approved use of getopt() */ |
295 |
|
|
if (goal_length == 0) { |
296 |
|
|
char *p; |
297 |
|
|
|
298 |
|
|
p = argv[optind - 1]; |
299 |
|
|
if (p[0] == '-' && p[1] == ch && !p[2]) |
300 |
|
|
goal_length = get_positive(++p, "width must be nonzero", 1); |
301 |
|
|
else |
302 |
|
|
goal_length = get_positive(argv[optind]+1, |
303 |
|
|
"width must be nonzero", 1); |
304 |
|
|
max_length = goal_length; |
305 |
|
|
} |
306 |
|
|
break; |
307 |
|
|
case 'h': |
308 |
|
|
default: |
309 |
|
|
usage(); |
310 |
|
|
/* NOT REACHED */ |
311 |
|
|
} |
312 |
|
|
} |
313 |
|
|
|
314 |
|
|
argc -= optind; |
315 |
|
|
argv += optind; |
316 |
|
|
|
317 |
|
|
/* [ goal [ maximum ] ] */ |
318 |
|
|
if (argc > 0 && goal_length == 0 && |
319 |
|
|
(goal_length = get_positive(*argv,"goal length must be positive", 0)) != 0) { |
320 |
|
|
--argc; |
321 |
|
|
++argv; |
322 |
|
|
if (argc > 0 && (max_length = get_positive(*argv,"max length must be positive", 0)) != 0) { |
323 |
|
|
--argc; |
324 |
|
|
++argv; |
325 |
|
|
if (max_length < goal_length) |
326 |
|
|
errx(1, "max length must be >= goal length"); |
327 |
|
|
} |
328 |
|
|
} |
329 |
|
|
|
330 |
|
|
if (goal_length == 0) |
331 |
|
|
goal_length = 65; |
332 |
|
|
if (max_length == 0) |
333 |
|
|
max_length = goal_length+10; |
334 |
|
|
|
335 |
|
|
/* 2. Process files. */ |
336 |
|
|
|
337 |
|
|
if (argc > 0) { |
338 |
|
|
while (argc-- > 0) |
339 |
|
|
process_named_file(*argv++); |
340 |
|
|
} else { |
341 |
|
|
if (pledge("stdio wpath cpath rpath", NULL) == -1) |
342 |
|
|
err(1, "pledge"); |
343 |
|
|
process_stream(stdin, "standard input"); |
344 |
|
|
} |
345 |
|
|
|
346 |
|
|
/* We're done. */ |
347 |
|
|
return n_errors; |
348 |
|
|
|
349 |
|
|
} |
350 |
|
|
|
351 |
|
|
/* Process a single file, given its name. |
352 |
|
|
*/ |
353 |
|
|
static void |
354 |
|
|
process_named_file(const char *name) |
355 |
|
|
{ |
356 |
|
|
FILE *f; |
357 |
|
|
|
358 |
|
|
if ((f = fopen(name, "r")) == NULL) { |
359 |
|
|
warn("%s", name); |
360 |
|
|
ERRS(n_errors); |
361 |
|
|
} else { |
362 |
|
|
process_stream(f, name); |
363 |
|
|
fclose(f); |
364 |
|
|
} |
365 |
|
|
} |
366 |
|
|
|
367 |
|
|
/* Types of mail header continuation lines: |
368 |
|
|
*/ |
369 |
|
|
typedef enum { |
370 |
|
|
hdr_ParagraphStart = -1, |
371 |
|
|
hdr_NonHeader = 0, |
372 |
|
|
hdr_Header = 1, |
373 |
|
|
hdr_Continuation = 2 |
374 |
|
|
} HdrType; |
375 |
|
|
|
376 |
|
|
/* Process a stream. This is where the real work happens, |
377 |
|
|
* except that centering is handled separately. |
378 |
|
|
*/ |
379 |
|
|
static void |
380 |
|
|
process_stream(FILE *stream, const char *name) |
381 |
|
|
{ |
382 |
|
|
const char *wordp, *cp; |
383 |
|
|
wchar_t wc; |
384 |
|
|
size_t np; |
385 |
|
|
size_t last_indent = SILLY; /* how many spaces in last indent? */ |
386 |
|
|
size_t para_line_number = 0; /* how many lines already read in this para? */ |
387 |
|
|
size_t first_indent = SILLY; /* indentation of line 0 of paragraph */ |
388 |
|
|
int wcl; /* number of bytes in wide character */ |
389 |
|
|
int wcw; /* display width of wide character */ |
390 |
|
|
int word_length; /* number of bytes in word */ |
391 |
|
|
int word_width; /* display width of word */ |
392 |
|
|
int space_width; /* display width of space after word */ |
393 |
|
|
int line_width; /* display width of line */ |
394 |
|
|
HdrType prev_header_type = hdr_ParagraphStart; |
395 |
|
|
HdrType header_type; |
396 |
|
|
|
397 |
|
|
/* ^-- header_type of previous line; -1 at para start */ |
398 |
|
|
const char *line; |
399 |
|
|
|
400 |
|
|
if (centerP) { |
401 |
|
|
center_stream(stream, name); |
402 |
|
|
return; |
403 |
|
|
} |
404 |
|
|
|
405 |
|
|
while ((line = get_line(stream)) != NULL) { |
406 |
|
|
np = indent_length(line); |
407 |
|
|
header_type = hdr_NonHeader; |
408 |
|
|
if (grok_mail_headers && prev_header_type != hdr_NonHeader) { |
409 |
|
|
if (np == 0 && might_be_header(line)) |
410 |
|
|
header_type = hdr_Header; |
411 |
|
|
else if (np > 0 && prev_header_type>hdr_NonHeader) |
412 |
|
|
header_type = hdr_Continuation; |
413 |
|
|
} |
414 |
|
|
|
415 |
|
|
/* We need a new paragraph if and only if: |
416 |
|
|
* this line is blank, |
417 |
|
|
* OR it's a troff request, |
418 |
|
|
* OR it's a mail header, |
419 |
|
|
* OR it's not a mail header AND the last line was one, |
420 |
|
|
* OR the indentation has changed |
421 |
|
|
* AND the line isn't a mail header continuation line |
422 |
|
|
* AND this isn't the second line of an indented paragraph. |
423 |
|
|
*/ |
424 |
|
|
if (*line == '\0' || (*line == '.' && !format_troff) || |
425 |
|
|
header_type == hdr_Header || |
426 |
|
|
(header_type == hdr_NonHeader && prev_header_type > hdr_NonHeader) || |
427 |
|
|
(np != last_indent && header_type != hdr_Continuation && |
428 |
|
|
(!allow_indented_paragraphs || para_line_number != 1)) ) { |
429 |
|
|
new_paragraph(np); |
430 |
|
|
para_line_number = 0; |
431 |
|
|
first_indent = np; |
432 |
|
|
last_indent = np; |
433 |
|
|
|
434 |
|
|
/* nroff compatibility */ |
435 |
|
|
if (*line == '.' && !format_troff) { |
436 |
|
|
puts(line); |
437 |
|
|
continue; |
438 |
|
|
} |
439 |
|
|
if (header_type == hdr_Header) |
440 |
|
|
last_indent = 2; /* for cont. lines */ |
441 |
|
|
if (*line == '\0') { |
442 |
|
|
putchar('\n'); |
443 |
|
|
prev_header_type = hdr_ParagraphStart; |
444 |
|
|
continue; |
445 |
|
|
} else { |
446 |
|
|
/* If this is an indented paragraph other than a mail header |
447 |
|
|
* continuation, set |last_indent|. |
448 |
|
|
*/ |
449 |
|
|
if (np != last_indent && header_type != hdr_Continuation) |
450 |
|
|
last_indent = np; |
451 |
|
|
} |
452 |
|
|
prev_header_type = header_type; |
453 |
|
|
} |
454 |
|
|
|
455 |
|
|
line_width = np; |
456 |
|
|
for (wordp = line; *wordp != '\0'; wordp = cp) { |
457 |
|
|
word_length = 0; |
458 |
|
|
word_width = space_width = 0; |
459 |
|
|
for (cp = wordp; *cp != '\0'; cp += wcl) { |
460 |
|
|
wcl = mbtowc(&wc, cp, MB_CUR_MAX); |
461 |
|
|
if (wcl == -1) { |
462 |
|
|
(void)mbtowc(NULL, NULL, MB_CUR_MAX); |
463 |
|
|
wc = L'?'; |
464 |
|
|
wcl = 1; |
465 |
|
|
wcw = 1; |
466 |
|
|
} else if (wc == L'\t') |
467 |
|
|
wcw = (line_width / tab_width + 1) * |
468 |
|
|
tab_width - line_width; |
469 |
|
|
else if ((wcw = wcwidth(wc)) == -1) |
470 |
|
|
wcw = 1; |
471 |
|
|
if (iswblank(wc)) { |
472 |
|
|
/* Skip whitespace at start of line. */ |
473 |
|
|
if (word_length == 0) { |
474 |
|
|
wordp += wcl; |
475 |
|
|
continue; |
476 |
|
|
} |
477 |
|
|
/* Count whitespace after word. */ |
478 |
|
|
space_width += wcw; |
479 |
|
|
} else { |
480 |
|
|
/* Detect end of word. */ |
481 |
|
|
if (space_width > 0) |
482 |
|
|
break; |
483 |
|
|
/* Measure word. */ |
484 |
|
|
word_length += wcl; |
485 |
|
|
word_width += wcw; |
486 |
|
|
} |
487 |
|
|
line_width += wcw; |
488 |
|
|
} |
489 |
|
|
|
490 |
|
|
/* Send the word to the output machinery. */ |
491 |
|
|
output_word(first_indent, last_indent, wordp, |
492 |
|
|
word_length, word_width, space_width); |
493 |
|
|
} |
494 |
|
|
++para_line_number; |
495 |
|
|
} |
496 |
|
|
|
497 |
|
|
new_paragraph(0); |
498 |
|
|
if (ferror(stream)) { |
499 |
|
|
warn("%s", name); |
500 |
|
|
ERRS(n_errors); |
501 |
|
|
} |
502 |
|
|
} |
503 |
|
|
|
504 |
|
|
/* How long is the indent on this line? |
505 |
|
|
*/ |
506 |
|
|
static size_t |
507 |
|
|
indent_length(const char *line) |
508 |
|
|
{ |
509 |
|
|
size_t n = 0; |
510 |
|
|
|
511 |
|
|
for (;;) { |
512 |
|
|
switch(*line++) { |
513 |
|
|
case ' ': |
514 |
|
|
++n; |
515 |
|
|
continue; |
516 |
|
|
case '\t': |
517 |
|
|
n = (n / tab_width + 1) * tab_width; |
518 |
|
|
continue; |
519 |
|
|
default: |
520 |
|
|
break; |
521 |
|
|
} |
522 |
|
|
break; |
523 |
|
|
} |
524 |
|
|
return n; |
525 |
|
|
} |
526 |
|
|
|
527 |
|
|
/* Might this line be a mail header? |
528 |
|
|
* We deem a line to be a possible header if it matches the |
529 |
|
|
* Perl regexp /^[A-Z][-A-Za-z0-9]*:\s/. This is *not* the same |
530 |
|
|
* as in RFC whatever-number-it-is; we want to be gratuitously |
531 |
|
|
* conservative to avoid mangling ordinary civilised text. |
532 |
|
|
*/ |
533 |
|
|
static int |
534 |
|
|
might_be_header(const char *line) |
535 |
|
|
{ |
536 |
|
|
|
537 |
|
|
if (!isupper((unsigned char)*line++)) |
538 |
|
|
return 0; |
539 |
|
|
while (isalnum((unsigned char)*line) || *line == '-') |
540 |
|
|
++line; |
541 |
|
|
return (*line == ':' && isspace((unsigned char)line[1])); |
542 |
|
|
} |
543 |
|
|
|
544 |
|
|
/* Begin a new paragraph with an indent of |indent| spaces. |
545 |
|
|
*/ |
546 |
|
|
static void |
547 |
|
|
new_paragraph(size_t indent) |
548 |
|
|
{ |
549 |
|
|
|
550 |
|
|
if (x0 > 0) |
551 |
|
|
putchar('\n'); |
552 |
|
|
x = indent; |
553 |
|
|
x0 = 0; |
554 |
|
|
pending_spaces = 0; |
555 |
|
|
output_in_paragraph = 0; |
556 |
|
|
} |
557 |
|
|
|
558 |
|
|
/* Output spaces or tabs for leading indentation. |
559 |
|
|
*/ |
560 |
|
|
static void |
561 |
|
|
output_indent(size_t n_spaces) |
562 |
|
|
{ |
563 |
|
|
|
564 |
|
|
if (n_spaces == 0) |
565 |
|
|
return; |
566 |
|
|
if (output_tab_width) { |
567 |
|
|
while (n_spaces >= output_tab_width) { |
568 |
|
|
putchar('\t'); |
569 |
|
|
n_spaces -= output_tab_width; |
570 |
|
|
} |
571 |
|
|
} |
572 |
|
|
while (n_spaces-- > 0) |
573 |
|
|
putchar(' '); |
574 |
|
|
} |
575 |
|
|
|
576 |
|
|
/* Output a single word. |
577 |
|
|
* indent0 and indent1 are the indents to use on the first and subsequent |
578 |
|
|
* lines of a paragraph. They'll often be the same, of course. |
579 |
|
|
*/ |
580 |
|
|
static void |
581 |
|
|
output_word(size_t indent0, size_t indent1, const char *word, |
582 |
|
|
int length, int width, int spaces) |
583 |
|
|
{ |
584 |
|
|
size_t new_x = x + pending_spaces + width; |
585 |
|
|
|
586 |
|
|
/* If either |spaces==0| (at end of line) or |coalesce_spaces_P| |
587 |
|
|
* (squashing internal whitespace), then add just one space; |
588 |
|
|
* except that if the last character was a sentence-ender we |
589 |
|
|
* actually add two spaces. |
590 |
|
|
*/ |
591 |
|
|
if (coalesce_spaces_P || spaces == 0) |
592 |
|
|
spaces = strchr(sentence_enders, word[length-1]) ? 2 : 1; |
593 |
|
|
|
594 |
|
|
if (x0 == 0) |
595 |
|
|
output_indent(output_in_paragraph ? indent1 : indent0); |
596 |
|
|
else if (new_x > max_length || x >= goal_length || |
597 |
|
|
(new_x > goal_length && new_x-goal_length > goal_length-x)) { |
598 |
|
|
putchar('\n'); |
599 |
|
|
output_indent(indent1); |
600 |
|
|
x0 = 0; |
601 |
|
|
x = indent1; |
602 |
|
|
} else { |
603 |
|
|
x0 += pending_spaces; |
604 |
|
|
x += pending_spaces; |
605 |
|
|
while (pending_spaces--) |
606 |
|
|
putchar(' '); |
607 |
|
|
} |
608 |
|
|
x0 += width; |
609 |
|
|
x += width; |
610 |
|
|
while(length--) |
611 |
|
|
putchar(*word++); |
612 |
|
|
pending_spaces = spaces; |
613 |
|
|
output_in_paragraph = 1; |
614 |
|
|
} |
615 |
|
|
|
616 |
|
|
/* Process a stream, but just center its lines rather than trying to |
617 |
|
|
* format them neatly. |
618 |
|
|
*/ |
619 |
|
|
static void |
620 |
|
|
center_stream(FILE *stream, const char *name) |
621 |
|
|
{ |
622 |
|
|
char *line, *cp; |
623 |
|
|
wchar_t wc; |
624 |
|
|
size_t l; /* Display width of the line. */ |
625 |
|
|
int wcw; /* Display width of one character. */ |
626 |
|
|
int wcl; /* Length in bytes of one character. */ |
627 |
|
|
|
628 |
|
|
while ((line = get_line(stream)) != NULL) { |
629 |
|
|
l = 0; |
630 |
|
|
for (cp = line; *cp != '\0'; cp += wcl) { |
631 |
|
|
if (*cp == '\t') |
632 |
|
|
*cp = ' '; |
633 |
|
|
if ((wcl = mbtowc(&wc, cp, MB_CUR_MAX)) == -1) { |
634 |
|
|
(void)mbtowc(NULL, NULL, MB_CUR_MAX); |
635 |
|
|
*cp = '?'; |
636 |
|
|
wcl = 1; |
637 |
|
|
wcw = 1; |
638 |
|
|
} else if ((wcw = wcwidth(wc)) == -1) |
639 |
|
|
wcw = 1; |
640 |
|
|
if (l == 0 && iswspace(wc)) |
641 |
|
|
line += wcl; |
642 |
|
|
else |
643 |
|
|
l += wcw; |
644 |
|
|
} |
645 |
|
|
while (l < goal_length) { |
646 |
|
|
putchar(' '); |
647 |
|
|
l += 2; |
648 |
|
|
} |
649 |
|
|
puts(line); |
650 |
|
|
} |
651 |
|
|
|
652 |
|
|
if (ferror(stream)) { |
653 |
|
|
warn("%s", name); |
654 |
|
|
ERRS(n_errors); |
655 |
|
|
} |
656 |
|
|
} |
657 |
|
|
|
658 |
|
|
/* Get a single line from a stream. Strip control |
659 |
|
|
* characters and trailing whitespace, and handle backspaces. |
660 |
|
|
* Return the address of the buffer containing the line. |
661 |
|
|
* This can cope with arbitrarily long lines, and with lines |
662 |
|
|
* without terminating \n. |
663 |
|
|
* If there are no characters left or an error happens, we |
664 |
|
|
* return NULL. |
665 |
|
|
*/ |
666 |
|
|
static char * |
667 |
|
|
get_line(FILE *stream) |
668 |
|
|
{ |
669 |
|
|
int ch; |
670 |
|
|
int troff = 0; |
671 |
|
|
static char *buf = NULL; |
672 |
|
|
static size_t length = 0; |
673 |
|
|
size_t len = 0; |
674 |
|
|
|
675 |
|
|
if (buf == NULL) { |
676 |
|
|
length = 100; |
677 |
|
|
buf = xrealloc(NULL, length); |
678 |
|
|
} |
679 |
|
|
|
680 |
|
|
while ((ch = getc(stream)) != '\n' && ch != EOF) { |
681 |
|
|
if ((len == 0) && (ch == '.' && !format_troff)) |
682 |
|
|
troff = 1; |
683 |
|
|
if (troff || ch == '\t' || !iscntrl(ch)) { |
684 |
|
|
if (len >= length) { |
685 |
|
|
length *= 2; |
686 |
|
|
buf = xrealloc(buf, length); |
687 |
|
|
} |
688 |
|
|
buf[len++] = ch; |
689 |
|
|
} else if (ch == '\b') { |
690 |
|
|
if (len) |
691 |
|
|
--len; |
692 |
|
|
} |
693 |
|
|
} |
694 |
|
|
while (len > 0 && isspace((unsigned char)buf[len-1])) |
695 |
|
|
--len; |
696 |
|
|
buf[len] = '\0'; |
697 |
|
|
return (len > 0 || ch != EOF) ? buf : NULL; |
698 |
|
|
} |
699 |
|
|
|
700 |
|
|
/* (Re)allocate some memory, exiting with an error if we can't. |
701 |
|
|
*/ |
702 |
|
|
static void * |
703 |
|
|
xrealloc(void *ptr, size_t nbytes) |
704 |
|
|
{ |
705 |
|
|
void *p; |
706 |
|
|
|
707 |
|
|
p = realloc(ptr, nbytes); |
708 |
|
|
if (p == NULL) |
709 |
|
|
errx(1, "out of memory"); |
710 |
|
|
return p; |
711 |
|
|
} |
712 |
|
|
|
713 |
|
|
void |
714 |
|
|
usage(void) |
715 |
|
|
{ |
716 |
|
|
extern char *__progname; |
717 |
|
|
|
718 |
|
|
fprintf(stderr, |
719 |
|
|
"usage: %s [-cmnps] [-d chars] [-l number] [-t number]\n" |
720 |
|
|
"\t[goal [maximum] | -width | -w width] [file ...]\n", |
721 |
|
|
__progname); |
722 |
|
|
exit (1); |
723 |
|
|
} |