1 |
|
|
/* $OpenBSD: spellprog.c,v 1.13 2017/07/28 17:16:35 nicm Exp $ */ |
2 |
|
|
|
3 |
|
|
/* |
4 |
|
|
* Copyright (c) 1991, 1993 |
5 |
|
|
* The Regents of the University of California. All rights reserved. |
6 |
|
|
* |
7 |
|
|
* Redistribution and use in source and binary forms, with or without |
8 |
|
|
* modification, are permitted provided that the following conditions |
9 |
|
|
* are met: |
10 |
|
|
* 1. Redistributions of source code must retain the above copyright |
11 |
|
|
* notice, this list of conditions and the following disclaimer. |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in the |
14 |
|
|
* documentation and/or other materials provided with the distribution. |
15 |
|
|
* 3. Neither the name of the University nor the names of its contributors |
16 |
|
|
* may be used to endorse or promote products derived from this software |
17 |
|
|
* without specific prior written permission. |
18 |
|
|
* |
19 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
20 |
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 |
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
23 |
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 |
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
25 |
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
26 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
27 |
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
28 |
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
29 |
|
|
* SUCH DAMAGE. |
30 |
|
|
* |
31 |
|
|
* @(#)spell.h 8.1 (Berkeley) 6/6/93 |
32 |
|
|
*/ |
33 |
|
|
/* |
34 |
|
|
* Copyright (C) Caldera International Inc. 2001-2002. |
35 |
|
|
* All rights reserved. |
36 |
|
|
* |
37 |
|
|
* Redistribution and use in source and binary forms, with or without |
38 |
|
|
* modification, are permitted provided that the following conditions |
39 |
|
|
* are met: |
40 |
|
|
* 1. Redistributions of source code and documentation must retain the above |
41 |
|
|
* copyright notice, this list of conditions and the following disclaimer. |
42 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
43 |
|
|
* notice, this list of conditions and the following disclaimer in the |
44 |
|
|
* documentation and/or other materials provided with the distribution. |
45 |
|
|
* 3. All advertising materials mentioning features or use of this software |
46 |
|
|
* must display the following acknowledgement: |
47 |
|
|
* This product includes software developed or owned by Caldera |
48 |
|
|
* International, Inc. |
49 |
|
|
* 4. Neither the name of Caldera International, Inc. nor the names of other |
50 |
|
|
* contributors may be used to endorse or promote products derived from |
51 |
|
|
* this software without specific prior written permission. |
52 |
|
|
* |
53 |
|
|
* USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA |
54 |
|
|
* INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR |
55 |
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
56 |
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
57 |
|
|
* IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, |
58 |
|
|
* INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
59 |
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
60 |
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
61 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
62 |
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
63 |
|
|
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
64 |
|
|
* POSSIBILITY OF SUCH DAMAGE. |
65 |
|
|
*/ |
66 |
|
|
|
67 |
|
|
#include <sys/mman.h> |
68 |
|
|
#include <sys/stat.h> |
69 |
|
|
|
70 |
|
|
#include <ctype.h> |
71 |
|
|
#include <err.h> |
72 |
|
|
#include <errno.h> |
73 |
|
|
#include <fcntl.h> |
74 |
|
|
#include <limits.h> |
75 |
|
|
#include <locale.h> |
76 |
|
|
#include <stdint.h> |
77 |
|
|
#include <stdio.h> |
78 |
|
|
#include <stdlib.h> |
79 |
|
|
#include <string.h> |
80 |
|
|
#include <unistd.h> |
81 |
|
|
|
82 |
|
|
#define DLEV 2 |
83 |
|
|
|
84 |
|
|
int an(char *, char *, char *, int); |
85 |
|
|
int bility(char *, char *, char *, int); |
86 |
|
|
int es(char *, char *, char *, int); |
87 |
|
|
int dict(char *, char *); |
88 |
|
|
int i_to_y(char *, char *, char *, int); |
89 |
|
|
int ily(char *, char *, char *, int); |
90 |
|
|
int ize(char *, char *, char *, int); |
91 |
|
|
int metry(char *, char *, char *, int); |
92 |
|
|
int monosyl(char *, char *); |
93 |
|
|
int ncy(char *, char *, char *, int); |
94 |
|
|
int nop(char *, char *, char *, int); |
95 |
|
|
int trypref(char *, char *, int); |
96 |
|
|
int tryword(char *, char *, int); |
97 |
|
|
int s(char *, char *, char *, int); |
98 |
|
|
int strip(char *, char *, char *, int); |
99 |
|
|
int suffix(char *, int); |
100 |
|
|
int tion(char *, char *, char *, int); |
101 |
|
|
int vowel(unsigned char); |
102 |
|
|
int y_to_e(char *, char *, char *, int); |
103 |
|
|
int CCe(char *, char *, char *, int); |
104 |
|
|
int VCe(char *, char *, char *, int); |
105 |
|
|
char *lookuppref(char **, char *); |
106 |
|
|
char *skipv(char *); |
107 |
|
|
char *estrdup(const char *); |
108 |
|
|
void ise(void); |
109 |
|
|
void print_word(FILE *); |
110 |
|
|
void ztos(char *); |
111 |
|
|
__dead void usage(void); |
112 |
|
|
|
113 |
|
|
/* from look.c */ |
114 |
|
|
int look(unsigned char *, unsigned char *, unsigned char *); |
115 |
|
|
|
116 |
|
|
struct suftab { |
117 |
|
|
char *suf; |
118 |
|
|
int (*p1)(char *, char *, char *, int); |
119 |
|
|
int n1; |
120 |
|
|
char *d1; |
121 |
|
|
char *a1; |
122 |
|
|
int (*p2)(char *, char *, char *, int); |
123 |
|
|
int n2; |
124 |
|
|
char *d2; |
125 |
|
|
char *a2; |
126 |
|
|
} suftab[] = { |
127 |
|
|
{"ssen", ily, 4, "-y+iness", "+ness" }, |
128 |
|
|
{"ssel", ily, 4, "-y+i+less", "+less" }, |
129 |
|
|
{"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" }, |
130 |
|
|
{"s'", s, 2, "", "+'s"}, |
131 |
|
|
{"s", s, 1, "", "+s"}, |
132 |
|
|
{"ecn", ncy, 1, "", "-t+ce"}, |
133 |
|
|
{"ycn", ncy, 1, "", "-cy+t"}, |
134 |
|
|
{"ytilb", nop, 0, "", ""}, |
135 |
|
|
{"ytilib", bility, 5, "-le+ility", ""}, |
136 |
|
|
{"elbaif", i_to_y, 4, "-y+iable", ""}, |
137 |
|
|
{"elba", CCe, 4, "-e+able", "+able"}, |
138 |
|
|
{"yti", CCe, 3, "-e+ity", "+ity"}, |
139 |
|
|
{"ylb", y_to_e, 1, "-e+y", ""}, |
140 |
|
|
{"yl", ily, 2, "-y+ily", "+ly"}, |
141 |
|
|
{"laci", strip, 2, "", "+al"}, |
142 |
|
|
{"latnem", strip, 2, "", "+al"}, |
143 |
|
|
{"lanoi", strip, 2, "", "+al"}, |
144 |
|
|
{"tnem", strip, 4, "", "+ment"}, |
145 |
|
|
{"gni", CCe, 3, "-e+ing", "+ing"}, |
146 |
|
|
{"reta", nop, 0, "", ""}, |
147 |
|
|
{"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"}, |
148 |
|
|
{"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"}, |
149 |
|
|
{"citsi", strip, 2, "", "+ic"}, |
150 |
|
|
{"cihparg", i_to_y, 1, "-y+ic", ""}, |
151 |
|
|
{"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"}, |
152 |
|
|
{"cirtem", i_to_y, 1, "-y+ic", ""}, |
153 |
|
|
{"yrtem", metry, 0, "-ry+er", ""}, |
154 |
|
|
{"cigol", i_to_y, 1, "-y+ic", ""}, |
155 |
|
|
{"tsigol", i_to_y, 2, "-y+ist", ""}, |
156 |
|
|
{"tsi", VCe, 3, "-e+ist", "+ist"}, |
157 |
|
|
{"msi", VCe, 3, "-e+ism", "+ist"}, |
158 |
|
|
{"noitacif", i_to_y, 6, "-y+ication", ""}, |
159 |
|
|
{"noitazi", ize, 5, "-e+ation", ""}, |
160 |
|
|
{"rota", tion, 2, "-e+or", ""}, |
161 |
|
|
{"noit", tion, 3, "-e+ion", "+ion"}, |
162 |
|
|
{"naino", an, 3, "", "+ian"}, |
163 |
|
|
{"na", an, 1, "", "+n"}, |
164 |
|
|
{"evit", tion, 3, "-e+ive", "+ive"}, |
165 |
|
|
{"ezi", CCe, 3, "-e+ize", "+ize"}, |
166 |
|
|
{"pihs", strip, 4, "", "+ship"}, |
167 |
|
|
{"dooh", ily, 4, "-y+hood", "+hood"}, |
168 |
|
|
{"ekil", strip, 4, "", "+like"}, |
169 |
|
|
{ NULL } |
170 |
|
|
}; |
171 |
|
|
|
172 |
|
|
char *preftab[] = { |
173 |
|
|
"anti", |
174 |
|
|
"bio", |
175 |
|
|
"dis", |
176 |
|
|
"electro", |
177 |
|
|
"en", |
178 |
|
|
"fore", |
179 |
|
|
"hyper", |
180 |
|
|
"intra", |
181 |
|
|
"inter", |
182 |
|
|
"iso", |
183 |
|
|
"kilo", |
184 |
|
|
"magneto", |
185 |
|
|
"meta", |
186 |
|
|
"micro", |
187 |
|
|
"milli", |
188 |
|
|
"mis", |
189 |
|
|
"mono", |
190 |
|
|
"multi", |
191 |
|
|
"non", |
192 |
|
|
"out", |
193 |
|
|
"over", |
194 |
|
|
"photo", |
195 |
|
|
"poly", |
196 |
|
|
"pre", |
197 |
|
|
"pseudo", |
198 |
|
|
"re", |
199 |
|
|
"semi", |
200 |
|
|
"stereo", |
201 |
|
|
"sub", |
202 |
|
|
"super", |
203 |
|
|
"thermo", |
204 |
|
|
"ultra", |
205 |
|
|
"under", /* must precede un */ |
206 |
|
|
"un", |
207 |
|
|
NULL |
208 |
|
|
}; |
209 |
|
|
|
210 |
|
|
struct wlist { |
211 |
|
|
int fd; |
212 |
|
|
unsigned char *front; |
213 |
|
|
unsigned char *back; |
214 |
|
|
} *wlists; |
215 |
|
|
|
216 |
|
|
int vflag; |
217 |
|
|
int xflag; |
218 |
|
|
char word[LINE_MAX]; |
219 |
|
|
char original[LINE_MAX]; |
220 |
|
|
char *deriv[40]; |
221 |
|
|
char affix[40]; |
222 |
|
|
|
223 |
|
|
/* |
224 |
|
|
* The spellprog utility accepts a newline-delimited list of words |
225 |
|
|
* on stdin. For arguments it expects the path to a word list and |
226 |
|
|
* the path to a file in which to store found words. |
227 |
|
|
* |
228 |
|
|
* In normal usage, spell is called twice. The first time it is |
229 |
|
|
* called with a stop list to flag commonly mispelled words. The |
230 |
|
|
* remaining words are then passed to spell again, this time with |
231 |
|
|
* the dictionary file as the first (non-flag) argument. |
232 |
|
|
* |
233 |
|
|
* Unlike historic versions of spellprog, this one does not use |
234 |
|
|
* hashed files. Instead it simply requires that files be sorted |
235 |
|
|
* lexigraphically and uses the same algorithm as the look utility. |
236 |
|
|
* |
237 |
|
|
* Note that spellprog should be called via the spell shell script |
238 |
|
|
* and is not meant to be invoked directly by the user. |
239 |
|
|
*/ |
240 |
|
|
|
241 |
|
|
int |
242 |
|
|
main(int argc, char **argv) |
243 |
|
|
{ |
244 |
|
|
char *ep, *cp, *dp; |
245 |
|
|
char *outfile; |
246 |
|
|
int ch, fold, i; |
247 |
|
|
struct stat sb; |
248 |
|
|
FILE *file, *found; |
249 |
|
|
|
250 |
|
|
setlocale(LC_ALL, ""); |
251 |
|
|
|
252 |
|
|
if (pledge("stdio rpath wpath cpath flock", NULL) == -1) |
253 |
|
|
err(1, "pledge"); |
254 |
|
|
|
255 |
|
|
outfile = NULL; |
256 |
|
|
while ((ch = getopt(argc, argv, "bvxo:")) != -1) { |
257 |
|
|
switch (ch) { |
258 |
|
|
case 'b': |
259 |
|
|
/* Use British dictionary and convert ize -> ise. */ |
260 |
|
|
ise(); |
261 |
|
|
break; |
262 |
|
|
case 'o': |
263 |
|
|
outfile = optarg; |
264 |
|
|
break; |
265 |
|
|
case 'v': |
266 |
|
|
/* Also write derivations to "found" file. */ |
267 |
|
|
vflag = 1; |
268 |
|
|
break; |
269 |
|
|
case 'x': |
270 |
|
|
/* Print plausible stems to stdout. */ |
271 |
|
|
xflag = 1; |
272 |
|
|
break; |
273 |
|
|
default: |
274 |
|
|
usage(); |
275 |
|
|
} |
276 |
|
|
|
277 |
|
|
} |
278 |
|
|
argc -= optind; |
279 |
|
|
argv += optind; |
280 |
|
|
if (argc < 1) |
281 |
|
|
usage(); |
282 |
|
|
|
283 |
|
|
/* Open and mmap the word/stop lists. */ |
284 |
|
|
if ((wlists = calloc(sizeof(struct wlist), (argc + 1))) == NULL) |
285 |
|
|
err(1, "malloc"); |
286 |
|
|
for (i = 0; argc--; i++) { |
287 |
|
|
wlists[i].fd = open(argv[i], O_RDONLY, 0); |
288 |
|
|
if (wlists[i].fd == -1 || fstat(wlists[i].fd, &sb) != 0) |
289 |
|
|
err(1, "%s", argv[i]); |
290 |
|
|
if (sb.st_size > SIZE_MAX) |
291 |
|
|
errc(1, EFBIG, "%s", argv[i]); |
292 |
|
|
wlists[i].front = mmap(NULL, (size_t)sb.st_size, PROT_READ, |
293 |
|
|
MAP_PRIVATE, wlists[i].fd, (off_t)0); |
294 |
|
|
if (wlists[i].front == MAP_FAILED) |
295 |
|
|
err(1, "%s", argv[i]); |
296 |
|
|
wlists[i].back = wlists[i].front + sb.st_size; |
297 |
|
|
} |
298 |
|
|
wlists[i].fd = -1; |
299 |
|
|
|
300 |
|
|
/* Open file where found words are to be saved. */ |
301 |
|
|
if (outfile == NULL) |
302 |
|
|
found = NULL; |
303 |
|
|
else if ((found = fopen(outfile, "w")) == NULL) |
304 |
|
|
err(1, "cannot open %s", outfile); |
305 |
|
|
|
306 |
|
|
for (;; print_word(file)) { |
307 |
|
|
affix[0] = '\0'; |
308 |
|
|
file = found; |
309 |
|
|
for (ep = word; (*ep = ch = getchar()) != '\n'; ep++) { |
310 |
|
|
if (ep - word == sizeof(word) - 1) { |
311 |
|
|
*ep = '\0'; |
312 |
|
|
warnx("word too long (%s)", word); |
313 |
|
|
while ((ch = getchar()) != '\n') |
314 |
|
|
; /* slurp until EOL */ |
315 |
|
|
} |
316 |
|
|
if (ch == EOF) { |
317 |
|
|
if (found != NULL) |
318 |
|
|
fclose(found); |
319 |
|
|
exit(0); |
320 |
|
|
} |
321 |
|
|
} |
322 |
|
|
for (cp = word, dp = original; cp < ep; ) |
323 |
|
|
*dp++ = *cp++; |
324 |
|
|
*dp = '\0'; |
325 |
|
|
fold = 0; |
326 |
|
|
for (cp = word; cp < ep; cp++) |
327 |
|
|
if (islower((unsigned char)*cp)) |
328 |
|
|
goto lcase; |
329 |
|
|
if (trypref(ep, ".", 0)) |
330 |
|
|
continue; |
331 |
|
|
++fold; |
332 |
|
|
for (cp = original + 1, dp = word + 1; dp < ep; dp++, cp++) |
333 |
|
|
*dp = tolower((unsigned char)*cp); |
334 |
|
|
lcase: |
335 |
|
|
if (trypref(ep, ".", 0) || suffix(ep, 0)) |
336 |
|
|
continue; |
337 |
|
|
if (isupper((unsigned char)word[0])) { |
338 |
|
|
for (cp = original, dp = word; (*dp = *cp++); dp++) { |
339 |
|
|
if (fold) |
340 |
|
|
*dp = tolower((unsigned char)*dp); |
341 |
|
|
} |
342 |
|
|
word[0] = tolower((unsigned char)word[0]); |
343 |
|
|
goto lcase; |
344 |
|
|
} |
345 |
|
|
file = stdout; |
346 |
|
|
} |
347 |
|
|
|
348 |
|
|
exit(0); |
349 |
|
|
} |
350 |
|
|
|
351 |
|
|
void |
352 |
|
|
print_word(FILE *f) |
353 |
|
|
{ |
354 |
|
|
|
355 |
|
|
if (f != NULL) { |
356 |
|
|
if (vflag && affix[0] != '\0' && affix[0] != '.') |
357 |
|
|
fprintf(f, "%s\t%s\n", affix, original); |
358 |
|
|
else |
359 |
|
|
fprintf(f, "%s\n", original); |
360 |
|
|
} |
361 |
|
|
} |
362 |
|
|
|
363 |
|
|
/* |
364 |
|
|
* For each matching suffix in suftab, call the function associated |
365 |
|
|
* with that suffix (p1 and p2). |
366 |
|
|
*/ |
367 |
|
|
int |
368 |
|
|
suffix(char *ep, int lev) |
369 |
|
|
{ |
370 |
|
|
struct suftab *t; |
371 |
|
|
char *cp, *sp; |
372 |
|
|
|
373 |
|
|
lev += DLEV; |
374 |
|
|
deriv[lev] = deriv[lev-1] = 0; |
375 |
|
|
for (t = suftab; (sp = t->suf); t++) { |
376 |
|
|
cp = ep; |
377 |
|
|
while (*sp) { |
378 |
|
|
if (*--cp != *sp++) |
379 |
|
|
goto next; |
380 |
|
|
} |
381 |
|
|
for (sp = cp; --sp >= word && !vowel(*sp);) |
382 |
|
|
; /* nothing */ |
383 |
|
|
if (sp < word) |
384 |
|
|
return (0); |
385 |
|
|
if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1)) |
386 |
|
|
return (1); |
387 |
|
|
if (t->p2 != NULL) { |
388 |
|
|
deriv[lev] = deriv[lev+1] = 0; |
389 |
|
|
return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev)); |
390 |
|
|
} |
391 |
|
|
return (0); |
392 |
|
|
next: ; |
393 |
|
|
} |
394 |
|
|
return (0); |
395 |
|
|
} |
396 |
|
|
|
397 |
|
|
int |
398 |
|
|
nop(char *ep, char *d, char *a, int lev) |
399 |
|
|
{ |
400 |
|
|
|
401 |
|
|
return (0); |
402 |
|
|
} |
403 |
|
|
|
404 |
|
|
int |
405 |
|
|
strip(char *ep, char *d, char *a, int lev) |
406 |
|
|
{ |
407 |
|
|
|
408 |
|
|
return (trypref(ep, a, lev) || suffix(ep, lev)); |
409 |
|
|
} |
410 |
|
|
|
411 |
|
|
int |
412 |
|
|
s(char *ep, char *d, char *a, int lev) |
413 |
|
|
{ |
414 |
|
|
|
415 |
|
|
if (lev > DLEV + 1) |
416 |
|
|
return (0); |
417 |
|
|
if (*ep == 's' && ep[-1] == 's') |
418 |
|
|
return (0); |
419 |
|
|
return (strip(ep, d, a, lev)); |
420 |
|
|
} |
421 |
|
|
|
422 |
|
|
int |
423 |
|
|
an(char *ep, char *d, char *a, int lev) |
424 |
|
|
{ |
425 |
|
|
|
426 |
|
|
if (!isupper((unsigned char)*word)) /* must be proper name */ |
427 |
|
|
return (0); |
428 |
|
|
return (trypref(ep,a,lev)); |
429 |
|
|
} |
430 |
|
|
|
431 |
|
|
int |
432 |
|
|
ize(char *ep, char *d, char *a, int lev) |
433 |
|
|
{ |
434 |
|
|
|
435 |
|
|
*ep++ = 'e'; |
436 |
|
|
return (strip(ep ,"", d, lev)); |
437 |
|
|
} |
438 |
|
|
|
439 |
|
|
int |
440 |
|
|
y_to_e(char *ep, char *d, char *a, int lev) |
441 |
|
|
{ |
442 |
|
|
char c = *ep; |
443 |
|
|
|
444 |
|
|
*ep++ = 'e'; |
445 |
|
|
if (strip(ep, "", d, lev)) |
446 |
|
|
return (1); |
447 |
|
|
ep[-1] = c; |
448 |
|
|
return (0); |
449 |
|
|
} |
450 |
|
|
|
451 |
|
|
int |
452 |
|
|
ily(char *ep, char *d, char *a, int lev) |
453 |
|
|
{ |
454 |
|
|
|
455 |
|
|
if (ep[-1] == 'i') |
456 |
|
|
return (i_to_y(ep, d, a, lev)); |
457 |
|
|
else |
458 |
|
|
return (strip(ep, d, a, lev)); |
459 |
|
|
} |
460 |
|
|
|
461 |
|
|
int |
462 |
|
|
ncy(char *ep, char *d, char *a, int lev) |
463 |
|
|
{ |
464 |
|
|
|
465 |
|
|
if (skipv(skipv(ep-1)) < word) |
466 |
|
|
return (0); |
467 |
|
|
ep[-1] = 't'; |
468 |
|
|
return (strip(ep, d, a, lev)); |
469 |
|
|
} |
470 |
|
|
|
471 |
|
|
int |
472 |
|
|
bility(char *ep, char *d, char *a, int lev) |
473 |
|
|
{ |
474 |
|
|
|
475 |
|
|
*ep++ = 'l'; |
476 |
|
|
return (y_to_e(ep, d, a, lev)); |
477 |
|
|
} |
478 |
|
|
|
479 |
|
|
int |
480 |
|
|
i_to_y(char *ep, char *d, char *a, int lev) |
481 |
|
|
{ |
482 |
|
|
|
483 |
|
|
if (ep[-1] == 'i') { |
484 |
|
|
ep[-1] = 'y'; |
485 |
|
|
a = d; |
486 |
|
|
} |
487 |
|
|
return (strip(ep, "", a, lev)); |
488 |
|
|
} |
489 |
|
|
|
490 |
|
|
int |
491 |
|
|
es(char *ep, char *d, char *a, int lev) |
492 |
|
|
{ |
493 |
|
|
|
494 |
|
|
if (lev > DLEV) |
495 |
|
|
return (0); |
496 |
|
|
|
497 |
|
|
switch (ep[-1]) { |
498 |
|
|
default: |
499 |
|
|
return (0); |
500 |
|
|
case 'i': |
501 |
|
|
return (i_to_y(ep, d, a, lev)); |
502 |
|
|
case 's': |
503 |
|
|
case 'h': |
504 |
|
|
case 'z': |
505 |
|
|
case 'x': |
506 |
|
|
return (strip(ep, d, a, lev)); |
507 |
|
|
} |
508 |
|
|
} |
509 |
|
|
|
510 |
|
|
int |
511 |
|
|
metry(char *ep, char *d, char *a, int lev) |
512 |
|
|
{ |
513 |
|
|
|
514 |
|
|
ep[-2] = 'e'; |
515 |
|
|
ep[-1] = 'r'; |
516 |
|
|
return (strip(ep, d, a, lev)); |
517 |
|
|
} |
518 |
|
|
|
519 |
|
|
int |
520 |
|
|
tion(char *ep, char *d, char *a, int lev) |
521 |
|
|
{ |
522 |
|
|
|
523 |
|
|
switch (ep[-2]) { |
524 |
|
|
case 'c': |
525 |
|
|
case 'r': |
526 |
|
|
return (trypref(ep, a, lev)); |
527 |
|
|
case 'a': |
528 |
|
|
return (y_to_e(ep, d, a, lev)); |
529 |
|
|
} |
530 |
|
|
return (0); |
531 |
|
|
} |
532 |
|
|
|
533 |
|
|
/* |
534 |
|
|
* Possible consonant-consonant-e ending. |
535 |
|
|
*/ |
536 |
|
|
int |
537 |
|
|
CCe(char *ep, char *d, char *a, int lev) |
538 |
|
|
{ |
539 |
|
|
|
540 |
|
|
switch (ep[-1]) { |
541 |
|
|
case 'l': |
542 |
|
|
if (vowel(ep[-2])) |
543 |
|
|
break; |
544 |
|
|
switch (ep[-2]) { |
545 |
|
|
case 'l': |
546 |
|
|
case 'r': |
547 |
|
|
case 'w': |
548 |
|
|
break; |
549 |
|
|
default: |
550 |
|
|
return (y_to_e(ep, d, a, lev)); |
551 |
|
|
} |
552 |
|
|
break; |
553 |
|
|
case 's': |
554 |
|
|
if (ep[-2] == 's') |
555 |
|
|
break; |
556 |
|
|
case 'c': |
557 |
|
|
case 'g': |
558 |
|
|
if (*ep == 'a') |
559 |
|
|
return (0); |
560 |
|
|
case 'v': |
561 |
|
|
case 'z': |
562 |
|
|
if (vowel(ep[-2])) |
563 |
|
|
break; |
564 |
|
|
case 'u': |
565 |
|
|
if (y_to_e(ep, d, a, lev)) |
566 |
|
|
return (1); |
567 |
|
|
if (!(ep[-2] == 'n' && ep[-1] == 'g')) |
568 |
|
|
return (0); |
569 |
|
|
} |
570 |
|
|
return (VCe(ep, d, a, lev)); |
571 |
|
|
} |
572 |
|
|
|
573 |
|
|
/* |
574 |
|
|
* Possible consonant-vowel-consonant-e ending. |
575 |
|
|
*/ |
576 |
|
|
int |
577 |
|
|
VCe(char *ep, char *d, char *a, int lev) |
578 |
|
|
{ |
579 |
|
|
char c; |
580 |
|
|
|
581 |
|
|
c = ep[-1]; |
582 |
|
|
if (c == 'e') |
583 |
|
|
return (0); |
584 |
|
|
if (!vowel(c) && vowel(ep[-2])) { |
585 |
|
|
c = *ep; |
586 |
|
|
*ep++ = 'e'; |
587 |
|
|
if (trypref(ep, d, lev) || suffix(ep, lev)) |
588 |
|
|
return (1); |
589 |
|
|
ep--; |
590 |
|
|
*ep = c; |
591 |
|
|
} |
592 |
|
|
return (strip(ep, d, a, lev)); |
593 |
|
|
} |
594 |
|
|
|
595 |
|
|
char * |
596 |
|
|
lookuppref(char **wp, char *ep) |
597 |
|
|
{ |
598 |
|
|
char **sp; |
599 |
|
|
char *bp,*cp; |
600 |
|
|
|
601 |
|
|
for (sp = preftab; *sp; sp++) { |
602 |
|
|
bp = *wp; |
603 |
|
|
for (cp = *sp; *cp; cp++, bp++) { |
604 |
|
|
if (tolower((unsigned char)*bp) != *cp) |
605 |
|
|
goto next; |
606 |
|
|
} |
607 |
|
|
for (cp = bp; cp < ep; cp++) { |
608 |
|
|
if (vowel(*cp)) { |
609 |
|
|
*wp = bp; |
610 |
|
|
return (*sp); |
611 |
|
|
} |
612 |
|
|
} |
613 |
|
|
next: ; |
614 |
|
|
} |
615 |
|
|
return (0); |
616 |
|
|
} |
617 |
|
|
|
618 |
|
|
/* |
619 |
|
|
* If the word is not in the dictionary, try stripping off prefixes |
620 |
|
|
* until the word is found or we run out of prefixes to check. |
621 |
|
|
*/ |
622 |
|
|
int |
623 |
|
|
trypref(char *ep, char *a, int lev) |
624 |
|
|
{ |
625 |
|
|
char *cp; |
626 |
|
|
char *bp; |
627 |
|
|
char *pp; |
628 |
|
|
int val = 0; |
629 |
|
|
char space[20]; |
630 |
|
|
|
631 |
|
|
deriv[lev] = a; |
632 |
|
|
if (tryword(word, ep, lev)) |
633 |
|
|
return (1); |
634 |
|
|
bp = word; |
635 |
|
|
pp = space; |
636 |
|
|
deriv[lev+1] = pp; |
637 |
|
|
while ((cp = lookuppref(&bp, ep))) { |
638 |
|
|
*pp++ = '+'; |
639 |
|
|
while ((*pp = *cp++)) |
640 |
|
|
pp++; |
641 |
|
|
if (tryword(bp, ep, lev+1)) { |
642 |
|
|
val = 1; |
643 |
|
|
break; |
644 |
|
|
} |
645 |
|
|
if (pp - space >= sizeof(space)) |
646 |
|
|
return (0); |
647 |
|
|
} |
648 |
|
|
deriv[lev+1] = deriv[lev+2] = 0; |
649 |
|
|
return (val); |
650 |
|
|
} |
651 |
|
|
|
652 |
|
|
int |
653 |
|
|
tryword(char *bp, char *ep, int lev) |
654 |
|
|
{ |
655 |
|
|
int i, j; |
656 |
|
|
char duple[3]; |
657 |
|
|
|
658 |
|
|
if (ep-bp <= 1) |
659 |
|
|
return (0); |
660 |
|
|
if (vowel(*ep) && monosyl(bp, ep)) |
661 |
|
|
return (0); |
662 |
|
|
|
663 |
|
|
i = dict(bp, ep); |
664 |
|
|
if (i == 0 && vowel(*ep) && ep[-1] == ep[-2] && monosyl(bp, ep-1)) { |
665 |
|
|
ep--; |
666 |
|
|
deriv[++lev] = duple; |
667 |
|
|
duple[0] = '+'; |
668 |
|
|
duple[1] = *ep; |
669 |
|
|
duple[2] = '\0'; |
670 |
|
|
i = dict(bp, ep); |
671 |
|
|
} |
672 |
|
|
if (vflag == 0 || i == 0) |
673 |
|
|
return (i); |
674 |
|
|
|
675 |
|
|
/* Also tack on possible derivations. (XXX - warn on truncation?) */ |
676 |
|
|
for (j = lev; j > 0; j--) { |
677 |
|
|
if (deriv[j]) |
678 |
|
|
strlcat(affix, deriv[j], sizeof(affix)); |
679 |
|
|
} |
680 |
|
|
return (i); |
681 |
|
|
} |
682 |
|
|
|
683 |
|
|
int |
684 |
|
|
monosyl(char *bp, char *ep) |
685 |
|
|
{ |
686 |
|
|
|
687 |
|
|
if (ep < bp + 2) |
688 |
|
|
return (0); |
689 |
|
|
if (vowel(*--ep) || !vowel(*--ep) || ep[1] == 'x' || ep[1] == 'w') |
690 |
|
|
return (0); |
691 |
|
|
while (--ep >= bp) |
692 |
|
|
if (vowel(*ep)) |
693 |
|
|
return (0); |
694 |
|
|
return (1); |
695 |
|
|
} |
696 |
|
|
|
697 |
|
|
char * |
698 |
|
|
skipv(char *s) |
699 |
|
|
{ |
700 |
|
|
|
701 |
|
|
if (s >= word && vowel(*s)) |
702 |
|
|
s--; |
703 |
|
|
while (s >= word && !vowel(*s)) |
704 |
|
|
s--; |
705 |
|
|
return (s); |
706 |
|
|
} |
707 |
|
|
|
708 |
|
|
int |
709 |
|
|
vowel(unsigned char c) |
710 |
|
|
{ |
711 |
|
|
|
712 |
|
|
switch (tolower(c)) { |
713 |
|
|
case 'a': |
714 |
|
|
case 'e': |
715 |
|
|
case 'i': |
716 |
|
|
case 'o': |
717 |
|
|
case 'u': |
718 |
|
|
case 'y': |
719 |
|
|
return (1); |
720 |
|
|
} |
721 |
|
|
return (0); |
722 |
|
|
} |
723 |
|
|
|
724 |
|
|
/* |
725 |
|
|
* Crummy way to Britishise. |
726 |
|
|
*/ |
727 |
|
|
void |
728 |
|
|
ise(void) |
729 |
|
|
{ |
730 |
|
|
struct suftab *tab; |
731 |
|
|
|
732 |
|
|
for (tab = suftab; tab->suf; tab++) { |
733 |
|
|
/* Assume that suffix will contain 'z' if a1 or d1 do */ |
734 |
|
|
if (strchr(tab->suf, 'z')) { |
735 |
|
|
tab->suf = estrdup(tab->suf); |
736 |
|
|
ztos(tab->suf); |
737 |
|
|
if (strchr(tab->d1, 'z')) { |
738 |
|
|
tab->d1 = estrdup(tab->d1); |
739 |
|
|
ztos(tab->d1); |
740 |
|
|
} |
741 |
|
|
if (strchr(tab->a1, 'z')) { |
742 |
|
|
tab->a1 = estrdup(tab->a1); |
743 |
|
|
ztos(tab->a1); |
744 |
|
|
} |
745 |
|
|
} |
746 |
|
|
} |
747 |
|
|
} |
748 |
|
|
|
749 |
|
|
void |
750 |
|
|
ztos(char *s) |
751 |
|
|
{ |
752 |
|
|
|
753 |
|
|
for (; *s; s++) |
754 |
|
|
if (*s == 'z') |
755 |
|
|
*s = 's'; |
756 |
|
|
} |
757 |
|
|
|
758 |
|
|
char * |
759 |
|
|
estrdup(const char *s) |
760 |
|
|
{ |
761 |
|
|
char *d; |
762 |
|
|
|
763 |
|
|
if ((d = strdup(s)) == NULL) |
764 |
|
|
err(1, "strdup"); |
765 |
|
|
return (d); |
766 |
|
|
} |
767 |
|
|
|
768 |
|
|
/* |
769 |
|
|
* Look up a word in the dictionary. |
770 |
|
|
* Returns 1 if found, 0 if not. |
771 |
|
|
*/ |
772 |
|
|
int |
773 |
|
|
dict(char *bp, char *ep) |
774 |
|
|
{ |
775 |
|
|
char c; |
776 |
|
|
int i, rval; |
777 |
|
|
|
778 |
|
|
c = *ep; |
779 |
|
|
*ep = '\0'; |
780 |
|
|
if (xflag) |
781 |
|
|
printf("=%s\n", bp); |
782 |
|
|
for (i = rval = 0; wlists[i].fd != -1; i++) { |
783 |
|
|
if ((rval = look((unsigned char *)bp, wlists[i].front, |
784 |
|
|
wlists[i].back)) == 1) |
785 |
|
|
break; |
786 |
|
|
} |
787 |
|
|
*ep = c; |
788 |
|
|
return (rval); |
789 |
|
|
} |
790 |
|
|
|
791 |
|
|
__dead void |
792 |
|
|
usage(void) |
793 |
|
|
{ |
794 |
|
|
extern char *__progname; |
795 |
|
|
|
796 |
|
|
fprintf(stderr, "usage: %s [-bvx] [-o found-words] word-list ...\n", |
797 |
|
|
__progname); |
798 |
|
|
exit(1); |
799 |
|
|
} |