3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #pragma ident "%Z%%M% %I% %E% SMI"
31
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <string.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <locale.h>
39 #include "hash.h"
40
41 #define Tolower(c) (isupper(c)?tolower(c):c)
42 #define DLEV 2
43
44 /*
45 * ANSI prototypes
46 */
47 static int ily(char *, char *, char *, int);
48 static int s(char *, char *, char *, int);
49 static int es(char *, char *, char *, int);
50 static int subst(char *, char *, char *, int);
51 static int nop(void);
52 static int bility(char *, char *, char *, int);
53 static int i_to_y(char *, char *, char *, int);
54 static int CCe(char *, char *, char *, int);
55 static int y_to_e(char *, char *, char *, int);
56 static int strip(char *, char *, char *, int);
57 static int ize(char *, char *, char *, int);
58 static int tion(char *, char *, char *, int);
59 static int an(char *, char *, char *, int);
60 int prime(char *);
61 static void ise(void);
62 static int tryword(char *, char *, int);
63 static int trypref(char *, char *, int);
64 static int trysuff(char *, int);
65 static int vowel(int);
66 static int dict(char *, char *);
67 static int monosyl(char *, char *);
68 static int VCe(char *, char *, char *, int);
69 static char *skipv(char *);
70 static void ztos(char *);
71
72 static struct suftab {
73 char *suf;
74 int (*p1)();
75 int n1;
76 char *d1;
77 char *a1;
78 int (*p2)();
79 int n2;
80 char *d2;
81 char *a2;
82 } suftab[] = {
83 {"ssen", ily, 4, "-y+iness", "+ness" },
84 {"ssel", ily, 4, "-y+i+less", "+less" },
85 {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
86 {"s'", s, 2, "", "+'s"},
87 {"s", s, 1, "", "+s"},
88 {"ecn", subst, 1, "-t+ce", ""},
89 {"ycn", subst, 1, "-t+cy", ""},
90 {"ytilb", nop, 0, "", ""},
91 {"ytilib", bility, 5, "-le+ility", ""},
92 {"elbaif", i_to_y, 4, "-y+iable", ""},
93 {"elba", CCe, 4, "-e+able", "+able"},
94 {"yti", CCe, 3, "-e+ity", "+ity"},
95 {"ylb", y_to_e, 1, "-e+y", ""},
96 {"yl", ily, 2, "-y+ily", "+ly"},
97 {"laci", strip, 2, "", "+al"},
98 {"latnem", strip, 2, "", "+al"},
99 {"lanoi", strip, 2, "", "+al"},
100 {"tnem", strip, 4, "", "+ment"},
101 {"gni", CCe, 3, "-e+ing", "+ing"},
102 {"reta", nop, 0, "", ""},
112 {"cigol", i_to_y, 1, "-y+ic", ""},
113 {"tsigol", i_to_y, 2, "-y+ist", ""},
114 {"tsi", CCe, 3, "-e+ist", "+ist"},
115 {"msi", CCe, 3, "-e+ism", "+ist"},
116 {"noitacifi", i_to_y, 6, "-y+ication", ""},
117 {"noitazi", ize, 4, "-e+ation", ""},
118 {"rota", tion, 2, "-e+or", ""},
119 {"rotc", tion, 2, "", "+or"},
120 {"noit", tion, 3, "-e+ion", "+ion"},
121 {"naino", an, 3, "", "+ian"},
122 {"na", an, 1, "", "+n"},
123 {"evi", subst, 0, "-ion+ive", ""},
124 {"ezi", CCe, 3, "-e+ize", "+ize"},
125 {"pihs", strip, 4, "", "+ship"},
126 {"dooh", ily, 4, "-y+ihood", "+hood"},
127 {"luf", ily, 3, "-y+iful", "+ful"},
128 {"ekil", strip, 4, "", "+like"},
129 0
130 };
131
132 static char *preftab[] = {
133 "anti",
134 "auto",
135 "bio",
136 "counter",
137 "dis",
138 "electro",
139 "en",
140 "fore",
141 "geo",
142 "hyper",
143 "intra",
144 "inter",
145 "iso",
146 "kilo",
147 "magneto",
148 "meta",
149 "micro",
150 "mid",
151 "milli",
156 "out",
157 "over",
158 "photo",
159 "poly",
160 "pre",
161 "pseudo",
162 "psycho",
163 "re",
164 "semi",
165 "stereo",
166 "sub",
167 "super",
168 "tele",
169 "thermo",
170 "ultra",
171 "under", /* must precede un */
172 "un",
173 0
174 };
175
176 static int vflag;
177 static int xflag;
178 static char *prog;
179 static char word[LINE_MAX];
180 static char original[LINE_MAX];
181 static char *deriv[LINE_MAX];
182 static char affix[LINE_MAX];
183 static FILE *file, *found;
184 /*
185 * deriv is stack of pointers to notes like +micro +ed
186 * affix is concatenated string of notes
187 * the buffer size 141 stems from the sizes of original and affix.
188 */
189
190 /*
191 * in an attempt to defray future maintenance misunderstandings, here is
192 * an attempt to describe the input/output expectations of the spell
193 * program.
194 *
195 * spellprog is intended to be called from the shell file spell.
196 * because of this, there is little error checking (this is historical, not
197 * necessarily advisable).
236 main(int argc, char **argv)
237 {
238 char *ep, *cp;
239 char *dp;
240 int fold;
241 int c, j;
242 int pass;
243
244 /* Set locale environment variables local definitions */
245 (void) setlocale(LC_ALL, "");
246 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
247 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
248 #endif
249 (void) textdomain(TEXT_DOMAIN);
250
251
252 prog = argv[0];
253 while ((c = getopt(argc, argv, "bvx")) != EOF) {
254 switch (c) {
255 case 'b':
256 ise();
257 break;
258 case 'v':
259 vflag++;
260 break;
261 case 'x':
262 xflag++;
263 break;
264 }
265 }
266
267 argc -= optind;
268 argv = &argv[optind];
269
270 if ((argc < 2) || !prime(*argv)) {
271 (void) fprintf(stderr,
272 gettext("%s: cannot initialize hash table\n"), prog);
273 exit(1);
274 }
275 argc--;
276 argv++;
277
278 /*
279 * if pass is not 1, it is assumed to be a filename.
280 * found words are written to this file.
281 */
282 pass = **argv;
283 if (pass != '1')
284 found = fopen(*argv, "w");
285
286 for (;;) {
287 affix[0] = 0;
288 file = stdout;
289 for (ep = word; (*ep = j = getchar()) != '\n'; ep++)
290 if (j == EOF)
291 exit(0);
292 /*
293 * here is the hyphen processing. these words were found in the stop
294 * list. however, if they exist as is, (no derivations tried) in the
295 * dictionary, let them through as correct.
296 *
297 */
338 (void) fprintf(file, "%s\t%s\n", affix,
339 original);
340 }
341 }
342 }
343
344 /*
345 * strip exactly one suffix and do
346 * indicated routine(s), which may recursively
347 * strip suffixes
348 */
349
350 static int
351 trysuff(char *ep, int lev)
352 {
353 struct suftab *t;
354 char *cp, *sp;
355
356 lev += DLEV;
357 deriv[lev] = deriv[lev-1] = 0;
358 for (t = &suftab[0]; (sp = t->suf) != 0; t++) {
359 cp = ep;
360 while (*sp)
361 if (*--cp != *sp++)
362 goto next;
363 for (sp = cp; --sp >= word && !vowel(*sp); );
364 if (sp < word)
365 return (0);
366 if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
367 return (1);
368 if (t->p2 != 0) {
369 deriv[lev] = deriv[lev+1] = 0;
370 return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
371 }
372 return (0);
373 next:;
374 }
375 return (0);
376 }
377
378 static int
379 nop(void)
380 {
381 return (0);
382 }
383
686 while (s >= word && !vowel(*s))
687 s--;
688 return (s);
689 }
690
691 static int
692 vowel(int c)
693 {
694 switch (Tolower(c)) {
695 case 'a':
696 case 'e':
697 case 'i':
698 case 'o':
699 case 'u':
700 case 'y':
701 return (1);
702 }
703 return (0);
704 }
705
706 /* crummy way to Britishise */
707 static void
708 ise(void)
709 {
710 struct suftab *p;
711
712 for (p = suftab; p->suf; p++) {
713 ztos(p->suf);
714 ztos(p->d1);
715 ztos(p->a1);
716 }
717 }
718
719 static void
720 ztos(char *s)
721 {
722 for (; *s; s++)
723 if (*s == 'z')
724 *s = 's';
725 }
726
727 static int
728 dict(char *bp, char *ep)
729 {
730 int temp, result;
731 if (xflag)
732 (void) fprintf(stdout, "=%.*s\n", ep-bp, bp);
733 temp = *ep;
734 *ep = 0;
735 result = hashlook(bp);
736 *ep = temp;
737 return (result);
738 }
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2015 Gary Mills
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <limits.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <ctype.h>
37 #include <locale.h>
38 #include "hash.h"
39
40 #define Tolower(c) (isupper(c)?tolower(c):c)
41 #define DLEV 2
42
43 /*
44 * ANSI prototypes
45 */
46 static int ily(char *, char *, char *, int);
47 static int s(char *, char *, char *, int);
48 static int es(char *, char *, char *, int);
49 static int subst(char *, char *, char *, int);
50 static int nop(void);
51 static int bility(char *, char *, char *, int);
52 static int i_to_y(char *, char *, char *, int);
53 static int CCe(char *, char *, char *, int);
54 static int y_to_e(char *, char *, char *, int);
55 static int strip(char *, char *, char *, int);
56 static int ize(char *, char *, char *, int);
57 static int tion(char *, char *, char *, int);
58 static int an(char *, char *, char *, int);
59 int prime(char *);
60 static int tryword(char *, char *, int);
61 static int trypref(char *, char *, int);
62 static int trysuff(char *, int);
63 static int vowel(int);
64 static int dict(char *, char *);
65 static int monosyl(char *, char *);
66 static int VCe(char *, char *, char *, int);
67 static char *skipv(char *);
68
69 struct suftab {
70 char *suf;
71 int (*p1)();
72 int n1;
73 char *d1;
74 char *a1;
75 int (*p2)();
76 int n2;
77 char *d2;
78 char *a2;
79 };
80
81 static struct suftab sufa[] = {
82 {"ssen", ily, 4, "-y+iness", "+ness" },
83 {"ssel", ily, 4, "-y+i+less", "+less" },
84 {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
85 {"s'", s, 2, "", "+'s"},
86 {"s", s, 1, "", "+s"},
87 {"ecn", subst, 1, "-t+ce", ""},
88 {"ycn", subst, 1, "-t+cy", ""},
89 {"ytilb", nop, 0, "", ""},
90 {"ytilib", bility, 5, "-le+ility", ""},
91 {"elbaif", i_to_y, 4, "-y+iable", ""},
92 {"elba", CCe, 4, "-e+able", "+able"},
93 {"yti", CCe, 3, "-e+ity", "+ity"},
94 {"ylb", y_to_e, 1, "-e+y", ""},
95 {"yl", ily, 2, "-y+ily", "+ly"},
96 {"laci", strip, 2, "", "+al"},
97 {"latnem", strip, 2, "", "+al"},
98 {"lanoi", strip, 2, "", "+al"},
99 {"tnem", strip, 4, "", "+ment"},
100 {"gni", CCe, 3, "-e+ing", "+ing"},
101 {"reta", nop, 0, "", ""},
111 {"cigol", i_to_y, 1, "-y+ic", ""},
112 {"tsigol", i_to_y, 2, "-y+ist", ""},
113 {"tsi", CCe, 3, "-e+ist", "+ist"},
114 {"msi", CCe, 3, "-e+ism", "+ist"},
115 {"noitacifi", i_to_y, 6, "-y+ication", ""},
116 {"noitazi", ize, 4, "-e+ation", ""},
117 {"rota", tion, 2, "-e+or", ""},
118 {"rotc", tion, 2, "", "+or"},
119 {"noit", tion, 3, "-e+ion", "+ion"},
120 {"naino", an, 3, "", "+ian"},
121 {"na", an, 1, "", "+n"},
122 {"evi", subst, 0, "-ion+ive", ""},
123 {"ezi", CCe, 3, "-e+ize", "+ize"},
124 {"pihs", strip, 4, "", "+ship"},
125 {"dooh", ily, 4, "-y+ihood", "+hood"},
126 {"luf", ily, 3, "-y+iful", "+ful"},
127 {"ekil", strip, 4, "", "+like"},
128 0
129 };
130
131 static struct suftab sufb[] = {
132 {"ssen", ily, 4, "-y+iness", "+ness" },
133 {"ssel", ily, 4, "-y+i+less", "+less" },
134 {"se", s, 1, "", "+s", es, 2, "-y+ies", "+es" },
135 {"s'", s, 2, "", "+'s"},
136 {"s", s, 1, "", "+s"},
137 {"ecn", subst, 1, "-t+ce", ""},
138 {"ycn", subst, 1, "-t+cy", ""},
139 {"ytilb", nop, 0, "", ""},
140 {"ytilib", bility, 5, "-le+ility", ""},
141 {"elbaif", i_to_y, 4, "-y+iable", ""},
142 {"elba", CCe, 4, "-e+able", "+able"},
143 {"yti", CCe, 3, "-e+ity", "+ity"},
144 {"ylb", y_to_e, 1, "-e+y", ""},
145 {"yl", ily, 2, "-y+ily", "+ly"},
146 {"laci", strip, 2, "", "+al"},
147 {"latnem", strip, 2, "", "+al"},
148 {"lanoi", strip, 2, "", "+al"},
149 {"tnem", strip, 4, "", "+ment"},
150 {"gni", CCe, 3, "-e+ing", "+ing"},
151 {"reta", nop, 0, "", ""},
152 {"retc", nop, 0, "", ""},
153 {"re", strip, 1, "", "+r", i_to_y, 2, "-y+ier", "+er"},
154 {"de", strip, 1, "", "+d", i_to_y, 2, "-y+ied", "+ed"},
155 {"citsi", strip, 2, "", "+ic"},
156 {"citi", ize, 1, "-ic+e", ""},
157 {"cihparg", i_to_y, 1, "-y+ic", ""},
158 {"tse", strip, 2, "", "+st", i_to_y, 3, "-y+iest", "+est"},
159 {"cirtem", i_to_y, 1, "-y+ic", ""},
160 {"yrtem", subst, 0, "-er+ry", ""},
161 {"cigol", i_to_y, 1, "-y+ic", ""},
162 {"tsigol", i_to_y, 2, "-y+ist", ""},
163 {"tsi", CCe, 3, "-e+ist", "+ist"},
164 {"msi", CCe, 3, "-e+ism", "+ist"},
165 {"noitacifi", i_to_y, 6, "-y+ication", ""},
166 {"noitasi", ize, 4, "-e+ation", ""},
167 {"rota", tion, 2, "-e+or", ""},
168 {"rotc", tion, 2, "", "+or"},
169 {"noit", tion, 3, "-e+ion", "+ion"},
170 {"naino", an, 3, "", "+ian"},
171 {"na", an, 1, "", "+n"},
172 {"evi", subst, 0, "-ion+ive", ""},
173 {"esi", CCe, 3, "-e+ise", "+ise"},
174 {"pihs", strip, 4, "", "+ship"},
175 {"dooh", ily, 4, "-y+ihood", "+hood"},
176 {"luf", ily, 3, "-y+iful", "+ful"},
177 {"ekil", strip, 4, "", "+like"},
178 0
179 };
180
181 static char *preftab[] = {
182 "anti",
183 "auto",
184 "bio",
185 "counter",
186 "dis",
187 "electro",
188 "en",
189 "fore",
190 "geo",
191 "hyper",
192 "intra",
193 "inter",
194 "iso",
195 "kilo",
196 "magneto",
197 "meta",
198 "micro",
199 "mid",
200 "milli",
205 "out",
206 "over",
207 "photo",
208 "poly",
209 "pre",
210 "pseudo",
211 "psycho",
212 "re",
213 "semi",
214 "stereo",
215 "sub",
216 "super",
217 "tele",
218 "thermo",
219 "ultra",
220 "under", /* must precede un */
221 "un",
222 0
223 };
224
225 static int bflag;
226 static int vflag;
227 static int xflag;
228 static struct suftab *suftab;
229 static char *prog;
230 static char word[LINE_MAX];
231 static char original[LINE_MAX];
232 static char *deriv[LINE_MAX];
233 static char affix[LINE_MAX];
234 static FILE *file, *found;
235 /*
236 * deriv is stack of pointers to notes like +micro +ed
237 * affix is concatenated string of notes
238 * the buffer size 141 stems from the sizes of original and affix.
239 */
240
241 /*
242 * in an attempt to defray future maintenance misunderstandings, here is
243 * an attempt to describe the input/output expectations of the spell
244 * program.
245 *
246 * spellprog is intended to be called from the shell file spell.
247 * because of this, there is little error checking (this is historical, not
248 * necessarily advisable).
287 main(int argc, char **argv)
288 {
289 char *ep, *cp;
290 char *dp;
291 int fold;
292 int c, j;
293 int pass;
294
295 /* Set locale environment variables local definitions */
296 (void) setlocale(LC_ALL, "");
297 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
298 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
299 #endif
300 (void) textdomain(TEXT_DOMAIN);
301
302
303 prog = argv[0];
304 while ((c = getopt(argc, argv, "bvx")) != EOF) {
305 switch (c) {
306 case 'b':
307 bflag++;
308 break;
309 case 'v':
310 vflag++;
311 break;
312 case 'x':
313 xflag++;
314 break;
315 }
316 }
317
318 argc -= optind;
319 argv = &argv[optind];
320
321 if ((argc < 2) || !prime(*argv)) {
322 (void) fprintf(stderr,
323 gettext("%s: cannot initialize hash table\n"), prog);
324 exit(1);
325 }
326 argc--;
327 argv++;
328
329 /* Select the correct suffix table */
330 suftab = (bflag == 0) ? sufa : sufb;
331
332 /*
333 * if pass is not 1, it is assumed to be a filename.
334 * found words are written to this file.
335 */
336 pass = **argv;
337 if (pass != '1')
338 found = fopen(*argv, "w");
339
340 for (;;) {
341 affix[0] = 0;
342 file = stdout;
343 for (ep = word; (*ep = j = getchar()) != '\n'; ep++)
344 if (j == EOF)
345 exit(0);
346 /*
347 * here is the hyphen processing. these words were found in the stop
348 * list. however, if they exist as is, (no derivations tried) in the
349 * dictionary, let them through as correct.
350 *
351 */
392 (void) fprintf(file, "%s\t%s\n", affix,
393 original);
394 }
395 }
396 }
397
398 /*
399 * strip exactly one suffix and do
400 * indicated routine(s), which may recursively
401 * strip suffixes
402 */
403
404 static int
405 trysuff(char *ep, int lev)
406 {
407 struct suftab *t;
408 char *cp, *sp;
409
410 lev += DLEV;
411 deriv[lev] = deriv[lev-1] = 0;
412 for (t = &suftab[0]; (t != 0 && (sp = t->suf) != 0); t++) {
413 cp = ep;
414 while (*sp)
415 if (*--cp != *sp++)
416 goto next;
417 for (sp = cp; --sp >= word && !vowel(*sp); )
418 ;
419 if (sp < word)
420 return (0);
421 if ((*t->p1)(ep-t->n1, t->d1, t->a1, lev+1))
422 return (1);
423 if (t->p2 != 0) {
424 deriv[lev] = deriv[lev+1] = 0;
425 return ((*t->p2)(ep-t->n2, t->d2, t->a2, lev));
426 }
427 return (0);
428 next:;
429 }
430 return (0);
431 }
432
433 static int
434 nop(void)
435 {
436 return (0);
437 }
438
741 while (s >= word && !vowel(*s))
742 s--;
743 return (s);
744 }
745
746 static int
747 vowel(int c)
748 {
749 switch (Tolower(c)) {
750 case 'a':
751 case 'e':
752 case 'i':
753 case 'o':
754 case 'u':
755 case 'y':
756 return (1);
757 }
758 return (0);
759 }
760
761 static int
762 dict(char *bp, char *ep)
763 {
764 int temp, result;
765 if (xflag)
766 (void) fprintf(stdout, "=%.*s\n", ep-bp, bp);
767 temp = *ep;
768 *ep = 0;
769 result = hashlook(bp);
770 *ep = temp;
771 return (result);
772 }
|