1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
14 * Copyright 2013 David Hoeppner. All rights reserved.
15 */
16
17 /*
18 * Functions to charmap .
19 */
20
21 #include <assert.h>
22
23 #include "iconv.h"
24 #include "parser.tab.h"
25
26 /*
27 * Charmap specific.
28 */
29 int com_char = '#';
30 int esc_char = '\\';
31 int mb_cur_max = 1;
32 int mb_cur_min = 1;
33
34 int lineno = 1;
35 static FILE *input = stdin;
36 static const char *filename = "<stdin>";
37 static int escaped = 0;
38 static int instring = 0;
39 static int nextline;
40
41 /*
42 * Tokens.
43 */
44 static char *token = NULL;
45 static int tokidx;
46 static int toksz = 0;
47 static int hadtok = 0;
48
49 /*
50 * Wide strings.
51 */
52 static wchar_t *widestr = NULL;
53 static int wideidx = 0;
54 static int widesz = 0;
55
56 /*
57 * Keywords related.
58 */
59 static int category = T_END;
60
61 static struct token {
62 int id;
63 const char *name;
64 } keywords[] = {
65 { T_COM_CHAR, "comment_char" },
66 { -1, NULL },
67 };
68
69 /*
70 * Charmap reserved keywords.
71 */
72 static struct token symwords[] = {
73 { T_COM_CHAR, "comment_char" },
74 { -1, NULL },
75 };
76
77 /*
78 * Reset the scanner variables and open the supplied charmap file.
79 */
80 void
81 reset_scanner(const char *fname)
82 {
83 input = fopen(fname, "r");
84 if (input == NULL) {
85 perror("fopen");
86 exit(4);
87 }
88
89 filename = fname;
90 com_char = '#';
91 esc_char = '\\';
92 instring = 0;
93 escaped = 0;
94 lineno = 1;
95 nextline = 1;
96 tokidx = 0;
97 wideidx = 0;
98 }
99
100 static int
101 scanc(void)
102 {
103 int c;
104
105 c = getc(input);
106 lineno = nextline;
107 if (c == '\n') {
108 nextline++;
109 }
110
111 return (c);
112 }
113
114 static void
115 unscanc(int c)
116 {
117 if (c == '\n') {
118 nextline--;
119 }
120
121 if (ungetc(c, input) < 0) {
122 yyerror(_("ungetc failed"));
123 }
124 }
125
126 void
127 add_tok(int c)
128 {
129 if ((tokidx + 1) >= toksz) {
130 toksz += 64;
131
132 if ((token = realloc(token, toksz)) == NULL) {
133 yyerror(_("out of memory"));
134 tokidx = 0;
135 toksz = 0;
136 return;
137 }
138 }
139
140 token[tokidx++] = (char)c;
141 token[tokidx] = 0;
142 }
143
144 int
145 get_escaped(int c)
146 {
147 switch (c) {
148 case 'n':
149 return ('\n');
150 case 'r':
151 return ('\r');
152 case 't':
153 return ('\t');
154 case 'f':
155 return ('\f');
156 case 'v':
157 return ('\v');
158 case 'b':
159 return ('\b');
160 case 'a':
161 return ('\a');
162 default:
163 return (c);
164 }
165 }
166
167 int
168 get_symbol(void)
169 {
170 int c;
171
172 while ((c = scanc()) != EOF) {
173 if (escaped == 1) {
174 escaped = 0;
175 if (c == '\n') {
176 continue;
177 }
178
179 add_tok(get_escaped(c));
180 continue;
181 }
182
183 if (c == esc_char) {
184 escaped = 1;
185 continue;
186 }
187
188 if (c == '\n') { /* Well that's strange! */
189 yyerror(_("unterminated symbolic name"));
190 continue;
191 }
192
193 if (c == '>') { /* End of symbol */
194 /*
195 * This restarts the token from the beginning
196 * the next time we scan a character. (This
197 * token is complete.)
198 */
199 if (token == NULL) {
200 yyerror(_("missing symbolic name"));
201 return (T_NULL);
202 }
203
204 tokidx = 0;
205
206 /* XXX */
207 if (category == T_END) {
208
209 }
210
211 /* XXX */
212
213 /* Its an undefined symbol */
214 yylval.token = strdup(token);
215 token = NULL;
216 toksz = 0;
217 tokidx = 0;
218
219 return (T_SYMBOL);
220 }
221
222 add_tok(c);
223 }
224
225 yyerror(_("unterminated symbolic name"));
226
227 return (EOF);
228 }
229
230 static int
231 consume_token(void)
232 {
233 /* XXX */
234
235 printf("XXX consume_token XXX\n");
236
237 return (T_NAME);
238 }
239
240 void
241 scan_to_eol(void)
242 {
243 int c;
244
245 while ((c = scanc()) != '\n') {
246 if (c == EOF) {
247 /* end of file without newline! */
248 errf(_("missing newline"));
249 return;
250 }
251 }
252
253 assert(c == '\n');
254 }
255
256 int
257 yylex(void)
258 {
259 int c;
260 printf("yylex\n");
261 while ((c = scanc()) != EOF) {
262
263 /* If it is the escape character itself note it */
264 if (c == esc_char) {
265 escaped = 1;
266 continue;
267 }
268
269 /* Remove from the comment character to end of line */
270 if (c == com_char) {
271 while (c != '\n') {
272 if ((c = scanc()) == EOF) {
273 /* End of file without newline */
274 return (EOF);
275 }
276 }
277
278 assert(c == '\n');
279
280 printf("NEWLINE\n");
281 if (hadtok == 0) {
282 /*
283 * If there were no tokens on this line,
284 * then just pretend it didn't exist at all.
285 */
286 continue;
287 }
288
289 hadtok = 0;
290 return (T_NL);
291 }
292
293 if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
294 /*
295 * These are all token delimiters. If there
296 * is a token already in progress, we need to
297 * process it.
298 */
299 unscanc(c);
300 return (consume_token());
301 }
302
303 switch (c) {
304 case '\n':
305 return (T_NL);
306 case '>':
307 hadtok = 1;
308 return (T_GT);
309 case '<':
310 /* Symbol start! */
311 hadtok = 1;
312 return (get_symbol());
313 case ' ':
314 case '\t':
315 /* Whitespace, just ignore */
316 continue;
317 case '"':
318 hadtok = 1;
319 instring = 1;
320 return (T_QUOTE);
321 default:
322 hadtok = 1;
323 add_tok(c);
324 continue;
325 }
326 }
327
328 return (EOF);
329 }
330
331 void
332 yyerror(const char *msg)
333 {
334 (void) fprintf(stderr, _("%s: %d: error: %s\n"),
335 filename, lineno, msg);
336 exit(4);
337 }
338
339 void
340 errf(const char *fmt, ...)
341 {
342 /* XXX */
343
344 exit(4);
345 }