1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2013 David Hoeppner. All rights reserved. 15 */ 16 17 /* 18 * Functions to charmap . 19 */ 20 21 #include <assert.h> 22 23 #include "iconv.h" 24 #include "parser.tab.h" 25 26 /* 27 * Charmap specific. 28 */ 29 int com_char = '#'; 30 int esc_char = '\\'; 31 int mb_cur_max = 1; 32 int mb_cur_min = 1; 33 34 int lineno = 1; 35 static FILE *input = stdin; 36 static const char *filename = "<stdin>"; 37 static int escaped = 0; 38 static int instring = 0; 39 static int nextline; 40 41 /* 42 * Tokens. 43 */ 44 static char *token = NULL; 45 static int tokidx; 46 static int toksz = 0; 47 static int hadtok = 0; 48 49 /* 50 * Wide strings. 51 */ 52 static wchar_t *widestr = NULL; 53 static int wideidx = 0; 54 static int widesz = 0; 55 56 /* 57 * Keywords related. 58 */ 59 static int category = T_END; 60 61 static struct token { 62 int id; 63 const char *name; 64 } keywords[] = { 65 { T_COM_CHAR, "comment_char" }, 66 { -1, NULL }, 67 }; 68 69 /* 70 * Charmap reserved keywords. 71 */ 72 static struct token symwords[] = { 73 { T_COM_CHAR, "comment_char" }, 74 { -1, NULL }, 75 }; 76 77 /* 78 * Reset the scanner variables and open the supplied charmap file. 79 */ 80 void 81 reset_scanner(const char *fname) 82 { 83 input = fopen(fname, "r"); 84 if (input == NULL) { 85 perror("fopen"); 86 exit(4); 87 } 88 89 filename = fname; 90 com_char = '#'; 91 esc_char = '\\'; 92 instring = 0; 93 escaped = 0; 94 lineno = 1; 95 nextline = 1; 96 tokidx = 0; 97 wideidx = 0; 98 } 99 100 static int 101 scanc(void) 102 { 103 int c; 104 105 c = getc(input); 106 lineno = nextline; 107 if (c == '\n') { 108 nextline++; 109 } 110 111 return (c); 112 } 113 114 static void 115 unscanc(int c) 116 { 117 if (c == '\n') { 118 nextline--; 119 } 120 121 if (ungetc(c, input) < 0) { 122 yyerror(_("ungetc failed")); 123 } 124 } 125 126 void 127 add_tok(int c) 128 { 129 if ((tokidx + 1) >= toksz) { 130 toksz += 64; 131 132 if ((token = realloc(token, toksz)) == NULL) { 133 yyerror(_("out of memory")); 134 tokidx = 0; 135 toksz = 0; 136 return; 137 } 138 } 139 140 token[tokidx++] = (char)c; 141 token[tokidx] = 0; 142 } 143 144 int 145 get_escaped(int c) 146 { 147 switch (c) { 148 case 'n': 149 return ('\n'); 150 case 'r': 151 return ('\r'); 152 case 't': 153 return ('\t'); 154 case 'f': 155 return ('\f'); 156 case 'v': 157 return ('\v'); 158 case 'b': 159 return ('\b'); 160 case 'a': 161 return ('\a'); 162 default: 163 return (c); 164 } 165 } 166 167 int 168 get_symbol(void) 169 { 170 int c; 171 172 while ((c = scanc()) != EOF) { 173 if (escaped == 1) { 174 escaped = 0; 175 if (c == '\n') { 176 continue; 177 } 178 179 add_tok(get_escaped(c)); 180 continue; 181 } 182 183 if (c == esc_char) { 184 escaped = 1; 185 continue; 186 } 187 188 if (c == '\n') { /* Well that's strange! */ 189 yyerror(_("unterminated symbolic name")); 190 continue; 191 } 192 193 if (c == '>') { /* End of symbol */ 194 /* 195 * This restarts the token from the beginning 196 * the next time we scan a character. (This 197 * token is complete.) 198 */ 199 if (token == NULL) { 200 yyerror(_("missing symbolic name")); 201 return (T_NULL); 202 } 203 204 tokidx = 0; 205 206 /* XXX */ 207 if (category == T_END) { 208 209 } 210 211 /* XXX */ 212 213 /* Its an undefined symbol */ 214 yylval.token = strdup(token); 215 token = NULL; 216 toksz = 0; 217 tokidx = 0; 218 219 return (T_SYMBOL); 220 } 221 222 add_tok(c); 223 } 224 225 yyerror(_("unterminated symbolic name")); 226 227 return (EOF); 228 } 229 230 static int 231 consume_token(void) 232 { 233 /* XXX */ 234 235 printf("XXX consume_token XXX\n"); 236 237 return (T_NAME); 238 } 239 240 void 241 scan_to_eol(void) 242 { 243 int c; 244 245 while ((c = scanc()) != '\n') { 246 if (c == EOF) { 247 /* end of file without newline! */ 248 errf(_("missing newline")); 249 return; 250 } 251 } 252 253 assert(c == '\n'); 254 } 255 256 int 257 yylex(void) 258 { 259 int c; 260 printf("yylex\n"); 261 while ((c = scanc()) != EOF) { 262 263 /* If it is the escape character itself note it */ 264 if (c == esc_char) { 265 escaped = 1; 266 continue; 267 } 268 269 /* Remove from the comment character to end of line */ 270 if (c == com_char) { 271 while (c != '\n') { 272 if ((c = scanc()) == EOF) { 273 /* End of file without newline */ 274 return (EOF); 275 } 276 } 277 278 assert(c == '\n'); 279 280 printf("NEWLINE\n"); 281 if (hadtok == 0) { 282 /* 283 * If there were no tokens on this line, 284 * then just pretend it didn't exist at all. 285 */ 286 continue; 287 } 288 289 hadtok = 0; 290 return (T_NL); 291 } 292 293 if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) { 294 /* 295 * These are all token delimiters. If there 296 * is a token already in progress, we need to 297 * process it. 298 */ 299 unscanc(c); 300 return (consume_token()); 301 } 302 303 switch (c) { 304 case '\n': 305 return (T_NL); 306 case '>': 307 hadtok = 1; 308 return (T_GT); 309 case '<': 310 /* Symbol start! */ 311 hadtok = 1; 312 return (get_symbol()); 313 case ' ': 314 case '\t': 315 /* Whitespace, just ignore */ 316 continue; 317 case '"': 318 hadtok = 1; 319 instring = 1; 320 return (T_QUOTE); 321 default: 322 hadtok = 1; 323 add_tok(c); 324 continue; 325 } 326 } 327 328 return (EOF); 329 } 330 331 void 332 yyerror(const char *msg) 333 { 334 (void) fprintf(stderr, _("%s: %d: error: %s\n"), 335 filename, lineno, msg); 336 exit(4); 337 } 338 339 void 340 errf(const char *fmt, ...) 341 { 342 /* XXX */ 343 344 exit(4); 345 }