1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
  14  * Copyright 2013 David Hoeppner.  All rights reserved.
  15  */
  16 
  17 /*
  18  * Functions to charmap .
  19  */
  20 
  21 #include <assert.h>
  22 
  23 #include "iconv.h"
  24 #include "parser.tab.h"
  25 
  26 /*
  27  * Charmap specific.
  28  */
  29 int             com_char = '#';
  30 int             esc_char = '\\';
  31 int             mb_cur_max = 1;
  32 int             mb_cur_min = 1;
  33 
  34 int             lineno = 1;
  35 static FILE     *input = stdin;
  36 static const char *filename = "<stdin>";
  37 static int      escaped = 0;
  38 static int      instring = 0;
  39 static int      nextline;
  40 
  41 /*
  42  * Tokens.
  43  */
  44 static char     *token = NULL;
  45 static int      tokidx;
  46 static int      toksz = 0;
  47 static int      hadtok = 0;
  48 
  49 /*
  50  * Wide strings.
  51  */
  52 static wchar_t  *widestr = NULL;
  53 static int      wideidx = 0;
  54 static int      widesz = 0;
  55 
  56 /*
  57  * Keywords related.
  58  */
  59 static int      category = T_END;
  60 
  61 static struct token {
  62         int     id;
  63         const char *name;
  64 } keywords[] = {
  65         { T_COM_CHAR,           "comment_char" },
  66         { -1, NULL },
  67 };
  68 
  69 /*
  70  * Charmap reserved keywords.
  71  */
  72 static struct token symwords[] = {
  73         { T_COM_CHAR,           "comment_char" },
  74         { -1, NULL },
  75 };
  76 
  77 /*
  78  * Reset the scanner variables and open the supplied charmap file.
  79  */
  80 void
  81 reset_scanner(const char *fname)
  82 {
  83         input = fopen(fname, "r");
  84         if (input == NULL) {
  85                 perror("fopen");
  86                 exit(4);
  87         }
  88 
  89         filename = fname;
  90         com_char = '#';
  91         esc_char = '\\';
  92         instring = 0;
  93         escaped = 0;
  94         lineno = 1;
  95         nextline = 1;
  96         tokidx = 0;
  97         wideidx = 0;
  98 }
  99 
 100 static int
 101 scanc(void)
 102 {
 103         int     c;
 104 
 105         c = getc(input);
 106         lineno = nextline;
 107         if (c == '\n') {
 108                 nextline++;
 109         }
 110 
 111         return (c);
 112 }
 113 
 114 static void
 115 unscanc(int c)
 116 {
 117         if (c == '\n') {
 118                 nextline--;
 119         }
 120 
 121         if (ungetc(c, input) < 0) {
 122                 yyerror(_("ungetc failed"));
 123         }
 124 }
 125 
 126 void
 127 add_tok(int c)
 128 {
 129         if ((tokidx + 1) >= toksz) {
 130                 toksz += 64;
 131 
 132                 if ((token = realloc(token, toksz)) == NULL) {
 133                         yyerror(_("out of memory"));
 134                         tokidx = 0;
 135                         toksz = 0;
 136                         return;
 137                 }
 138         }
 139 
 140         token[tokidx++] = (char)c;
 141         token[tokidx] = 0;
 142 }
 143 
 144 int
 145 get_escaped(int c)
 146 {
 147         switch (c) {
 148         case 'n':
 149                 return ('\n');
 150         case 'r':
 151                 return ('\r');
 152         case 't':
 153                 return ('\t');
 154         case 'f':
 155                 return ('\f');
 156         case 'v':
 157                 return ('\v');
 158         case 'b':
 159                 return ('\b');
 160         case 'a':
 161                 return ('\a');
 162         default:
 163                 return (c);
 164         }
 165 }
 166 
 167 int
 168 get_symbol(void)
 169 {
 170         int     c;
 171 
 172         while ((c = scanc()) != EOF) {
 173                 if (escaped == 1) {
 174                         escaped = 0;
 175                         if (c == '\n') {
 176                                 continue;
 177                         }
 178 
 179                         add_tok(get_escaped(c));
 180                         continue;
 181                 }
 182 
 183                 if (c == esc_char) {
 184                         escaped = 1;
 185                         continue;
 186                 }
 187 
 188                 if (c == '\n') {        /* Well that's strange! */
 189                         yyerror(_("unterminated symbolic name"));
 190                         continue;
 191                 }
 192 
 193                 if (c == '>') {              /* End of symbol */
 194                         /*
 195                          * This restarts the token from the beginning
 196                          * the next time we scan a character.  (This
 197                          * token is complete.)
 198                          */
 199                         if (token == NULL) {
 200                                 yyerror(_("missing symbolic name"));
 201                                 return (T_NULL);
 202                         }
 203 
 204                         tokidx = 0;
 205 
 206                         /* XXX */
 207                         if (category == T_END) {
 208 
 209                         }
 210 
 211                         /* XXX */
 212 
 213                         /* Its an undefined symbol */
 214                         yylval.token = strdup(token);
 215                         token = NULL;
 216                         toksz = 0;
 217                         tokidx = 0;
 218 
 219                         return (T_SYMBOL);
 220                 }
 221 
 222                 add_tok(c);
 223         }
 224 
 225         yyerror(_("unterminated symbolic name"));
 226 
 227         return (EOF);
 228 }
 229 
 230 static int
 231 consume_token(void)
 232 {
 233         /* XXX */
 234 
 235         printf("XXX consume_token XXX\n");
 236 
 237         return (T_NAME);
 238 }
 239 
 240 void
 241 scan_to_eol(void)
 242 {
 243         int     c;
 244 
 245         while ((c = scanc()) != '\n') {
 246                 if (c == EOF) {
 247                         /* end of file without newline! */
 248                         errf(_("missing newline"));
 249                         return;
 250                 }
 251         }
 252 
 253         assert(c == '\n');
 254 }
 255 
 256 int
 257 yylex(void)
 258 {
 259         int     c;
 260 printf("yylex\n");
 261         while ((c = scanc()) != EOF) {
 262 
 263                 /* If it is the escape character itself note it */
 264                 if (c == esc_char) {
 265                         escaped = 1;
 266                         continue;
 267                 }
 268 
 269                 /* Remove from the comment character to end of line */
 270                 if (c == com_char) {
 271                         while (c != '\n') {
 272                                 if ((c = scanc()) == EOF) {
 273                                         /* End of file without newline */
 274                                         return (EOF);
 275                                 }
 276                         }
 277 
 278                         assert(c == '\n');
 279 
 280 printf("NEWLINE\n");
 281                         if (hadtok == 0) {
 282                                 /*
 283                                  * If there were no tokens on this line,
 284                                  * then just pretend it didn't exist at all.
 285                                  */
 286                                 continue;
 287                         }
 288 
 289                         hadtok = 0;
 290                         return (T_NL);
 291                 }
 292 
 293                 if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
 294                         /*
 295                          * These are all token delimiters.  If there
 296                          * is a token already in progress, we need to
 297                          * process it.
 298                          */
 299                         unscanc(c);
 300                         return (consume_token());
 301                 }
 302 
 303                 switch (c) {
 304                 case '\n':
 305                         return (T_NL);
 306                 case '>':
 307                         hadtok = 1;
 308                         return (T_GT);
 309                 case '<':
 310                         /* Symbol start! */
 311                         hadtok = 1;
 312                         return (get_symbol());
 313                 case ' ':
 314                 case '\t':
 315                         /* Whitespace, just ignore */
 316                         continue;
 317                 case '"':
 318                         hadtok = 1;
 319                         instring = 1;
 320                         return (T_QUOTE);
 321                 default:
 322                         hadtok = 1;
 323                         add_tok(c);
 324                         continue;
 325                 }
 326         }
 327 
 328         return (EOF);
 329 }
 330 
 331 void
 332 yyerror(const char *msg)
 333 {
 334         (void) fprintf(stderr, _("%s: %d: error: %s\n"),
 335             filename, lineno, msg);
 336         exit(4);
 337 }
 338 
 339 void
 340 errf(const char *fmt, ...)
 341 {
 342         /* XXX */
 343 
 344         exit(4);
 345 }