1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  14  * Copyright 2013 David Hoeppner.  All rights reserved.
  15  */
  16 
  17 /*
  18  * POSIX iconv.
  19  */
  20 
  21 #include <sys/list.h>
  22 
  23 #include <errno.h>
  24 #include <glob.h>
  25 #include <iconv.h>
  26 #include <langinfo.h>
  27 #include <libnvpair.h>
  28 #include <locale.h>
  29 #include <stddef.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 
  33 #include "iconv.h"
  34 
  35 static const char *g_progname = "iconv";
  36 
  37 static char     *g_from_cs = "UTF-8";
  38 static char     *g_to_cs = "UTF-8";
  39 static iconv_t  g_ich;                  /* iconv(3c) lib handle */
  40 static int      g_errcnt;
  41 static boolean_t g_cflag = B_FALSE;     /* Skip invalid characters */
  42 static boolean_t g_sflag = B_FALSE;     /* Silent */
  43 static boolean_t g_lflag = B_FALSE;     /* List conversions */
  44 
  45 
  46 /*
  47  * Forward declarations.
  48  */
  49 static void     usage(void) __NORETURN;
  50 static void     do_iconv(FILE *, const char *);
  51 static void     list_codesets(void);
  52 int             yyparse(void);
  53 
  54 typedef struct _iconv_item {
  55         list_node_t     ii_next;
  56         list_t          ii_alias_list;
  57         char            *ii_name;
  58 } iconv_item_t;
  59 
  60 typedef struct _iconv_alias {
  61         list_node_t     ia_next;
  62         char            *ia_name;
  63 } iconv_alias_t;
  64 
  65 /*
  66  * Print usage.
  67  */
  68 static void
  69 usage(void)
  70 {
  71         (void) fprintf(stderr, _(
  72             "usage:"
  73             "\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n"
  74             "\ticonv [-cs] -f frommap -t tomap [file ...]\n"
  75             "\ticonv -l\n"));
  76         exit(1);
  77 }
  78 
  79 
  80 int
  81 main(int argc, char **argv)
  82 {
  83         char    *fname;
  84         FILE    *fp;
  85         int     c;
  86 
  87         init_charmap();
  88 
  89         /* XXX */
  90         yydebug = 1;
  91 
  92         (void) setlocale(LC_ALL, "");
  93 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
  94 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
  95 #endif
  96         (void) textdomain(TEXT_DOMAIN);
  97 
  98         while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) {
  99                 switch (c) {
 100                 case 'c':
 101                         g_cflag = B_TRUE;
 102                         break;
 103                 case 'l':
 104                         g_lflag = B_TRUE;
 105                         break;
 106                 case 's':
 107                         g_sflag = B_TRUE;
 108                         break;
 109                 case 'f':
 110                         g_from_cs = optarg;
 111                         break;
 112                 case 't':
 113                         g_to_cs = optarg;
 114                         break;
 115                 case '?':
 116                         usage();
 117                 }
 118         }
 119 
 120         if (g_lflag) {
 121                 if (optind != argc)
 122                         usage();
 123                 list_codesets();
 124                 exit(0);
 125         }
 126 
 127         /* Charmaps and codesets can't be mixed */
 128         if ((strchr(g_from_cs, '/') == NULL) !=
 129             (strchr(g_to_cs, '/') == NULL)) {
 130                 usage();
 131         }
 132 
 133         /* XXX form_cs not only codeset */
 134         if (strchr(g_from_cs, '/') != NULL) {
 135                 reset_scanner(g_from_cs);
 136                 (void) yyparse();
 137 
 138                 switch_charmap();
 139 
 140                 reset_scanner(g_to_cs);
 141                 (void) yyparse();
 142         }
 143 
 144         /* XXX empty string "" current encoding */
 145         if (g_from_cs == NULL) {
 146                 g_from_cs = nl_langinfo(CODESET);
 147                 printf("%s\n", g_from_cs);
 148         }
 149         if (g_to_cs == NULL)
 150                 g_to_cs = nl_langinfo(CODESET);
 151 
 152         /*
 153          * XXX todo: deal with charmap files (/paths)
 154          */
 155 
 156         g_ich = iconv_open(g_to_cs, g_from_cs);
 157         if (g_ich == ((iconv_t)-1)) {
 158                 if (errno == EINVAL) {
 159                         (void) fprintf(stderr, gettext("Not supported %s to %s\n"),
 160                             g_from_cs, g_to_cs);
 161                 } else {
 162                         (void) fprintf(stderr, "iconv_open failed\n");
 163                 }
 164                 exit(1);
 165         }
 166 
 167         if (optind == argc || (optind == argc - 1 &&
 168             0 == strcmp(argv[optind], "-"))) {
 169                 do_iconv(stdin, "stdin");
 170                 exit(0);
 171         }
 172 
 173         for (; optind < argc; optind++) {
 174                 fp = fopen(argv[optind], "r");
 175                 if (fp == NULL) {
 176                         perror(argv[optind]);
 177                         exit(1);
 178                 }
 179                 do_iconv(fp, argv[optind]);
 180                 (void) fclose(fp);
 181         }
 182 
 183         return (EXIT_SUCCESS);
 184 }
 185 
 186 /*
 187  * Do actual conversion, copying *fp to stdout.
 188  *
 189  * Conversions may grow or shrink data, so using a larger output buffer
 190  * to reduce the likelihood of leftover input buffer data in each pass.
 191  */
 192 
 193 #define IBUFSIZ 1024
 194 #define OBUFSIZ (2*IBUFSIZ)
 195 
 196 void
 197 do_iconv(FILE *fp, const char *fname)
 198 {
 199         const char *iptr;
 200         char    ibuf[IBUFSIZ];
 201         char    obuf[OBUFSIZ];
 202         char    *optr;
 203         size_t  ileft, icnt, oleft, ocnt;
 204         int     nr, nw, rc;
 205 
 206         while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) {
 207 
 208                 iptr = ibuf;
 209                 ileft = nr;
 210 
 211                 while (ileft > 0) {
 212                         optr = obuf;
 213                         oleft = OBUFSIZ;
 214                         rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft);
 215                         if (rc == (size_t)-1) {
 216                                 /*
 217                                  * XXX todo: deal with skipping invalid
 218                                  * input characters and continue...
 219                                  */
 220                                 g_errcnt++;
 221                                 break;
 222                         }
 223                         ocnt = OBUFSIZ - oleft;
 224                         nw = fwrite(obuf, 1, ocnt, stdout);
 225                         if (nw != ocnt) {
 226                                 perror("write");
 227                                 exit(1);
 228                         }
 229                 }
 230         }
 231 
 232         /*
 233          * End of file. Flush any shift encodings.
 234          */
 235         iptr = NULL;
 236         ileft = 0;
 237         optr = obuf;
 238         oleft = OBUFSIZ;
 239         iconv(g_ich, &iptr, &ileft, &optr, &oleft);
 240         ocnt = OBUFSIZ - oleft;
 241         fwrite(obuf, 1, ocnt, stdout);
 242 }
 243 
 244 /*
 245  * Item is in the list?
 246  */
 247 static boolean_t
 248 iconv_find(list_t *list, const char *name)
 249 {
 250         iconv_item_t    *head;
 251         boolean_t       found = B_FALSE;
 252 
 253         head = list_head(list);
 254         while (head != NULL) {
 255                 if (strcmp(head->ii_name, name) == 0) {
 256                         found = B_TRUE;
 257                         break;
 258                 }
 259                 head = list_next(list, head);
 260         }
 261 
 262         return (found);
 263 }
 264 
 265 /*
 266  * Insert into a sorted list.
 267  */
 268 static void
 269 iconv_insert(list_t *list, const char *name)
 270 {
 271         iconv_item_t    *head;
 272         iconv_item_t    *item;
 273 
 274         head = list_head(list);
 275         while (head != NULL && strcmp(head->ii_name, name) < 0)
 276                 head = list_next(list, head);
 277 
 278         item = (iconv_item_t *)malloc(sizeof (iconv_item_t));
 279 
 280         list_link_init(&item->ii_next);
 281         list_create(&item->ii_alias_list, sizeof (iconv_alias_t),
 282             offsetof(iconv_alias_t, ia_next));
 283 
 284         item->ii_name = strdup(name);
 285 
 286         list_insert_before(list, head, item);
 287 }
 288 
 289 static void
 290 iconv_insert_create(list_t *list, const char *name)
 291 {
 292         if (!iconv_find(list, name))
 293                 iconv_insert(list, name);
 294 }
 295 
 296 static void
 297 iconv_print(list_t *list)
 298 {
 299         iconv_item_t    *head;
 300         iconv_alias_t   *alias_head;
 301 
 302         (void) fprintf(stdout, gettext(
 303             "The following are all supported code set names.  All combinations\n"
 304             "of those names are not necessarily available for the pair of the\n"
 305             "fromcode-tocode.  Some of those code set names have aliases, which\n"
 306             "are case-insensitive and shown after the canonical name:\n"));
 307 
 308         head = list_head(list);
 309         while (head != NULL) {
 310                 (void) fprintf(stdout, "%s", head->ii_name);
 311 
 312                 if (!list_is_empty(&head->ii_alias_list)) {
 313                         printf(" (");
 314                         alias_head = list_head(&head->ii_alias_list);
 315                         while (alias_head != NULL) {
 316                                 (void) fprintf(stdout, "%s",
 317                                     alias_head->ia_name);
 318 
 319                                 alias_head = list_next(&head->ii_alias_list,
 320                                     alias_head);
 321 
 322                                 if (alias_head != NULL)
 323                                         (void) fprintf(stdout, ", ");
 324                         }
 325                         (void) fprintf(stdout, ")");
 326                 }
 327 
 328                 (void) fprintf(stdout, ",\n");
 329 
 330                 head = list_next(list, head);
 331         }
 332 }
 333 
 334 /*
 335  * List all codesets available.
 336  */
 337 static void
 338 list_codesets(void)
 339 {
 340         list_t  item_list;
 341         glob_t  globbuf;
 342         FILE    *fp;
 343         char    *alias, *ptr, *chomp;
 344         char    buf[1024];
 345         int     i;
 346 
 347         list_create(&item_list, sizeof (iconv_item_t),
 348             offsetof(iconv_item_t, ii_next));
 349 
 350 #define _ICONV_PATH     "/usr/lib/iconv/"
 351 
 352         /* XXX search path depends on arch amd64 etc */
 353         (void) chdir(_ICONV_PATH);
 354         (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf);
 355         (void) chdir("geniconvtbl/binarytables");
 356         (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf);
 357 
 358         for (i = 0; i < globbuf.gl_pathc; i++) {
 359 
 360                 ptr = globbuf.gl_pathv[i];
 361                 alias = strsep(&ptr, "%");
 362 
 363                 chomp = ptr;
 364                 for (; *chomp; chomp++) {
 365                         if (*chomp == '.')
 366                                 *chomp = '\0';
 367                 }
 368 
 369                 iconv_insert_create(&item_list, ptr);
 370                 iconv_insert_create(&item_list, alias);
 371         }
 372 
 373         globfree(&globbuf);
 374 
 375         (void) chdir(_ICONV_PATH);
 376         (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf);
 377 
 378         for (i = 0; i < globbuf.gl_pathc; i++) {
 379 
 380                 ptr = globbuf.gl_pathv[i];
 381                 alias = strsep(&ptr, ".");
 382 printf("%s\n", ptr);
 383                 chomp = ptr;
 384                 for (; *chomp; chomp++) {
 385                         if (*chomp == '.')
 386                                 *chomp = '\0';
 387                 }
 388 
 389                 iconv_insert_create(&item_list, ptr);
 390                 iconv_insert_create(&item_list, alias);
 391         }
 392 
 393         globfree(&globbuf);
 394 
 395         /*
 396          * Read in the alias file and build up a list of
 397          * encoding aliases.
 398          */
 399         fp = fopen("alias", "r");
 400         if (fp == NULL) {
 401                 fprintf(stderr, gettext(
 402                     "Failed to open the conversion alias file: %s\n"),
 403                     "XXX");
 404 
 405                 /* XXX free list */
 406                 return;
 407         }
 408 
 409         while (fgets(buf, sizeof (buf), fp) != NULL) {
 410                 iconv_item_t *head;
 411                 iconv_alias_t *alias_head;
 412 
 413                 /* Skip comments */
 414                 if (buf[0] == '#')
 415                         continue;
 416 
 417                 ptr = buf;
 418                 alias = strsep(&ptr, " \t");
 419 
 420                 chomp = ptr;
 421                 for (; *chomp; chomp++) {
 422                         if (*chomp == '\n')
 423                                 *chomp = '\0';
 424                 }
 425 
 426                 head = list_head(&item_list);
 427                 while (head != NULL &&
 428                     strcmp(head->ii_name, ptr) < 0)
 429                         head = list_next(&item_list, head);
 430 
 431                 if (head != NULL) {
 432                         alias_head = (iconv_alias_t *)malloc(
 433                             sizeof (iconv_alias_t));
 434                         list_link_init(&alias_head->ia_next);
 435                         alias_head->ia_name = strdup(alias);
 436 
 437                         list_insert_tail(&head->ii_alias_list, alias_head);
 438                 }
 439         }
 440 
 441         iconv_print(&item_list);
 442 
 443         /* XXX free list */
 444 
 445         (void) fclose(fp);
 446 }