1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  14  * Copyright 2013 David Hoeppner.  All rights reserved.
  15  */
  16 
  17 /*
  18  * POSIX iconv.
  19  */
  20 
  21 #include <sys/list.h>
  22 
  23 #include <errno.h>
  24 #include <glob.h>
  25 #include <iconv.h>
  26 #include <langinfo.h>
  27 #include <libnvpair.h>
  28 #include <locale.h>
  29 #include <stddef.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 
  33 #include "iconv.h"
  34 
  35 static const char *g_progname = "iconv";
  36 
  37 static char     *g_from_cs = "UTF-8";
  38 static char     *g_to_cs = "UTF-8";
  39 static iconv_t  g_ich;                  /* iconv(3c) lib handle */
  40 static int      g_errcnt;
  41 static boolean_t g_cflag = B_FALSE;     /* Skip invalid characters */
  42 static boolean_t g_sflag = B_FALSE;     /* Silent */
  43 static boolean_t g_lflag = B_FALSE;     /* List conversions */
  44 
  45 
  46 /*
  47  * Forward declarations.
  48  */
  49 static void     usage(void) __NORETURN;
  50 static void     do_iconv(FILE *, const char *);
  51 static void     list_codesets(void);
  52 int             yyparse(void);
  53 
  54 typedef struct _iconv_item {
  55         list_node_t     ii_next;
  56         list_t          ii_alias_list;
  57         char            *ii_name;
  58 } iconv_item_t;
  59 
  60 typedef struct _iconv_alias {
  61         list_node_t     ia_next;
  62         char            *ia_name;
  63 } iconv_alias_t;
  64 
  65 /*
  66  * Print usage.
  67  */
  68 static void
  69 usage(void)
  70 {
  71         /* XXX g_progname */
  72         (void) fprintf(stderr, _(
  73             "usage:\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n"
  74             "\ticonv [-cs] -f frommap -t tomap [file ...]\n"));
  75         (void) fprintf(stderr, _("\t%s -l\n"), g_progname);
  76         exit(1);
  77 }
  78 
  79 
  80 int
  81 main(int argc, char **argv)
  82 {
  83         char    *fname;
  84         FILE    *fp;
  85         int     c;
  86 
  87         init_charmap();
  88 
  89         /* XXX */
  90         yydebug = 1;
  91 
  92         (void) setlocale(LC_ALL, "");
  93 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
  94 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
  95 #endif
  96         (void) textdomain(TEXT_DOMAIN);
  97 
  98         while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) {
  99                 switch (c) {
 100                 case 'c':
 101                         g_cflag = B_TRUE;
 102                         break;
 103                 case 'l':
 104                         g_lflag = B_TRUE;
 105                         break;
 106                 case 's':
 107                         g_sflag = B_TRUE;
 108                         break;
 109                 case 'f':
 110                         g_from_cs = optarg;
 111                         break;
 112                 case 't':
 113                         g_to_cs = optarg;
 114                         break;
 115                 case '?':
 116                         usage();
 117                 }
 118         }
 119 
 120         if (g_lflag) {
 121                 if (optind != argc)
 122                         usage();
 123                 list_codesets();
 124                 exit(0);
 125         }
 126 
 127         /* XXX form_cs not only codeset */
 128         if (strstr(g_from_cs, "/") != NULL) {
 129                 reset_scanner(g_from_cs);
 130                 (void) yyparse();
 131         }
 132 
 133         /* XXX empty string "" current encoding */
 134         if (g_from_cs == NULL) {
 135                 g_from_cs = nl_langinfo(CODESET);
 136                 printf("%s\n", g_from_cs);
 137         }
 138         if (g_to_cs == NULL)
 139                 g_to_cs = nl_langinfo(CODESET);
 140 
 141         /*
 142          * XXX todo: deal with charmap files (/paths)
 143          */
 144 
 145         g_ich = iconv_open(g_to_cs, g_from_cs);
 146         if (g_ich == ((iconv_t)-1)) {
 147                 if (errno == EINVAL) {
 148                         (void) fprintf(stderr, gettext("Not supported %s to %s\n"),
 149                             g_from_cs, g_to_cs);
 150                 } else {
 151                         (void) fprintf(stderr, "iconv_open failed\n");
 152                 }
 153                 exit(1);
 154         }
 155 
 156         if (optind == argc || (optind == argc - 1 &&
 157             0 == strcmp(argv[optind], "-"))) {
 158                 do_iconv(stdin, "stdin");
 159                 exit(0);
 160         }
 161 
 162         for (; optind < argc; optind++) {
 163                 fp = fopen(argv[optind], "r");
 164                 if (fp == NULL) {
 165                         perror(argv[optind]);
 166                         exit(1);
 167                 }
 168                 do_iconv(fp, argv[optind]);
 169                 (void) fclose(fp);
 170         }
 171 
 172         return (EXIT_SUCCESS);
 173 }
 174 
 175 /*
 176  * Do actual conversion, copying *fp to stdout.
 177  *
 178  * Conversions may grow or shrink data, so using a larger output buffer
 179  * to reduce the likelihood of leftover input buffer data in each pass.
 180  */
 181 
 182 #define IBUFSIZ 1024
 183 #define OBUFSIZ (2*IBUFSIZ)
 184 
 185 void
 186 do_iconv(FILE *fp, const char *fname)
 187 {
 188         const char *iptr;
 189         char    ibuf[IBUFSIZ];
 190         char    obuf[OBUFSIZ];
 191         char    *optr;
 192         size_t  ileft, icnt, oleft, ocnt;
 193         int     nr, nw, rc;
 194 
 195         while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) {
 196 
 197                 iptr = ibuf;
 198                 ileft = nr;
 199 
 200                 while (ileft > 0) {
 201                         optr = obuf;
 202                         oleft = OBUFSIZ;
 203                         rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft);
 204                         if (rc == (size_t)-1) {
 205                                 /*
 206                                  * XXX todo: deal with skipping invalid
 207                                  * input characters and continue...
 208                                  */
 209                                 g_errcnt++;
 210                                 break;
 211                         }
 212                         ocnt = OBUFSIZ - oleft;
 213                         nw = fwrite(obuf, 1, ocnt, stdout);
 214                         if (nw != ocnt) {
 215                                 perror("write");
 216                                 exit(1);
 217                         }
 218                 }
 219         }
 220 
 221         /*
 222          * End of file. Flush any shift encodings.
 223          */
 224         iptr = NULL;
 225         ileft = 0;
 226         optr = obuf;
 227         oleft = OBUFSIZ;
 228         iconv(g_ich, &iptr, &ileft, &optr, &oleft);
 229         ocnt = OBUFSIZ - oleft;
 230         fwrite(obuf, 1, ocnt, stdout);
 231 }
 232 
 233 /*
 234  * Item is in the list?
 235  */
 236 static boolean_t
 237 iconv_find(list_t *list, const char *name)
 238 {
 239         iconv_item_t    *head;
 240         boolean_t       found = B_FALSE;
 241 
 242         head = list_head(list);
 243         while (head != NULL) {
 244                 if (strcmp(head->ii_name, name) == 0) {
 245                         found = B_TRUE;
 246                         break;
 247                 }
 248                 head = list_next(list, head);
 249         }
 250 
 251         return (found);
 252 }
 253 
 254 /*
 255  * Insert into a sorted list.
 256  */
 257 static void
 258 iconv_insert(list_t *list, const char *name)
 259 {
 260         iconv_item_t    *head;
 261         iconv_item_t    *item;
 262 
 263         head = list_head(list);
 264         while (head != NULL && strcmp(head->ii_name, name) < 0)
 265                 head = list_next(list, head);
 266 
 267         item = (iconv_item_t *)malloc(sizeof (iconv_item_t));
 268 
 269         list_link_init(&item->ii_next);
 270         list_create(&item->ii_alias_list, sizeof (iconv_alias_t),
 271             offsetof(iconv_alias_t, ia_next));
 272 
 273         item->ii_name = strdup(name);
 274 
 275         list_insert_before(list, head, item);
 276 }
 277 
 278 static void
 279 iconv_insert_create(list_t *list, const char *name)
 280 {
 281         if (!iconv_find(list, name))
 282                 iconv_insert(list, name);
 283 }
 284 
 285 static void
 286 iconv_print(list_t *list)
 287 {
 288         iconv_item_t    *head;
 289         iconv_alias_t   *alias_head;
 290 
 291         (void) fprintf(stdout, gettext(
 292             "The following are all supported code set names.  All combinations\n"
 293             "of those names are not necessarily available for the pair of the\n"
 294             "fromcode-tocode.  Some of those code set names have aliases, which\n"
 295             "are case-insensitive and shown after the canonical name:\n"));
 296 
 297         head = list_head(list);
 298         while (head != NULL) {
 299                 (void) fprintf(stdout, "%s", head->ii_name);
 300 
 301                 if (!list_is_empty(&head->ii_alias_list)) {
 302                         printf(" (");
 303                         alias_head = list_head(&head->ii_alias_list);
 304                         while (alias_head != NULL) {
 305                                 (void) fprintf(stdout, "%s",
 306                                     alias_head->ia_name);
 307 
 308                                 alias_head = list_next(&head->ii_alias_list,
 309                                     alias_head);
 310 
 311                                 if (alias_head != NULL)
 312                                         (void) fprintf(stdout, ", ");
 313                         }
 314                         (void) fprintf(stdout, ")");
 315                 }
 316 
 317                 (void) fprintf(stdout, ",\n");
 318 
 319                 head = list_next(list, head);
 320         }
 321 }
 322 
 323 /*
 324  * List all codesets available.
 325  */
 326 static void
 327 list_codesets(void)
 328 {
 329         list_t  item_list;
 330         glob_t  globbuf;
 331         FILE    *fp;
 332         char    *alias, *ptr, *chomp;
 333         char    buf[1024];
 334         int     i;
 335 
 336         list_create(&item_list, sizeof (iconv_item_t),
 337             offsetof(iconv_item_t, ii_next));
 338 
 339 #define _ICONV_PATH     "/usr/lib/iconv/"
 340 
 341         /* XXX search path depends on arch amd64 etc */
 342         (void) chdir(_ICONV_PATH);
 343         (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf);
 344         (void) chdir("geniconvtbl/binarytables");
 345         (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf);
 346 
 347         for (i = 0; i < globbuf.gl_pathc; i++) {
 348 
 349                 ptr = globbuf.gl_pathv[i];
 350                 alias = strsep(&ptr, "%");
 351 
 352                 chomp = ptr;
 353                 for (; *chomp; chomp++) {
 354                         if (*chomp == '.')
 355                                 *chomp = '\0';
 356                 }
 357 
 358                 iconv_insert_create(&item_list, ptr);
 359                 iconv_insert_create(&item_list, alias);
 360         }
 361 
 362         globfree(&globbuf);
 363 
 364         (void) chdir(_ICONV_PATH);
 365         (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf);
 366 
 367         for (i = 0; i < globbuf.gl_pathc; i++) {
 368 
 369                 ptr = globbuf.gl_pathv[i];
 370                 alias = strsep(&ptr, ".");
 371 printf("%s\n", ptr);
 372                 chomp = ptr;
 373                 for (; *chomp; chomp++) {
 374                         if (*chomp == '.')
 375                                 *chomp = '\0';
 376                 }
 377 
 378                 iconv_insert_create(&item_list, ptr);
 379                 iconv_insert_create(&item_list, alias);
 380         }
 381 
 382         globfree(&globbuf);
 383 
 384         /*
 385          * Read in the alias file and build up a list of
 386          * encoding aliases.
 387          */
 388         fp = fopen("alias", "r");
 389         if (fp == NULL) {
 390                 fprintf(stderr, gettext(
 391                     "Failed to open the conversion alias file: %s\n"),
 392                     "XXX");
 393 
 394                 /* XXX free list */
 395                 return;
 396         }
 397 
 398         while (fgets(buf, sizeof (buf), fp) != NULL) {
 399                 iconv_item_t *head;
 400                 iconv_alias_t *alias_head;
 401 
 402                 /* Skip comments */
 403                 if (buf[0] == '#')
 404                         continue;
 405 
 406                 ptr = buf;
 407                 alias = strsep(&ptr, " \t");
 408 
 409                 chomp = ptr;
 410                 for (; *chomp; chomp++) {
 411                         if (*chomp == '\n')
 412                                 *chomp = '\0';
 413                 }
 414 
 415                 head = list_head(&item_list);
 416                 while (head != NULL &&
 417                     strcmp(head->ii_name, ptr) < 0)
 418                         head = list_next(&item_list, head);
 419 
 420                 if (head != NULL) {
 421                         alias_head = (iconv_alias_t *)malloc(
 422                             sizeof (iconv_alias_t));
 423                         list_link_init(&alias_head->ia_next);
 424                         alias_head->ia_name = strdup(alias);
 425 
 426                         list_insert_tail(&head->ii_alias_list, alias_head);
 427                 }
 428         }
 429 
 430         iconv_print(&item_list);
 431 
 432         /* XXX free list */
 433 
 434         (void) fclose(fp);
 435 }