1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  14  */
  15 
  16 /*
  17  * iconv(1) command.
  18  */
  19 
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <errno.h>
  24 #include <limits.h>
  25 #include <iconv.h>
  26 #include <libintl.h>
  27 #include <langinfo.h>
  28 #include <locale.h>
  29 #include "charmap.h"
  30 
  31 #include <assert.h>
  32 
  33 const char *progname = "iconv";
  34 
  35 char *from_cs;
  36 char *to_cs;
  37 int debug;
  38 int cflag;      /* skip invalid characters */
  39 int sflag;      /* silent */
  40 int lflag;      /* list conversions */
  41 
  42 void iconv_file(FILE *, const char *);
  43 void list_codesets(void);
  44 
  45 iconv_t ich;    /* iconv(3c) lib handle */
  46 size_t (*pconv)(const char **iptr, size_t *ileft,
  47                 char **optr, size_t *oleft);
  48 
  49 size_t
  50 lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
  51 {
  52         return (iconv(ich, iptr, ileft, optr, oleft));
  53 }
  54 
  55 void
  56 usage(void)
  57 {
  58         (void) fprintf(stderr, gettext(
  59             "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
  60             "[file ...]\n"), progname);
  61         (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
  62         exit(1);
  63 }
  64 
  65 int
  66 main(int argc, char **argv)
  67 {
  68         FILE *fp;
  69         char *fslash, *tslash;
  70         int c;
  71 
  72         (void) setlocale(LC_ALL, "");
  73 
  74 #if !defined(TEXT_DOMAIN)
  75 #define TEXT_DOMAIN     "SYS_TEST"
  76 #endif
  77         (void) textdomain(TEXT_DOMAIN);
  78 
  79         while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
  80                 switch (c) {
  81                 case 'c':
  82                         cflag++;
  83                         break;
  84                 case 'd':
  85                         debug++;
  86                         break;
  87                 case 'l':
  88                         lflag++;
  89                         break;
  90                 case 's':
  91                         sflag++;
  92                         break;
  93                 case 'f':
  94                         from_cs = optarg;
  95                         break;
  96                 case 't':
  97                         to_cs = optarg;
  98                         break;
  99                 case '?':
 100                         usage();
 101                 }
 102         }
 103 
 104         if (lflag) {
 105                 if (from_cs != NULL || to_cs != NULL || optind != argc)
 106                         usage();
 107                 list_codesets();
 108                 exit(0);
 109         }
 110 
 111         if (from_cs == NULL)
 112                 from_cs = nl_langinfo(CODESET);
 113         if (to_cs == NULL)
 114                 to_cs = nl_langinfo(CODESET);
 115 
 116         /*
 117          * If either "from" or "to" contains a slash,
 118          * then we're using charmaps.
 119          */
 120         fslash = strchr(from_cs, '/');
 121         tslash = strchr(to_cs, '/');
 122         if (fslash != NULL || tslash != NULL) {
 123                 charmap_init(to_cs, from_cs);
 124                 pconv = cm_iconv;
 125                 if (debug)
 126                         charmap_dump();
 127         } else {
 128                 ich = iconv_open(to_cs, from_cs);
 129                 if (ich == ((iconv_t)-1)) {
 130                         (void) fprintf(stderr, gettext("iconv_open failed\n"));
 131                         exit(1);
 132                 }
 133                 pconv = lib_iconv;
 134         }
 135 
 136         if (optind == argc || optind == argc - 1 &&
 137             0 == strcmp(argv[optind], "-")) {
 138                 iconv_file(stdin, "stdin");
 139                 exit(0);
 140         }
 141 
 142         for (; optind < argc; optind++) {
 143                 fp = fopen(argv[optind], "r");
 144                 if (fp == NULL) {
 145                         perror(argv[optind]);
 146                         exit(1);
 147                 }
 148                 iconv_file(fp, argv[optind]);
 149                 (void) fclose(fp);
 150         }
 151         exit(0);
 152 }
 153 
 154 /*
 155  * Conversion buffer sizes:
 156  *
 157  * The input buffer has room to prepend one mbs character if needed for
 158  * handling a left-over at the end of a previous conversion buffer.
 159  *
 160  * Conversions may grow or shrink data, so using a larger output buffer
 161  * to reduce the likelihood of leftover input buffer data in each pass.
 162  */
 163 #define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
 164 #define OBUFSIZ (2 * BUFSIZ)
 165 
 166 void
 167 iconv_file(FILE *fp, const char *fname)
 168 {
 169         static char ibuf[IBUFSIZ];
 170         static char obuf[OBUFSIZ];
 171         const char *iptr;
 172         char *optr;
 173         off64_t offset;
 174         size_t ileft, oleft, ocnt;
 175         int iconv_errno;
 176         int nr, nw, rc;
 177 
 178         offset = 0;
 179         ileft = 0;
 180         iptr = ibuf + MB_LEN_MAX;
 181 
 182         while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
 183 
 184                 assert(iptr <= ibuf+MB_LEN_MAX);
 185                 assert(ileft <= MB_LEN_MAX);
 186                 ileft += nr;
 187                 offset += nr;
 188 
 189                 optr = obuf;
 190                 oleft = OBUFSIZ;
 191 
 192         iconv_again:
 193                 rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
 194                 iconv_errno = errno;
 195 
 196                 ocnt = OBUFSIZ - oleft;
 197                 if (ocnt > 0) {
 198                         nw = fwrite(obuf, 1, ocnt, stdout);
 199                         if (nw != ocnt) {
 200                                 perror("fwrite");
 201                                 exit(1);
 202                         }
 203                 }
 204                 optr = obuf;
 205                 oleft = OBUFSIZ;
 206 
 207                 if (rc == (size_t)-1) {
 208                         switch (iconv_errno) {
 209 
 210                         case E2BIG:     /* no room in output buffer */
 211                                 goto iconv_again;
 212 
 213                         case EINVAL:    /* incomplete sequence on input */
 214                                 if (debug) {
 215                                         (void) fprintf(stderr,
 216                         _("Incomplete sequence in %s at offset %lld\n"),
 217                                             fname, offset - ileft);
 218                                 }
 219                                 /*
 220                                  * Copy the reminder to the space reserved
 221                                  * at the start of the input buffer.
 222                                  */
 223                                 assert(ileft > 0);
 224                                 if (ileft <= MB_LEN_MAX) {
 225                                         char *p = ibuf+MB_LEN_MAX-ileft;
 226                                         (void) memcpy(p, iptr, ileft);
 227                                         iptr = p;
 228                                         continue; /* read again */
 229                                 }
 230                                 /*
 231                                  * Should not see ileft > MB_LEN_MAX,
 232                                  * but if we do, handle as EILSEQ.
 233                                  */
 234                                 /* FALLTHROUGH */
 235 
 236                         case EILSEQ:    /* invalid sequence on input */
 237                                 if (!sflag) {
 238                                         (void) fprintf(stderr,
 239                         _("Illegal sequence in %s at offset %lld\n"),
 240                                             fname, offset - ileft);
 241                                         (void) fprintf(stderr,
 242                         _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
 243                                             iptr[0] & 0xff,
 244                                             iptr[1] & 0xff,
 245                                             iptr[2] & 0xff);
 246                                 }
 247                                 assert(ileft > 0);
 248                                 /* skip one */
 249                                 iptr++;
 250                                 ileft--;
 251                                 assert(oleft > 0);
 252                                 if (!cflag) {
 253                                         *optr++ = '?';
 254                                         oleft--;
 255                                 }
 256                                 goto iconv_again;
 257 
 258                         default:
 259                                 (void) fprintf(stderr,
 260                         _("iconv error (%s) in file $s at offset %lld\n"),
 261                                     strerror(errno), fname, offset - ileft);
 262                                 perror("iconv");
 263                                 break;
 264                         }
 265                 }
 266 
 267                 /* normal iconv return */
 268                 ileft = 0;
 269                 iptr = ibuf + MB_LEN_MAX;
 270         }
 271 
 272         /*
 273          * End of file
 274          * Flush any shift encodings.
 275          */
 276         iptr = NULL;
 277         ileft = 0;
 278         optr = obuf;
 279         oleft = OBUFSIZ;
 280         (*pconv)(&iptr, &ileft, &optr, &oleft);
 281         ocnt = OBUFSIZ - oleft;
 282         if (ocnt > 0) {
 283                 (void) fwrite(obuf, 1, ocnt, stdout);
 284         }
 285 }
 286 
 287 /*
 288  * scan the /usr/lib/iconv directory...
 289  * A script for this seems appropriate.
 290  */
 291 void
 292 list_codesets(void)
 293 {
 294         (void) system("/usr/lib/iconv/iconv_list");
 295 }