1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2013 David Hoeppner. All rights reserved. 15 */ 16 17 /* 18 * POSIX iconv. 19 */ 20 21 #include <sys/list.h> 22 23 #include <errno.h> 24 #include <glob.h> 25 #include <iconv.h> 26 #include <langinfo.h> 27 #include <libnvpair.h> 28 #include <locale.h> 29 #include <stddef.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #include "iconv.h" 34 35 static const char *g_progname = "iconv"; 36 37 static char *g_from_cs = "UTF-8"; 38 static char *g_to_cs = "UTF-8"; 39 static iconv_t g_ich; /* iconv(3c) lib handle */ 40 static int g_errcnt; 41 static boolean_t g_cflag = B_FALSE; /* Skip invalid characters */ 42 static boolean_t g_sflag = B_FALSE; /* Silent */ 43 static boolean_t g_lflag = B_FALSE; /* List conversions */ 44 45 46 /* 47 * Forward declarations. 48 */ 49 static void usage(void) __NORETURN; 50 static void do_iconv(FILE *, const char *); 51 static void list_codesets(void); 52 int yyparse(void); 53 54 typedef struct _iconv_item { 55 list_node_t ii_next; 56 list_t ii_alias_list; 57 char *ii_name; 58 } iconv_item_t; 59 60 typedef struct _iconv_alias { 61 list_node_t ia_next; 62 char *ia_name; 63 } iconv_alias_t; 64 65 /* 66 * Print usage. 67 */ 68 static void 69 usage(void) 70 { 71 (void) fprintf(stderr, _( 72 "usage:" 73 "\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n" 74 "\ticonv [-cs] -f frommap -t tomap [file ...]\n" 75 "\ticonv -l\n")); 76 exit(1); 77 } 78 79 80 int 81 main(int argc, char **argv) 82 { 83 char *fname; 84 FILE *fp; 85 int c; 86 87 init_charmap(); 88 89 /* XXX */ 90 yydebug = 1; 91 92 (void) setlocale(LC_ALL, ""); 93 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 94 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 95 #endif 96 (void) textdomain(TEXT_DOMAIN); 97 98 while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) { 99 switch (c) { 100 case 'c': 101 g_cflag = B_TRUE; 102 break; 103 case 'l': 104 g_lflag = B_TRUE; 105 break; 106 case 's': 107 g_sflag = B_TRUE; 108 break; 109 case 'f': 110 g_from_cs = optarg; 111 break; 112 case 't': 113 g_to_cs = optarg; 114 break; 115 case '?': 116 usage(); 117 } 118 } 119 120 if (g_lflag) { 121 if (optind != argc) 122 usage(); 123 list_codesets(); 124 exit(0); 125 } 126 127 /* Charmaps and codesets can't be mixed */ 128 if ((strchr(g_from_cs, '/') == NULL) != 129 (strchr(g_to_cs, '/') == NULL)) { 130 usage(); 131 } 132 133 /* XXX form_cs not only codeset */ 134 if (strchr(g_from_cs, '/') != NULL) { 135 reset_scanner(g_from_cs); 136 (void) yyparse(); 137 138 switch_charmap(); 139 140 reset_scanner(g_to_cs); 141 (void) yyparse(); 142 } 143 144 /* XXX empty string "" current encoding */ 145 if (g_from_cs == NULL) { 146 g_from_cs = nl_langinfo(CODESET); 147 printf("%s\n", g_from_cs); 148 } 149 if (g_to_cs == NULL) 150 g_to_cs = nl_langinfo(CODESET); 151 152 /* 153 * XXX todo: deal with charmap files (/paths) 154 */ 155 156 g_ich = iconv_open(g_to_cs, g_from_cs); 157 if (g_ich == ((iconv_t)-1)) { 158 if (errno == EINVAL) { 159 (void) fprintf(stderr, gettext("Not supported %s to %s\n"), 160 g_from_cs, g_to_cs); 161 } else { 162 (void) fprintf(stderr, "iconv_open failed\n"); 163 } 164 exit(1); 165 } 166 167 if (optind == argc || (optind == argc - 1 && 168 0 == strcmp(argv[optind], "-"))) { 169 do_iconv(stdin, "stdin"); 170 exit(0); 171 } 172 173 for (; optind < argc; optind++) { 174 fp = fopen(argv[optind], "r"); 175 if (fp == NULL) { 176 perror(argv[optind]); 177 exit(1); 178 } 179 do_iconv(fp, argv[optind]); 180 (void) fclose(fp); 181 } 182 183 return (EXIT_SUCCESS); 184 } 185 186 /* 187 * Do actual conversion, copying *fp to stdout. 188 * 189 * Conversions may grow or shrink data, so using a larger output buffer 190 * to reduce the likelihood of leftover input buffer data in each pass. 191 */ 192 193 #define IBUFSIZ 1024 194 #define OBUFSIZ (2*IBUFSIZ) 195 196 void 197 do_iconv(FILE *fp, const char *fname) 198 { 199 const char *iptr; 200 char ibuf[IBUFSIZ]; 201 char obuf[OBUFSIZ]; 202 char *optr; 203 size_t ileft, icnt, oleft, ocnt; 204 int nr, nw, rc; 205 206 while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) { 207 208 iptr = ibuf; 209 ileft = nr; 210 211 while (ileft > 0) { 212 optr = obuf; 213 oleft = OBUFSIZ; 214 rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft); 215 if (rc == (size_t)-1) { 216 /* 217 * XXX todo: deal with skipping invalid 218 * input characters and continue... 219 */ 220 g_errcnt++; 221 break; 222 } 223 ocnt = OBUFSIZ - oleft; 224 nw = fwrite(obuf, 1, ocnt, stdout); 225 if (nw != ocnt) { 226 perror("write"); 227 exit(1); 228 } 229 } 230 } 231 232 /* 233 * End of file. Flush any shift encodings. 234 */ 235 iptr = NULL; 236 ileft = 0; 237 optr = obuf; 238 oleft = OBUFSIZ; 239 iconv(g_ich, &iptr, &ileft, &optr, &oleft); 240 ocnt = OBUFSIZ - oleft; 241 fwrite(obuf, 1, ocnt, stdout); 242 } 243 244 /* 245 * Item is in the list? 246 */ 247 static boolean_t 248 iconv_find(list_t *list, const char *name) 249 { 250 iconv_item_t *head; 251 boolean_t found = B_FALSE; 252 253 head = list_head(list); 254 while (head != NULL) { 255 if (strcmp(head->ii_name, name) == 0) { 256 found = B_TRUE; 257 break; 258 } 259 head = list_next(list, head); 260 } 261 262 return (found); 263 } 264 265 /* 266 * Insert into a sorted list. 267 */ 268 static void 269 iconv_insert(list_t *list, const char *name) 270 { 271 iconv_item_t *head; 272 iconv_item_t *item; 273 274 head = list_head(list); 275 while (head != NULL && strcmp(head->ii_name, name) < 0) 276 head = list_next(list, head); 277 278 item = (iconv_item_t *)malloc(sizeof (iconv_item_t)); 279 280 list_link_init(&item->ii_next); 281 list_create(&item->ii_alias_list, sizeof (iconv_alias_t), 282 offsetof(iconv_alias_t, ia_next)); 283 284 item->ii_name = strdup(name); 285 286 list_insert_before(list, head, item); 287 } 288 289 static void 290 iconv_insert_create(list_t *list, const char *name) 291 { 292 if (!iconv_find(list, name)) 293 iconv_insert(list, name); 294 } 295 296 static void 297 iconv_print(list_t *list) 298 { 299 iconv_item_t *head; 300 iconv_alias_t *alias_head; 301 302 (void) fprintf(stdout, gettext( 303 "The following are all supported code set names. All combinations\n" 304 "of those names are not necessarily available for the pair of the\n" 305 "fromcode-tocode. Some of those code set names have aliases, which\n" 306 "are case-insensitive and shown after the canonical name:\n")); 307 308 head = list_head(list); 309 while (head != NULL) { 310 (void) fprintf(stdout, "%s", head->ii_name); 311 312 if (!list_is_empty(&head->ii_alias_list)) { 313 printf(" ("); 314 alias_head = list_head(&head->ii_alias_list); 315 while (alias_head != NULL) { 316 (void) fprintf(stdout, "%s", 317 alias_head->ia_name); 318 319 alias_head = list_next(&head->ii_alias_list, 320 alias_head); 321 322 if (alias_head != NULL) 323 (void) fprintf(stdout, ", "); 324 } 325 (void) fprintf(stdout, ")"); 326 } 327 328 (void) fprintf(stdout, ",\n"); 329 330 head = list_next(list, head); 331 } 332 } 333 334 /* 335 * List all codesets available. 336 */ 337 static void 338 list_codesets(void) 339 { 340 list_t item_list; 341 glob_t globbuf; 342 FILE *fp; 343 char *alias, *ptr, *chomp; 344 char buf[1024]; 345 int i; 346 347 list_create(&item_list, sizeof (iconv_item_t), 348 offsetof(iconv_item_t, ii_next)); 349 350 #define _ICONV_PATH "/usr/lib/iconv/" 351 352 /* XXX search path depends on arch amd64 etc */ 353 (void) chdir(_ICONV_PATH); 354 (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf); 355 (void) chdir("geniconvtbl/binarytables"); 356 (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf); 357 358 for (i = 0; i < globbuf.gl_pathc; i++) { 359 360 ptr = globbuf.gl_pathv[i]; 361 alias = strsep(&ptr, "%"); 362 363 chomp = ptr; 364 for (; *chomp; chomp++) { 365 if (*chomp == '.') 366 *chomp = '\0'; 367 } 368 369 iconv_insert_create(&item_list, ptr); 370 iconv_insert_create(&item_list, alias); 371 } 372 373 globfree(&globbuf); 374 375 (void) chdir(_ICONV_PATH); 376 (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf); 377 378 for (i = 0; i < globbuf.gl_pathc; i++) { 379 380 ptr = globbuf.gl_pathv[i]; 381 alias = strsep(&ptr, "."); 382 printf("%s\n", ptr); 383 chomp = ptr; 384 for (; *chomp; chomp++) { 385 if (*chomp == '.') 386 *chomp = '\0'; 387 } 388 389 iconv_insert_create(&item_list, ptr); 390 iconv_insert_create(&item_list, alias); 391 } 392 393 globfree(&globbuf); 394 395 /* 396 * Read in the alias file and build up a list of 397 * encoding aliases. 398 */ 399 fp = fopen("alias", "r"); 400 if (fp == NULL) { 401 fprintf(stderr, gettext( 402 "Failed to open the conversion alias file: %s\n"), 403 "XXX"); 404 405 /* XXX free list */ 406 return; 407 } 408 409 while (fgets(buf, sizeof (buf), fp) != NULL) { 410 iconv_item_t *head; 411 iconv_alias_t *alias_head; 412 413 /* Skip comments */ 414 if (buf[0] == '#') 415 continue; 416 417 ptr = buf; 418 alias = strsep(&ptr, " \t"); 419 420 chomp = ptr; 421 for (; *chomp; chomp++) { 422 if (*chomp == '\n') 423 *chomp = '\0'; 424 } 425 426 head = list_head(&item_list); 427 while (head != NULL && 428 strcmp(head->ii_name, ptr) < 0) 429 head = list_next(&item_list, head); 430 431 if (head != NULL) { 432 alias_head = (iconv_alias_t *)malloc( 433 sizeof (iconv_alias_t)); 434 list_link_init(&alias_head->ia_next); 435 alias_head->ia_name = strdup(alias); 436 437 list_insert_tail(&head->ii_alias_list, alias_head); 438 } 439 } 440 441 iconv_print(&item_list); 442 443 /* XXX free list */ 444 445 (void) fclose(fp); 446 }