1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2013 David Hoeppner. All rights reserved. 15 */ 16 17 /* 18 * POSIX iconv. 19 */ 20 21 #include <sys/list.h> 22 23 #include <errno.h> 24 #include <glob.h> 25 #include <iconv.h> 26 #include <langinfo.h> 27 #include <libnvpair.h> 28 #include <locale.h> 29 #include <stddef.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #include "iconv.h" 34 35 static const char *g_progname = "iconv"; 36 37 static char *g_from_cs = "UTF-8"; 38 static char *g_to_cs = "UTF-8"; 39 static iconv_t g_ich; /* iconv(3c) lib handle */ 40 static int g_errcnt; 41 static boolean_t g_cflag = B_FALSE; /* Skip invalid characters */ 42 static boolean_t g_sflag = B_FALSE; /* Silent */ 43 static boolean_t g_lflag = B_FALSE; /* List conversions */ 44 45 46 /* 47 * Forward declarations. 48 */ 49 static void usage(void) __NORETURN; 50 static void do_iconv(FILE *, const char *); 51 static void list_codesets(void); 52 int yyparse(void); 53 54 typedef struct _iconv_item { 55 list_node_t ii_next; 56 list_t ii_alias_list; 57 char *ii_name; 58 } iconv_item_t; 59 60 typedef struct _iconv_alias { 61 list_node_t ia_next; 62 char *ia_name; 63 } iconv_alias_t; 64 65 /* 66 * Print usage. 67 */ 68 static void 69 usage(void) 70 { 71 /* XXX g_progname */ 72 (void) fprintf(stderr, _( 73 "usage:\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n" 74 "\ticonv [-cs] -f frommap -t tomap [file ...]\n")); 75 (void) fprintf(stderr, _("\t%s -l\n"), g_progname); 76 exit(1); 77 } 78 79 80 int 81 main(int argc, char **argv) 82 { 83 char *fname; 84 FILE *fp; 85 int c; 86 87 init_charmap(); 88 89 /* XXX */ 90 yydebug = 1; 91 92 (void) setlocale(LC_ALL, ""); 93 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 94 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 95 #endif 96 (void) textdomain(TEXT_DOMAIN); 97 98 while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) { 99 switch (c) { 100 case 'c': 101 g_cflag = B_TRUE; 102 break; 103 case 'l': 104 g_lflag = B_TRUE; 105 break; 106 case 's': 107 g_sflag = B_TRUE; 108 break; 109 case 'f': 110 g_from_cs = optarg; 111 break; 112 case 't': 113 g_to_cs = optarg; 114 break; 115 case '?': 116 usage(); 117 } 118 } 119 120 if (g_lflag) { 121 if (optind != argc) 122 usage(); 123 list_codesets(); 124 exit(0); 125 } 126 127 /* XXX form_cs not only codeset */ 128 if (strstr(g_from_cs, "/") != NULL) { 129 reset_scanner(g_from_cs); 130 (void) yyparse(); 131 } 132 133 /* XXX empty string "" current encoding */ 134 if (g_from_cs == NULL) { 135 g_from_cs = nl_langinfo(CODESET); 136 printf("%s\n", g_from_cs); 137 } 138 if (g_to_cs == NULL) 139 g_to_cs = nl_langinfo(CODESET); 140 141 /* 142 * XXX todo: deal with charmap files (/paths) 143 */ 144 145 g_ich = iconv_open(g_to_cs, g_from_cs); 146 if (g_ich == ((iconv_t)-1)) { 147 if (errno == EINVAL) { 148 (void) fprintf(stderr, gettext("Not supported %s to %s\n"), 149 g_from_cs, g_to_cs); 150 } else { 151 (void) fprintf(stderr, "iconv_open failed\n"); 152 } 153 exit(1); 154 } 155 156 if (optind == argc || (optind == argc - 1 && 157 0 == strcmp(argv[optind], "-"))) { 158 do_iconv(stdin, "stdin"); 159 exit(0); 160 } 161 162 for (; optind < argc; optind++) { 163 fp = fopen(argv[optind], "r"); 164 if (fp == NULL) { 165 perror(argv[optind]); 166 exit(1); 167 } 168 do_iconv(fp, argv[optind]); 169 (void) fclose(fp); 170 } 171 172 return (EXIT_SUCCESS); 173 } 174 175 /* 176 * Do actual conversion, copying *fp to stdout. 177 * 178 * Conversions may grow or shrink data, so using a larger output buffer 179 * to reduce the likelihood of leftover input buffer data in each pass. 180 */ 181 182 #define IBUFSIZ 1024 183 #define OBUFSIZ (2*IBUFSIZ) 184 185 void 186 do_iconv(FILE *fp, const char *fname) 187 { 188 const char *iptr; 189 char ibuf[IBUFSIZ]; 190 char obuf[OBUFSIZ]; 191 char *optr; 192 size_t ileft, icnt, oleft, ocnt; 193 int nr, nw, rc; 194 195 while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) { 196 197 iptr = ibuf; 198 ileft = nr; 199 200 while (ileft > 0) { 201 optr = obuf; 202 oleft = OBUFSIZ; 203 rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft); 204 if (rc == (size_t)-1) { 205 /* 206 * XXX todo: deal with skipping invalid 207 * input characters and continue... 208 */ 209 g_errcnt++; 210 break; 211 } 212 ocnt = OBUFSIZ - oleft; 213 nw = fwrite(obuf, 1, ocnt, stdout); 214 if (nw != ocnt) { 215 perror("write"); 216 exit(1); 217 } 218 } 219 } 220 221 /* 222 * End of file. Flush any shift encodings. 223 */ 224 iptr = NULL; 225 ileft = 0; 226 optr = obuf; 227 oleft = OBUFSIZ; 228 iconv(g_ich, &iptr, &ileft, &optr, &oleft); 229 ocnt = OBUFSIZ - oleft; 230 fwrite(obuf, 1, ocnt, stdout); 231 } 232 233 /* 234 * Item is in the list? 235 */ 236 static boolean_t 237 iconv_find(list_t *list, const char *name) 238 { 239 iconv_item_t *head; 240 boolean_t found = B_FALSE; 241 242 head = list_head(list); 243 while (head != NULL) { 244 if (strcmp(head->ii_name, name) == 0) { 245 found = B_TRUE; 246 break; 247 } 248 head = list_next(list, head); 249 } 250 251 return (found); 252 } 253 254 /* 255 * Insert into a sorted list. 256 */ 257 static void 258 iconv_insert(list_t *list, const char *name) 259 { 260 iconv_item_t *head; 261 iconv_item_t *item; 262 263 head = list_head(list); 264 while (head != NULL && strcmp(head->ii_name, name) < 0) 265 head = list_next(list, head); 266 267 item = (iconv_item_t *)malloc(sizeof (iconv_item_t)); 268 269 list_link_init(&item->ii_next); 270 list_create(&item->ii_alias_list, sizeof (iconv_alias_t), 271 offsetof(iconv_alias_t, ia_next)); 272 273 item->ii_name = strdup(name); 274 275 list_insert_before(list, head, item); 276 } 277 278 static void 279 iconv_insert_create(list_t *list, const char *name) 280 { 281 if (!iconv_find(list, name)) 282 iconv_insert(list, name); 283 } 284 285 static void 286 iconv_print(list_t *list) 287 { 288 iconv_item_t *head; 289 iconv_alias_t *alias_head; 290 291 (void) fprintf(stdout, gettext( 292 "The following are all supported code set names. All combinations\n" 293 "of those names are not necessarily available for the pair of the\n" 294 "fromcode-tocode. Some of those code set names have aliases, which\n" 295 "are case-insensitive and shown after the canonical name:\n")); 296 297 head = list_head(list); 298 while (head != NULL) { 299 (void) fprintf(stdout, "%s", head->ii_name); 300 301 if (!list_is_empty(&head->ii_alias_list)) { 302 printf(" ("); 303 alias_head = list_head(&head->ii_alias_list); 304 while (alias_head != NULL) { 305 (void) fprintf(stdout, "%s", 306 alias_head->ia_name); 307 308 alias_head = list_next(&head->ii_alias_list, 309 alias_head); 310 311 if (alias_head != NULL) 312 (void) fprintf(stdout, ", "); 313 } 314 (void) fprintf(stdout, ")"); 315 } 316 317 (void) fprintf(stdout, ",\n"); 318 319 head = list_next(list, head); 320 } 321 } 322 323 /* 324 * List all codesets available. 325 */ 326 static void 327 list_codesets(void) 328 { 329 list_t item_list; 330 glob_t globbuf; 331 FILE *fp; 332 char *alias, *ptr, *chomp; 333 char buf[1024]; 334 int i; 335 336 list_create(&item_list, sizeof (iconv_item_t), 337 offsetof(iconv_item_t, ii_next)); 338 339 #define _ICONV_PATH "/usr/lib/iconv/" 340 341 /* XXX search path depends on arch amd64 etc */ 342 (void) chdir(_ICONV_PATH); 343 (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf); 344 (void) chdir("geniconvtbl/binarytables"); 345 (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf); 346 347 for (i = 0; i < globbuf.gl_pathc; i++) { 348 349 ptr = globbuf.gl_pathv[i]; 350 alias = strsep(&ptr, "%"); 351 352 chomp = ptr; 353 for (; *chomp; chomp++) { 354 if (*chomp == '.') 355 *chomp = '\0'; 356 } 357 358 iconv_insert_create(&item_list, ptr); 359 iconv_insert_create(&item_list, alias); 360 } 361 362 globfree(&globbuf); 363 364 (void) chdir(_ICONV_PATH); 365 (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf); 366 367 for (i = 0; i < globbuf.gl_pathc; i++) { 368 369 ptr = globbuf.gl_pathv[i]; 370 alias = strsep(&ptr, "."); 371 printf("%s\n", ptr); 372 chomp = ptr; 373 for (; *chomp; chomp++) { 374 if (*chomp == '.') 375 *chomp = '\0'; 376 } 377 378 iconv_insert_create(&item_list, ptr); 379 iconv_insert_create(&item_list, alias); 380 } 381 382 globfree(&globbuf); 383 384 /* 385 * Read in the alias file and build up a list of 386 * encoding aliases. 387 */ 388 fp = fopen("alias", "r"); 389 if (fp == NULL) { 390 fprintf(stderr, gettext( 391 "Failed to open the conversion alias file: %s\n"), 392 "XXX"); 393 394 /* XXX free list */ 395 return; 396 } 397 398 while (fgets(buf, sizeof (buf), fp) != NULL) { 399 iconv_item_t *head; 400 iconv_alias_t *alias_head; 401 402 /* Skip comments */ 403 if (buf[0] == '#') 404 continue; 405 406 ptr = buf; 407 alias = strsep(&ptr, " \t"); 408 409 chomp = ptr; 410 for (; *chomp; chomp++) { 411 if (*chomp == '\n') 412 *chomp = '\0'; 413 } 414 415 head = list_head(&item_list); 416 while (head != NULL && 417 strcmp(head->ii_name, ptr) < 0) 418 head = list_next(&item_list, head); 419 420 if (head != NULL) { 421 alias_head = (iconv_alias_t *)malloc( 422 sizeof (iconv_alias_t)); 423 list_link_init(&alias_head->ia_next); 424 alias_head->ia_name = strdup(alias); 425 426 list_insert_tail(&head->ii_alias_list, alias_head); 427 } 428 } 429 430 iconv_print(&item_list); 431 432 /* XXX free list */ 433 434 (void) fclose(fp); 435 }