1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
14 * Copyright 2013 David Hoeppner. All rights reserved.
15 */
16
17 /*
18 * POSIX iconv.
19 */
20
21 #include <sys/list.h>
22
23 #include <errno.h>
24 #include <glob.h>
25 #include <iconv.h>
26 #include <langinfo.h>
27 #include <libnvpair.h>
28 #include <locale.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "iconv.h"
34
35 static const char *g_progname = "iconv";
36
37 static char *g_from_cs = "UTF-8";
38 static char *g_to_cs = "UTF-8";
39 static iconv_t g_ich; /* iconv(3c) lib handle */
40 static int g_errcnt;
41 static boolean_t g_cflag = B_FALSE; /* Skip invalid characters */
42 static boolean_t g_sflag = B_FALSE; /* Silent */
43 static boolean_t g_lflag = B_FALSE; /* List conversions */
44
45
46 /*
47 * Forward declarations.
48 */
49 static void usage(void) __NORETURN;
50 static void do_iconv(FILE *, const char *);
51 static void list_codesets(void);
52 int yyparse(void);
53
54 typedef struct _iconv_item {
55 list_node_t ii_next;
56 list_t ii_alias_list;
57 char *ii_name;
58 } iconv_item_t;
59
60 typedef struct _iconv_alias {
61 list_node_t ia_next;
62 char *ia_name;
63 } iconv_alias_t;
64
65 /*
66 * Print usage.
67 */
68 static void
69 usage(void)
70 {
71 (void) fprintf(stderr, _(
72 "usage:"
73 "\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n"
74 "\ticonv [-cs] -f frommap -t tomap [file ...]\n"
75 "\ticonv -l\n"));
76 exit(1);
77 }
78
79
80 int
81 main(int argc, char **argv)
82 {
83 char *fname;
84 FILE *fp;
85 int c;
86
87 init_charmap();
88
89 /* XXX */
90 yydebug = 1;
91
92 (void) setlocale(LC_ALL, "");
93 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
94 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
95 #endif
96 (void) textdomain(TEXT_DOMAIN);
97
98 while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) {
99 switch (c) {
100 case 'c':
101 g_cflag = B_TRUE;
102 break;
103 case 'l':
104 g_lflag = B_TRUE;
105 break;
106 case 's':
107 g_sflag = B_TRUE;
108 break;
109 case 'f':
110 g_from_cs = optarg;
111 break;
112 case 't':
113 g_to_cs = optarg;
114 break;
115 case '?':
116 usage();
117 }
118 }
119
120 if (g_lflag) {
121 if (optind != argc)
122 usage();
123 list_codesets();
124 exit(0);
125 }
126
127 /* Charmaps and codesets can't be mixed */
128 if ((strchr(g_from_cs, '/') == NULL) !=
129 (strchr(g_to_cs, '/') == NULL)) {
130 usage();
131 }
132
133 /* XXX form_cs not only codeset */
134 if (strchr(g_from_cs, '/') != NULL) {
135 reset_scanner(g_from_cs);
136 (void) yyparse();
137
138 switch_charmap();
139
140 reset_scanner(g_to_cs);
141 (void) yyparse();
142 }
143
144 /* XXX empty string "" current encoding */
145 if (g_from_cs == NULL) {
146 g_from_cs = nl_langinfo(CODESET);
147 printf("%s\n", g_from_cs);
148 }
149 if (g_to_cs == NULL)
150 g_to_cs = nl_langinfo(CODESET);
151
152 /*
153 * XXX todo: deal with charmap files (/paths)
154 */
155
156 g_ich = iconv_open(g_to_cs, g_from_cs);
157 if (g_ich == ((iconv_t)-1)) {
158 if (errno == EINVAL) {
159 (void) fprintf(stderr, gettext("Not supported %s to %s\n"),
160 g_from_cs, g_to_cs);
161 } else {
162 (void) fprintf(stderr, "iconv_open failed\n");
163 }
164 exit(1);
165 }
166
167 if (optind == argc || (optind == argc - 1 &&
168 0 == strcmp(argv[optind], "-"))) {
169 do_iconv(stdin, "stdin");
170 exit(0);
171 }
172
173 for (; optind < argc; optind++) {
174 fp = fopen(argv[optind], "r");
175 if (fp == NULL) {
176 perror(argv[optind]);
177 exit(1);
178 }
179 do_iconv(fp, argv[optind]);
180 (void) fclose(fp);
181 }
182
183 return (EXIT_SUCCESS);
184 }
185
186 /*
187 * Do actual conversion, copying *fp to stdout.
188 *
189 * Conversions may grow or shrink data, so using a larger output buffer
190 * to reduce the likelihood of leftover input buffer data in each pass.
191 */
192
193 #define IBUFSIZ 1024
194 #define OBUFSIZ (2*IBUFSIZ)
195
196 void
197 do_iconv(FILE *fp, const char *fname)
198 {
199 const char *iptr;
200 char ibuf[IBUFSIZ];
201 char obuf[OBUFSIZ];
202 char *optr;
203 size_t ileft, icnt, oleft, ocnt;
204 int nr, nw, rc;
205
206 while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) {
207
208 iptr = ibuf;
209 ileft = nr;
210
211 while (ileft > 0) {
212 optr = obuf;
213 oleft = OBUFSIZ;
214 rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft);
215 if (rc == (size_t)-1) {
216 /*
217 * XXX todo: deal with skipping invalid
218 * input characters and continue...
219 */
220 g_errcnt++;
221 break;
222 }
223 ocnt = OBUFSIZ - oleft;
224 nw = fwrite(obuf, 1, ocnt, stdout);
225 if (nw != ocnt) {
226 perror("write");
227 exit(1);
228 }
229 }
230 }
231
232 /*
233 * End of file. Flush any shift encodings.
234 */
235 iptr = NULL;
236 ileft = 0;
237 optr = obuf;
238 oleft = OBUFSIZ;
239 iconv(g_ich, &iptr, &ileft, &optr, &oleft);
240 ocnt = OBUFSIZ - oleft;
241 fwrite(obuf, 1, ocnt, stdout);
242 }
243
244 /*
245 * Item is in the list?
246 */
247 static boolean_t
248 iconv_find(list_t *list, const char *name)
249 {
250 iconv_item_t *head;
251 boolean_t found = B_FALSE;
252
253 head = list_head(list);
254 while (head != NULL) {
255 if (strcmp(head->ii_name, name) == 0) {
256 found = B_TRUE;
257 break;
258 }
259 head = list_next(list, head);
260 }
261
262 return (found);
263 }
264
265 /*
266 * Insert into a sorted list.
267 */
268 static void
269 iconv_insert(list_t *list, const char *name)
270 {
271 iconv_item_t *head;
272 iconv_item_t *item;
273
274 head = list_head(list);
275 while (head != NULL && strcmp(head->ii_name, name) < 0)
276 head = list_next(list, head);
277
278 item = (iconv_item_t *)malloc(sizeof (iconv_item_t));
279
280 list_link_init(&item->ii_next);
281 list_create(&item->ii_alias_list, sizeof (iconv_alias_t),
282 offsetof(iconv_alias_t, ia_next));
283
284 item->ii_name = strdup(name);
285
286 list_insert_before(list, head, item);
287 }
288
289 static void
290 iconv_insert_create(list_t *list, const char *name)
291 {
292 if (!iconv_find(list, name))
293 iconv_insert(list, name);
294 }
295
296 static void
297 iconv_print(list_t *list)
298 {
299 iconv_item_t *head;
300 iconv_alias_t *alias_head;
301
302 (void) fprintf(stdout, gettext(
303 "The following are all supported code set names. All combinations\n"
304 "of those names are not necessarily available for the pair of the\n"
305 "fromcode-tocode. Some of those code set names have aliases, which\n"
306 "are case-insensitive and shown after the canonical name:\n"));
307
308 head = list_head(list);
309 while (head != NULL) {
310 (void) fprintf(stdout, "%s", head->ii_name);
311
312 if (!list_is_empty(&head->ii_alias_list)) {
313 printf(" (");
314 alias_head = list_head(&head->ii_alias_list);
315 while (alias_head != NULL) {
316 (void) fprintf(stdout, "%s",
317 alias_head->ia_name);
318
319 alias_head = list_next(&head->ii_alias_list,
320 alias_head);
321
322 if (alias_head != NULL)
323 (void) fprintf(stdout, ", ");
324 }
325 (void) fprintf(stdout, ")");
326 }
327
328 (void) fprintf(stdout, ",\n");
329
330 head = list_next(list, head);
331 }
332 }
333
334 /*
335 * List all codesets available.
336 */
337 static void
338 list_codesets(void)
339 {
340 list_t item_list;
341 glob_t globbuf;
342 FILE *fp;
343 char *alias, *ptr, *chomp;
344 char buf[1024];
345 int i;
346
347 list_create(&item_list, sizeof (iconv_item_t),
348 offsetof(iconv_item_t, ii_next));
349
350 #define _ICONV_PATH "/usr/lib/iconv/"
351
352 /* XXX search path depends on arch amd64 etc */
353 (void) chdir(_ICONV_PATH);
354 (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf);
355 (void) chdir("geniconvtbl/binarytables");
356 (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf);
357
358 for (i = 0; i < globbuf.gl_pathc; i++) {
359
360 ptr = globbuf.gl_pathv[i];
361 alias = strsep(&ptr, "%");
362
363 chomp = ptr;
364 for (; *chomp; chomp++) {
365 if (*chomp == '.')
366 *chomp = '\0';
367 }
368
369 iconv_insert_create(&item_list, ptr);
370 iconv_insert_create(&item_list, alias);
371 }
372
373 globfree(&globbuf);
374
375 (void) chdir(_ICONV_PATH);
376 (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf);
377
378 for (i = 0; i < globbuf.gl_pathc; i++) {
379
380 ptr = globbuf.gl_pathv[i];
381 alias = strsep(&ptr, ".");
382 printf("%s\n", ptr);
383 chomp = ptr;
384 for (; *chomp; chomp++) {
385 if (*chomp == '.')
386 *chomp = '\0';
387 }
388
389 iconv_insert_create(&item_list, ptr);
390 iconv_insert_create(&item_list, alias);
391 }
392
393 globfree(&globbuf);
394
395 /*
396 * Read in the alias file and build up a list of
397 * encoding aliases.
398 */
399 fp = fopen("alias", "r");
400 if (fp == NULL) {
401 fprintf(stderr, gettext(
402 "Failed to open the conversion alias file: %s\n"),
403 "XXX");
404
405 /* XXX free list */
406 return;
407 }
408
409 while (fgets(buf, sizeof (buf), fp) != NULL) {
410 iconv_item_t *head;
411 iconv_alias_t *alias_head;
412
413 /* Skip comments */
414 if (buf[0] == '#')
415 continue;
416
417 ptr = buf;
418 alias = strsep(&ptr, " \t");
419
420 chomp = ptr;
421 for (; *chomp; chomp++) {
422 if (*chomp == '\n')
423 *chomp = '\0';
424 }
425
426 head = list_head(&item_list);
427 while (head != NULL &&
428 strcmp(head->ii_name, ptr) < 0)
429 head = list_next(&item_list, head);
430
431 if (head != NULL) {
432 alias_head = (iconv_alias_t *)malloc(
433 sizeof (iconv_alias_t));
434 list_link_init(&alias_head->ia_next);
435 alias_head->ia_name = strdup(alias);
436
437 list_insert_tail(&head->ii_alias_list, alias_head);
438 }
439 }
440
441 iconv_print(&item_list);
442
443 /* XXX free list */
444
445 (void) fclose(fp);
446 }