1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
14 * Copyright 2013 David Hoeppner. All rights reserved.
15 */
16
17 /*
18 * POSIX iconv.
19 */
20
21 #include <sys/list.h>
22
23 #include <errno.h>
24 #include <glob.h>
25 #include <iconv.h>
26 #include <langinfo.h>
27 #include <libnvpair.h>
28 #include <locale.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <unistd.h>
32
33 #include "iconv.h"
34
35 static const char *g_progname = "iconv";
36
37 static char *g_from_cs = "UTF-8";
38 static char *g_to_cs = "UTF-8";
39 static iconv_t g_ich; /* iconv(3c) lib handle */
40 static int g_errcnt;
41 static boolean_t g_cflag = B_FALSE; /* Skip invalid characters */
42 static boolean_t g_sflag = B_FALSE; /* Silent */
43 static boolean_t g_lflag = B_FALSE; /* List conversions */
44
45
46 /*
47 * Forward declarations.
48 */
49 static void usage(void) __NORETURN;
50 static void do_iconv(FILE *, const char *);
51 static void list_codesets(void);
52 int yyparse(void);
53
54 typedef struct _iconv_item {
55 list_node_t ii_next;
56 list_t ii_alias_list;
57 char *ii_name;
58 } iconv_item_t;
59
60 typedef struct _iconv_alias {
61 list_node_t ia_next;
62 char *ia_name;
63 } iconv_alias_t;
64
65 /*
66 * Print usage.
67 */
68 static void
69 usage(void)
70 {
71 /* XXX g_progname */
72 (void) fprintf(stderr, _(
73 "usage:\ticonv [-cs] [-f fromcode] [-t tocode] [file ...]\n"
74 "\ticonv [-cs] -f frommap -t tomap [file ...]\n"));
75 (void) fprintf(stderr, _("\t%s -l\n"), g_progname);
76 exit(1);
77 }
78
79
80 int
81 main(int argc, char **argv)
82 {
83 char *fname;
84 FILE *fp;
85 int c;
86
87 init_charmap();
88
89 /* XXX */
90 yydebug = 1;
91
92 (void) setlocale(LC_ALL, "");
93 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
94 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */
95 #endif
96 (void) textdomain(TEXT_DOMAIN);
97
98 while ((c = getopt(argc, argv, "clsf:t:?")) != EOF) {
99 switch (c) {
100 case 'c':
101 g_cflag = B_TRUE;
102 break;
103 case 'l':
104 g_lflag = B_TRUE;
105 break;
106 case 's':
107 g_sflag = B_TRUE;
108 break;
109 case 'f':
110 g_from_cs = optarg;
111 break;
112 case 't':
113 g_to_cs = optarg;
114 break;
115 case '?':
116 usage();
117 }
118 }
119
120 if (g_lflag) {
121 if (optind != argc)
122 usage();
123 list_codesets();
124 exit(0);
125 }
126
127 /* XXX form_cs not only codeset */
128 if (strstr(g_from_cs, "/") != NULL) {
129 reset_scanner(g_from_cs);
130 (void) yyparse();
131 }
132
133 /* XXX empty string "" current encoding */
134 if (g_from_cs == NULL) {
135 g_from_cs = nl_langinfo(CODESET);
136 printf("%s\n", g_from_cs);
137 }
138 if (g_to_cs == NULL)
139 g_to_cs = nl_langinfo(CODESET);
140
141 /*
142 * XXX todo: deal with charmap files (/paths)
143 */
144
145 g_ich = iconv_open(g_to_cs, g_from_cs);
146 if (g_ich == ((iconv_t)-1)) {
147 if (errno == EINVAL) {
148 (void) fprintf(stderr, gettext("Not supported %s to %s\n"),
149 g_from_cs, g_to_cs);
150 } else {
151 (void) fprintf(stderr, "iconv_open failed\n");
152 }
153 exit(1);
154 }
155
156 if (optind == argc || (optind == argc - 1 &&
157 0 == strcmp(argv[optind], "-"))) {
158 do_iconv(stdin, "stdin");
159 exit(0);
160 }
161
162 for (; optind < argc; optind++) {
163 fp = fopen(argv[optind], "r");
164 if (fp == NULL) {
165 perror(argv[optind]);
166 exit(1);
167 }
168 do_iconv(fp, argv[optind]);
169 (void) fclose(fp);
170 }
171
172 return (EXIT_SUCCESS);
173 }
174
175 /*
176 * Do actual conversion, copying *fp to stdout.
177 *
178 * Conversions may grow or shrink data, so using a larger output buffer
179 * to reduce the likelihood of leftover input buffer data in each pass.
180 */
181
182 #define IBUFSIZ 1024
183 #define OBUFSIZ (2*IBUFSIZ)
184
185 void
186 do_iconv(FILE *fp, const char *fname)
187 {
188 const char *iptr;
189 char ibuf[IBUFSIZ];
190 char obuf[OBUFSIZ];
191 char *optr;
192 size_t ileft, icnt, oleft, ocnt;
193 int nr, nw, rc;
194
195 while ((nr = fread(ibuf, 1, IBUFSIZ, fp)) > 0) {
196
197 iptr = ibuf;
198 ileft = nr;
199
200 while (ileft > 0) {
201 optr = obuf;
202 oleft = OBUFSIZ;
203 rc = iconv(g_ich, &iptr, &ileft, &optr, &oleft);
204 if (rc == (size_t)-1) {
205 /*
206 * XXX todo: deal with skipping invalid
207 * input characters and continue...
208 */
209 g_errcnt++;
210 break;
211 }
212 ocnt = OBUFSIZ - oleft;
213 nw = fwrite(obuf, 1, ocnt, stdout);
214 if (nw != ocnt) {
215 perror("write");
216 exit(1);
217 }
218 }
219 }
220
221 /*
222 * End of file. Flush any shift encodings.
223 */
224 iptr = NULL;
225 ileft = 0;
226 optr = obuf;
227 oleft = OBUFSIZ;
228 iconv(g_ich, &iptr, &ileft, &optr, &oleft);
229 ocnt = OBUFSIZ - oleft;
230 fwrite(obuf, 1, ocnt, stdout);
231 }
232
233 /*
234 * Item is in the list?
235 */
236 static boolean_t
237 iconv_find(list_t *list, const char *name)
238 {
239 iconv_item_t *head;
240 boolean_t found = B_FALSE;
241
242 head = list_head(list);
243 while (head != NULL) {
244 if (strcmp(head->ii_name, name) == 0) {
245 found = B_TRUE;
246 break;
247 }
248 head = list_next(list, head);
249 }
250
251 return (found);
252 }
253
254 /*
255 * Insert into a sorted list.
256 */
257 static void
258 iconv_insert(list_t *list, const char *name)
259 {
260 iconv_item_t *head;
261 iconv_item_t *item;
262
263 head = list_head(list);
264 while (head != NULL && strcmp(head->ii_name, name) < 0)
265 head = list_next(list, head);
266
267 item = (iconv_item_t *)malloc(sizeof (iconv_item_t));
268
269 list_link_init(&item->ii_next);
270 list_create(&item->ii_alias_list, sizeof (iconv_alias_t),
271 offsetof(iconv_alias_t, ia_next));
272
273 item->ii_name = strdup(name);
274
275 list_insert_before(list, head, item);
276 }
277
278 static void
279 iconv_insert_create(list_t *list, const char *name)
280 {
281 if (!iconv_find(list, name))
282 iconv_insert(list, name);
283 }
284
285 static void
286 iconv_print(list_t *list)
287 {
288 iconv_item_t *head;
289 iconv_alias_t *alias_head;
290
291 (void) fprintf(stdout, gettext(
292 "The following are all supported code set names. All combinations\n"
293 "of those names are not necessarily available for the pair of the\n"
294 "fromcode-tocode. Some of those code set names have aliases, which\n"
295 "are case-insensitive and shown after the canonical name:\n"));
296
297 head = list_head(list);
298 while (head != NULL) {
299 (void) fprintf(stdout, "%s", head->ii_name);
300
301 if (!list_is_empty(&head->ii_alias_list)) {
302 printf(" (");
303 alias_head = list_head(&head->ii_alias_list);
304 while (alias_head != NULL) {
305 (void) fprintf(stdout, "%s",
306 alias_head->ia_name);
307
308 alias_head = list_next(&head->ii_alias_list,
309 alias_head);
310
311 if (alias_head != NULL)
312 (void) fprintf(stdout, ", ");
313 }
314 (void) fprintf(stdout, ")");
315 }
316
317 (void) fprintf(stdout, ",\n");
318
319 head = list_next(list, head);
320 }
321 }
322
323 /*
324 * List all codesets available.
325 */
326 static void
327 list_codesets(void)
328 {
329 list_t item_list;
330 glob_t globbuf;
331 FILE *fp;
332 char *alias, *ptr, *chomp;
333 char buf[1024];
334 int i;
335
336 list_create(&item_list, sizeof (iconv_item_t),
337 offsetof(iconv_item_t, ii_next));
338
339 #define _ICONV_PATH "/usr/lib/iconv/"
340
341 /* XXX search path depends on arch amd64 etc */
342 (void) chdir(_ICONV_PATH);
343 (void) glob("*%*.so", GLOB_NOSORT, NULL, &globbuf);
344 (void) chdir("geniconvtbl/binarytables");
345 (void) glob("*%*.bt", GLOB_NOSORT|GLOB_APPEND, NULL, &globbuf);
346
347 for (i = 0; i < globbuf.gl_pathc; i++) {
348
349 ptr = globbuf.gl_pathv[i];
350 alias = strsep(&ptr, "%");
351
352 chomp = ptr;
353 for (; *chomp; chomp++) {
354 if (*chomp == '.')
355 *chomp = '\0';
356 }
357
358 iconv_insert_create(&item_list, ptr);
359 iconv_insert_create(&item_list, alias);
360 }
361
362 globfree(&globbuf);
363
364 (void) chdir(_ICONV_PATH);
365 (void) glob("*.*.t", GLOB_NOSORT, NULL, &globbuf);
366
367 for (i = 0; i < globbuf.gl_pathc; i++) {
368
369 ptr = globbuf.gl_pathv[i];
370 alias = strsep(&ptr, ".");
371 printf("%s\n", ptr);
372 chomp = ptr;
373 for (; *chomp; chomp++) {
374 if (*chomp == '.')
375 *chomp = '\0';
376 }
377
378 iconv_insert_create(&item_list, ptr);
379 iconv_insert_create(&item_list, alias);
380 }
381
382 globfree(&globbuf);
383
384 /*
385 * Read in the alias file and build up a list of
386 * encoding aliases.
387 */
388 fp = fopen("alias", "r");
389 if (fp == NULL) {
390 fprintf(stderr, gettext(
391 "Failed to open the conversion alias file: %s\n"),
392 "XXX");
393
394 /* XXX free list */
395 return;
396 }
397
398 while (fgets(buf, sizeof (buf), fp) != NULL) {
399 iconv_item_t *head;
400 iconv_alias_t *alias_head;
401
402 /* Skip comments */
403 if (buf[0] == '#')
404 continue;
405
406 ptr = buf;
407 alias = strsep(&ptr, " \t");
408
409 chomp = ptr;
410 for (; *chomp; chomp++) {
411 if (*chomp == '\n')
412 *chomp = '\0';
413 }
414
415 head = list_head(&item_list);
416 while (head != NULL &&
417 strcmp(head->ii_name, ptr) < 0)
418 head = list_next(&item_list, head);
419
420 if (head != NULL) {
421 alias_head = (iconv_alias_t *)malloc(
422 sizeof (iconv_alias_t));
423 list_link_init(&alias_head->ia_next);
424 alias_head->ia_name = strdup(alias);
425
426 list_insert_tail(&head->ii_alias_list, alias_head);
427 }
428 }
429
430 iconv_print(&item_list);
431
432 /* XXX free list */
433
434 (void) fclose(fp);
435 }