1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
14 */
15
16 /*
17 * iconv(1) command.
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <errno.h>
24 #include <limits.h>
25 #include <iconv.h>
26 #include <libintl.h>
27 #include <langinfo.h>
28 #include <locale.h>
29 #include "charmap.h"
30
31 #include <assert.h>
32
33 const char *progname = "iconv";
34
35 char *from_cs;
36 char *to_cs;
37 int debug;
38 int cflag; /* skip invalid characters */
39 int sflag; /* silent */
40 int lflag; /* list conversions */
41
42 void iconv_file(FILE *, const char *);
43 void list_codesets(void);
44
45 iconv_t ich; /* iconv(3c) lib handle */
46 size_t (*pconv)(const char **iptr, size_t *ileft,
47 char **optr, size_t *oleft);
48
49 size_t
50 lib_iconv(const char **iptr, size_t *ileft, char **optr, size_t *oleft)
51 {
52 return (iconv(ich, iptr, ileft, optr, oleft));
53 }
54
55 void
56 usage(void)
57 {
58 (void) fprintf(stderr, gettext(
59 "usage: %s [-cs] [-f from-codeset] [-t to-codeset] "
60 "[file ...]\n"), progname);
61 (void) fprintf(stderr, gettext("\t%s -l\n"), progname);
62 exit(1);
63 }
64
65 int
66 main(int argc, char **argv)
67 {
68 FILE *fp;
69 char *fslash, *tslash;
70 int c;
71
72 (void) setlocale(LC_ALL, "");
73
74 #if !defined(TEXT_DOMAIN)
75 #define TEXT_DOMAIN "SYS_TEST"
76 #endif
77 (void) textdomain(TEXT_DOMAIN);
78
79 while ((c = getopt(argc, argv, "cdlsf:t:")) != EOF) {
80 switch (c) {
81 case 'c':
82 cflag++;
83 break;
84 case 'd':
85 debug++;
86 break;
87 case 'l':
88 lflag++;
89 break;
90 case 's':
91 sflag++;
92 break;
93 case 'f':
94 from_cs = optarg;
95 break;
96 case 't':
97 to_cs = optarg;
98 break;
99 case '?':
100 usage();
101 }
102 }
103
104 if (lflag) {
105 if (from_cs != NULL || to_cs != NULL || optind != argc)
106 usage();
107 list_codesets();
108 exit(0);
109 }
110
111 if (from_cs == NULL)
112 from_cs = nl_langinfo(CODESET);
113 if (to_cs == NULL)
114 to_cs = nl_langinfo(CODESET);
115
116 /*
117 * If either "from" or "to" contains a slash,
118 * then we're using charmaps.
119 */
120 fslash = strchr(from_cs, '/');
121 tslash = strchr(to_cs, '/');
122 if (fslash != NULL || tslash != NULL) {
123 charmap_init(to_cs, from_cs);
124 pconv = cm_iconv;
125 if (debug)
126 charmap_dump();
127 } else {
128 ich = iconv_open(to_cs, from_cs);
129 if (ich == ((iconv_t)-1)) {
130 (void) fprintf(stderr, gettext("iconv_open failed\n"));
131 exit(1);
132 }
133 pconv = lib_iconv;
134 }
135
136 if (optind == argc || optind == argc - 1 &&
137 0 == strcmp(argv[optind], "-")) {
138 iconv_file(stdin, "stdin");
139 exit(0);
140 }
141
142 for (; optind < argc; optind++) {
143 fp = fopen(argv[optind], "r");
144 if (fp == NULL) {
145 perror(argv[optind]);
146 exit(1);
147 }
148 iconv_file(fp, argv[optind]);
149 (void) fclose(fp);
150 }
151 exit(0);
152 }
153
154 /*
155 * Conversion buffer sizes:
156 *
157 * The input buffer has room to prepend one mbs character if needed for
158 * handling a left-over at the end of a previous conversion buffer.
159 *
160 * Conversions may grow or shrink data, so using a larger output buffer
161 * to reduce the likelihood of leftover input buffer data in each pass.
162 */
163 #define IBUFSIZ (MB_LEN_MAX + BUFSIZ)
164 #define OBUFSIZ (2 * BUFSIZ)
165
166 void
167 iconv_file(FILE *fp, const char *fname)
168 {
169 static char ibuf[IBUFSIZ];
170 static char obuf[OBUFSIZ];
171 const char *iptr;
172 char *optr;
173 off64_t offset;
174 size_t ileft, oleft, ocnt;
175 int iconv_errno;
176 int nr, nw, rc;
177
178 offset = 0;
179 ileft = 0;
180 iptr = ibuf + MB_LEN_MAX;
181
182 while ((nr = fread(ibuf+MB_LEN_MAX, 1, BUFSIZ, fp)) > 0) {
183
184 assert(iptr <= ibuf+MB_LEN_MAX);
185 assert(ileft <= MB_LEN_MAX);
186 ileft += nr;
187 offset += nr;
188
189 optr = obuf;
190 oleft = OBUFSIZ;
191
192 iconv_again:
193 rc = (*pconv)(&iptr, &ileft, &optr, &oleft);
194 iconv_errno = errno;
195
196 ocnt = OBUFSIZ - oleft;
197 if (ocnt > 0) {
198 nw = fwrite(obuf, 1, ocnt, stdout);
199 if (nw != ocnt) {
200 perror("fwrite");
201 exit(1);
202 }
203 }
204 optr = obuf;
205 oleft = OBUFSIZ;
206
207 if (rc == (size_t)-1) {
208 switch (iconv_errno) {
209
210 case E2BIG: /* no room in output buffer */
211 goto iconv_again;
212
213 case EINVAL: /* incomplete sequence on input */
214 if (debug) {
215 (void) fprintf(stderr,
216 _("Incomplete sequence in %s at offset %lld\n"),
217 fname, offset - ileft);
218 }
219 /*
220 * Copy the reminder to the space reserved
221 * at the start of the input buffer.
222 */
223 assert(ileft > 0);
224 if (ileft <= MB_LEN_MAX) {
225 char *p = ibuf+MB_LEN_MAX-ileft;
226 (void) memcpy(p, iptr, ileft);
227 iptr = p;
228 continue; /* read again */
229 }
230 /*
231 * Should not see ileft > MB_LEN_MAX,
232 * but if we do, handle as EILSEQ.
233 */
234 /* FALLTHROUGH */
235
236 case EILSEQ: /* invalid sequence on input */
237 if (!sflag) {
238 (void) fprintf(stderr,
239 _("Illegal sequence in %s at offset %lld\n"),
240 fname, offset - ileft);
241 (void) fprintf(stderr,
242 _("bad seq: \\x%02x\\x%02x\\x%02x\n"),
243 iptr[0] & 0xff,
244 iptr[1] & 0xff,
245 iptr[2] & 0xff);
246 }
247 assert(ileft > 0);
248 /* skip one */
249 iptr++;
250 ileft--;
251 assert(oleft > 0);
252 if (!cflag) {
253 *optr++ = '?';
254 oleft--;
255 }
256 goto iconv_again;
257
258 default:
259 (void) fprintf(stderr,
260 _("iconv error (%s) in file $s at offset %lld\n"),
261 strerror(errno), fname, offset - ileft);
262 perror("iconv");
263 break;
264 }
265 }
266
267 /* normal iconv return */
268 ileft = 0;
269 iptr = ibuf + MB_LEN_MAX;
270 }
271
272 /*
273 * End of file
274 * Flush any shift encodings.
275 */
276 iptr = NULL;
277 ileft = 0;
278 optr = obuf;
279 oleft = OBUFSIZ;
280 (*pconv)(&iptr, &ileft, &optr, &oleft);
281 ocnt = OBUFSIZ - oleft;
282 if (ocnt > 0) {
283 (void) fwrite(obuf, 1, ocnt, stdout);
284 }
285 }
286
287 /*
288 * scan the /usr/lib/iconv directory...
289 * A script for this seems appropriate.
290 */
291 void
292 list_codesets(void)
293 {
294 (void) system("/usr/lib/iconv/iconv_list");
295 }