Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/kiconv/kiconv_ja/kiconv_ja.c
+++ new/usr/src/uts/common/kiconv/kiconv_ja/kiconv_ja.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 -#pragma ident "%Z%%M% %I% %E% SMI"
28 -
29 27 #include <sys/types.h>
30 28 #include <sys/param.h>
31 29 #include <sys/sysmacros.h>
32 30 #include <sys/systm.h>
33 31 #include <sys/debug.h>
34 32 #include <sys/kmem.h>
35 33 #include <sys/sunddi.h>
36 34 #include <sys/byteorder.h>
37 35 #include <sys/errno.h>
38 36 #include <sys/euc.h>
39 37 #include <sys/modctl.h>
40 38 #include <sys/kiconv.h>
41 39
42 40 #include <sys/kiconv_ja.h>
43 41 #include <sys/kiconv_ja_jis_to_unicode.h>
44 42 #include <sys/kiconv_ja_unicode_to_jis.h>
45 43
46 44 /*
47 45 * The following vector shows remaining bytes in a UTF-8 character.
48 46 * Index will be the first byte of the character. This is defined in
49 47 * u8_textprep.c.
50 48 */
51 49 extern const int8_t u8_number_of_bytes[];
52 50
53 51 /*
54 52 * The following is a vector of bit-masks to get used bits in
55 53 * the first byte of a UTF-8 character. Index is remaining bytes at above of
56 54 * the character. This is defined in uconv.c.
57 55 */
58 56 extern const uchar_t u8_masks_tbl[];
59 57
60 58 /*
61 59 * The following two vectors are to provide valid minimum and
62 60 * maximum values for the 2'nd byte of a multibyte UTF-8 character for
63 61 * better illegal sequence checking. The index value must be the value of
64 62 * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
65 63 */
66 64 extern const uint8_t u8_valid_min_2nd_byte[];
67 65 extern const uint8_t u8_valid_max_2nd_byte[];
68 66
69 67 static kiconv_ja_euc16_t
70 68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
71 69 {
72 70 const kiconv_ja_euc16_t *p;
73 71
74 72 if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
75 73 return (p[ucs2 & 0xff]);
76 74
77 75 return (KICONV_JA_NODEST);
78 76 }
79 77
80 78 static size_t
81 79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
82 80 {
83 81 uint_t l; /* to be copied to *p on successful return */
84 82 uchar_t ic; /* current byte */
85 83 uchar_t ic1; /* 1st byte */
86 84 uchar_t *ip = *pip; /* next byte to read */
87 85 size_t ileft = *pileft; /* number of bytes available */
88 86 size_t rv = 0; /* return value of this function */
89 87 int remaining_bytes;
90 88 int u8_size;
91 89
92 90 KICONV_JA_NGET(ic1); /* read 1st byte */
93 91
94 92 if (ic1 < 0x80) {
95 93 /* successfully converted */
96 94 *p = (uint_t)ic1;
97 95 goto ret;
98 96 }
99 97
100 98 u8_size = u8_number_of_bytes[ic1];
101 99 if (u8_size == U8_ILLEGAL_CHAR) {
102 100 KICONV_JA_RETERROR(EILSEQ)
103 101 } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
104 102 KICONV_JA_RETERROR(ERANGE)
105 103 }
106 104
107 105 remaining_bytes = u8_size - 1;
108 106 if (remaining_bytes != 0) {
109 107 l = ic1 & u8_masks_tbl[remaining_bytes];
110 108
111 109 for (; remaining_bytes > 0; remaining_bytes--) {
112 110 KICONV_JA_NGET(ic);
113 111 if (ic1 != 0U) {
114 112 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
115 113 (ic > u8_valid_max_2nd_byte[ic1])) {
116 114 KICONV_JA_RETERROR(EILSEQ)
117 115 }
118 116 ic1 = 0U; /* 2nd byte check done */
119 117 } else {
120 118 if ((ic < 0x80) || (ic > 0xbf)) {
121 119 KICONV_JA_RETERROR(EILSEQ)
122 120 }
123 121 }
124 122 l = (l << 6) | (ic & 0x3f);
125 123 }
126 124
127 125 /* successfully converted */
128 126 *p = l;
129 127 } else {
130 128 KICONV_JA_RETERROR(EILSEQ)
131 129 }
132 130
133 131 ret:
134 132 if (rv == 0) {
135 133 /*
136 134 * Update rv, *pip, and *pileft on successfule return.
137 135 */
138 136 rv = *pileft - ileft;
139 137 *pip = ip;
140 138 *pileft = ileft;
141 139 }
142 140
143 141 return (rv);
144 142 }
145 143
146 144 static size_t
147 145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
148 146 {
149 147 uint_t l; /* to be copied to *p on successful return */
150 148 uchar_t ic; /* current byte */
151 149 uchar_t ic1; /* 1st byte */
152 150 uchar_t *ip = *pip; /* next byte to read */
153 151 size_t ileft = *pileft; /* number of bytes available */
154 152 size_t rv = 0; /* return value of this function */
155 153 int remaining_bytes;
156 154 int u8_size;
157 155
158 156 KICONV_JA_NGET_REP_TO_MB(ic1); /* read 1st byte */
159 157
160 158 if (ic1 < 0x80) {
161 159 /* successfully converted */
162 160 l = (uint_t)ic1;
163 161 goto ret;
164 162 }
165 163
166 164 u8_size = u8_number_of_bytes[ic1];
167 165 if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
168 166 l = KICONV_JA_DEF_SINGLE;
169 167 (*repnum)++;
170 168 goto ret;
171 169 }
172 170
173 171 remaining_bytes = u8_size - 1;
174 172
175 173 if (remaining_bytes != 0) {
176 174 l = ic1 & u8_masks_tbl[remaining_bytes];
177 175
178 176 for (; remaining_bytes > 0; remaining_bytes--) {
179 177 KICONV_JA_NGET_REP_TO_MB(ic);
180 178 if (ic1 != 0U) {
181 179 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
182 180 (ic > u8_valid_max_2nd_byte[ic1])) {
183 181 l = KICONV_JA_DEF_SINGLE;
184 182 (*repnum)++;
185 183 ileft -= (remaining_bytes - 1);
186 184 ip += (remaining_bytes - 1);
187 185 break;
188 186 }
189 187 ic1 = 0U; /* 2nd byte check done */
190 188 } else {
191 189 if ((ic < 0x80) || (ic > 0xbf)) {
192 190 l = KICONV_JA_DEF_SINGLE;
193 191 (*repnum)++;
194 192 ileft -= (remaining_bytes - 1);
195 193 ip += (remaining_bytes - 1);
196 194 break;
197 195 }
198 196 }
199 197 l = (l << 6) | (ic & 0x3f);
200 198 }
201 199 } else {
202 200 l = KICONV_JA_DEF_SINGLE;
203 201 (*repnum)++;
204 202 }
205 203
206 204 ret:
207 205 /* successfully converted */
208 206 *p = l;
209 207 rv = *pileft - ileft;
210 208
211 209 *pip = ip;
212 210 *pileft = ileft;
213 211
214 212 return (rv);
215 213 }
216 214
217 215 static size_t /* return #bytes read, or -1 */
218 216 read_unicode(
219 217 uint_t *p, /* point variable to store UTF-32 */
220 218 uchar_t **pip, /* point pointer to input buf */
221 219 size_t *pileft, /* point #bytes left in input buf */
222 220 int *errno, /* point variable to errno */
223 221 int flag, /* kiconvstr flag */
224 222 size_t *rv) /* point return valuse */
225 223 {
226 224 if (flag & KICONV_REPLACE_INVALID)
227 225 return (utf8_ucs_replace(p, pip, pileft, rv));
228 226 else
229 227 return (utf8_ucs(p, pip, pileft, errno));
230 228 }
231 229
232 230 static size_t
233 231 write_unicode(
234 232 uint_t u32, /* UTF-32 to write */
235 233 char **pop, /* point pointer to output buf */
236 234 size_t *poleft, /* point #bytes left in output buf */
237 235 int *errno) /* point variable to errno */
238 236 {
239 237 char *op = *pop;
240 238 size_t oleft = *poleft;
241 239 size_t rv = 0; /* return value */
242 240
243 241 if (u32 <= 0x7f) {
244 242 KICONV_JA_NPUT((uchar_t)(u32));
245 243 rv = 1;
246 244 } else if (u32 <= 0x7ff) {
247 245 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
248 246 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
249 247 rv = 2;
250 248 } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
251 249 KICONV_JA_RETERROR(EILSEQ)
252 250 } else if (u32 <= 0xffff) {
253 251 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
254 252 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
255 253 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
256 254 rv = 3;
257 255 } else if (u32 <= 0x10ffff) {
258 256 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
259 257 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
260 258 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
261 259 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
262 260 rv = 4;
263 261 } else {
264 262 KICONV_JA_RETERROR(EILSEQ)
265 263 }
266 264
267 265 ret:
268 266 if (rv != (size_t)-1) {
269 267 /* update *pop and *poleft only on successful return */
270 268 *pop = op;
271 269 *poleft = oleft;
272 270 }
273 271
274 272 return (rv);
275 273 }
276 274
277 275 static void *
278 276 _kiconv_ja_open_unicode(uint8_t id)
279 277 {
280 278 kiconv_state_t kcd;
281 279
282 280 kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
283 281 KM_SLEEP);
284 282 kcd->id = id;
285 283 kcd->bom_processed = 0;
286 284 return ((void *)kcd);
287 285 }
288 286
289 287 static void *
290 288 open_eucjp(void)
291 289 {
292 290 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
293 291 }
294 292
295 293 static void *
296 294 open_eucjpms(void)
297 295 {
298 296 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
299 297 }
300 298
301 299 static void *
302 300 open_sjis(void)
303 301 {
304 302 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
305 303 }
306 304
307 305 static void *
308 306 open_cp932(void)
309 307 {
310 308 return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
311 309 }
312 310
313 311 int
314 312 close_ja(void *kcd)
315 313 {
316 314 if (! kcd || kcd == (void *)-1)
317 315 return (EBADF);
318 316
319 317 kmem_free(kcd, sizeof (kiconv_state_data_t));
320 318
321 319 return (0);
322 320 }
323 321
324 322 static size_t
325 323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
326 324 char **outbuf, size_t *outbytesleft, int *errno)
327 325 {
328 326 uint_t u32; /* UTF-32 */
329 327 uint_t index; /* index for table lookup */
330 328 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
331 329 size_t rv = 0; /* return value of this function */
332 330
333 331 uchar_t *ip;
334 332 size_t ileft;
335 333 char *op;
336 334 size_t oleft;
337 335 size_t id = ((kiconv_state_t)kcd)->id;
338 336
339 337 if ((inbuf == NULL) || (*inbuf == NULL)) {
340 338 return (0);
341 339 }
342 340
343 341 ip = (uchar_t *)*inbuf;
344 342 ileft = *inbytesleft;
345 343 op = *outbuf;
346 344 oleft = *outbytesleft;
347 345
348 346 while (ileft != 0) {
349 347 KICONV_JA_NGET(ic1); /* get 1st byte */
350 348
351 349 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
352 350 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
353 351 KICONV_JA_PUTU(u32);
354 352 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
355 353 KICONV_JA_NGET(ic2);
356 354 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
357 355 ic1 &= KICONV_JA_CMASK;
358 356 ic2 &= KICONV_JA_CMASK;
359 357 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
360 358 if (u32 == KICONV_JA_NODEST) {
361 359 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
362 360 u32 = kiconv_ja_jisx0208_to_ucs2[index];
363 361 }
364 362 if (u32 == KICONV_JA_REPLACE)
365 363 rv++;
366 364 KICONV_JA_PUTU(u32);
367 365 } else { /* 2nd byte check failed */
368 366 KICONV_JA_RETERROR(EILSEQ)
369 367 }
370 368 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
371 369 KICONV_JA_NGET(ic2);
372 370 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
373 371 index = (ic2 - 0xa1);
374 372 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
375 373 KICONV_JA_PUTU(u32);
376 374 } else { /* 2nd byte check failed */
377 375 KICONV_JA_RETERROR(EILSEQ)
378 376 }
379 377 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
380 378 KICONV_JA_NGET(ic2);
381 379 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
382 380 KICONV_JA_NGET(ic3);
383 381 if (KICONV_JA_ISCS3(ic3)) {
384 382 /* 3rd byte check passed */
385 383 ic2 &= KICONV_JA_CMASK;
386 384 ic3 &= KICONV_JA_CMASK;
387 385 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
388 386 ic2, ic3);
389 387 if (u32 == KICONV_JA_NODEST) {
390 388 index = ((ic2 - 0x21) * 94 +
391 389 (ic3 - 0x21));
392 390 u32 = kiconv_ja_jisx0212_to_ucs2
393 391 [index];
394 392 }
395 393 if (u32 == KICONV_JA_REPLACE)
396 394 rv++;
397 395 KICONV_JA_PUTU(u32);
398 396 } else { /* 3rd byte check failed */
399 397 KICONV_JA_RETERROR(EILSEQ)
400 398 }
401 399 } else { /* 2nd byte check failed */
402 400 KICONV_JA_RETERROR(EILSEQ)
403 401 }
404 402 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
405 403 /* C1 control; 1 byte */
406 404 u32 = ic1;
407 405 KICONV_JA_PUTU(u32);
408 406 } else { /* 1st byte check failed */
409 407 KICONV_JA_RETERROR(EILSEQ)
410 408 }
411 409
412 410 /*
413 411 * One character successfully converted so update
414 412 * values outside of this function's stack.
415 413 */
416 414 *inbuf = (char *)ip;
417 415 *inbytesleft = ileft;
418 416 *outbuf = op;
419 417 *outbytesleft = oleft;
420 418 }
421 419
422 420 ret:
423 421 return (rv);
424 422 }
425 423
426 424 static size_t
427 425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
428 426 char **outbuf, size_t *outbytesleft, int *errno)
429 427 {
430 428 uchar_t ic;
431 429 size_t rv = 0;
432 430 uint_t ucs4;
433 431 ushort_t euc16;
434 432
435 433 uchar_t *ip;
436 434 size_t ileft;
437 435 char *op;
438 436 size_t oleft;
439 437 size_t read_len;
440 438
441 439 size_t id = ((kiconv_state_t)kcd)->id;
442 440
443 441 if ((inbuf == NULL) || (*inbuf == NULL)) {
444 442 return (0);
445 443 }
446 444
447 445 ip = (uchar_t *)*inbuf;
448 446 ileft = *inbytesleft;
449 447 op = *outbuf;
450 448 oleft = *outbytesleft;
451 449
452 450 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
453 451
454 452 while (ileft != 0) {
455 453 KICONV_JA_GETU(&ucs4, 0);
456 454
457 455 if (ucs4 > 0xffff) {
458 456 /* non-BMP */
459 457 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
460 458 rv++;
461 459 goto next;
462 460 }
463 461
464 462 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
465 463 if (euc16 == KICONV_JA_NODEST) {
466 464 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
467 465 }
468 466 if (euc16 == KICONV_JA_NODEST) {
469 467 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
470 468 rv++;
471 469 goto next;
472 470 }
473 471
474 472 switch (euc16 & 0x8080) {
475 473 case 0x0000: /* CS0 */
476 474 ic = (uchar_t)euc16;
477 475 KICONV_JA_NPUT(ic);
478 476 break;
479 477 case 0x8080: /* CS1 */
480 478 ic = (uchar_t)((euc16 >> 8) & 0xff);
481 479 KICONV_JA_NPUT(ic);
482 480 ic = (uchar_t)(euc16 & 0xff);
483 481 KICONV_JA_NPUT(ic);
484 482 break;
485 483 case 0x0080: /* CS2 */
486 484 KICONV_JA_NPUT(SS2);
487 485 ic = (uchar_t)euc16;
488 486 KICONV_JA_NPUT(ic);
489 487 break;
490 488 case 0x8000: /* CS3 */
491 489 KICONV_JA_NPUT(SS3);
492 490 ic = (uchar_t)((euc16 >> 8) & 0xff);
493 491 KICONV_JA_NPUT(ic);
494 492 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
495 493 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
496 494 break;
497 495 }
498 496 next:
499 497 /*
500 498 * One character successfully converted so update
501 499 * values outside of this function's stack.
502 500 */
503 501 *inbuf = (char *)ip;
504 502 *inbytesleft = ileft;
505 503 *outbuf = op;
506 504 *outbytesleft = oleft;
507 505 }
508 506
509 507 ret:
510 508 return (rv);
511 509 }
512 510
513 511 static size_t
514 512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
515 513 size_t *outbytesleft, int flag, int *errno, uint8_t id)
516 514 {
517 515 uint_t u32; /* UTF-32 */
518 516 uint_t index; /* index for table lookup */
519 517 uchar_t ic1, ic2, ic3; /* 1st, 2nd, and 3rd bytes of a char */
520 518 size_t rv = 0; /* return value of this function */
521 519
522 520 uchar_t *ip;
523 521 size_t ileft;
524 522 char *op;
525 523 size_t oleft;
526 524
527 525 boolean_t do_not_ignore_null;
528 526
529 527 if ((inbuf == NULL) || (*inbuf == NULL)) {
530 528 return (0);
531 529 }
532 530
533 531 ip = (uchar_t *)inbuf;
534 532 ileft = *inbytesleft;
535 533 op = outbuf;
536 534 oleft = *outbytesleft;
537 535
538 536 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
539 537
540 538 while (ileft != 0) {
541 539 KICONV_JA_NGET(ic1); /* get 1st byte */
542 540
543 541 if (KICONV_JA_ISASC(ic1)) { /* ASCII; 1 byte */
544 542 if (ic1 == '\0' && do_not_ignore_null) {
545 543 return (0);
546 544 }
547 545 u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
548 546 KICONV_JA_PUTU(u32);
549 547 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
550 548 if (flag & KICONV_REPLACE_INVALID) {
551 549 KICONV_JA_NGET_REP_FR_MB(ic2);
552 550 } else {
553 551 KICONV_JA_NGET(ic2);
554 552 }
555 553 if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
556 554 ic1 &= KICONV_JA_CMASK;
557 555 ic2 &= KICONV_JA_CMASK;
558 556 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
559 557 if (u32 == KICONV_JA_NODEST) {
560 558 index = (ic1 - 0x21) * 94 + ic2 - 0x21;
561 559 u32 = kiconv_ja_jisx0208_to_ucs2[index];
562 560 }
563 561 if (u32 == KICONV_JA_REPLACE)
564 562 rv++;
565 563 KICONV_JA_PUTU(u32);
566 564 } else { /* 2nd byte check failed */
567 565 if (flag & KICONV_REPLACE_INVALID) {
568 566 KICONV_JA_PUTU(KICONV_JA_REPLACE);
569 567 rv++;
570 568 } else {
571 569 KICONV_JA_RETERROR(EILSEQ)
572 570 }
573 571 }
574 572 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
575 573 if (flag & KICONV_REPLACE_INVALID) {
576 574 KICONV_JA_NGET_REP_FR_MB(ic2);
577 575 } else {
578 576 KICONV_JA_NGET(ic2);
579 577 }
580 578 if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
581 579 index = (ic2 - 0xa1);
582 580 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
583 581 KICONV_JA_PUTU(u32);
584 582 } else { /* 2nd byte check failed */
585 583 if (flag & KICONV_REPLACE_INVALID) {
586 584 KICONV_JA_PUTU(KICONV_JA_REPLACE);
587 585 rv++;
588 586 } else {
589 587 KICONV_JA_RETERROR(EILSEQ)
590 588 }
591 589 }
592 590 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
593 591 if (flag & KICONV_REPLACE_INVALID) {
594 592 KICONV_JA_NGET_REP_FR_MB(ic2);
595 593 } else {
596 594 KICONV_JA_NGET(ic2);
597 595 }
598 596 if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
599 597 if (flag & KICONV_REPLACE_INVALID) {
600 598 KICONV_JA_NGET_REP_FR_MB(ic3);
601 599 } else {
602 600 KICONV_JA_NGET(ic3);
603 601 }
604 602 if (KICONV_JA_ISCS3(ic3)) {
605 603 /* 3rd byte check passed */
606 604 ic2 &= KICONV_JA_CMASK;
607 605 ic3 &= KICONV_JA_CMASK;
608 606 KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
609 607 ic2, ic3);
610 608 if (u32 == KICONV_JA_NODEST) {
611 609 index = ((ic2 - 0x21) * 94 +
612 610 (ic3 - 0x21));
613 611 u32 = kiconv_ja_jisx0212_to_ucs2
614 612 [index];
615 613 }
616 614 if (u32 == KICONV_JA_REPLACE)
617 615 rv++;
618 616 KICONV_JA_PUTU(u32);
619 617 } else { /* 3rd byte check failed */
620 618 if (flag & KICONV_REPLACE_INVALID) {
621 619 KICONV_JA_PUTU(
622 620 KICONV_JA_REPLACE);
623 621 rv++;
624 622 } else {
625 623 KICONV_JA_RETERROR(EILSEQ)
626 624 }
627 625 }
628 626 } else { /* 2nd byte check failed */
629 627 if (flag & KICONV_REPLACE_INVALID) {
630 628 KICONV_JA_PUTU(KICONV_JA_REPLACE);
631 629 rv++;
632 630 } else {
633 631 KICONV_JA_RETERROR(EILSEQ)
634 632 }
635 633 }
636 634 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
637 635 /* C1 control; 1 byte */
638 636 u32 = ic1;
639 637 KICONV_JA_PUTU(u32);
640 638 } else { /* 1st byte check failed */
641 639 if (flag & KICONV_REPLACE_INVALID) {
642 640 KICONV_JA_PUTU(KICONV_JA_REPLACE);
643 641 rv++;
644 642 } else {
645 643 KICONV_JA_RETERROR(EILSEQ)
646 644 }
647 645 }
648 646
649 647 next:
650 648 /*
651 649 * One character successfully converted so update
652 650 * values outside of this function's stack.
653 651 */
654 652 *inbytesleft = ileft;
655 653 *outbytesleft = oleft;
656 654 }
657 655
658 656 ret:
659 657 return (rv);
660 658 }
661 659
662 660 static size_t
663 661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
664 662 size_t *outbytesleft, int flag, int *errno, uint8_t id)
665 663 {
666 664 uchar_t ic;
667 665 size_t rv = 0;
668 666 uint_t ucs4;
669 667 ushort_t euc16;
670 668
671 669 uchar_t *ip;
672 670 size_t ileft;
673 671 char *op;
674 672 size_t oleft;
675 673 size_t read_len;
676 674
677 675 boolean_t do_not_ignore_null;
678 676
679 677 if ((inbuf == NULL) || (*inbuf == NULL)) {
680 678 return (0);
681 679 }
682 680
683 681 ip = (uchar_t *)inbuf;
684 682 ileft = *inbytesleft;
685 683 op = outbuf;
686 684 oleft = *outbytesleft;
687 685
688 686 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
689 687
690 688 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
691 689
692 690 while (ileft != 0) {
693 691 KICONV_JA_GETU(&ucs4, flag);
694 692
695 693 if (ucs4 == 0x0 && do_not_ignore_null) {
696 694 return (0);
697 695 }
698 696
699 697 if (ucs4 > 0xffff) {
700 698 /* non-BMP */
701 699 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
702 700 rv++;
703 701 goto next;
704 702 }
705 703
706 704 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
707 705 if (euc16 == KICONV_JA_NODEST) {
708 706 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
709 707 }
710 708 if (euc16 == KICONV_JA_NODEST) {
711 709 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
712 710 rv++;
713 711 goto next;
714 712 }
715 713
716 714 switch (euc16 & 0x8080) {
717 715 case 0x0000: /* CS0 */
718 716 ic = (uchar_t)euc16;
719 717 KICONV_JA_NPUT(ic);
720 718 break;
721 719 case 0x8080: /* CS1 */
722 720 ic = (uchar_t)((euc16 >> 8) & 0xff);
723 721 KICONV_JA_NPUT(ic);
724 722 ic = (uchar_t)(euc16 & 0xff);
725 723 KICONV_JA_NPUT(ic);
726 724 break;
727 725 case 0x0080: /* CS2 */
728 726 KICONV_JA_NPUT(SS2);
729 727 ic = (uchar_t)euc16;
730 728 KICONV_JA_NPUT(ic);
731 729 break;
732 730 case 0x8000: /* CS3 */
733 731 KICONV_JA_NPUT(SS3);
734 732 ic = (uchar_t)((euc16 >> 8) & 0xff);
735 733 KICONV_JA_NPUT(ic);
736 734 ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
737 735 KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
738 736 break;
739 737 }
740 738 next:
741 739 /*
742 740 * One character successfully converted so update
743 741 * values outside of this function's stack.
744 742 */
745 743 *inbytesleft = ileft;
746 744 *outbytesleft = oleft;
747 745 }
748 746
749 747 ret:
750 748 return (rv);
751 749 }
752 750
753 751 static size_t
754 752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
755 753 char **outbuf, size_t *outbytesleft, int *errno)
756 754 {
757 755 if (! kcd || kcd == (void *)-1) {
758 756 *errno = EBADF;
759 757 return ((size_t)-1);
760 758 }
761 759
762 760 return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
763 761 outbuf, outbytesleft, errno));
764 762 }
765 763
766 764 static size_t
767 765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
768 766 char **outbuf, size_t *outbytesleft, int *errno)
769 767 {
770 768 if (! kcd || kcd == (void *)-1) {
771 769 *errno = EBADF;
772 770 return ((size_t)-1);
773 771 }
774 772
775 773 return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
776 774 outbuf, outbytesleft, errno));
777 775 }
778 776
779 777 static size_t
780 778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
781 779 size_t *outbytesleft, int flag, int *errno)
782 780 {
783 781 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
784 782 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
785 783 }
786 784
787 785 static size_t
788 786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
789 787 size_t *outbytesleft, int flag, int *errno)
790 788 {
791 789 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
792 790 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
793 791 }
794 792
795 793 static size_t
796 794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
797 795 size_t *outbytesleft, int flag, int *errno)
798 796 {
799 797 return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
800 798 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
801 799 }
802 800
803 801 static size_t
804 802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
805 803 size_t *outbytesleft, int flag, int *errno)
806 804 {
807 805 return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
808 806 outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
809 807 }
810 808
811 809 static size_t
812 810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
813 811 char **outbuf, size_t *outbytesleft, int *errno)
814 812 {
815 813 uint_t uni; /* UTF-32 */
816 814 uint_t index; /* index for table lookup */
817 815 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
818 816 size_t rv = 0; /* return value of this function */
819 817
820 818 uchar_t *ip;
821 819 size_t ileft;
822 820 char *op;
823 821 size_t oleft;
824 822 size_t id = ((kiconv_state_t)kcd)->id;
825 823
826 824 if ((inbuf == NULL) || (*inbuf == NULL)) {
827 825 return (0);
828 826 }
829 827
830 828 ip = (uchar_t *)*inbuf;
831 829 ileft = *inbytesleft;
832 830 op = *outbuf;
833 831 oleft = *outbytesleft;
834 832
835 833 while (ileft != 0) {
836 834 KICONV_JA_NGET(ic1); /* get 1st byte */
837 835
838 836 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
839 837 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
840 838 KICONV_JA_PUTU(uni);
841 839 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
842 840 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
843 841 KICONV_JA_PUTU(uni);
844 842 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
845 843 KICONV_JA_NGET(ic2);
846 844 if (KICONV_JA_ISSJKANJI2(ic2)) {
847 845 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
848 846 if (ic2 >= 0x9f) {
849 847 ic1++;
850 848 }
851 849 ic2 = kiconv_ja_sjtojis2[ic2];
852 850 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
853 851 if (uni == KICONV_JA_NODEST) {
854 852 index = ((ic1 - 0x21) * 94)
855 853 + (ic2 - 0x21);
856 854 uni = kiconv_ja_jisx0208_to_ucs2[index];
857 855 }
858 856 if (uni == KICONV_JA_REPLACE)
859 857 rv++;
860 858 KICONV_JA_PUTU(uni);
861 859 } else { /* 2nd byte check failed */
862 860 KICONV_JA_RETERROR(EILSEQ)
863 861 /* NOTREACHED */
864 862 }
865 863 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
866 864 KICONV_JA_NGET(ic2);
867 865 if (KICONV_JA_ISSJKANJI2(ic2)) {
868 866 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
869 867 if (ic2 >= 0x9f) {
870 868 ic1++;
871 869 }
872 870 index = ((ic1 - 0x21) * 94)
873 871 + (kiconv_ja_sjtojis2[ic2] - 0x21);
874 872 uni = kiconv_ja_jisx0212_to_ucs2[index];
875 873 if (uni == KICONV_JA_REPLACE)
876 874 rv++;
877 875 KICONV_JA_PUTU(uni);
878 876 } else { /* 2nd byte check failed */
879 877 KICONV_JA_RETERROR(EILSEQ)
880 878 }
881 879 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
882 880 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
883 881 /*
884 882 * We need a special treatment for each codes.
885 883 * By adding some offset number for them, we
886 884 * can process them as the same way of that of
887 885 * extended IBM chars.
888 886 */
889 887 KICONV_JA_NGET(ic2);
890 888 if (KICONV_JA_ISSJKANJI2(ic2)) {
891 889 ushort_t dest, upper, lower;
892 890 dest = (ic1 << 8) + ic2;
893 891 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
894 892 KICONV_JA_REMAP_NEC(dest);
895 893 if (dest == 0xffff) {
896 894 KICONV_JA_RETERROR(EILSEQ)
897 895 }
898 896 }
899 897 /*
900 898 * XXX: 0xfa54 and 0xfa5b must be mapped
901 899 * to JIS0208 area. Therefore we
902 900 * have to do special treatment.
903 901 */
904 902 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
905 903 if (dest == 0xfa54) {
906 904 upper = 0x22;
907 905 lower = 0x4c;
908 906 } else {
909 907 upper = 0x22;
910 908 lower = 0x68;
911 909 }
912 910 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
913 911 upper, lower);
914 912 if (uni == KICONV_JA_NODEST) {
915 913 index = (uint_t)((upper - 0x21)
916 914 * 94 + (lower - 0x21));
917 915 uni = kiconv_ja_jisx0208_to_ucs2
918 916 [index];
919 917 }
920 918 if (uni == KICONV_JA_REPLACE)
921 919 rv++;
922 920 KICONV_JA_PUTU(uni);
923 921 } else {
924 922 dest = dest - 0xfa40 -
925 923 (((dest>>8) - 0xfa) * 0x40);
926 924 dest = kiconv_ja_sjtoibmext[dest];
927 925 if (dest == 0xffff) {
928 926 KICONV_JA_RETERROR(EILSEQ)
929 927 }
930 928 upper = (dest >> 8) & KICONV_JA_CMASK;
931 929 lower = dest & KICONV_JA_CMASK;
932 930 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
933 931 upper, lower);
934 932 if (uni == KICONV_JA_NODEST) {
935 933 index = (uint_t)((upper - 0x21)
936 934 * 94 + (lower - 0x21));
937 935 uni = kiconv_ja_jisx0212_to_ucs2
938 936 [index];
939 937 }
940 938 if (uni == KICONV_JA_REPLACE)
941 939 rv++;
942 940 KICONV_JA_PUTU(uni);
943 941 }
944 942 } else { /* 2nd byte check failed */
945 943 KICONV_JA_RETERROR(EILSEQ)
946 944 }
947 945 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
948 946 /*
949 947 * Based on the draft convention of OSF-JVC CDEWG,
950 948 * characters in this area will be mapped to
951 949 * "CHIKAN-MOJI." (convertible character)
952 950 * We use U+FFFD in this case.
953 951 */
954 952 KICONV_JA_NGET(ic2);
955 953 if (KICONV_JA_ISSJKANJI2(ic2)) {
956 954 uni = 0xfffd;
957 955 KICONV_JA_PUTU(uni);
958 956 } else { /* 2nd byte check failed */
959 957 KICONV_JA_RETERROR(EILSEQ)
960 958 }
961 959 } else { /* 1st byte check failed */
962 960 KICONV_JA_RETERROR(EILSEQ)
963 961 }
964 962
965 963 /*
966 964 * One character successfully converted so update
967 965 * values outside of this function's stack.
968 966 */
969 967 *inbuf = (char *)ip;
970 968 *inbytesleft = ileft;
971 969 *outbuf = op;
972 970 *outbytesleft = oleft;
973 971 }
974 972
975 973 ret:
976 974 return (rv);
977 975 }
978 976
979 977 /*
980 978 * _kiconv_ja_lookuptbl()
981 979 * Return the index number if its index-ed number
982 980 * is the same as dest value.
983 981 */
984 982 static ushort_t
985 983 _kiconv_ja_lookuptbl(ushort_t dest)
986 984 {
987 985 ushort_t tmp;
988 986 int i;
989 987 int sz = (sizeof (kiconv_ja_sjtoibmext) /
990 988 sizeof (kiconv_ja_sjtoibmext[0]));
991 989
992 990 for (i = 0; i < sz; i++) {
993 991 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
994 992 if (tmp == dest)
995 993 return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
996 994 }
997 995 return (0x3f);
998 996 }
999 997
1000 998 static size_t
1001 999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1002 1000 char **outbuf, size_t *outbytesleft, int *errno)
1003 1001 {
1004 1002 uchar_t ic;
1005 1003 size_t rv = 0;
1006 1004 uint_t ucs4;
1007 1005 ushort_t euc16;
1008 1006 ushort_t dest;
1009 1007
1010 1008 uchar_t *ip;
1011 1009 size_t ileft;
1012 1010 char *op;
1013 1011 size_t oleft;
1014 1012 size_t read_len;
1015 1013
1016 1014 size_t id = ((kiconv_state_t)kcd)->id;
1017 1015
1018 1016 if ((inbuf == NULL) || (*inbuf == NULL)) {
1019 1017 return (0);
1020 1018 }
1021 1019
1022 1020 ip = (uchar_t *)*inbuf;
1023 1021 ileft = *inbytesleft;
1024 1022 op = *outbuf;
1025 1023 oleft = *outbytesleft;
1026 1024
1027 1025 KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1028 1026
1029 1027 while (ileft != 0) {
1030 1028 KICONV_JA_GETU(&ucs4, 0);
1031 1029
1032 1030 if (ucs4 > 0xffff) {
1033 1031 /* non-BMP */
1034 1032 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1035 1033 rv++;
1036 1034 goto next;
1037 1035 }
1038 1036
1039 1037 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1040 1038 if (euc16 == KICONV_JA_NODEST) {
1041 1039 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1042 1040 }
1043 1041 if (euc16 == KICONV_JA_NODEST) {
1044 1042 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1045 1043 rv++;
1046 1044 goto next;
1047 1045 }
1048 1046
1049 1047 switch (euc16 & 0x8080) {
1050 1048 case 0x0000: /* CS0 */
1051 1049 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1052 1050 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1053 1051 rv++;
1054 1052 } else {
1055 1053 ic = (uchar_t)euc16;
1056 1054 KICONV_JA_NPUT(ic);
1057 1055 }
1058 1056 break;
1059 1057 case 0x8080: /* CS1 */
1060 1058 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1061 1059 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1062 1060 /*
1063 1061 * for even number row (Ku), add 0x80 to
1064 1062 * look latter half of kiconv_ja_jistosj2[] array
1065 1063 */
1066 1064 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1067 1065 + (((ic % 2) == 0) ? 0x80 : 0x00));
1068 1066 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1069 1067 break;
1070 1068 case 0x0080: /* CS2 */
1071 1069 ic = (uchar_t)euc16;
1072 1070 KICONV_JA_NPUT(ic);
1073 1071 break;
1074 1072 case 0x8000: /* CS3 */
1075 1073 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1076 1074 if (euc16 == 0xa271) {
1077 1075 /* NUMERO SIGN */
1078 1076 KICONV_JA_NPUT(0x87);
1079 1077 KICONV_JA_NPUT(0x82);
1080 1078 } else if (ic < 0x75) { /* check if IBM VDC */
1081 1079 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1082 1080 if (dest == 0xffff) {
1083 1081 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1084 1082 } else {
1085 1083 /* avoid putting NUL ('\0') */
1086 1084 if (dest > 0xff) {
1087 1085 KICONV_JA_NPUT(
1088 1086 (dest >> 8) & 0xff);
1089 1087 KICONV_JA_NPUT(dest & 0xff);
1090 1088 } else {
1091 1089 KICONV_JA_NPUT(dest & 0xff);
1092 1090 }
1093 1091 }
1094 1092 } else {
1095 1093 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1096 1094 /*
1097 1095 * for even number row (Ku), add 0x80 to
1098 1096 * look latter half of kiconv_ja_jistosj2[]
1099 1097 */
1100 1098 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1101 1099 + (((ic % 2) == 0) ? 0x80 : 0x00));
1102 1100 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1103 1101 }
1104 1102 break;
1105 1103 }
1106 1104
1107 1105 next:
1108 1106 /*
1109 1107 * One character successfully converted so update
1110 1108 * values outside of this function's stack.
1111 1109 */
1112 1110 *inbuf = (char *)ip;
1113 1111 *inbytesleft = ileft;
1114 1112 *outbuf = op;
1115 1113 *outbytesleft = oleft;
1116 1114 }
1117 1115
1118 1116 ret:
1119 1117 return (rv);
1120 1118 }
1121 1119
1122 1120 static size_t
1123 1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1124 1122 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1125 1123 {
1126 1124 uint_t uni; /* UTF-32 */
1127 1125 uint_t index; /* index for table lookup */
1128 1126 uchar_t ic1, ic2; /* 1st and 2nd bytes of a char */
1129 1127 size_t rv = 0; /* return value of this function */
1130 1128
1131 1129 uchar_t *ip;
1132 1130 size_t ileft;
1133 1131 char *op;
1134 1132 size_t oleft;
1135 1133
1136 1134 boolean_t do_not_ignore_null;
1137 1135
1138 1136 if ((inbuf == NULL) || (*inbuf == NULL)) {
1139 1137 return (0);
1140 1138 }
1141 1139
1142 1140 ip = (uchar_t *)inbuf;
1143 1141 ileft = *inbytesleft;
1144 1142 op = outbuf;
1145 1143 oleft = *outbytesleft;
1146 1144
1147 1145 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1148 1146
1149 1147 while (ileft != 0) {
1150 1148 KICONV_JA_NGET(ic1); /* get 1st byte */
1151 1149
1152 1150 if (KICONV_JA_ISASC((int)ic1)) { /* ASCII; 1 byte */
1153 1151 if (ic1 == '\0' && do_not_ignore_null) {
1154 1152 return (0);
1155 1153 }
1156 1154 uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1157 1155 KICONV_JA_PUTU(uni);
1158 1156 } else if (KICONV_JA_ISSJKANA(ic1)) {
1159 1157 /* JIS X 0201 Kana; 1 byte */
1160 1158 uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1161 1159 KICONV_JA_PUTU(uni);
1162 1160 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1163 1161 /* JIS X 0208 or UDC; 2 bytes */
1164 1162 if (flag & KICONV_REPLACE_INVALID) {
1165 1163 KICONV_JA_NGET_REP_FR_MB(ic2);
1166 1164 } else {
1167 1165 KICONV_JA_NGET(ic2);
1168 1166 }
1169 1167 if (KICONV_JA_ISSJKANJI2(ic2)) {
1170 1168 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1171 1169 if (ic2 >= 0x9f) {
1172 1170 ic1++;
1173 1171 }
1174 1172 ic2 = kiconv_ja_sjtojis2[ic2];
1175 1173 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1176 1174 if (uni == KICONV_JA_NODEST) {
1177 1175 index = ((ic1 - 0x21) * 94)
1178 1176 + (ic2 - 0x21);
1179 1177 uni = kiconv_ja_jisx0208_to_ucs2[index];
1180 1178 }
1181 1179 if (uni == KICONV_JA_REPLACE)
1182 1180 rv++;
1183 1181 KICONV_JA_PUTU(uni);
1184 1182 } else { /* 2nd byte check failed */
1185 1183 if (flag & KICONV_REPLACE_INVALID) {
1186 1184 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1187 1185 rv++;
1188 1186 } else {
1189 1187 KICONV_JA_RETERROR(EILSEQ)
1190 1188 }
1191 1189 /* NOTREACHED */
1192 1190 }
1193 1191 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1194 1192 if (flag & KICONV_REPLACE_INVALID) {
1195 1193 KICONV_JA_NGET_REP_FR_MB(ic2);
1196 1194 } else {
1197 1195 KICONV_JA_NGET(ic2);
1198 1196 }
1199 1197 if (KICONV_JA_ISSJKANJI2(ic2)) {
1200 1198 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1201 1199 if (ic2 >= 0x9f) {
1202 1200 ic1++;
1203 1201 }
1204 1202 index = ((ic1 - 0x21) * 94)
1205 1203 + (kiconv_ja_sjtojis2[ic2] - 0x21);
1206 1204 uni = kiconv_ja_jisx0212_to_ucs2[index];
1207 1205 if (uni == KICONV_JA_REPLACE)
1208 1206 rv++;
1209 1207 KICONV_JA_PUTU(uni);
1210 1208 } else { /* 2nd byte check failed */
1211 1209 if (flag & KICONV_REPLACE_INVALID) {
1212 1210 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1213 1211 rv++;
1214 1212 } else {
1215 1213 KICONV_JA_RETERROR(EILSEQ)
1216 1214 }
1217 1215 }
1218 1216 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1219 1217 KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1220 1218 /*
1221 1219 * We need a special treatment for each codes.
1222 1220 * By adding some offset number for them, we
1223 1221 * can process them as the same way of that of
1224 1222 * extended IBM chars.
1225 1223 */
1226 1224 if (flag & KICONV_REPLACE_INVALID) {
1227 1225 KICONV_JA_NGET_REP_FR_MB(ic2);
1228 1226 } else {
1229 1227 KICONV_JA_NGET(ic2);
1230 1228 }
1231 1229 if (KICONV_JA_ISSJKANJI2(ic2)) {
1232 1230 ushort_t dest, upper, lower;
1233 1231 dest = (ic1 << 8) + ic2;
1234 1232 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1235 1233 KICONV_JA_REMAP_NEC(dest);
1236 1234 if (dest == 0xffff) {
1237 1235 if (flag &
1238 1236 KICONV_REPLACE_INVALID) {
1239 1237 KICONV_JA_PUTU(
1240 1238 KICONV_JA_REPLACE);
1241 1239 rv++;
1242 1240 } else {
1243 1241 KICONV_JA_RETERROR(
1244 1242 EILSEQ)
1245 1243 }
1246 1244 }
1247 1245 }
1248 1246 /*
1249 1247 * XXX: 0xfa54 and 0xfa5b must be mapped
1250 1248 * to JIS0208 area. Therefore we
1251 1249 * have to do special treatment.
1252 1250 */
1253 1251 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1254 1252 if (dest == 0xfa54) {
1255 1253 upper = 0x22;
1256 1254 lower = 0x4c;
1257 1255 } else {
1258 1256 upper = 0x22;
1259 1257 lower = 0x68;
1260 1258 }
1261 1259 KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1262 1260 upper, lower);
1263 1261 if (uni == KICONV_JA_NODEST) {
1264 1262 index = (uint_t)((upper - 0x21)
1265 1263 * 94 + (lower - 0x21));
1266 1264 uni = kiconv_ja_jisx0208_to_ucs2
1267 1265 [index];
1268 1266 }
1269 1267 if (uni == KICONV_JA_REPLACE)
1270 1268 rv++;
1271 1269 KICONV_JA_PUTU(uni);
1272 1270 } else {
1273 1271 dest = dest - 0xfa40 -
1274 1272 (((dest>>8) - 0xfa) * 0x40);
1275 1273 dest = kiconv_ja_sjtoibmext[dest];
1276 1274 if (dest == 0xffff) {
1277 1275 if (flag &
1278 1276 KICONV_REPLACE_INVALID) {
1279 1277 KICONV_JA_PUTU(
1280 1278 KICONV_JA_REPLACE);
1281 1279 rv++;
1282 1280 } else {
1283 1281 KICONV_JA_RETERROR(
1284 1282 EILSEQ)
1285 1283 }
1286 1284 }
1287 1285 upper = (dest >> 8) & KICONV_JA_CMASK;
1288 1286 lower = dest & KICONV_JA_CMASK;
1289 1287 KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1290 1288 upper, lower);
1291 1289 if (uni == KICONV_JA_NODEST) {
1292 1290 index = (uint_t)((upper - 0x21)
1293 1291 * 94 + (lower - 0x21));
1294 1292 uni = kiconv_ja_jisx0212_to_ucs2
1295 1293 [index];
1296 1294 }
1297 1295 if (uni == KICONV_JA_REPLACE)
1298 1296 rv++;
1299 1297 KICONV_JA_PUTU(uni);
1300 1298 }
1301 1299 } else { /* 2nd byte check failed */
1302 1300 if (flag & KICONV_REPLACE_INVALID) {
1303 1301 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1304 1302 rv++;
1305 1303 } else {
1306 1304 KICONV_JA_RETERROR(EILSEQ)
1307 1305 }
1308 1306 }
1309 1307 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1310 1308 /*
1311 1309 * Based on the draft convention of OSF-JVC CDEWG,
1312 1310 * characters in this area will be mapped to
1313 1311 * "CHIKAN-MOJI." (convertible character)
1314 1312 * We use U+FFFD in this case.
1315 1313 */
1316 1314 if (flag & KICONV_REPLACE_INVALID) {
1317 1315 KICONV_JA_NGET_REP_FR_MB(ic2);
1318 1316 } else {
1319 1317 KICONV_JA_NGET(ic2);
1320 1318 }
1321 1319 if (KICONV_JA_ISSJKANJI2(ic2)) {
1322 1320 uni = 0xfffd;
1323 1321 KICONV_JA_PUTU(uni);
1324 1322 } else { /* 2nd byte check failed */
1325 1323 if (flag & KICONV_REPLACE_INVALID) {
1326 1324 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1327 1325 rv++;
1328 1326 } else {
1329 1327 KICONV_JA_RETERROR(EILSEQ)
1330 1328 }
1331 1329 }
1332 1330 } else { /* 1st byte check failed */
1333 1331 if (flag & KICONV_REPLACE_INVALID) {
1334 1332 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1335 1333 rv++;
1336 1334 } else {
1337 1335 KICONV_JA_RETERROR(EILSEQ)
1338 1336 }
1339 1337 }
1340 1338
1341 1339 next:
1342 1340 /*
1343 1341 * One character successfully converted so update
1344 1342 * values outside of this function's stack.
1345 1343 */
1346 1344 *inbytesleft = ileft;
1347 1345 *outbytesleft = oleft;
1348 1346 }
1349 1347
1350 1348 ret:
1351 1349 return (rv);
1352 1350 }
1353 1351
1354 1352 static size_t
1355 1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1356 1354 size_t *outbytesleft, int flag, int *errno, uint8_t id)
1357 1355 {
1358 1356 uchar_t ic;
1359 1357 size_t rv = 0;
1360 1358 uint_t ucs4;
1361 1359 ushort_t euc16;
1362 1360 ushort_t dest;
1363 1361
1364 1362 uchar_t *ip;
1365 1363 size_t ileft;
1366 1364 char *op;
1367 1365 size_t oleft;
1368 1366 size_t read_len;
1369 1367
1370 1368 boolean_t do_not_ignore_null;
1371 1369
1372 1370 if ((inbuf == NULL) || (*inbuf == NULL)) {
1373 1371 return (0);
1374 1372 }
1375 1373
1376 1374 ip = (uchar_t *)inbuf;
1377 1375 ileft = *inbytesleft;
1378 1376 op = outbuf;
1379 1377 oleft = *outbytesleft;
1380 1378
1381 1379 KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1382 1380
1383 1381 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1384 1382
1385 1383 while (ileft != 0) {
1386 1384 KICONV_JA_GETU(&ucs4, flag);
1387 1385
1388 1386 if (ucs4 == 0x0 && do_not_ignore_null) {
1389 1387 return (0);
1390 1388 }
1391 1389
1392 1390 if (ucs4 > 0xffff) {
1393 1391 /* non-BMP */
1394 1392 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1395 1393 rv++;
1396 1394 goto next;
1397 1395 }
1398 1396
1399 1397 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1400 1398 if (euc16 == KICONV_JA_NODEST) {
1401 1399 euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1402 1400 }
1403 1401 if (euc16 == KICONV_JA_NODEST) {
1404 1402 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1405 1403 rv++;
1406 1404 goto next;
1407 1405 }
1408 1406
1409 1407 switch (euc16 & 0x8080) {
1410 1408 case 0x0000: /* CS0 */
1411 1409 if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1412 1410 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1413 1411 rv++;
1414 1412 } else {
1415 1413 ic = (uchar_t)euc16;
1416 1414 KICONV_JA_NPUT(ic);
1417 1415 }
1418 1416 break;
1419 1417 case 0x8080: /* CS1 */
1420 1418 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1421 1419 KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1422 1420 /*
1423 1421 * for even number row (Ku), add 0x80 to
1424 1422 * look latter half of kiconv_ja_jistosj2[] array
1425 1423 */
1426 1424 ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1427 1425 + (((ic % 2) == 0) ? 0x80 : 0x00));
1428 1426 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1429 1427 break;
1430 1428 case 0x0080: /* CS2 */
1431 1429 ic = (uchar_t)euc16;
1432 1430 KICONV_JA_NPUT(ic);
1433 1431 break;
1434 1432 case 0x8000: /* CS3 */
1435 1433 ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1436 1434 if (euc16 == 0xa271) {
1437 1435 /* NUMERO SIGN */
1438 1436 KICONV_JA_NPUT(0x87);
1439 1437 KICONV_JA_NPUT(0x82);
1440 1438 } else if (ic < 0x75) { /* check if IBM VDC */
1441 1439 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1442 1440 if (dest == 0xffff) {
1443 1441 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1444 1442 } else {
1445 1443 /* avoid putting NUL ('\0') */
1446 1444 if (dest > 0xff) {
1447 1445 KICONV_JA_NPUT(
1448 1446 (dest >> 8) & 0xff);
1449 1447 KICONV_JA_NPUT(dest & 0xff);
1450 1448 } else {
1451 1449 KICONV_JA_NPUT(dest & 0xff);
1452 1450 }
1453 1451 }
1454 1452 } else {
1455 1453 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1456 1454 /*
1457 1455 * for even number row (Ku), add 0x80 to
1458 1456 * look latter half of kiconv_ja_jistosj2[]
1459 1457 */
1460 1458 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1461 1459 + (((ic % 2) == 0) ? 0x80 : 0x00));
1462 1460 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1463 1461 }
1464 1462 break;
1465 1463 }
1466 1464
1467 1465 next:
1468 1466 /*
1469 1467 * One character successfully converted so update
1470 1468 * values outside of this function's stack.
1471 1469 */
1472 1470 *inbytesleft = ileft;
1473 1471 *outbytesleft = oleft;
1474 1472 }
1475 1473
1476 1474 ret:
1477 1475 return (rv);
1478 1476 }
1479 1477
1480 1478 static size_t
1481 1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1482 1480 char **outbuf, size_t *outbytesleft, int *errno)
1483 1481 {
1484 1482 if (! kcd || kcd == (void *)-1) {
1485 1483 *errno = EBADF;
1486 1484 return ((size_t)-1);
1487 1485 }
1488 1486
1489 1487 return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1490 1488 outbuf, outbytesleft, errno));
1491 1489 }
1492 1490
1493 1491 static size_t
1494 1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1495 1493 char **outbuf, size_t *outbytesleft, int *errno)
1496 1494 {
1497 1495 if (! kcd || kcd == (void *)-1) {
1498 1496 *errno = EBADF;
1499 1497 return ((size_t)-1);
1500 1498 }
1501 1499
1502 1500 return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1503 1501 outbuf, outbytesleft, errno));
1504 1502 }
1505 1503
1506 1504 static size_t
1507 1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1508 1506 size_t *outbytesleft, int flag, int *errno)
1509 1507 {
1510 1508 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1511 1509 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512 1510 }
1513 1511
1514 1512 static size_t
1515 1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1516 1514 size_t *outbytesleft, int flag, int *errno)
1517 1515 {
1518 1516 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1519 1517 outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520 1518 }
1521 1519
1522 1520 static size_t
1523 1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1524 1522 size_t *outbytesleft, int flag, int *errno)
1525 1523 {
1526 1524 return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1527 1525 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528 1526 }
1529 1527
1530 1528 static size_t
1531 1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1532 1530 size_t *outbytesleft, int flag, int *errno)
1533 1531 {
1534 1532 return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1535 1533 outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536 1534 }
1537 1535
1538 1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1539 1537 {
1540 1538 "eucjp", "utf-8", open_eucjp,
1541 1539 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542 1540 },
1543 1541 {
1544 1542 "utf-8", "eucjp", open_eucjp,
1545 1543 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546 1544 },
1547 1545 {
1548 1546 "eucjpms", "utf-8", open_eucjpms,
1549 1547 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550 1548 },
1551 1549 {
1552 1550 "utf-8", "eucjpms", open_eucjpms,
1553 1551 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554 1552 },
1555 1553 {
1556 1554 "sjis", "utf-8", open_sjis,
1557 1555 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558 1556 },
1559 1557 {
1560 1558 "utf-8", "sjis", open_sjis,
1561 1559 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562 1560 },
1563 1561 {
1564 1562 "cp932", "utf-8", open_cp932,
1565 1563 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566 1564 },
1567 1565 {
1568 1566 "utf-8", "cp932", open_cp932,
1569 1567 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1570 1568 }
1571 1569 };
1572 1570
1573 1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1574 1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1575 1573
1576 1574 #define KICONV_JA_MAX_JA_OPS \
1577 1575 (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1578 1576 #define KICONV_JA_MAX_JA_ALIAS \
1579 1577 (sizeof (kiconv_ja_aliases) / sizeof (char *))
1580 1578
1581 1579 static kiconv_module_info_t kiconv_ja_info = {
1582 1580 "kiconv_ja", /* module name */
1583 1581 KICONV_JA_MAX_JA_OPS, /* number of conversion in kiconv_ja */
1584 1582 kiconv_ja_ops_tbl, /* kiconv_ja ops table */
1585 1583 KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1586 1584 kiconv_ja_aliases, /* kiconv_ja aliases */
1587 1585 kiconv_ja_canonicals, /* kiconv_ja canonicals */
1588 1586 0
↓ open down ↓ |
1550 lines elided |
↑ open up ↑ |
1589 1587 };
1590 1588
1591 1589 static struct modlkiconv modlkiconv_ja = {
1592 1590 &mod_kiconvops,
1593 1591 "kiconv module for Japanese",
1594 1592 &kiconv_ja_info
1595 1593 };
1596 1594
1597 1595 static struct modlinkage modlinkage = {
1598 1596 MODREV_1,
1599 - (void *)&modlkiconv_ja,
1600 - NULL
1597 + { (void *)&modlkiconv_ja, NULL }
1601 1598 };
1602 1599
1603 1600 int
1604 1601 _init(void)
1605 1602 {
1606 1603 int err;
1607 1604
1608 1605 err = mod_install(&modlinkage);
1609 1606 if (err)
1610 1607 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1611 1608
1612 1609 return (err);
1613 1610 }
1614 1611
1615 1612 int
1616 1613 _info(struct modinfo *modinfop)
1617 1614 {
1618 1615 return (mod_info(&modlinkage, modinfop));
1619 1616 }
1620 1617
1621 1618 int
1622 1619 _fini(void)
1623 1620 {
1624 1621 int err;
1625 1622
1626 1623 /*
1627 1624 * If this module is being used, then, we cannot remove the module.
1628 1625 * The following checking will catch pretty much all usual cases.
1629 1626 *
1630 1627 * Any remaining will be catached by the kiconv_unregister_module()
1631 1628 * during mod_remove() at below.
1632 1629 */
1633 1630 if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1634 1631 return (EBUSY);
1635 1632
1636 1633 err = mod_remove(&modlinkage);
1637 1634 if (err)
1638 1635 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1639 1636
1640 1637 return (err);
1641 1638 }
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX