Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/kiconv/kiconv_tc/kiconv_tc.c
+++ new/usr/src/uts/common/kiconv/kiconv_tc/kiconv_tc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 -#pragma ident "%Z%%M% %I% %E% SMI"
27 -
28 26 #include <sys/types.h>
29 27 #include <sys/param.h>
30 28 #include <sys/sysmacros.h>
31 29 #include <sys/systm.h>
32 30 #include <sys/debug.h>
33 31 #include <sys/kmem.h>
34 32 #include <sys/sunddi.h>
35 33 #include <sys/byteorder.h>
36 34 #include <sys/errno.h>
37 35 #include <sys/modctl.h>
38 36 #include <sys/u8_textprep.h>
39 37 #include <sys/kiconv.h>
40 38 #include <sys/kiconv_cck_common.h>
41 39 #include <sys/kiconv_tc.h>
42 40 #include <sys/kiconv_big5_utf8.h>
43 41 #include <sys/kiconv_euctw_utf8.h>
44 42 #include <sys/kiconv_hkscs_utf8.h>
45 43 #include <sys/kiconv_cp950hkscs_utf8.h>
46 44 #include <sys/kiconv_utf8_big5.h>
47 45 #include <sys/kiconv_utf8_euctw.h>
48 46 #include <sys/kiconv_utf8_cp950hkscs.h>
49 47 #include <sys/kiconv_utf8_hkscs.h>
50 48
51 49 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
52 50 static uchar_t hkscs_special_sequence[][4] = {
53 51 { 0xc3, 0x8a, 0xcc, 0x84 }, /* 0x8862 */
54 52 { 0xc3, 0x8a, 0xcc, 0x8c }, /* 0x8864 */
55 53 { 0xc3, 0xaa, 0xcc, 0x84 }, /* 0x88a3 */
56 54 { 0xc3, 0xaa, 0xcc, 0x8c } /* 0x88a5 */
57 55 };
58 56
59 57 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
60 58 static uint32_t ucs_special_sequence[] = {
61 59 0x8866, /* U+00ca */
62 60 0x8862, /* U+00ca U+0304 */
63 61 0x8864, /* U+00ca U+030c */
64 62 0x88a7, /* U+00ea */
65 63 0x88a3, /* U+00ea U+0304 */
66 64 0x88a5 /* U+00ea U+030c */
67 65 };
68 66
69 67 typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
70 68 uchar_t *obtail, size_t *ret_val);
71 69
72 70 static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
73 71 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
74 72 static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
75 73 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
76 74 static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
77 75 uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
78 76 static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
79 77 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
80 78 static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
81 79 size_t *ret_val);
82 80 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
83 81 uchar_t *obtail, size_t *ret_val);
84 82 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
85 83 uchar_t *obtail, size_t *ret_val);
86 84 static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
87 85 uchar_t *ob, uchar_t *obtail, size_t *ret_val);
88 86 static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
89 87 uchar_t byte2);
90 88
91 89 #define KICONV_TC_BIG5 (0x01)
92 90 #define KICONV_TC_BIG5HKSCS (0x02)
93 91 #define KICONV_TC_CP950HKSCS (0x03)
94 92 #define KICONV_TC_EUCTW (0x04)
95 93 #define KICONV_TC_MAX_MAGIC_ID (0x04)
96 94
97 95 static void *
98 96 open_fr_big5()
99 97 {
100 98 return ((void *)KICONV_TC_BIG5);
101 99 }
102 100
103 101 static void *
104 102 open_fr_big5hkscs()
105 103 {
106 104 return ((void *)KICONV_TC_BIG5HKSCS);
107 105 }
108 106
109 107 static void *
110 108 open_fr_cp950hkscs()
111 109 {
112 110 return ((void *)KICONV_TC_CP950HKSCS);
113 111 }
114 112
115 113 static void *
116 114 open_fr_euctw()
117 115 {
118 116 return ((void *)KICONV_TC_EUCTW);
119 117 }
120 118
121 119 static int
122 120 close_fr_tc(void *s)
123 121 {
124 122 if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
125 123 return (EBADF);
126 124
127 125 return (0);
128 126 }
129 127
130 128 /*
131 129 * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
132 130 */
133 131 static size_t
134 132 kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
135 133 char **outbuf, size_t *outbytesleft, int *errno,
136 134 kiconv_big5toutf8_t ptr_big5touf8)
137 135 {
138 136 uchar_t *ib;
139 137 uchar_t *ob;
140 138 uchar_t *ibtail;
141 139 uchar_t *obtail;
142 140 size_t ret_val;
143 141 int8_t sz;
144 142 uint32_t big5_val;
145 143
146 144 /* Check on the kiconv code conversion descriptor. */
147 145 if (kcd == NULL || kcd == (void *)-1) {
148 146 *errno = EBADF;
149 147 return ((size_t)-1);
150 148 }
151 149
152 150 /* If this is a state reset request, process and return. */
153 151 if (inbuf == NULL || *inbuf == NULL) {
154 152 return (0);
155 153 }
156 154
157 155 ret_val = 0;
158 156 ib = (uchar_t *)*inbuf;
159 157 ob = (uchar_t *)*outbuf;
160 158 ibtail = ib + *inbytesleft;
161 159 obtail = ob + *outbytesleft;
162 160
163 161 while (ib < ibtail) {
164 162 if (KICONV_IS_ASCII(*ib)) {
165 163 if (ob >= obtail) {
166 164 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
167 165 }
168 166
169 167 *ob++ = *ib++;
170 168 continue;
171 169 }
172 170
173 171 /*
174 172 * Issue EILSEQ error if the first byte is not a
175 173 * valid BIG5/HKSCS leading byte.
176 174 */
177 175 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
178 176 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
179 177 }
180 178
181 179 /*
182 180 * Issue EINVAL error if input buffer has an incomplete
183 181 * character at the end of the buffer.
184 182 */
185 183 if (ibtail - ib < 2) {
186 184 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
187 185 }
188 186
189 187 /*
190 188 * Issue EILSEQ error if the remaining bytes is not
191 189 * a valid BIG5/HKSCS byte.
192 190 */
193 191 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
194 192 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
195 193 }
196 194
197 195 /* Now we have a valid BIG5/HKSCS character. */
198 196 big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
199 197 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
200 198
201 199 if (sz < 0) {
202 200 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
203 201 }
204 202
205 203 ib += 2;
206 204 ob += sz;
207 205 }
208 206
209 207 *inbuf = (char *)ib;
210 208 *inbytesleft = ibtail - ib;
211 209 *outbuf = (char *)ob;
212 210 *outbytesleft = obtail - ob;
213 211
214 212 return (ret_val);
215 213 }
216 214
217 215 /*
218 216 * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
219 217 * to UTF-8.
220 218 */
221 219 static size_t
222 220 kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
223 221 size_t *outlen, int flag, int *errno,
224 222 kiconv_big5toutf8_t ptr_big5touf8)
225 223 {
226 224 uchar_t *oldib;
227 225 uchar_t *ibtail;
228 226 uchar_t *obtail;
229 227 size_t ret_val;
230 228 int8_t sz;
231 229 uint32_t big5_val;
232 230 boolean_t do_not_ignore_null;
233 231
234 232 ret_val = 0;
235 233 ibtail = ib + *inlen;
236 234 obtail = ob + *outlen;
237 235 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
238 236
239 237 while (ib < ibtail) {
240 238 if (*ib == '\0' && do_not_ignore_null)
241 239 break;
242 240
243 241 if (KICONV_IS_ASCII(*ib)) {
244 242 if (ob >= obtail) {
245 243 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
246 244 }
247 245
248 246 *ob++ = *ib++;
249 247 continue;
250 248 }
251 249
252 250 oldib = ib;
253 251
254 252 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
255 253 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
256 254 }
257 255
258 256 if (ibtail - ib < 2) {
259 257 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
260 258 }
261 259
262 260 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
263 261 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
264 262 }
265 263
266 264 big5_val = *ib++;
267 265 big5_val = (big5_val << 8) | *ib++;
268 266 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
269 267
270 268 if (sz < 0) {
271 269 ib = oldib;
272 270 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
273 271 }
274 272
275 273 ob += sz;
276 274 continue;
277 275
278 276 REPLACE_INVALID:
279 277 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
280 278 ib = oldib;
281 279 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
282 280 }
283 281
284 282 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
285 283 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
286 284 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
287 285 ret_val++;
288 286 }
289 287
290 288 *inlen = ibtail - ib;
291 289 *outlen = obtail - ob;
292 290
293 291 return (ret_val);
294 292 }
295 293
296 294 /*
297 295 * Encoding convertor from BIG5 to UTF-8.
298 296 */
299 297 static size_t
300 298 kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
301 299 size_t *outbytesleft, int *errno)
302 300 {
303 301 return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
304 302 outbytesleft, errno, big5_to_utf8));
305 303 }
306 304
307 305 /*
308 306 * String based encoding convertor from BIG5 to UTF-8.
309 307 */
310 308 static size_t
311 309 kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
312 310 size_t *outlen, int flag, int *errno)
313 311 {
314 312 return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
315 313 (uchar_t *)outarray, outlen, flag, errno,
316 314 big5_to_utf8));
317 315 }
318 316
319 317 /*
320 318 * Encoding convertor from BIG5-HKSCS to UTF-8.
321 319 */
322 320 static size_t
323 321 kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
324 322 char **outbuf, size_t *outbytesleft, int *errno)
325 323 {
326 324 return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
327 325 outbytesleft, errno, big5hkscs_to_utf8);
328 326 }
329 327
330 328 /*
331 329 * String based encoding convertor from BIG5-HKSCS to UTF-8.
332 330 */
333 331 static size_t
334 332 kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
335 333 size_t *outlen, int flag, int *errno)
336 334 {
337 335 return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
338 336 (uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
339 337 }
340 338
341 339 /*
342 340 * Encoding convertor from CP950-HKSCS to UTF-8.
343 341 */
344 342 static size_t
345 343 kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
346 344 char **outbuf, size_t *outbytesleft, int *errno)
347 345 {
348 346 return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
349 347 outbytesleft, errno, cp950hkscs_to_utf8);
350 348 }
351 349
352 350 /*
353 351 * String based encoding convertor from CP950-HKSCS to UTF-8.
354 352 */
355 353 static size_t
356 354 kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
357 355 size_t *outlen, int flag, int *errno)
358 356 {
359 357 return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
360 358 (uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
361 359 }
362 360
363 361 /*
364 362 * Encoding convertor from EUC-TW to UTF-8.
365 363 */
366 364 static size_t
367 365 kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
368 366 char **outbuf, size_t *outbytesleft, int *errno)
369 367 {
370 368 uchar_t *ib;
371 369 uchar_t *ob;
372 370 uchar_t *ibtail;
373 371 uchar_t *obtail;
374 372 uchar_t *oldib;
375 373 size_t ret_val;
376 374 size_t plane_no;
377 375 int8_t sz;
378 376 uint32_t euctw_val;
379 377 boolean_t isplane1;
380 378
381 379 /* Check on the kiconv code conversion descriptor. */
382 380 if (kcd == NULL || kcd == (void *)-1) {
383 381 *errno = EBADF;
384 382 return ((size_t)-1);
385 383 }
386 384
387 385 /* If this is a state reset request, process and return. */
388 386 if (inbuf == NULL || *inbuf == NULL) {
389 387 return (0);
390 388 }
391 389
392 390 ret_val = 0;
393 391 ib = (uchar_t *)*inbuf;
394 392 ob = (uchar_t *)*outbuf;
395 393 ibtail = ib + *inbytesleft;
396 394 obtail = ob + *outbytesleft;
397 395
398 396 while (ib < ibtail) {
399 397 if (KICONV_IS_ASCII(*ib)) {
400 398 if (ob >= obtail) {
401 399 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
402 400 }
403 401
404 402 *ob++ = *ib++;
405 403 continue;
406 404 }
407 405
408 406 /*
409 407 * Issue EILSEQ error if the first byte is not a
410 408 * valid EUC-TW leading byte.
411 409 */
412 410 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
413 411 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
414 412 }
415 413
416 414 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
417 415 B_FALSE : B_TRUE;
418 416
419 417 /*
420 418 * Issue EINVAL error if input buffer has an incomplete
421 419 * character at the end of the buffer.
422 420 */
423 421 if (ibtail - ib < (isplane1 ? 2 : 4)) {
424 422 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
425 423 }
426 424
427 425 oldib = ib;
428 426 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
429 427
430 428 /*
431 429 * Issue EILSEQ error if the remaining bytes are not
432 430 * valid EUC-TW bytes.
433 431 */
434 432 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
435 433 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
436 434 }
437 435
438 436 if (! isplane1)
439 437 ib += 2;
440 438
441 439 /* Now we have a valid EUC-TW character. */
442 440 euctw_val = *ib++;
443 441 euctw_val = (euctw_val << 8) | *ib++;
444 442 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
445 443
446 444 if (sz < 0) {
447 445 ib = oldib;
448 446 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
449 447 }
450 448
451 449 ob += sz;
452 450 }
453 451
454 452 *inbuf = (char *)ib;
455 453 *inbytesleft = ibtail - ib;
456 454 *outbuf = (char *)ob;
457 455 *outbytesleft = obtail - ob;
458 456
459 457 return (ret_val);
460 458 }
461 459
462 460 /*
463 461 * String based encoding convertor from EUC-TW to UTF-8.
464 462 */
465 463 static size_t
466 464 kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
467 465 size_t *outlen, int flag, int *errno)
468 466 {
469 467 uchar_t *ib;
470 468 uchar_t *ob;
471 469 uchar_t *ibtail;
472 470 uchar_t *obtail;
473 471 uchar_t *oldib;
474 472 size_t ret_val;
475 473 size_t plane_no;
476 474 int8_t sz;
477 475 uint32_t euctw_val;
478 476 boolean_t isplane1;
479 477 boolean_t do_not_ignore_null;
480 478
481 479 ret_val = 0;
482 480 ib = (uchar_t *)inarray;
483 481 ob = (uchar_t *)outarray;
484 482 ibtail = ib + *inlen;
485 483 obtail = ob + *outlen;
486 484 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
487 485
488 486 while (ib < ibtail) {
489 487 if (*ib == '\0' && do_not_ignore_null)
490 488 break;
491 489
492 490 if (KICONV_IS_ASCII(*ib)) {
493 491 if (ob >= obtail) {
494 492 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495 493 }
496 494
497 495 *ob++ = *ib++;
498 496 continue;
499 497 }
500 498
501 499 oldib = ib;
502 500
503 501 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
504 502 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
505 503 }
506 504
507 505 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
508 506 B_FALSE : B_TRUE;
509 507
510 508 if (ibtail - ib < (isplane1 ? 2 : 4)) {
511 509 if (flag & KICONV_REPLACE_INVALID) {
512 510 ib = ibtail;
513 511 goto REPLACE_INVALID;
514 512 }
515 513
516 514 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
517 515 }
518 516
519 517 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
520 518
521 519 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
522 520 KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
523 521 }
524 522
525 523 if (! isplane1)
526 524 ib += 2;
527 525
528 526 euctw_val = *ib++;
529 527 euctw_val = (euctw_val << 8) | *ib++;
530 528 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
531 529
532 530 if (sz < 0) {
533 531 ib = oldib;
534 532 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
535 533 }
536 534
537 535 ob += sz;
538 536 continue;
539 537
540 538 REPLACE_INVALID:
541 539 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
542 540 ib = oldib;
543 541 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
544 542 }
545 543
546 544 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
547 545 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
548 546 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
549 547 ret_val++;
550 548 }
551 549
552 550 *inlen = ibtail - ib;
553 551 *outlen = obtail - ob;
554 552
555 553 return (ret_val);
556 554 }
557 555
558 556 /*
559 557 * Encoding convertor from UTF-8 to BIG5.
560 558 */
561 559 static size_t
562 560 kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
563 561 char **outbuf, size_t *outbytesleft, int *errno)
564 562 {
565 563 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
566 564 outbytesleft, errno, utf8_to_big5);
567 565 }
568 566
569 567 /*
570 568 * String based encoding convertor from UTF-8 to BIG5.
571 569 */
572 570 static size_t
573 571 kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
574 572 size_t *outlen, int flag, int *errno)
575 573 {
576 574 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
577 575 (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
578 576 }
579 577
580 578 /*
581 579 * Encoding convertor from UTF-8 to EUC-TW.
582 580 */
583 581 static size_t
584 582 kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
585 583 char **outbuf, size_t *outbytesleft, int *errno)
586 584 {
587 585 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
588 586 outbytesleft, errno, utf8_to_euctw);
589 587 }
590 588
591 589 /*
592 590 * String based encoding convertor from UTF-8 to EUC-TW.
593 591 */
594 592 static size_t
595 593 kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
596 594 size_t *outlen, int flag, int *errno)
597 595 {
598 596 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
599 597 (uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
600 598 }
601 599
602 600 /*
603 601 * Encoding convertor from UTF-8 to CP950HKSCS.
604 602 */
605 603 static size_t
606 604 kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
607 605 char **outbuf, size_t *outbytesleft, int *errno)
608 606 {
609 607 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
610 608 outbytesleft, errno, utf8_to_cp950hkscs);
611 609 }
612 610
613 611 /*
614 612 * String based encoding convertor from UTF-8 to CP950HKSCS.
615 613 */
616 614 static size_t
617 615 kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
618 616 size_t *outlen, int flag, int *errno)
619 617 {
620 618 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
621 619 (uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
622 620 }
623 621
624 622 /*
625 623 * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
626 624 */
627 625 static size_t
628 626 kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
629 627 char **outbuf, size_t *outbytesleft, int *errno)
630 628 {
631 629 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
632 630 outbytesleft, errno, utf8_to_big5hkscs);
633 631 }
634 632
635 633 /*
636 634 * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
637 635 */
638 636 static size_t
639 637 kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
640 638 size_t *outlen, int flag, int *errno)
641 639 {
642 640 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
643 641 (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
644 642 }
645 643
646 644 /*
647 645 * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
648 646 * Return: > 0 - Converted successfully
649 647 * = -1 - E2BIG
650 648 */
651 649 static int8_t
652 650 big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
653 651 size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
654 652 {
655 653 size_t index;
656 654 int8_t sz;
657 655 uchar_t *u8;
658 656
659 657 index = kiconv_binsearch(big5_val, table, nitems);
660 658 u8 = table[index].u8;
661 659 sz = u8_number_of_bytes[u8[0]];
662 660
663 661 if (obtail - ob < sz) {
664 662 *ret_val = (size_t)-1;
665 663 return (-1);
666 664 }
667 665
668 666 if (index == 0)
669 667 (*ret_val)++; /* Non-identical conversion */
670 668
671 669 for (index = 0; index < sz; index++)
672 670 *ob++ = u8[index];
673 671
674 672 return (sz);
675 673 }
676 674
677 675 /*
678 676 * Convert single BIG5 character to UTF-8.
679 677 */
680 678 static int8_t
681 679 big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
682 680 {
683 681 return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
684 682 kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
685 683 }
686 684
687 685 /*
688 686 * Convert single CP950-HKSCS character to UTF-8.
689 687 */
690 688 static int8_t
691 689 cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
692 690 size_t *ret_val)
693 691 {
694 692 return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
695 693 kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
696 694 }
697 695
698 696 /*
699 697 * Calculate unicode value for some CNS planes which fall in Unicode
700 698 * UDA range.
701 699 */
702 700 static uint32_t
703 701 get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
704 702 {
705 703 /*
706 704 * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
707 705 * to compute the Unicode value.
708 706 */
709 707 if (plane_no == 16)
710 708 --plane_no;
711 709
712 710 /* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
713 711 return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
714 712 }
715 713
716 714 /*
717 715 * Convert single EUC-TW character to UTF-8.
718 716 * Return: > 0 - Converted successfully
719 717 * = -1 - E2BIG
720 718 */
721 719 static int8_t
722 720 euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
723 721 uchar_t *obtail, size_t *ret_val)
724 722 {
725 723 uint32_t u32;
726 724 size_t index;
727 725 int8_t sz;
728 726 uchar_t udc[4];
729 727 uchar_t *u8;
730 728
731 729 switch (plane_no) {
732 730 case 1:
733 731 index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
734 732 KICONV_CNS1_UTF8_MAX);
735 733 u8 = kiconv_cns1_utf8[index].u8;
736 734 break;
737 735 case 2:
738 736 index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
739 737 KICONV_CNS2_UTF8_MAX);
740 738 u8 = kiconv_cns2_utf8[index].u8;
741 739 break;
742 740 case 3:
743 741 index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
744 742 KICONV_CNS3_UTF8_MAX);
745 743 u8 = kiconv_cns3_utf8[index].u8;
746 744 break;
747 745 case 4:
748 746 index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
749 747 KICONV_CNS4_UTF8_MAX);
750 748 u8 = kiconv_cns4_utf8[index].u8;
751 749 break;
752 750 case 5:
753 751 index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
754 752 KICONV_CNS5_UTF8_MAX);
755 753 u8 = kiconv_cns5_utf8[index].u8;
756 754 break;
757 755 case 6:
758 756 index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
759 757 KICONV_CNS6_UTF8_MAX);
760 758 u8 = kiconv_cns6_utf8[index].u8;
761 759 break;
762 760 case 7:
763 761 index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
764 762 KICONV_CNS7_UTF8_MAX);
765 763 u8 = kiconv_cns7_utf8[index].u8;
766 764 break;
767 765 case 12:
768 766 case 13:
769 767 case 14:
770 768 case 16:
771 769 u32 = get_unicode_from_UDA(plane_no,
772 770 (euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
773 771 /*
774 772 * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
775 773 * will occupy 4 bytes.
776 774 */
777 775 udc[0] = 0xF3;
778 776 udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
779 777 udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
780 778 udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
781 779 u8 = udc;
782 780 index = 1;
783 781 break;
784 782 case 15:
785 783 index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
786 784 KICONV_CNS15_UTF8_MAX);
787 785 u8 = kiconv_cns15_utf8[index].u8;
788 786 break;
789 787 default:
790 788 index = 0;
791 789 u8 = kiconv_cns1_utf8[index].u8;
792 790 }
793 791
794 792 sz = u8_number_of_bytes[u8[0]];
795 793 if (obtail - ob < sz) {
796 794 *ret_val = (size_t)-1;
797 795 return (-1);
798 796 }
799 797
800 798 if (index == 0)
801 799 (*ret_val)++;
802 800
803 801 for (index = 0; index < sz; index++)
804 802 *ob++ = u8[index];
805 803
806 804 return (sz);
807 805 }
808 806
809 807 /*
810 808 * Convert single HKSCS character to UTF-8.
811 809 * Return: > 0 - Converted successfully
812 810 * = -1 - E2BIG
813 811 */
814 812 static int8_t
815 813 big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
816 814 size_t *ret_val)
817 815 {
818 816 size_t index;
819 817 int8_t sz;
820 818 uchar_t *u8;
821 819
822 820 index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
823 821 KICONV_HKSCS_UTF8_MAX);
824 822 u8 = kiconv_hkscs_utf8[index].u8;
825 823
826 824 /*
827 825 * Single HKSCS-2004 character may map to 2 Unicode
828 826 * code points.
829 827 */
830 828 if (u8[0] == 0xFF) {
831 829 u8 = hkscs_special_sequence[u8[1]];
832 830 sz = 4;
833 831 } else {
834 832 sz = u8_number_of_bytes[u8[0]];
835 833 }
836 834
837 835 if (obtail - ob < sz) {
838 836 *ret_val = (size_t)-1;
839 837 return (-1);
840 838 }
841 839
842 840 if (index == 0)
843 841 (*ret_val)++; /* Non-identical conversion. */
844 842
845 843 for (index = 0; index < sz; index++)
846 844 *ob++ = u8[index];
847 845
848 846 return (sz);
849 847 }
850 848
851 849 /*
852 850 * Convert single UTF-8 character to EUC-TW.
853 851 * Return: > 0 - Converted successfully
854 852 * = -1 - E2BIG
855 853 */
856 854 /* ARGSUSED */
857 855 static int8_t
858 856 utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
859 857 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
860 858 {
861 859 size_t index;
862 860 size_t plane_no;
863 861 uchar_t byte1;
864 862 uchar_t byte2;
865 863
866 864 if (utf8 >= KICONV_TC_UDA_UTF8_START &&
867 865 utf8 <= KICONV_TC_UDA_UTF8_END) {
868 866 /*
869 867 * Calculate EUC-TW code if utf8 is in Unicode
870 868 * Private Plane 15.
871 869 */
872 870 index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
873 871 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
874 872 KICONV_TC_UDA_UCS4_START;
875 873 plane_no = 12 + index / 8836;
876 874 byte1 = 0xA1 + (index % 8836) / 94;
877 875 byte2 = 0xA1 + index % 94;
878 876
879 877 /* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
880 878 if (plane_no == 15)
881 879 plane_no = 16;
882 880 } else {
883 881 uint32_t euctw_val;
884 882
885 883 index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
886 884 KICONV_UTF8_EUCTW_MAX);
887 885
888 886 if (index == 0) {
889 887 if (ob >= obtail) {
890 888 *ret_val = (size_t)-1;
891 889 return (-1);
892 890 }
893 891
894 892 *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
895 893 (*ret_val)++;
896 894
897 895 return (1);
898 896 }
899 897
900 898 euctw_val = kiconv_utf8_euctw[index].value;
901 899 byte1 = (euctw_val & 0xFF00) >> 8;
902 900 byte2 = euctw_val & 0xFF;
903 901 plane_no = euctw_val >> 16;
904 902 }
905 903
906 904 if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
907 905 *ret_val = (size_t)-1;
908 906 return (-1);
909 907 }
910 908
911 909 if (plane_no != 1) {
912 910 *ob++ = KICONV_TC_EUCTW_MBYTE;
913 911 *ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
914 912 }
915 913
916 914 *ob++ = byte1;
917 915 *ob = byte2;
918 916
919 917 return (plane_no == 1 ? 2 : 4);
920 918 }
921 919
922 920 /*
923 921 * Convert single UTF-8 character to BIG5-HKSCS
924 922 * Return: > 0 - Converted successfully
925 923 * = -1 - E2BIG
926 924 */
927 925 static int8_t
928 926 utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
929 927 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
930 928 {
931 929 size_t index;
932 930 int8_t hkscslen;
933 931 uint32_t hkscscode;
934 932 boolean_t special_sequence = B_FALSE;
935 933
936 934 index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
937 935 KICONV_UTF8_HKSCS_MAX);
938 936 hkscscode = kiconv_utf8_hkscs[index].value;
939 937
940 938 /*
941 939 * There are 4 special code points in HKSCS-2004 which mapped
942 940 * to 2 UNICODE code points.
943 941 */
944 942 if ((int32_t)hkscscode < 0) {
945 943 size_t special_index = (-(int32_t)hkscscode - 1) * 3;
946 944
947 945 /* Check the following 2 bytes. */
948 946 if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
949 947 (*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
950 948 special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
951 949 special_sequence = B_TRUE;
952 950 }
953 951
954 952 hkscscode = ucs_special_sequence[special_index];
955 953 }
956 954
957 955 hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
958 956 if (obtail - ob < hkscslen) {
959 957 *ret_val = (size_t)-1;
960 958 return (-1);
961 959 }
962 960
963 961 if (index == 0)
964 962 (*ret_val)++;
965 963
966 964 if (hkscslen > 1)
967 965 *ob++ = (uchar_t)(hkscscode >> 8);
968 966 *ob = (uchar_t)(hkscscode & 0xFF);
969 967
970 968 if (special_sequence) { /* Advance for special sequence */
971 969 (*inbuf) += 2;
972 970 }
973 971
974 972 return (hkscslen);
975 973 }
976 974
977 975 /*
978 976 * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
979 977 * Return: > 0 - Converted successfully
980 978 * = -1 - E2BIG
981 979 */
982 980 static int8_t
983 981 utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
984 982 size_t *ret_val, kiconv_table_t *table, size_t nitems)
985 983 {
986 984 size_t index;
987 985 int8_t big5len;
988 986 uint32_t big5code;
989 987
990 988 index = kiconv_binsearch(utf8, table, nitems);
991 989 big5code = table[index].value;
992 990 big5len = (big5code <= 0xFF) ? 1 : 2;
993 991
994 992 if (obtail - ob < big5len) {
995 993 *ret_val = (size_t)-1;
996 994 return (-1);
997 995 }
998 996
999 997 if (index == 0)
1000 998 (*ret_val)++;
1001 999
1002 1000 if (big5len > 1)
1003 1001 *ob++ = (uchar_t)(big5code >> 8);
1004 1002 *ob = (uchar_t)(big5code & 0xFF);
1005 1003
1006 1004 return (big5len);
1007 1005 }
1008 1006
1009 1007 /*
1010 1008 * Convert single UTF-8 character to BIG5.
1011 1009 */
1012 1010 /* ARGSUSED */
1013 1011 static int8_t
1014 1012 utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1015 1013 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1016 1014 {
1017 1015 return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1018 1016 kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
1019 1017 }
1020 1018
1021 1019 /*
1022 1020 * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1023 1021 */
1024 1022 /* ARGSUSED */
1025 1023 static int8_t
1026 1024 utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1027 1025 uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1028 1026 {
1029 1027 return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1030 1028 kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
1031 1029 }
1032 1030
1033 1031 static kiconv_ops_t kiconv_tc_ops_tbl[] = {
1034 1032 {
1035 1033 "big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
1036 1034 kiconv_close_to_cck, kiconvstr_to_big5
1037 1035 },
1038 1036 {
1039 1037 "utf-8", "big5", open_fr_big5, kiconv_fr_big5,
1040 1038 close_fr_tc, kiconvstr_fr_big5
1041 1039 },
1042 1040
1043 1041 {
1044 1042 "big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
1045 1043 kiconv_close_to_cck, kiconvstr_to_big5hkscs
1046 1044 },
1047 1045 {
1048 1046 "utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
1049 1047 close_fr_tc, kiconvstr_fr_big5hkscs
1050 1048 },
1051 1049
1052 1050 {
1053 1051 "euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
1054 1052 kiconv_close_to_cck, kiconvstr_to_euctw
1055 1053 },
1056 1054 {
1057 1055 "utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
1058 1056 close_fr_tc, kiconvstr_fr_euctw
1059 1057 },
1060 1058
1061 1059 {
1062 1060 "cp950-hkscs", "utf-8", kiconv_open_to_cck,
1063 1061 kiconv_to_cp950hkscs, kiconv_close_to_cck,
1064 1062 kiconvstr_to_cp950hkscs
1065 1063 },
1066 1064 {
1067 1065 "utf-8", "cp950-hkscs", open_fr_cp950hkscs,
1068 1066 kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
1069 1067 },
1070 1068 };
1071 1069
1072 1070 static kiconv_module_info_t kiconv_tc_info = {
1073 1071 "kiconv_tc", /* module name */
1074 1072 sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
1075 1073 kiconv_tc_ops_tbl,
1076 1074 0,
1077 1075 NULL,
1078 1076 NULL,
1079 1077 0
↓ open down ↓ |
1042 lines elided |
↑ open up ↑ |
1080 1078 };
1081 1079
1082 1080 static struct modlkiconv modlkiconv_tc = {
1083 1081 &mod_kiconvops,
1084 1082 "kiconv Traditional Chinese module 1.0",
1085 1083 &kiconv_tc_info
1086 1084 };
1087 1085
1088 1086 static struct modlinkage modlinkage = {
1089 1087 MODREV_1,
1090 - (void *)&modlkiconv_tc,
1091 - NULL
1088 + { (void *)&modlkiconv_tc, NULL }
1092 1089 };
1093 1090
1094 1091 int
1095 1092 _init(void)
1096 1093 {
1097 1094 int err;
1098 1095
1099 1096 err = mod_install(&modlinkage);
1100 1097 if (err)
1101 1098 cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
1102 1099
1103 1100 return (err);
1104 1101 }
1105 1102
1106 1103 int
1107 1104 _fini(void)
1108 1105 {
1109 1106 int err;
1110 1107
1111 1108 /*
1112 1109 * If this module is being used, then, we cannot remove the module.
1113 1110 * The following checking will catch pretty much all usual cases.
1114 1111 *
1115 1112 * Any remaining will be catached by the kiconv_unregister_module()
1116 1113 * during mod_remove() at below.
1117 1114 */
1118 1115 if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
1119 1116 return (EBUSY);
1120 1117
1121 1118 err = mod_remove(&modlinkage);
1122 1119 if (err)
1123 1120 cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
1124 1121
1125 1122 return (err);
1126 1123 }
1127 1124
1128 1125 int
1129 1126 _info(struct modinfo *modinfop)
1130 1127 {
1131 1128 return (mod_info(&modlinkage, modinfop));
1132 1129 }
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX