Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/kiconv/kiconv_sc/kiconv_sc.c
+++ new/usr/src/uts/common/kiconv/kiconv_sc/kiconv_sc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 -#pragma ident "%Z%%M% %I% %E% SMI"
27 -
28 26 #include <sys/types.h>
29 27 #include <sys/param.h>
30 28 #include <sys/sysmacros.h>
31 29 #include <sys/systm.h>
32 30 #include <sys/debug.h>
33 31 #include <sys/kmem.h>
34 32 #include <sys/sunddi.h>
35 33 #include <sys/byteorder.h>
36 34 #include <sys/errno.h>
37 35 #include <sys/modctl.h>
38 36 #include <sys/kiconv.h>
39 37 #include <sys/u8_textprep.h>
40 38 #include <sys/kiconv_cck_common.h>
41 39 #include <sys/kiconv_sc.h>
42 40 #include <sys/kiconv_gb18030_utf8.h>
43 41 #include <sys/kiconv_gb2312_utf8.h>
44 42 #include <sys/kiconv_utf8_gb18030.h>
45 43 #include <sys/kiconv_utf8_gb2312.h>
46 44
47 45 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
48 46 uchar_t *obtail, size_t *ret_val);
49 47 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
50 48 size_t *ret_val, boolean_t isgbk4);
51 49 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
52 50 uchar_t *ob, uchar_t *obtail, size_t *ret);
53 51 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
54 52 uchar_t *ob, uchar_t *obtail, size_t *ret);
55 53 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
56 54 uchar_t *ob, uchar_t *obtail, size_t *ret);
57 55
58 56 #define KICONV_SC_GB18030 (0x01)
59 57 #define KICONV_SC_GBK (0x02)
60 58 #define KICONV_SC_EUCCN (0x03)
61 59 #define KICONV_SC_MAX_MAGIC_ID (0x03)
62 60
63 61 static void *
64 62 open_fr_gb18030()
65 63 {
66 64 return ((void *)KICONV_SC_GB18030);
67 65 }
68 66
69 67 static void *
70 68 open_fr_gbk()
71 69 {
72 70 return ((void *)KICONV_SC_GBK);
73 71 }
74 72
75 73 static void *
76 74 open_fr_euccn()
77 75 {
78 76 return ((void *)KICONV_SC_EUCCN);
79 77 }
80 78
81 79 static int
82 80 close_fr_sc(void *s)
83 81 {
84 82 if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
85 83 return (EBADF);
86 84
87 85 return (0);
88 86 }
89 87
90 88 /*
91 89 * Encoding convertor from UTF-8 to GB18030.
92 90 */
93 91 size_t
94 92 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
95 93 char **outbuf, size_t *outbytesleft, int *errno)
96 94 {
97 95
98 96 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
99 97 outbytesleft, errno, utf8_to_gb18030);
100 98 }
101 99
102 100 /*
103 101 * String based encoding convertor from UTF-8 to GB18030.
104 102 */
105 103 size_t
106 104 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
107 105 size_t *outlen, int flag, int *errno)
108 106 {
109 107 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
110 108 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
111 109 }
112 110
113 111 /*
114 112 * Encoding convertor from GB18030 to UTF-8.
115 113 */
116 114 size_t
117 115 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
118 116 char **outbuf, size_t *outbytesleft, int *errno)
119 117 {
120 118 uchar_t *ib;
121 119 uchar_t *ob;
122 120 uchar_t *ibtail;
123 121 uchar_t *obtail;
124 122 size_t ret_val;
125 123 int8_t sz;
126 124 uint32_t gb_val;
127 125 boolean_t isgbk4;
128 126
129 127 /* Check on the kiconv code conversion descriptor. */
130 128 if (kcd == NULL || kcd == (void *)-1) {
131 129 *errno = EBADF;
132 130 return ((size_t)-1);
133 131 }
134 132
135 133 /* If this is a state reset request, process and return. */
136 134 if (inbuf == NULL || *inbuf == NULL) {
137 135 return (0);
138 136 }
139 137
140 138 ret_val = 0;
141 139 ib = (uchar_t *)*inbuf;
142 140 ob = (uchar_t *)*outbuf;
143 141 ibtail = ib + *inbytesleft;
144 142 obtail = ob + *outbytesleft;
145 143
146 144 while (ib < ibtail) {
147 145 if (KICONV_IS_ASCII(*ib)) {
148 146 if (ob >= obtail) {
149 147 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
150 148 }
151 149
152 150 *ob++ = *ib++;
153 151 continue;
154 152 }
155 153
156 154 /*
157 155 * Issue EILSEQ error if the first byte is not a
158 156 * valid GB18030 leading byte.
159 157 */
160 158 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
161 159 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
162 160 }
163 161
164 162 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
165 163 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
166 164
167 165 if (isgbk4) {
168 166 if (ibtail - ib < 4) {
169 167 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
170 168 }
171 169
172 170 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
173 171 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
174 172 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
175 173 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
176 174 }
177 175
178 176 gb_val = (uint32_t)(*ib) << 24 |
179 177 (uint32_t)(*(ib + 1)) << 16 |
180 178 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
181 179 } else {
182 180 if (ibtail - ib < 2) {
183 181 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
184 182 }
185 183
186 184 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
187 185 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
188 186 }
189 187
190 188 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
191 189 }
192 190
193 191 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
194 192 if (sz < 0) {
195 193 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
196 194 }
197 195
198 196 ib += isgbk4 ? 4 : 2;
199 197 ob += sz;
200 198 }
201 199
202 200 *inbuf = (char *)ib;
203 201 *inbytesleft = ibtail - ib;
204 202 *outbuf = (char *)ob;
205 203 *outbytesleft = obtail - ob;
206 204
207 205 return (ret_val);
208 206 }
209 207
210 208 /*
211 209 * String based encoding convertor from GB18030 to UTF-8.
212 210 */
213 211 size_t
214 212 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
215 213 size_t *outlen, int flag, int *errno)
216 214 {
217 215 uchar_t *ib;
218 216 uchar_t *ob;
219 217 uchar_t *ibtail;
220 218 uchar_t *obtail;
221 219 uchar_t *oldib;
222 220 size_t ret_val;
223 221 int8_t sz;
224 222 uint32_t gb_val;
225 223 boolean_t isgbk4;
226 224 boolean_t do_not_ignore_null;
227 225
228 226 ret_val = 0;
229 227 ib = (uchar_t *)inarray;
230 228 ob = (uchar_t *)outarray;
231 229 ibtail = ib + *inlen;
232 230 obtail = ob + *outlen;
233 231 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
234 232
235 233 while (ib < ibtail) {
236 234 if (*ib == '\0' && do_not_ignore_null)
237 235 break;
238 236
239 237 if (KICONV_IS_ASCII(*ib)) {
240 238 if (ob >= obtail) {
241 239 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
242 240 }
243 241
244 242 *ob++ = *ib++;
245 243 continue;
246 244 }
247 245
248 246 oldib = ib;
249 247
250 248 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
251 249 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
252 250 }
253 251
254 252 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
255 253 KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
256 254
257 255 if (isgbk4) {
258 256 if (ibtail - ib < 4) {
259 257 if (flag & KICONV_REPLACE_INVALID) {
260 258 ib = ibtail;
261 259 goto REPLACE_INVALID;
262 260 }
263 261
264 262 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
265 263 }
266 264
267 265 if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
268 266 KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
269 267 KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
270 268 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
271 269 }
272 270
273 271 gb_val = (uint32_t)(*ib) << 24 |
274 272 (uint32_t)(*(ib + 1)) << 16 |
275 273 (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
276 274 } else {
277 275 if (ibtail - ib < 2) {
278 276 if (flag & KICONV_REPLACE_INVALID) {
279 277 ib = ibtail;
280 278 goto REPLACE_INVALID;
281 279 }
282 280
283 281 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
284 282 }
285 283
286 284 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
287 285 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
288 286 }
289 287
290 288 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
291 289 }
292 290
293 291 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
294 292 if (sz < 0) {
295 293 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
296 294 }
297 295
298 296 ib += isgbk4 ? 4 : 2;
299 297 ob += sz;
300 298 continue;
301 299
302 300 REPLACE_INVALID:
303 301 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
304 302 ib = oldib;
305 303 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
306 304 }
307 305
308 306 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
309 307 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
310 308 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
311 309 ret_val++;
312 310 }
313 311
314 312 *inlen = ibtail - ib;
315 313 *outlen = obtail - ob;
316 314
317 315 return (ret_val);
318 316 }
319 317
320 318 /*
321 319 * Encoding convertor from UTF-8 to GBK.
322 320 */
323 321 size_t
324 322 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
325 323 char **outbuf, size_t *outbytesleft, int *errno)
326 324 {
327 325
328 326 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
329 327 outbytesleft, errno, utf8_to_gbk);
330 328 }
331 329
332 330 /*
333 331 * String based encoding convertor from UTF-8 to GBK.
334 332 */
335 333 size_t
336 334 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
337 335 size_t *outlen, int flag, int *errno)
338 336 {
339 337 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
340 338 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
341 339 }
342 340
343 341 /*
344 342 * Encoding convertor from GBK to UTF-8.
345 343 */
346 344 size_t
347 345 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
348 346 char **outbuf, size_t *outbytesleft, int *errno)
349 347 {
350 348 uchar_t *ib;
351 349 uchar_t *ob;
352 350 uchar_t *ibtail;
353 351 uchar_t *obtail;
354 352 size_t ret_val;
355 353 int8_t sz;
356 354 uint32_t gb_val;
357 355
358 356 /* Check on the kiconv code conversion descriptor. */
359 357 if (kcd == NULL || kcd == (void *)-1) {
360 358 *errno = EBADF;
361 359 return ((size_t)-1);
362 360 }
363 361
364 362 /* If this is a state reset request, process and return. */
365 363 if (inbuf == NULL || *inbuf == NULL) {
366 364 return (0);
367 365 }
368 366
369 367 ret_val = 0;
370 368 ib = (uchar_t *)*inbuf;
371 369 ob = (uchar_t *)*outbuf;
372 370 ibtail = ib + *inbytesleft;
373 371 obtail = ob + *outbytesleft;
374 372
375 373 while (ib < ibtail) {
376 374 if (KICONV_IS_ASCII(*ib)) {
377 375 if (ob >= obtail) {
378 376 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
379 377 }
380 378
381 379 *ob++ = *ib++;
382 380 continue;
383 381 }
384 382
385 383 /*
386 384 * Issue EILSEQ error if the first byte is not a
387 385 * valid GBK leading byte.
388 386 */
389 387 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
390 388 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
391 389 }
392 390
393 391 /*
394 392 * Issue EINVAL error if input buffer has an incomplete
395 393 * character at the end of the buffer.
396 394 */
397 395 if (ibtail - ib < 2) {
398 396 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
399 397 }
400 398
401 399 /*
402 400 * Issue EILSEQ error if the remaining byte is not
403 401 * a valid GBK byte.
404 402 */
405 403 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
406 404 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
407 405 }
408 406
409 407 /* Now we have a valid GBK character. */
410 408 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
411 409 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
412 410
413 411 if (sz < 0) {
414 412 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
415 413 }
416 414
417 415 ib += 2;
418 416 ob += sz;
419 417 }
420 418
421 419 *inbuf = (char *)ib;
422 420 *inbytesleft = ibtail - ib;
423 421 *outbuf = (char *)ob;
424 422 *outbytesleft = obtail - ob;
425 423
426 424 return (ret_val);
427 425 }
428 426
429 427 /*
430 428 * String based encoding convertor from GBK to UTF-8.
431 429 */
432 430 size_t
433 431 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
434 432 size_t *outlen, int flag, int *errno)
435 433 {
436 434 uchar_t *ib;
437 435 uchar_t *ob;
438 436 uchar_t *ibtail;
439 437 uchar_t *obtail;
440 438 uchar_t *oldib;
441 439 size_t ret_val;
442 440 int8_t sz;
443 441 uint32_t gb_val;
444 442 boolean_t do_not_ignore_null;
445 443
446 444 ret_val = 0;
447 445 ib = (uchar_t *)inarray;
448 446 ob = (uchar_t *)outarray;
449 447 ibtail = ib + *inlen;
450 448 obtail = ob + *outlen;
451 449 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
452 450
453 451 while (ib < ibtail) {
454 452 if (*ib == '\0' && do_not_ignore_null)
455 453 break;
456 454
457 455 if (KICONV_IS_ASCII(*ib)) {
458 456 if (ob >= obtail) {
459 457 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
460 458 }
461 459
462 460 *ob++ = *ib++;
463 461 continue;
464 462 }
465 463
466 464 oldib = ib;
467 465
468 466 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
469 467 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
470 468 }
471 469
472 470 if (ibtail - ib < 2) {
473 471 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
474 472 }
475 473
476 474 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
477 475 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
478 476 }
479 477
480 478 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
481 479 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
482 480
483 481 if (sz < 0) {
484 482 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
485 483 }
486 484
487 485 ib += 2;
488 486 ob += sz;
489 487 continue;
490 488
491 489 REPLACE_INVALID:
492 490 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
493 491 ib = oldib;
494 492 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
495 493 }
496 494
497 495 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
498 496 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
499 497 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
500 498 ret_val++;
501 499 }
502 500
503 501 *inlen = ibtail - ib;
504 502 *outlen = obtail - ob;
505 503
506 504 return (ret_val);
507 505 }
508 506
509 507 /*
510 508 * Encoding convertor from UTF-8 to EUC-CN.
511 509 */
512 510 size_t
513 511 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
514 512 char **outbuf, size_t *outbytesleft, int *errno)
515 513 {
516 514 return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
517 515 outbytesleft, errno, utf8_to_gb2312);
518 516 }
519 517
520 518 /*
521 519 * String based encoding convertor from UTF-8 to EUC-CN.
522 520 */
523 521 size_t
524 522 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
525 523 size_t *outlen, int flag, int *errno)
526 524 {
527 525 return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
528 526 (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
529 527 }
530 528
531 529 /*
532 530 * Encoding converto from EUC-CN to UTF-8 code.
533 531 */
534 532 size_t
535 533 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
536 534 char **outbuf, size_t *outbytesleft, int *errno)
537 535 {
538 536 uchar_t *ib;
539 537 uchar_t *ob;
540 538 uchar_t *ibtail;
541 539 uchar_t *obtail;
542 540 size_t ret_val;
543 541 int8_t sz;
544 542
545 543 /* Check on the kiconv code conversion descriptor. */
546 544 if (kcd == NULL || kcd == (void *)-1) {
547 545 *errno = EBADF;
548 546 return ((size_t)-1);
549 547 }
550 548
551 549 /* If this is a state reset request, process and return. */
552 550 if (inbuf == NULL || *inbuf == NULL) {
553 551 return (0);
554 552 }
555 553
556 554 ret_val = 0;
557 555 ib = (uchar_t *)*inbuf;
558 556 ob = (uchar_t *)*outbuf;
559 557 ibtail = ib + *inbytesleft;
560 558 obtail = ob + *outbytesleft;
561 559
562 560 while (ib < ibtail) {
563 561 if (KICONV_IS_ASCII(*ib)) {
564 562 if (ob >= obtail) {
565 563 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
566 564 }
567 565
568 566 *ob++ = *ib++;
569 567 continue;
570 568 }
571 569
572 570 /*
573 571 * Issue EILSEQ error if the first byte is not a
574 572 * valid GB2312 leading byte.
575 573 */
576 574 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
577 575 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
578 576 }
579 577
580 578 /*
581 579 * Issue EINVAL error if input buffer has an incomplete
582 580 * character at the end of the buffer.
583 581 */
584 582 if (ibtail - ib < 2) {
585 583 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
586 584 }
587 585
588 586 /*
589 587 * Issue EILSEQ error if the remaining byte is not
590 588 * a valid GB2312 byte.
591 589 */
592 590 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
593 591 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
594 592 }
595 593
596 594 /* Now we have a valid GB2312 character */
597 595 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
598 596 if (sz < 0) {
599 597 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
600 598 }
601 599
602 600 ib += 2;
603 601 ob += sz;
604 602 }
605 603
606 604 *inbuf = (char *)ib;
607 605 *inbytesleft = ibtail - ib;
608 606 *outbuf = (char *)ob;
609 607 *outbytesleft = obtail - ob;
610 608
611 609 return (ret_val);
612 610 }
613 611
614 612 /*
615 613 * String based encoding convertor from EUC-CN to UTF-8.
616 614 */
617 615 size_t
618 616 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
619 617 size_t *outlen, int flag, int *errno)
620 618 {
621 619 uchar_t *ib;
622 620 uchar_t *ob;
623 621 uchar_t *ibtail;
624 622 uchar_t *obtail;
625 623 uchar_t *oldib;
626 624 size_t ret_val;
627 625 int8_t sz;
628 626 boolean_t do_not_ignore_null;
629 627
630 628 ret_val = 0;
631 629 ib = (uchar_t *)inarray;
632 630 ob = (uchar_t *)outarray;
633 631 ibtail = ib + *inlen;
634 632 obtail = ob + *outlen;
635 633 do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
636 634
637 635 while (ib < ibtail) {
638 636 if (*ib == '\0' && do_not_ignore_null)
639 637 break;
640 638
641 639 if (KICONV_IS_ASCII(*ib)) {
642 640 if (ob >= obtail) {
643 641 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
644 642 }
645 643
646 644 *ob++ = *ib++;
647 645 continue;
648 646 }
649 647
650 648 oldib = ib;
651 649
652 650 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
653 651 KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
654 652 }
655 653
656 654 if (ibtail - ib < 2) {
657 655 KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
658 656 }
659 657
660 658 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
661 659 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
662 660 }
663 661
664 662 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
665 663 if (sz < 0) {
666 664 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
667 665 }
668 666
669 667 ib += 2;
670 668 ob += sz;
671 669 continue;
672 670
673 671 REPLACE_INVALID:
674 672 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
675 673 ib = oldib;
676 674 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
677 675 }
678 676
679 677 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
680 678 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
681 679 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
682 680 ret_val++;
683 681 }
684 682
685 683 *inlen = ibtail - ib;
686 684 *outlen = obtail - ob;
687 685
688 686 return (ret_val);
689 687 }
690 688
691 689 /*
692 690 * Convert single GB2312 character to UTF-8.
693 691 * Return: > 0 - Converted successfully
694 692 * = -1 - E2BIG
695 693 */
696 694 static int8_t
697 695 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
698 696 size_t *ret_val)
699 697 {
700 698 size_t index;
701 699 int8_t sz;
702 700 uchar_t *u8;
703 701
704 702 /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
705 703 index = b1 * 94 + b2 - 0x3BBF;
706 704
707 705 if (index >= KICONV_GB2312_UTF8_MAX)
708 706 index = KICONV_GB2312_UTF8_MAX - 1; /* Map to 0xEFBFBD */
709 707
710 708 u8 = kiconv_gb2312_utf8[index];
711 709 sz = u8_number_of_bytes[u8[0]];
712 710
713 711 if (obtail - ob < sz) {
714 712 *ret_val = (size_t)-1;
715 713 return (-1);
716 714 }
717 715
718 716 for (index = 0; index < sz; index++)
719 717 *ob++ = u8[index];
720 718
721 719 /*
722 720 * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
723 721 * elements, so need to ckeck more.
724 722 */
725 723 if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
726 724 u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
727 725 u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
728 726 u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
729 727 (*ret_val)++;
730 728
731 729 return (sz);
732 730 }
733 731
734 732 /*
735 733 * Convert single GB18030 or GBK character to UTF-8.
736 734 * Return: > 0 - Converted successfully
737 735 * = -1 - E2BIG
738 736 */
739 737 static int8_t
740 738 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
741 739 boolean_t isgbk4)
742 740 {
743 741 size_t index;
744 742 int8_t sz;
745 743 uchar_t u8array[4];
746 744 uchar_t *u8;
747 745
748 746 if (isgbk4) {
749 747 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
750 748 uint32_t u32;
751 749
752 750 /*
753 751 * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
754 752 * (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
755 753 * (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
756 754 * (gbk_val & 0xFF - 0x30)+
757 755 * KICONV_SC_PLANE1_UCS4_START;
758 756 */
759 757 u32 = (gbk_val >> 24) * 12600 +
760 758 ((gbk_val & 0xFF0000) >> 16) * 1260 +
761 759 ((gbk_val & 0xFF00) >> 8) * 10 +
762 760 (gbk_val & 0xFF) - 0x1BA0FA;
763 761 u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
764 762 u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
765 763 u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
766 764 u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
767 765 u8 = u8array;
768 766 index = 1;
769 767 } else {
770 768 index = kiconv_binsearch(gbk_val,
771 769 kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
772 770 u8 = kiconv_gbk4_utf8[index].u8;
773 771 }
774 772 } else {
775 773 index = kiconv_binsearch(gbk_val,
776 774 kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
777 775 u8 = kiconv_gbk_utf8[index].u8;
778 776 }
779 777
780 778 sz = u8_number_of_bytes[u8[0]];
781 779 if (obtail - ob < sz) {
782 780 *ret_val = (size_t)-1;
783 781 return (-1);
784 782 }
785 783
786 784 if (index == 0)
787 785 (*ret_val)++; /* Non-identical conversion */
788 786
789 787 for (index = 0; index < sz; index++)
790 788 *ob++ = u8[index];
791 789
792 790 return (sz);
793 791 }
794 792
795 793 /*
796 794 * Convert single UTF-8 character to GB18030.
797 795 * Return: > 0 - Converted successfully
798 796 * = -1 - E2BIG
799 797 */
800 798 /* ARGSUSED */
801 799 static int8_t
802 800 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
803 801 uchar_t *ob, uchar_t *obtail, size_t *ret)
804 802 {
805 803 size_t index;
806 804 int8_t gbklen;
807 805 uint32_t gbkcode;
808 806
809 807 if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
810 808 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
811 809 uint32_t u32;
812 810
813 811 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
814 812 ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
815 813 KICONV_SC_PLANE1_UCS4_START;
816 814 gbkcode = ((u32 / 12600 + 0x90) << 24) |
817 815 (((u32 % 12600) / 1260 + 0x30) << 16) |
818 816 (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
819 817 gbklen = 4;
820 818 index = 1;
821 819 } else {
822 820 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
823 821 KICONV_UTF8_GB18030_MAX);
824 822 gbkcode = kiconv_utf8_gb18030[index].value;
825 823 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
826 824 }
827 825
828 826 if (obtail - ob < gbklen) {
829 827 *ret = (size_t)-1;
830 828 return (-1);
831 829 }
832 830
833 831 if (index == 0)
834 832 (*ret)++; /* Non-identical conversion */
835 833
836 834 if (gbklen == 2) {
837 835 *ob++ = (uchar_t)(gbkcode >> 8);
838 836 } else if (gbklen == 4) {
839 837 *ob++ = (uchar_t)(gbkcode >> 24);
840 838 *ob++ = (uchar_t)(gbkcode >> 16);
841 839 *ob++ = (uchar_t)(gbkcode >> 8);
842 840 }
843 841 *ob = (uchar_t)(gbkcode & 0xFF);
844 842
845 843 return (gbklen);
846 844 }
847 845
848 846 /*
849 847 * Convert single UTF-8 character to GBK.
850 848 * Return: > 0 - Converted successfully
851 849 * = -1 - E2BIG
852 850 */
853 851 /* ARGSUSED */
854 852 static int8_t
855 853 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
856 854 uchar_t *ob, uchar_t *obtail, size_t *ret)
857 855 {
858 856 size_t index;
859 857 int8_t gbklen;
860 858 uint32_t gbkcode;
861 859
862 860 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
863 861 KICONV_UTF8_GB18030_MAX);
864 862 gbkcode = kiconv_utf8_gb18030[index].value;
865 863 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
866 864
867 865 /* GBK and GB18030 share the same table, so check the length. */
868 866 if (gbklen == 4) {
869 867 index = 0;
870 868 gbkcode = kiconv_utf8_gb18030[index].value;
871 869 gbklen = 1;
872 870 }
873 871
874 872 if (obtail - ob < gbklen) {
875 873 *ret = (size_t)-1;
876 874 return (-1);
877 875 }
878 876
879 877 if (index == 0)
880 878 (*ret)++; /* Non-identical conversion */
881 879
882 880 if (gbklen > 1)
883 881 *ob++ = (uchar_t)(gbkcode >> 8);
884 882 *ob = (uchar_t)(gbkcode & 0xFF);
885 883
886 884 return (gbklen);
887 885 }
888 886
889 887 /*
890 888 * Convert single UTF-8 character to GB2312.
891 889 * Return: > 0 - Converted successfully
892 890 * = -1 - E2BIG
893 891 */
894 892 /* ARGSUSED */
895 893 static int8_t
896 894 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
897 895 uchar_t *ob, uchar_t *obtail, size_t *ret)
898 896 {
899 897 size_t index;
900 898 int8_t gblen;
901 899 uint32_t gbcode;
902 900
903 901 index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
904 902 KICONV_UTF8_GB2312_MAX);
905 903 gbcode = kiconv_utf8_gb2312[index].value;
906 904 gblen = (gbcode <= 0xFF) ? 1 : 2;
907 905
908 906 if (obtail - ob < gblen) {
909 907 *ret = (size_t)-1;
910 908 return (-1);
911 909 }
912 910
913 911 if (index == 0)
914 912 (*ret)++;
915 913
916 914 if (gblen > 1)
917 915 *ob++ = (uchar_t)(gbcode >> 8);
918 916 *ob = (uchar_t)(gbcode & 0xFF);
919 917
920 918 return (gblen);
921 919 }
922 920
923 921 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
924 922 {
925 923 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
926 924 kiconv_close_to_cck, kiconvstr_to_gb18030
927 925 },
928 926 {
929 927 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
930 928 close_fr_sc, kiconvstr_fr_gb18030
931 929 },
932 930 {
933 931 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
934 932 kiconv_close_to_cck, kiconvstr_to_gbk
935 933 },
936 934 {
937 935 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
938 936 close_fr_sc, kiconvstr_fr_gbk
939 937 },
940 938 {
941 939 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
942 940 kiconv_close_to_cck, kiconvstr_to_euccn
943 941 },
944 942 {
945 943 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
946 944 close_fr_sc, kiconvstr_fr_euccn
947 945 },
948 946 };
949 947
950 948 static kiconv_module_info_t kiconv_sc_info = {
951 949 "kiconv_sc", /* module name */
952 950 sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
953 951 kiconv_sc_ops_tbl,
954 952 0,
955 953 NULL,
956 954 NULL,
957 955 0
↓ open down ↓ |
920 lines elided |
↑ open up ↑ |
958 956 };
959 957
960 958 static struct modlkiconv modlkiconv_sc = {
961 959 &mod_kiconvops,
962 960 "kiconv Simplified Chinese module 1.0",
963 961 &kiconv_sc_info
964 962 };
965 963
966 964 static struct modlinkage modlinkage = {
967 965 MODREV_1,
968 - (void *)&modlkiconv_sc,
969 - NULL
966 + { (void *)&modlkiconv_sc, NULL }
970 967 };
971 968
972 969 int
973 970 _init(void)
974 971 {
975 972 int err;
976 973
977 974 err = mod_install(&modlinkage);
978 975 if (err)
979 976 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
980 977
981 978 return (err);
982 979 }
983 980
984 981 int
985 982 _fini(void)
986 983 {
987 984 int err;
988 985
989 986 /*
990 987 * If this module is being used, then, we cannot remove the module.
991 988 * The following checking will catch pretty much all usual cases.
992 989 *
993 990 * Any remaining will be catached by the kiconv_unregister_module()
994 991 * during mod_remove() at below.
995 992 */
996 993 if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
997 994 return (EBUSY);
998 995
999 996 err = mod_remove(&modlinkage);
1000 997 if (err)
1001 998 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1002 999
1003 1000 return (err);
1004 1001 }
1005 1002
1006 1003 int
1007 1004 _info(struct modinfo *modinfop)
1008 1005 {
1009 1006 return (mod_info(&modlinkage, modinfop));
1010 1007 }
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX