1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/systm.h>
  32 #include <sys/debug.h>
  33 #include <sys/kmem.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/byteorder.h>
  36 #include <sys/errno.h>
  37 #include <sys/modctl.h>
  38 #include <sys/u8_textprep.h>
  39 #include <sys/kiconv.h>
  40 #include <sys/kiconv_cck_common.h>
  41 #include <sys/kiconv_tc.h>
  42 #include <sys/kiconv_big5_utf8.h>
  43 #include <sys/kiconv_euctw_utf8.h>
  44 #include <sys/kiconv_hkscs_utf8.h>
  45 #include <sys/kiconv_cp950hkscs_utf8.h>
  46 #include <sys/kiconv_utf8_big5.h>
  47 #include <sys/kiconv_utf8_euctw.h>
  48 #include <sys/kiconv_utf8_cp950hkscs.h>
  49 #include <sys/kiconv_utf8_hkscs.h>
  50 
  51 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
  52 static uchar_t hkscs_special_sequence[][4] = {
  53         { 0xc3, 0x8a, 0xcc, 0x84 },     /* 0x8862 */
  54         { 0xc3, 0x8a, 0xcc, 0x8c },     /* 0x8864 */
  55         { 0xc3, 0xaa, 0xcc, 0x84 },     /* 0x88a3 */
  56         { 0xc3, 0xaa, 0xcc, 0x8c }      /* 0x88a5 */
  57 };
  58 
  59 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
  60 static uint32_t ucs_special_sequence[] = {
  61         0x8866,         /* U+00ca */
  62         0x8862,         /* U+00ca U+0304 */
  63         0x8864,         /* U+00ca U+030c */
  64         0x88a7,         /* U+00ea */
  65         0x88a3,         /* U+00ea U+0304 */
  66         0x88a5          /* U+00ea U+030c */
  67 };
  68 
  69 typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
  70         uchar_t *obtail, size_t *ret_val);
  71 
  72 static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  73         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  74 static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  75         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  76 static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
  77         uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  78 static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  79         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  80 static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
  81         size_t *ret_val);
  82 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
  83         uchar_t *obtail, size_t *ret_val);
  84 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
  85         uchar_t *obtail, size_t *ret_val);
  86 static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
  87         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  88 static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
  89         uchar_t byte2);
  90 
  91 #define KICONV_TC_BIG5          (0x01)
  92 #define KICONV_TC_BIG5HKSCS     (0x02)
  93 #define KICONV_TC_CP950HKSCS    (0x03)
  94 #define KICONV_TC_EUCTW         (0x04)
  95 #define KICONV_TC_MAX_MAGIC_ID  (0x04)
  96 
  97 static void *
  98 open_fr_big5()
  99 {
 100         return ((void *)KICONV_TC_BIG5);
 101 }
 102 
 103 static void *
 104 open_fr_big5hkscs()
 105 {
 106         return ((void *)KICONV_TC_BIG5HKSCS);
 107 }
 108 
 109 static void *
 110 open_fr_cp950hkscs()
 111 {
 112         return ((void *)KICONV_TC_CP950HKSCS);
 113 }
 114 
 115 static void *
 116 open_fr_euctw()
 117 {
 118         return ((void *)KICONV_TC_EUCTW);
 119 }
 120 
 121 static int
 122 close_fr_tc(void *s)
 123 {
 124         if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
 125                 return (EBADF);
 126 
 127         return (0);
 128 }
 129 
 130 /*
 131  * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
 132  */
 133 static size_t
 134 kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
 135         char **outbuf, size_t *outbytesleft, int *errno,
 136         kiconv_big5toutf8_t ptr_big5touf8)
 137 {
 138         uchar_t         *ib;
 139         uchar_t         *ob;
 140         uchar_t         *ibtail;
 141         uchar_t         *obtail;
 142         size_t          ret_val;
 143         int8_t          sz;
 144         uint32_t        big5_val;
 145 
 146         /* Check on the kiconv code conversion descriptor. */
 147         if (kcd == NULL || kcd == (void *)-1) {
 148                 *errno = EBADF;
 149                 return ((size_t)-1);
 150         }
 151 
 152         /* If this is a state reset request, process and return. */
 153         if (inbuf == NULL || *inbuf == NULL) {
 154                 return (0);
 155         }
 156 
 157         ret_val = 0;
 158         ib = (uchar_t *)*inbuf;
 159         ob = (uchar_t *)*outbuf;
 160         ibtail = ib + *inbytesleft;
 161         obtail = ob + *outbytesleft;
 162 
 163         while (ib < ibtail) {
 164                 if (KICONV_IS_ASCII(*ib)) {
 165                         if (ob >= obtail) {
 166                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 167                         }
 168 
 169                         *ob++ = *ib++;
 170                         continue;
 171                 }
 172 
 173                 /*
 174                  * Issue EILSEQ error if the first byte is not a
 175                  * valid BIG5/HKSCS leading byte.
 176                  */
 177                 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
 178                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 179                 }
 180 
 181                 /*
 182                  * Issue EINVAL error if input buffer has an incomplete
 183                  * character at the end of the buffer.
 184                  */
 185                 if (ibtail - ib < 2) {
 186                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 187                 }
 188 
 189                 /*
 190                  * Issue EILSEQ error if the remaining bytes is not
 191                  * a valid BIG5/HKSCS byte.
 192                  */
 193                 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
 194                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 195                 }
 196 
 197                 /* Now we have a valid BIG5/HKSCS character. */
 198                 big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 199                 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
 200 
 201                 if (sz < 0) {
 202                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 203                 }
 204 
 205                 ib += 2;
 206                 ob += sz;
 207         }
 208 
 209         *inbuf = (char *)ib;
 210         *inbytesleft = ibtail - ib;
 211         *outbuf = (char *)ob;
 212         *outbytesleft = obtail - ob;
 213 
 214         return (ret_val);
 215 }
 216 
 217 /*
 218  * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
 219  * to UTF-8.
 220  */
 221 static size_t
 222 kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
 223     size_t *outlen, int flag, int *errno,
 224     kiconv_big5toutf8_t ptr_big5touf8)
 225 {
 226         uchar_t         *oldib;
 227         uchar_t         *ibtail;
 228         uchar_t         *obtail;
 229         size_t          ret_val;
 230         int8_t          sz;
 231         uint32_t        big5_val;
 232         boolean_t       do_not_ignore_null;
 233 
 234         ret_val = 0;
 235         ibtail = ib + *inlen;
 236         obtail = ob + *outlen;
 237         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 238 
 239         while (ib < ibtail) {
 240                 if (*ib == '\0' && do_not_ignore_null)
 241                         break;
 242 
 243                 if (KICONV_IS_ASCII(*ib)) {
 244                         if (ob >= obtail) {
 245                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 246                         }
 247 
 248                         *ob++ = *ib++;
 249                         continue;
 250                 }
 251 
 252                 oldib = ib;
 253 
 254                 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
 255                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 256                 }
 257 
 258                 if (ibtail - ib < 2) {
 259                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 260                 }
 261 
 262                 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
 263                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 264                 }
 265 
 266                 big5_val = *ib++;
 267                 big5_val = (big5_val << 8) | *ib++;
 268                 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
 269 
 270                 if (sz < 0) {
 271                         ib = oldib;
 272                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 273                 }
 274 
 275                 ob += sz;
 276                 continue;
 277 
 278 REPLACE_INVALID:
 279                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 280                         ib = oldib;
 281                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 282                 }
 283 
 284                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 285                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 286                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 287                 ret_val++;
 288         }
 289 
 290         *inlen = ibtail - ib;
 291         *outlen = obtail - ob;
 292 
 293         return (ret_val);
 294 }
 295 
 296 /*
 297  * Encoding convertor from BIG5 to UTF-8.
 298  */
 299 static size_t
 300 kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
 301         size_t *outbytesleft, int *errno)
 302 {
 303         return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 304             outbytesleft, errno, big5_to_utf8));
 305 }
 306 
 307 /*
 308  * String based encoding convertor from BIG5 to UTF-8.
 309  */
 310 static size_t
 311 kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
 312     size_t *outlen, int flag, int *errno)
 313 {
 314         return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 315             (uchar_t *)outarray, outlen, flag, errno,
 316             big5_to_utf8));
 317 }
 318 
 319 /*
 320  * Encoding convertor from BIG5-HKSCS to UTF-8.
 321  */
 322 static size_t
 323 kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 324     char **outbuf, size_t *outbytesleft, int *errno)
 325 {
 326         return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 327             outbytesleft, errno, big5hkscs_to_utf8);
 328 }
 329 
 330 /*
 331  * String based encoding convertor from BIG5-HKSCS to UTF-8.
 332  */
 333 static size_t
 334 kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
 335         size_t *outlen, int flag, int *errno)
 336 {
 337         return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 338             (uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
 339 }
 340 
 341 /*
 342  * Encoding convertor from CP950-HKSCS to UTF-8.
 343  */
 344 static size_t
 345 kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 346     char **outbuf, size_t *outbytesleft, int *errno)
 347 {
 348         return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 349             outbytesleft, errno, cp950hkscs_to_utf8);
 350 }
 351 
 352 /*
 353  * String based encoding convertor from CP950-HKSCS to UTF-8.
 354  */
 355 static size_t
 356 kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
 357         size_t *outlen, int flag, int *errno)
 358 {
 359         return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 360             (uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
 361 }
 362 
 363 /*
 364  * Encoding convertor from EUC-TW to UTF-8.
 365  */
 366 static size_t
 367 kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
 368         char **outbuf, size_t *outbytesleft, int *errno)
 369 {
 370         uchar_t         *ib;
 371         uchar_t         *ob;
 372         uchar_t         *ibtail;
 373         uchar_t         *obtail;
 374         uchar_t         *oldib;
 375         size_t          ret_val;
 376         size_t          plane_no;
 377         int8_t          sz;
 378         uint32_t        euctw_val;
 379         boolean_t       isplane1;
 380 
 381         /* Check on the kiconv code conversion descriptor. */
 382         if (kcd == NULL || kcd == (void *)-1) {
 383                 *errno = EBADF;
 384                 return ((size_t)-1);
 385         }
 386 
 387         /* If this is a state reset request, process and return. */
 388         if (inbuf == NULL || *inbuf == NULL) {
 389                 return (0);
 390         }
 391 
 392         ret_val = 0;
 393         ib = (uchar_t *)*inbuf;
 394         ob = (uchar_t *)*outbuf;
 395         ibtail = ib + *inbytesleft;
 396         obtail = ob + *outbytesleft;
 397 
 398         while (ib < ibtail) {
 399                 if (KICONV_IS_ASCII(*ib)) {
 400                         if (ob >= obtail) {
 401                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 402                         }
 403 
 404                         *ob++ = *ib++;
 405                         continue;
 406                 }
 407 
 408                 /*
 409                  * Issue EILSEQ error if the first byte is not a
 410                  * valid EUC-TW leading byte.
 411                  */
 412                 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
 413                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 414                 }
 415 
 416                 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
 417                     B_FALSE : B_TRUE;
 418 
 419                 /*
 420                  * Issue EINVAL error if input buffer has an incomplete
 421                  * character at the end of the buffer.
 422                  */
 423                 if (ibtail - ib < (isplane1 ? 2 : 4)) {
 424                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 425                 }
 426 
 427                 oldib = ib;
 428                 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
 429 
 430                 /*
 431                  * Issue EILSEQ error if the remaining bytes are not
 432                  * valid EUC-TW bytes.
 433                  */
 434                 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
 435                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 436                 }
 437 
 438                 if (! isplane1)
 439                         ib += 2;
 440 
 441                 /* Now we have a valid EUC-TW character. */
 442                 euctw_val = *ib++;
 443                 euctw_val = (euctw_val << 8) | *ib++;
 444                 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
 445 
 446                 if (sz < 0) {
 447                         ib = oldib;
 448                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 449                 }
 450 
 451                 ob += sz;
 452         }
 453 
 454         *inbuf = (char *)ib;
 455         *inbytesleft = ibtail - ib;
 456         *outbuf = (char *)ob;
 457         *outbytesleft = obtail - ob;
 458 
 459         return (ret_val);
 460 }
 461 
 462 /*
 463  * String based encoding convertor from EUC-TW to UTF-8.
 464  */
 465 static size_t
 466 kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
 467         size_t *outlen, int flag, int *errno)
 468 {
 469         uchar_t         *ib;
 470         uchar_t         *ob;
 471         uchar_t         *ibtail;
 472         uchar_t         *obtail;
 473         uchar_t         *oldib;
 474         size_t          ret_val;
 475         size_t          plane_no;
 476         int8_t          sz;
 477         uint32_t        euctw_val;
 478         boolean_t       isplane1;
 479         boolean_t       do_not_ignore_null;
 480 
 481         ret_val = 0;
 482         ib = (uchar_t *)inarray;
 483         ob = (uchar_t *)outarray;
 484         ibtail = ib + *inlen;
 485         obtail = ob + *outlen;
 486         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 487 
 488         while (ib < ibtail) {
 489                 if (*ib == '\0' && do_not_ignore_null)
 490                         break;
 491 
 492                 if (KICONV_IS_ASCII(*ib)) {
 493                         if (ob >= obtail) {
 494                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 495                         }
 496 
 497                         *ob++ = *ib++;
 498                         continue;
 499                 }
 500 
 501                 oldib = ib;
 502 
 503                 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
 504                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 505                 }
 506 
 507                 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
 508                     B_FALSE : B_TRUE;
 509 
 510                 if (ibtail - ib < (isplane1 ? 2 : 4)) {
 511                         if (flag & KICONV_REPLACE_INVALID) {
 512                                 ib = ibtail;
 513                                 goto REPLACE_INVALID;
 514                         }
 515 
 516                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 517                 }
 518 
 519                 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
 520 
 521                 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
 522                         KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
 523                 }
 524 
 525                 if (! isplane1)
 526                         ib += 2;
 527 
 528                 euctw_val = *ib++;
 529                 euctw_val = (euctw_val << 8) | *ib++;
 530                 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
 531 
 532                 if (sz < 0) {
 533                         ib = oldib;
 534                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 535                 }
 536 
 537                 ob += sz;
 538                 continue;
 539 
 540 REPLACE_INVALID:
 541                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 542                         ib = oldib;
 543                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 544                 }
 545 
 546                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 547                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 548                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 549                 ret_val++;
 550         }
 551 
 552         *inlen = ibtail - ib;
 553         *outlen = obtail - ob;
 554 
 555         return (ret_val);
 556 }
 557 
 558 /*
 559  * Encoding convertor from UTF-8 to BIG5.
 560  */
 561 static size_t
 562 kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
 563         char **outbuf, size_t *outbytesleft, int *errno)
 564 {
 565         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 566             outbytesleft, errno, utf8_to_big5);
 567 }
 568 
 569 /*
 570  * String based encoding convertor from UTF-8 to BIG5.
 571  */
 572 static size_t
 573 kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
 574         size_t *outlen, int flag, int *errno)
 575 {
 576         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 577             (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
 578 }
 579 
 580 /*
 581  * Encoding convertor from UTF-8 to EUC-TW.
 582  */
 583 static size_t
 584 kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
 585         char **outbuf, size_t *outbytesleft, int *errno)
 586 {
 587         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 588             outbytesleft, errno, utf8_to_euctw);
 589 }
 590 
 591 /*
 592  * String based encoding convertor from UTF-8 to EUC-TW.
 593  */
 594 static size_t
 595 kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
 596         size_t *outlen, int flag, int *errno)
 597 {
 598         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 599             (uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
 600 }
 601 
 602 /*
 603  * Encoding convertor from UTF-8 to CP950HKSCS.
 604  */
 605 static size_t
 606 kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 607         char **outbuf, size_t *outbytesleft, int *errno)
 608 {
 609         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 610             outbytesleft, errno, utf8_to_cp950hkscs);
 611 }
 612 
 613 /*
 614  * String based encoding convertor from UTF-8 to CP950HKSCS.
 615  */
 616 static size_t
 617 kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
 618         size_t *outlen, int flag, int *errno)
 619 {
 620         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 621             (uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
 622 }
 623 
 624 /*
 625  * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
 626  */
 627 static size_t
 628 kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 629         char **outbuf, size_t *outbytesleft, int *errno)
 630 {
 631         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 632             outbytesleft, errno, utf8_to_big5hkscs);
 633 }
 634 
 635 /*
 636  * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
 637  */
 638 static size_t
 639 kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
 640     size_t *outlen, int flag, int *errno)
 641 {
 642         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 643             (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
 644 }
 645 
 646 /*
 647  * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
 648  * Return: > 0  - Converted successfully
 649  *         = -1 - E2BIG
 650  */
 651 static int8_t
 652 big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
 653         size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
 654 {
 655         size_t  index;
 656         int8_t  sz;
 657         uchar_t *u8;
 658 
 659         index = kiconv_binsearch(big5_val, table, nitems);
 660         u8 = table[index].u8;
 661         sz = u8_number_of_bytes[u8[0]];
 662 
 663         if (obtail - ob < sz) {
 664                 *ret_val = (size_t)-1;
 665                 return (-1);
 666         }
 667 
 668         if (index == 0)
 669                 (*ret_val)++;   /* Non-identical conversion */
 670 
 671         for (index = 0; index < sz; index++)
 672                 *ob++ = u8[index];
 673 
 674         return (sz);
 675 }
 676 
 677 /*
 678  * Convert single BIG5 character to UTF-8.
 679  */
 680 static int8_t
 681 big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 682 {
 683         return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
 684             kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
 685 }
 686 
 687 /*
 688  * Convert single CP950-HKSCS character to UTF-8.
 689  */
 690 static int8_t
 691 cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
 692         size_t *ret_val)
 693 {
 694         return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
 695             kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
 696 }
 697 
 698 /*
 699  * Calculate unicode value for some CNS planes which fall in Unicode
 700  * UDA range.
 701  */
 702 static uint32_t
 703 get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
 704 {
 705         /*
 706          * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
 707          * to compute the Unicode value.
 708          */
 709         if (plane_no == 16)
 710                 --plane_no;
 711 
 712         /* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
 713         return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
 714 }
 715 
 716 /*
 717  * Convert single EUC-TW character to UTF-8.
 718  * Return: > 0  - Converted successfully
 719  *         = -1 - E2BIG
 720  */
 721 static int8_t
 722 euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
 723         uchar_t *obtail, size_t *ret_val)
 724 {
 725         uint32_t u32;
 726         size_t  index;
 727         int8_t  sz;
 728         uchar_t udc[4];
 729         uchar_t *u8;
 730 
 731         switch (plane_no) {
 732         case 1:
 733                 index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
 734                     KICONV_CNS1_UTF8_MAX);
 735                 u8 = kiconv_cns1_utf8[index].u8;
 736                 break;
 737         case 2:
 738                 index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
 739                     KICONV_CNS2_UTF8_MAX);
 740                 u8 = kiconv_cns2_utf8[index].u8;
 741                 break;
 742         case 3:
 743                 index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
 744                     KICONV_CNS3_UTF8_MAX);
 745                 u8 = kiconv_cns3_utf8[index].u8;
 746                 break;
 747         case 4:
 748                 index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
 749                     KICONV_CNS4_UTF8_MAX);
 750                 u8 = kiconv_cns4_utf8[index].u8;
 751                 break;
 752         case 5:
 753                 index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
 754                     KICONV_CNS5_UTF8_MAX);
 755                 u8 = kiconv_cns5_utf8[index].u8;
 756                 break;
 757         case 6:
 758                 index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
 759                     KICONV_CNS6_UTF8_MAX);
 760                 u8 = kiconv_cns6_utf8[index].u8;
 761                 break;
 762         case 7:
 763                 index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
 764                     KICONV_CNS7_UTF8_MAX);
 765                 u8 = kiconv_cns7_utf8[index].u8;
 766                 break;
 767         case 12:
 768         case 13:
 769         case 14:
 770         case 16:
 771                 u32 = get_unicode_from_UDA(plane_no,
 772                     (euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
 773                 /*
 774                  * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
 775                  * will occupy 4 bytes.
 776                  */
 777                 udc[0] = 0xF3;
 778                 udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
 779                 udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
 780                 udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
 781                 u8 = udc;
 782                 index = 1;
 783                 break;
 784         case 15:
 785                 index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
 786                     KICONV_CNS15_UTF8_MAX);
 787                 u8 = kiconv_cns15_utf8[index].u8;
 788                 break;
 789         default:
 790                 index = 0;
 791                 u8 = kiconv_cns1_utf8[index].u8;
 792         }
 793 
 794         sz = u8_number_of_bytes[u8[0]];
 795         if (obtail - ob < sz) {
 796                 *ret_val = (size_t)-1;
 797                 return (-1);
 798         }
 799 
 800         if (index == 0)
 801                 (*ret_val)++;
 802 
 803         for (index = 0; index < sz; index++)
 804                 *ob++ = u8[index];
 805 
 806         return (sz);
 807 }
 808 
 809 /*
 810  * Convert single HKSCS character to UTF-8.
 811  * Return: > 0  - Converted successfully
 812  *         = -1 - E2BIG
 813  */
 814 static int8_t
 815 big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
 816         size_t *ret_val)
 817 {
 818         size_t  index;
 819         int8_t  sz;
 820         uchar_t *u8;
 821 
 822         index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
 823             KICONV_HKSCS_UTF8_MAX);
 824         u8 = kiconv_hkscs_utf8[index].u8;
 825 
 826         /*
 827          * Single HKSCS-2004 character may map to 2 Unicode
 828          * code points.
 829          */
 830         if (u8[0] == 0xFF) {
 831                 u8 = hkscs_special_sequence[u8[1]];
 832                 sz = 4;
 833         } else {
 834                 sz = u8_number_of_bytes[u8[0]];
 835         }
 836 
 837         if (obtail - ob < sz) {
 838                 *ret_val = (size_t)-1;
 839                 return (-1);
 840         }
 841 
 842         if (index == 0)
 843                 (*ret_val)++;   /* Non-identical conversion. */
 844 
 845         for (index = 0; index < sz; index++)
 846                 *ob++ = u8[index];
 847 
 848         return (sz);
 849 }
 850 
 851 /*
 852  * Convert single UTF-8 character to EUC-TW.
 853  * Return: > 0  - Converted successfully
 854  *         = -1 - E2BIG
 855  */
 856 /* ARGSUSED */
 857 static int8_t
 858 utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 859         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 860 {
 861         size_t          index;
 862         size_t          plane_no;
 863         uchar_t         byte1;
 864         uchar_t         byte2;
 865 
 866         if (utf8 >= KICONV_TC_UDA_UTF8_START &&
 867             utf8 <= KICONV_TC_UDA_UTF8_END) {
 868                 /*
 869                  * Calculate EUC-TW code if utf8 is in Unicode
 870                  * Private Plane 15.
 871                  */
 872                 index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
 873                     ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
 874                     KICONV_TC_UDA_UCS4_START;
 875                 plane_no = 12 + index / 8836;
 876                 byte1 = 0xA1 + (index % 8836) / 94;
 877                 byte2 = 0xA1 + index % 94;
 878 
 879                 /* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
 880                 if (plane_no == 15)
 881                         plane_no = 16;
 882         } else {
 883                 uint32_t        euctw_val;
 884 
 885                 index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
 886                     KICONV_UTF8_EUCTW_MAX);
 887 
 888                 if (index == 0) {
 889                         if (ob >= obtail) {
 890                                 *ret_val = (size_t)-1;
 891                                 return (-1);
 892                         }
 893 
 894                         *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
 895                         (*ret_val)++;
 896 
 897                         return (1);
 898                 }
 899 
 900                 euctw_val = kiconv_utf8_euctw[index].value;
 901                 byte1 = (euctw_val & 0xFF00) >> 8;
 902                 byte2 = euctw_val & 0xFF;
 903                 plane_no = euctw_val >> 16;
 904         }
 905 
 906         if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
 907                 *ret_val = (size_t)-1;
 908                 return (-1);
 909         }
 910 
 911         if (plane_no != 1) {
 912                 *ob++ = KICONV_TC_EUCTW_MBYTE;
 913                 *ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
 914         }
 915 
 916         *ob++ = byte1;
 917         *ob = byte2;
 918 
 919         return (plane_no == 1 ? 2 : 4);
 920 }
 921 
 922 /*
 923  * Convert single UTF-8 character to BIG5-HKSCS
 924  * Return: > 0  - Converted successfully
 925  *         = -1 - E2BIG
 926  */
 927 static int8_t
 928 utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 929     uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 930 {
 931         size_t          index;
 932         int8_t          hkscslen;
 933         uint32_t        hkscscode;
 934         boolean_t       special_sequence = B_FALSE;
 935 
 936         index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
 937             KICONV_UTF8_HKSCS_MAX);
 938         hkscscode = kiconv_utf8_hkscs[index].value;
 939 
 940         /*
 941          * There are 4 special code points in HKSCS-2004 which mapped
 942          * to 2 UNICODE code points.
 943          */
 944         if ((int32_t)hkscscode < 0) {
 945                 size_t special_index = (-(int32_t)hkscscode - 1) * 3;
 946 
 947                 /* Check the following 2 bytes. */
 948                 if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
 949                     (*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
 950                         special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
 951                         special_sequence = B_TRUE;
 952                 }
 953 
 954                 hkscscode = ucs_special_sequence[special_index];
 955         }
 956 
 957         hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
 958         if (obtail - ob < hkscslen) {
 959                 *ret_val = (size_t)-1;
 960                 return (-1);
 961         }
 962 
 963         if (index == 0)
 964                 (*ret_val)++;
 965 
 966         if (hkscslen > 1)
 967                 *ob++ = (uchar_t)(hkscscode >> 8);
 968         *ob = (uchar_t)(hkscscode & 0xFF);
 969 
 970         if (special_sequence) {         /* Advance for special sequence */
 971                 (*inbuf) += 2;
 972         }
 973 
 974         return (hkscslen);
 975 }
 976 
 977 /*
 978  * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
 979  * Return: > 0  - Converted successfully
 980  *         = -1 - E2BIG
 981  */
 982 static int8_t
 983 utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
 984         size_t *ret_val, kiconv_table_t *table, size_t nitems)
 985 {
 986         size_t          index;
 987         int8_t          big5len;
 988         uint32_t        big5code;
 989 
 990         index = kiconv_binsearch(utf8, table, nitems);
 991         big5code = table[index].value;
 992         big5len = (big5code <= 0xFF) ? 1 : 2;
 993 
 994         if (obtail - ob < big5len) {
 995                 *ret_val = (size_t)-1;
 996                 return (-1);
 997         }
 998 
 999         if (index == 0)
1000                 (*ret_val)++;
1001 
1002         if (big5len > 1)
1003                 *ob++ = (uchar_t)(big5code >> 8);
1004         *ob = (uchar_t)(big5code & 0xFF);
1005 
1006         return (big5len);
1007 }
1008 
1009 /*
1010  * Convert single UTF-8 character to BIG5.
1011  */
1012 /* ARGSUSED */
1013 static int8_t
1014 utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1015         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1016 {
1017         return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1018             kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
1019 }
1020 
1021 /*
1022  * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1023  */
1024 /* ARGSUSED */
1025 static int8_t
1026 utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1027         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1028 {
1029         return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1030             kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
1031 }
1032 
1033 static kiconv_ops_t kiconv_tc_ops_tbl[] = {
1034         {
1035                 "big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
1036                 kiconv_close_to_cck, kiconvstr_to_big5
1037         },
1038         {
1039                 "utf-8", "big5", open_fr_big5, kiconv_fr_big5,
1040                 close_fr_tc, kiconvstr_fr_big5
1041         },
1042 
1043         {
1044                 "big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
1045                 kiconv_close_to_cck, kiconvstr_to_big5hkscs
1046         },
1047         {
1048                 "utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
1049                 close_fr_tc, kiconvstr_fr_big5hkscs
1050         },
1051 
1052         {
1053                 "euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
1054                 kiconv_close_to_cck, kiconvstr_to_euctw
1055         },
1056         {
1057                 "utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
1058                 close_fr_tc, kiconvstr_fr_euctw
1059         },
1060 
1061         {
1062                 "cp950-hkscs", "utf-8", kiconv_open_to_cck,
1063                 kiconv_to_cp950hkscs, kiconv_close_to_cck,
1064                 kiconvstr_to_cp950hkscs
1065         },
1066         {
1067                 "utf-8", "cp950-hkscs", open_fr_cp950hkscs,
1068                 kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
1069         },
1070 };
1071 
1072 static kiconv_module_info_t kiconv_tc_info = {
1073         "kiconv_tc",            /* module name */
1074         sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
1075         kiconv_tc_ops_tbl,
1076         0,
1077         NULL,
1078         NULL,
1079         0
1080 };
1081 
1082 static struct modlkiconv modlkiconv_tc = {
1083         &mod_kiconvops,
1084         "kiconv Traditional Chinese module 1.0",
1085         &kiconv_tc_info
1086 };
1087 
1088 static struct modlinkage modlinkage = {
1089         MODREV_1,
1090         (void *)&modlkiconv_tc,
1091         NULL
1092 };
1093 
1094 int
1095 _init(void)
1096 {
1097         int err;
1098 
1099         err = mod_install(&modlinkage);
1100         if (err)
1101                 cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
1102 
1103         return (err);
1104 }
1105 
1106 int
1107 _fini(void)
1108 {
1109         int err;
1110 
1111         /*
1112          * If this module is being used, then, we cannot remove the module.
1113          * The following checking will catch pretty much all usual cases.
1114          *
1115          * Any remaining will be catached by the kiconv_unregister_module()
1116          * during mod_remove() at below.
1117          */
1118         if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
1119                 return (EBUSY);
1120 
1121         err = mod_remove(&modlinkage);
1122         if (err)
1123                 cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
1124 
1125         return (err);
1126 }
1127 
1128 int
1129 _info(struct modinfo *modinfop)
1130 {
1131         return (mod_info(&modlinkage, modinfop));
1132 }