1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/param.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/systm.h>
  30 #include <sys/debug.h>
  31 #include <sys/kmem.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/byteorder.h>
  34 #include <sys/errno.h>
  35 #include <sys/modctl.h>
  36 #include <sys/u8_textprep.h>
  37 #include <sys/kiconv.h>
  38 #include <sys/kiconv_cck_common.h>
  39 #include <sys/kiconv_tc.h>
  40 #include <sys/kiconv_big5_utf8.h>
  41 #include <sys/kiconv_euctw_utf8.h>
  42 #include <sys/kiconv_hkscs_utf8.h>
  43 #include <sys/kiconv_cp950hkscs_utf8.h>
  44 #include <sys/kiconv_utf8_big5.h>
  45 #include <sys/kiconv_utf8_euctw.h>
  46 #include <sys/kiconv_utf8_cp950hkscs.h>
  47 #include <sys/kiconv_utf8_hkscs.h>
  48 
  49 /* 4 HKSCS-2004 code points map to 2 Unicode code points separately. */
  50 static uchar_t hkscs_special_sequence[][4] = {
  51         { 0xc3, 0x8a, 0xcc, 0x84 },     /* 0x8862 */
  52         { 0xc3, 0x8a, 0xcc, 0x8c },     /* 0x8864 */
  53         { 0xc3, 0xaa, 0xcc, 0x84 },     /* 0x88a3 */
  54         { 0xc3, 0xaa, 0xcc, 0x8c }      /* 0x88a5 */
  55 };
  56 
  57 /* 4 Unicode code point pair map to 1 HKSCS-2004 code point. */
  58 static uint32_t ucs_special_sequence[] = {
  59         0x8866,         /* U+00ca */
  60         0x8862,         /* U+00ca U+0304 */
  61         0x8864,         /* U+00ca U+030c */
  62         0x88a7,         /* U+00ea */
  63         0x88a3,         /* U+00ea U+0304 */
  64         0x88a5          /* U+00ea U+030c */
  65 };
  66 
  67 typedef int8_t (*kiconv_big5toutf8_t)(uint32_t value, uchar_t *ob,
  68         uchar_t *obtail, size_t *ret_val);
  69 
  70 static int8_t utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  71         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  72 static int8_t utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  73         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  74 static int8_t utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf,
  75         uchar_t *ibtail, uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  76 static int8_t utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  77         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  78 static int8_t big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
  79         size_t *ret_val);
  80 static int8_t big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
  81         uchar_t *obtail, size_t *ret_val);
  82 static int8_t cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob,
  83         uchar_t *obtail, size_t *ret_val);
  84 static int8_t euctw_to_utf8(size_t plane_no, uint32_t euctw_val,
  85         uchar_t *ob, uchar_t *obtail, size_t *ret_val);
  86 static uint32_t get_unicode_from_UDA(size_t plane_no, uchar_t byte1,
  87         uchar_t byte2);
  88 
  89 #define KICONV_TC_BIG5          (0x01)
  90 #define KICONV_TC_BIG5HKSCS     (0x02)
  91 #define KICONV_TC_CP950HKSCS    (0x03)
  92 #define KICONV_TC_EUCTW         (0x04)
  93 #define KICONV_TC_MAX_MAGIC_ID  (0x04)
  94 
  95 static void *
  96 open_fr_big5()
  97 {
  98         return ((void *)KICONV_TC_BIG5);
  99 }
 100 
 101 static void *
 102 open_fr_big5hkscs()
 103 {
 104         return ((void *)KICONV_TC_BIG5HKSCS);
 105 }
 106 
 107 static void *
 108 open_fr_cp950hkscs()
 109 {
 110         return ((void *)KICONV_TC_CP950HKSCS);
 111 }
 112 
 113 static void *
 114 open_fr_euctw()
 115 {
 116         return ((void *)KICONV_TC_EUCTW);
 117 }
 118 
 119 static int
 120 close_fr_tc(void *s)
 121 {
 122         if ((uintptr_t)s > KICONV_TC_MAX_MAGIC_ID)
 123                 return (EBADF);
 124 
 125         return (0);
 126 }
 127 
 128 /*
 129  * Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS) to UTF-8.
 130  */
 131 static size_t
 132 kiconv_fr_big5_common(void *kcd, char **inbuf, size_t *inbytesleft,
 133         char **outbuf, size_t *outbytesleft, int *errno,
 134         kiconv_big5toutf8_t ptr_big5touf8)
 135 {
 136         uchar_t         *ib;
 137         uchar_t         *ob;
 138         uchar_t         *ibtail;
 139         uchar_t         *obtail;
 140         size_t          ret_val;
 141         int8_t          sz;
 142         uint32_t        big5_val;
 143 
 144         /* Check on the kiconv code conversion descriptor. */
 145         if (kcd == NULL || kcd == (void *)-1) {
 146                 *errno = EBADF;
 147                 return ((size_t)-1);
 148         }
 149 
 150         /* If this is a state reset request, process and return. */
 151         if (inbuf == NULL || *inbuf == NULL) {
 152                 return (0);
 153         }
 154 
 155         ret_val = 0;
 156         ib = (uchar_t *)*inbuf;
 157         ob = (uchar_t *)*outbuf;
 158         ibtail = ib + *inbytesleft;
 159         obtail = ob + *outbytesleft;
 160 
 161         while (ib < ibtail) {
 162                 if (KICONV_IS_ASCII(*ib)) {
 163                         if (ob >= obtail) {
 164                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 165                         }
 166 
 167                         *ob++ = *ib++;
 168                         continue;
 169                 }
 170 
 171                 /*
 172                  * Issue EILSEQ error if the first byte is not a
 173                  * valid BIG5/HKSCS leading byte.
 174                  */
 175                 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
 176                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 177                 }
 178 
 179                 /*
 180                  * Issue EINVAL error if input buffer has an incomplete
 181                  * character at the end of the buffer.
 182                  */
 183                 if (ibtail - ib < 2) {
 184                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 185                 }
 186 
 187                 /*
 188                  * Issue EILSEQ error if the remaining bytes is not
 189                  * a valid BIG5/HKSCS byte.
 190                  */
 191                 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
 192                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 193                 }
 194 
 195                 /* Now we have a valid BIG5/HKSCS character. */
 196                 big5_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 197                 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
 198 
 199                 if (sz < 0) {
 200                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 201                 }
 202 
 203                 ib += 2;
 204                 ob += sz;
 205         }
 206 
 207         *inbuf = (char *)ib;
 208         *inbytesleft = ibtail - ib;
 209         *outbuf = (char *)ob;
 210         *outbytesleft = obtail - ob;
 211 
 212         return (ret_val);
 213 }
 214 
 215 /*
 216  * String based Common convertor from BIG5/HKSCS(BIG5-HKSCS or CP950-HKSCS)
 217  * to UTF-8.
 218  */
 219 static size_t
 220 kiconvstr_fr_big5_common(uchar_t *ib, size_t *inlen, uchar_t *ob,
 221     size_t *outlen, int flag, int *errno,
 222     kiconv_big5toutf8_t ptr_big5touf8)
 223 {
 224         uchar_t         *oldib;
 225         uchar_t         *ibtail;
 226         uchar_t         *obtail;
 227         size_t          ret_val;
 228         int8_t          sz;
 229         uint32_t        big5_val;
 230         boolean_t       do_not_ignore_null;
 231 
 232         ret_val = 0;
 233         ibtail = ib + *inlen;
 234         obtail = ob + *outlen;
 235         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 236 
 237         while (ib < ibtail) {
 238                 if (*ib == '\0' && do_not_ignore_null)
 239                         break;
 240 
 241                 if (KICONV_IS_ASCII(*ib)) {
 242                         if (ob >= obtail) {
 243                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 244                         }
 245 
 246                         *ob++ = *ib++;
 247                         continue;
 248                 }
 249 
 250                 oldib = ib;
 251 
 252                 if (! KICONV_TC_IS_BIG5_1st_BYTE(*ib)) {
 253                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 254                 }
 255 
 256                 if (ibtail - ib < 2) {
 257                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 258                 }
 259 
 260                 if (! KICONV_TC_IS_BIG5_2nd_BYTE(*(ib + 1))) {
 261                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 262                 }
 263 
 264                 big5_val = *ib++;
 265                 big5_val = (big5_val << 8) | *ib++;
 266                 sz = ptr_big5touf8(big5_val, ob, obtail, &ret_val);
 267 
 268                 if (sz < 0) {
 269                         ib = oldib;
 270                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 271                 }
 272 
 273                 ob += sz;
 274                 continue;
 275 
 276 REPLACE_INVALID:
 277                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 278                         ib = oldib;
 279                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 280                 }
 281 
 282                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 283                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 284                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 285                 ret_val++;
 286         }
 287 
 288         *inlen = ibtail - ib;
 289         *outlen = obtail - ob;
 290 
 291         return (ret_val);
 292 }
 293 
 294 /*
 295  * Encoding convertor from BIG5 to UTF-8.
 296  */
 297 static size_t
 298 kiconv_fr_big5(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
 299         size_t *outbytesleft, int *errno)
 300 {
 301         return (kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 302             outbytesleft, errno, big5_to_utf8));
 303 }
 304 
 305 /*
 306  * String based encoding convertor from BIG5 to UTF-8.
 307  */
 308 static size_t
 309 kiconvstr_fr_big5(char *inarray, size_t *inlen, char *outarray,
 310     size_t *outlen, int flag, int *errno)
 311 {
 312         return (kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 313             (uchar_t *)outarray, outlen, flag, errno,
 314             big5_to_utf8));
 315 }
 316 
 317 /*
 318  * Encoding convertor from BIG5-HKSCS to UTF-8.
 319  */
 320 static size_t
 321 kiconv_fr_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 322     char **outbuf, size_t *outbytesleft, int *errno)
 323 {
 324         return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 325             outbytesleft, errno, big5hkscs_to_utf8);
 326 }
 327 
 328 /*
 329  * String based encoding convertor from BIG5-HKSCS to UTF-8.
 330  */
 331 static size_t
 332 kiconvstr_fr_big5hkscs(char *inarray, size_t *inlen, char *outarray,
 333         size_t *outlen, int flag, int *errno)
 334 {
 335         return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 336             (uchar_t *)outarray, outlen, flag, errno, big5hkscs_to_utf8);
 337 }
 338 
 339 /*
 340  * Encoding convertor from CP950-HKSCS to UTF-8.
 341  */
 342 static size_t
 343 kiconv_fr_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 344     char **outbuf, size_t *outbytesleft, int *errno)
 345 {
 346         return kiconv_fr_big5_common(kcd, inbuf, inbytesleft, outbuf,
 347             outbytesleft, errno, cp950hkscs_to_utf8);
 348 }
 349 
 350 /*
 351  * String based encoding convertor from CP950-HKSCS to UTF-8.
 352  */
 353 static size_t
 354 kiconvstr_fr_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
 355         size_t *outlen, int flag, int *errno)
 356 {
 357         return kiconvstr_fr_big5_common((uchar_t *)inarray, inlen,
 358             (uchar_t *)outarray, outlen, flag, errno, cp950hkscs_to_utf8);
 359 }
 360 
 361 /*
 362  * Encoding convertor from EUC-TW to UTF-8.
 363  */
 364 static size_t
 365 kiconv_fr_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
 366         char **outbuf, size_t *outbytesleft, int *errno)
 367 {
 368         uchar_t         *ib;
 369         uchar_t         *ob;
 370         uchar_t         *ibtail;
 371         uchar_t         *obtail;
 372         uchar_t         *oldib;
 373         size_t          ret_val;
 374         size_t          plane_no;
 375         int8_t          sz;
 376         uint32_t        euctw_val;
 377         boolean_t       isplane1;
 378 
 379         /* Check on the kiconv code conversion descriptor. */
 380         if (kcd == NULL || kcd == (void *)-1) {
 381                 *errno = EBADF;
 382                 return ((size_t)-1);
 383         }
 384 
 385         /* If this is a state reset request, process and return. */
 386         if (inbuf == NULL || *inbuf == NULL) {
 387                 return (0);
 388         }
 389 
 390         ret_val = 0;
 391         ib = (uchar_t *)*inbuf;
 392         ob = (uchar_t *)*outbuf;
 393         ibtail = ib + *inbytesleft;
 394         obtail = ob + *outbytesleft;
 395 
 396         while (ib < ibtail) {
 397                 if (KICONV_IS_ASCII(*ib)) {
 398                         if (ob >= obtail) {
 399                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 400                         }
 401 
 402                         *ob++ = *ib++;
 403                         continue;
 404                 }
 405 
 406                 /*
 407                  * Issue EILSEQ error if the first byte is not a
 408                  * valid EUC-TW leading byte.
 409                  */
 410                 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
 411                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 412                 }
 413 
 414                 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
 415                     B_FALSE : B_TRUE;
 416 
 417                 /*
 418                  * Issue EINVAL error if input buffer has an incomplete
 419                  * character at the end of the buffer.
 420                  */
 421                 if (ibtail - ib < (isplane1 ? 2 : 4)) {
 422                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 423                 }
 424 
 425                 oldib = ib;
 426                 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
 427 
 428                 /*
 429                  * Issue EILSEQ error if the remaining bytes are not
 430                  * valid EUC-TW bytes.
 431                  */
 432                 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
 433                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 434                 }
 435 
 436                 if (! isplane1)
 437                         ib += 2;
 438 
 439                 /* Now we have a valid EUC-TW character. */
 440                 euctw_val = *ib++;
 441                 euctw_val = (euctw_val << 8) | *ib++;
 442                 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
 443 
 444                 if (sz < 0) {
 445                         ib = oldib;
 446                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 447                 }
 448 
 449                 ob += sz;
 450         }
 451 
 452         *inbuf = (char *)ib;
 453         *inbytesleft = ibtail - ib;
 454         *outbuf = (char *)ob;
 455         *outbytesleft = obtail - ob;
 456 
 457         return (ret_val);
 458 }
 459 
 460 /*
 461  * String based encoding convertor from EUC-TW to UTF-8.
 462  */
 463 static size_t
 464 kiconvstr_fr_euctw(char *inarray, size_t *inlen, char *outarray,
 465         size_t *outlen, int flag, int *errno)
 466 {
 467         uchar_t         *ib;
 468         uchar_t         *ob;
 469         uchar_t         *ibtail;
 470         uchar_t         *obtail;
 471         uchar_t         *oldib;
 472         size_t          ret_val;
 473         size_t          plane_no;
 474         int8_t          sz;
 475         uint32_t        euctw_val;
 476         boolean_t       isplane1;
 477         boolean_t       do_not_ignore_null;
 478 
 479         ret_val = 0;
 480         ib = (uchar_t *)inarray;
 481         ob = (uchar_t *)outarray;
 482         ibtail = ib + *inlen;
 483         obtail = ob + *outlen;
 484         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 485 
 486         while (ib < ibtail) {
 487                 if (*ib == '\0' && do_not_ignore_null)
 488                         break;
 489 
 490                 if (KICONV_IS_ASCII(*ib)) {
 491                         if (ob >= obtail) {
 492                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 493                         }
 494 
 495                         *ob++ = *ib++;
 496                         continue;
 497                 }
 498 
 499                 oldib = ib;
 500 
 501                 if (! KICONV_TC_IS_EUCTW_1st_BYTE(*ib)) {
 502                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 503                 }
 504 
 505                 isplane1 = (*ib == KICONV_TC_EUCTW_MBYTE) ?
 506                     B_FALSE : B_TRUE;
 507 
 508                 if (ibtail - ib < (isplane1 ? 2 : 4)) {
 509                         if (flag & KICONV_REPLACE_INVALID) {
 510                                 ib = ibtail;
 511                                 goto REPLACE_INVALID;
 512                         }
 513 
 514                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 515                 }
 516 
 517                 plane_no = isplane1 ? 1 : *(ib + 1) - KICONV_TC_EUCTW_PMASK;
 518 
 519                 if (! KICONV_TC_IS_VALID_EUCTW_SEQ(ib)) {
 520                         KICONV_SET_ERRNO_WITH_FLAG(isplane1 ? 2 : 4, EILSEQ);
 521                 }
 522 
 523                 if (! isplane1)
 524                         ib += 2;
 525 
 526                 euctw_val = *ib++;
 527                 euctw_val = (euctw_val << 8) | *ib++;
 528                 sz = euctw_to_utf8(plane_no, euctw_val, ob, obtail, &ret_val);
 529 
 530                 if (sz < 0) {
 531                         ib = oldib;
 532                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 533                 }
 534 
 535                 ob += sz;
 536                 continue;
 537 
 538 REPLACE_INVALID:
 539                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 540                         ib = oldib;
 541                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 542                 }
 543 
 544                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 545                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 546                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 547                 ret_val++;
 548         }
 549 
 550         *inlen = ibtail - ib;
 551         *outlen = obtail - ob;
 552 
 553         return (ret_val);
 554 }
 555 
 556 /*
 557  * Encoding convertor from UTF-8 to BIG5.
 558  */
 559 static size_t
 560 kiconv_to_big5(void *kcd, char **inbuf, size_t *inbytesleft,
 561         char **outbuf, size_t *outbytesleft, int *errno)
 562 {
 563         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 564             outbytesleft, errno, utf8_to_big5);
 565 }
 566 
 567 /*
 568  * String based encoding convertor from UTF-8 to BIG5.
 569  */
 570 static size_t
 571 kiconvstr_to_big5(char *inarray, size_t *inlen, char *outarray,
 572         size_t *outlen, int flag, int *errno)
 573 {
 574         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 575             (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5);
 576 }
 577 
 578 /*
 579  * Encoding convertor from UTF-8 to EUC-TW.
 580  */
 581 static size_t
 582 kiconv_to_euctw(void *kcd, char **inbuf, size_t *inbytesleft,
 583         char **outbuf, size_t *outbytesleft, int *errno)
 584 {
 585         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 586             outbytesleft, errno, utf8_to_euctw);
 587 }
 588 
 589 /*
 590  * String based encoding convertor from UTF-8 to EUC-TW.
 591  */
 592 static size_t
 593 kiconvstr_to_euctw(char *inarray, size_t *inlen, char *outarray,
 594         size_t *outlen, int flag, int *errno)
 595 {
 596         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 597             (uchar_t *)outarray, outlen, flag, errno, utf8_to_euctw);
 598 }
 599 
 600 /*
 601  * Encoding convertor from UTF-8 to CP950HKSCS.
 602  */
 603 static size_t
 604 kiconv_to_cp950hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 605         char **outbuf, size_t *outbytesleft, int *errno)
 606 {
 607         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 608             outbytesleft, errno, utf8_to_cp950hkscs);
 609 }
 610 
 611 /*
 612  * String based encoding convertor from UTF-8 to CP950HKSCS.
 613  */
 614 static size_t
 615 kiconvstr_to_cp950hkscs(char *inarray, size_t *inlen, char *outarray,
 616         size_t *outlen, int flag, int *errno)
 617 {
 618         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 619             (uchar_t *)outarray, outlen, flag, errno, utf8_to_cp950hkscs);
 620 }
 621 
 622 /*
 623  * Encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
 624  */
 625 static size_t
 626 kiconv_to_big5hkscs(void *kcd, char **inbuf, size_t *inbytesleft,
 627         char **outbuf, size_t *outbytesleft, int *errno)
 628 {
 629         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 630             outbytesleft, errno, utf8_to_big5hkscs);
 631 }
 632 
 633 /*
 634  * String based encoding convertor from UTF-8 to BIG5HKSCS(HKSCS-2004).
 635  */
 636 static size_t
 637 kiconvstr_to_big5hkscs(char *inarray, size_t *inlen, char *outarray,
 638     size_t *outlen, int flag, int *errno)
 639 {
 640         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 641             (uchar_t *)outarray, outlen, flag, errno, utf8_to_big5hkscs);
 642 }
 643 
 644 /*
 645  * Common convertor from single BIG5/CP950-HKSCS character to UTF-8.
 646  * Return: > 0  - Converted successfully
 647  *         = -1 - E2BIG
 648  */
 649 static int8_t
 650 big5_to_utf8_common(uint32_t big5_val, uchar_t *ob, uchar_t *obtail,
 651         size_t *ret_val, kiconv_table_array_t *table, size_t nitems)
 652 {
 653         size_t  index;
 654         int8_t  sz;
 655         uchar_t *u8;
 656 
 657         index = kiconv_binsearch(big5_val, table, nitems);
 658         u8 = table[index].u8;
 659         sz = u8_number_of_bytes[u8[0]];
 660 
 661         if (obtail - ob < sz) {
 662                 *ret_val = (size_t)-1;
 663                 return (-1);
 664         }
 665 
 666         if (index == 0)
 667                 (*ret_val)++;   /* Non-identical conversion */
 668 
 669         for (index = 0; index < sz; index++)
 670                 *ob++ = u8[index];
 671 
 672         return (sz);
 673 }
 674 
 675 /*
 676  * Convert single BIG5 character to UTF-8.
 677  */
 678 static int8_t
 679 big5_to_utf8(uint32_t big5_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 680 {
 681         return (big5_to_utf8_common(big5_val, ob, obtail, ret_val,
 682             kiconv_big5_utf8, KICONV_BIG5_UTF8_MAX));
 683 }
 684 
 685 /*
 686  * Convert single CP950-HKSCS character to UTF-8.
 687  */
 688 static int8_t
 689 cp950hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
 690         size_t *ret_val)
 691 {
 692         return (big5_to_utf8_common(hkscs_val, ob, obtail, ret_val,
 693             kiconv_cp950hkscs_utf8, KICONV_CP950HKSCS_UTF8_MAX));
 694 }
 695 
 696 /*
 697  * Calculate unicode value for some CNS planes which fall in Unicode
 698  * UDA range.
 699  */
 700 static uint32_t
 701 get_unicode_from_UDA(size_t plane_no, uchar_t b1, uchar_t b2)
 702 {
 703         /*
 704          * CNS Plane 15 is pre-allocated, so need move Plane 16 to back 15
 705          * to compute the Unicode value.
 706          */
 707         if (plane_no == 16)
 708                 --plane_no;
 709 
 710         /* 0xF0000 + (plane_no - 12) * 8836 + (b1 - 0xA1) * 94 + (b2 - 0xA1) */
 711         return (8836 * plane_no + 94 * b1 + b2 + 0xD2611);
 712 }
 713 
 714 /*
 715  * Convert single EUC-TW character to UTF-8.
 716  * Return: > 0  - Converted successfully
 717  *         = -1 - E2BIG
 718  */
 719 static int8_t
 720 euctw_to_utf8(size_t plane_no, uint32_t euctw_val, uchar_t *ob,
 721         uchar_t *obtail, size_t *ret_val)
 722 {
 723         uint32_t u32;
 724         size_t  index;
 725         int8_t  sz;
 726         uchar_t udc[4];
 727         uchar_t *u8;
 728 
 729         switch (plane_no) {
 730         case 1:
 731                 index = kiconv_binsearch(euctw_val, kiconv_cns1_utf8,
 732                     KICONV_CNS1_UTF8_MAX);
 733                 u8 = kiconv_cns1_utf8[index].u8;
 734                 break;
 735         case 2:
 736                 index = kiconv_binsearch(euctw_val, kiconv_cns2_utf8,
 737                     KICONV_CNS2_UTF8_MAX);
 738                 u8 = kiconv_cns2_utf8[index].u8;
 739                 break;
 740         case 3:
 741                 index = kiconv_binsearch(euctw_val, kiconv_cns3_utf8,
 742                     KICONV_CNS3_UTF8_MAX);
 743                 u8 = kiconv_cns3_utf8[index].u8;
 744                 break;
 745         case 4:
 746                 index = kiconv_binsearch(euctw_val, kiconv_cns4_utf8,
 747                     KICONV_CNS4_UTF8_MAX);
 748                 u8 = kiconv_cns4_utf8[index].u8;
 749                 break;
 750         case 5:
 751                 index = kiconv_binsearch(euctw_val, kiconv_cns5_utf8,
 752                     KICONV_CNS5_UTF8_MAX);
 753                 u8 = kiconv_cns5_utf8[index].u8;
 754                 break;
 755         case 6:
 756                 index = kiconv_binsearch(euctw_val, kiconv_cns6_utf8,
 757                     KICONV_CNS6_UTF8_MAX);
 758                 u8 = kiconv_cns6_utf8[index].u8;
 759                 break;
 760         case 7:
 761                 index = kiconv_binsearch(euctw_val, kiconv_cns7_utf8,
 762                     KICONV_CNS7_UTF8_MAX);
 763                 u8 = kiconv_cns7_utf8[index].u8;
 764                 break;
 765         case 12:
 766         case 13:
 767         case 14:
 768         case 16:
 769                 u32 = get_unicode_from_UDA(plane_no,
 770                     (euctw_val & 0xFF00) >> 8, euctw_val & 0xFF);
 771                 /*
 772                  * As U+F0000 <= u32 <= U+F8A0F, so its UTF-8 sequence
 773                  * will occupy 4 bytes.
 774                  */
 775                 udc[0] = 0xF3;
 776                 udc[1] = (uchar_t)(0x80 | (u32 & 0x03F000) >> 12);
 777                 udc[2] = (uchar_t)(0x80 | (u32 & 0x000FC0) >> 6);
 778                 udc[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
 779                 u8 = udc;
 780                 index = 1;
 781                 break;
 782         case 15:
 783                 index = kiconv_binsearch(euctw_val, kiconv_cns15_utf8,
 784                     KICONV_CNS15_UTF8_MAX);
 785                 u8 = kiconv_cns15_utf8[index].u8;
 786                 break;
 787         default:
 788                 index = 0;
 789                 u8 = kiconv_cns1_utf8[index].u8;
 790         }
 791 
 792         sz = u8_number_of_bytes[u8[0]];
 793         if (obtail - ob < sz) {
 794                 *ret_val = (size_t)-1;
 795                 return (-1);
 796         }
 797 
 798         if (index == 0)
 799                 (*ret_val)++;
 800 
 801         for (index = 0; index < sz; index++)
 802                 *ob++ = u8[index];
 803 
 804         return (sz);
 805 }
 806 
 807 /*
 808  * Convert single HKSCS character to UTF-8.
 809  * Return: > 0  - Converted successfully
 810  *         = -1 - E2BIG
 811  */
 812 static int8_t
 813 big5hkscs_to_utf8(uint32_t hkscs_val, uchar_t *ob, uchar_t *obtail,
 814         size_t *ret_val)
 815 {
 816         size_t  index;
 817         int8_t  sz;
 818         uchar_t *u8;
 819 
 820         index = kiconv_binsearch(hkscs_val, kiconv_hkscs_utf8,
 821             KICONV_HKSCS_UTF8_MAX);
 822         u8 = kiconv_hkscs_utf8[index].u8;
 823 
 824         /*
 825          * Single HKSCS-2004 character may map to 2 Unicode
 826          * code points.
 827          */
 828         if (u8[0] == 0xFF) {
 829                 u8 = hkscs_special_sequence[u8[1]];
 830                 sz = 4;
 831         } else {
 832                 sz = u8_number_of_bytes[u8[0]];
 833         }
 834 
 835         if (obtail - ob < sz) {
 836                 *ret_val = (size_t)-1;
 837                 return (-1);
 838         }
 839 
 840         if (index == 0)
 841                 (*ret_val)++;   /* Non-identical conversion. */
 842 
 843         for (index = 0; index < sz; index++)
 844                 *ob++ = u8[index];
 845 
 846         return (sz);
 847 }
 848 
 849 /*
 850  * Convert single UTF-8 character to EUC-TW.
 851  * Return: > 0  - Converted successfully
 852  *         = -1 - E2BIG
 853  */
 854 /* ARGSUSED */
 855 static int8_t
 856 utf8_to_euctw(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 857         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 858 {
 859         size_t          index;
 860         size_t          plane_no;
 861         uchar_t         byte1;
 862         uchar_t         byte2;
 863 
 864         if (utf8 >= KICONV_TC_UDA_UTF8_START &&
 865             utf8 <= KICONV_TC_UDA_UTF8_END) {
 866                 /*
 867                  * Calculate EUC-TW code if utf8 is in Unicode
 868                  * Private Plane 15.
 869                  */
 870                 index = (((utf8 & 0x7000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
 871                     ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
 872                     KICONV_TC_UDA_UCS4_START;
 873                 plane_no = 12 + index / 8836;
 874                 byte1 = 0xA1 + (index % 8836) / 94;
 875                 byte2 = 0xA1 + index % 94;
 876 
 877                 /* CNS Plane 15 is pre-allocated, so place it into Plane 16. */
 878                 if (plane_no == 15)
 879                         plane_no = 16;
 880         } else {
 881                 uint32_t        euctw_val;
 882 
 883                 index = kiconv_binsearch(utf8, kiconv_utf8_euctw,
 884                     KICONV_UTF8_EUCTW_MAX);
 885 
 886                 if (index == 0) {
 887                         if (ob >= obtail) {
 888                                 *ret_val = (size_t)-1;
 889                                 return (-1);
 890                         }
 891 
 892                         *ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
 893                         (*ret_val)++;
 894 
 895                         return (1);
 896                 }
 897 
 898                 euctw_val = kiconv_utf8_euctw[index].value;
 899                 byte1 = (euctw_val & 0xFF00) >> 8;
 900                 byte2 = euctw_val & 0xFF;
 901                 plane_no = euctw_val >> 16;
 902         }
 903 
 904         if (obtail - ob < (plane_no == 1 ? 2 : 4)) {
 905                 *ret_val = (size_t)-1;
 906                 return (-1);
 907         }
 908 
 909         if (plane_no != 1) {
 910                 *ob++ = KICONV_TC_EUCTW_MBYTE;
 911                 *ob++ = KICONV_TC_EUCTW_PMASK + plane_no;
 912         }
 913 
 914         *ob++ = byte1;
 915         *ob = byte2;
 916 
 917         return (plane_no == 1 ? 2 : 4);
 918 }
 919 
 920 /*
 921  * Convert single UTF-8 character to BIG5-HKSCS
 922  * Return: > 0  - Converted successfully
 923  *         = -1 - E2BIG
 924  */
 925 static int8_t
 926 utf8_to_big5hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 927     uchar_t *ob, uchar_t *obtail, size_t *ret_val)
 928 {
 929         size_t          index;
 930         int8_t          hkscslen;
 931         uint32_t        hkscscode;
 932         boolean_t       special_sequence = B_FALSE;
 933 
 934         index = kiconv_binsearch(utf8, kiconv_utf8_hkscs,
 935             KICONV_UTF8_HKSCS_MAX);
 936         hkscscode = kiconv_utf8_hkscs[index].value;
 937 
 938         /*
 939          * There are 4 special code points in HKSCS-2004 which mapped
 940          * to 2 UNICODE code points.
 941          */
 942         if ((int32_t)hkscscode < 0) {
 943                 size_t special_index = (-(int32_t)hkscscode - 1) * 3;
 944 
 945                 /* Check the following 2 bytes. */
 946                 if (ibtail - *inbuf >= 2 && **inbuf == 0xcc &&
 947                     (*(*inbuf + 1) == 0x84 || *(*inbuf + 1) == 0x8c)) {
 948                         special_index += (*(*inbuf + 1) == 0x84 ? 1 : 2);
 949                         special_sequence = B_TRUE;
 950                 }
 951 
 952                 hkscscode = ucs_special_sequence[special_index];
 953         }
 954 
 955         hkscslen = (hkscscode <= 0xFF) ? 1 : 2;
 956         if (obtail - ob < hkscslen) {
 957                 *ret_val = (size_t)-1;
 958                 return (-1);
 959         }
 960 
 961         if (index == 0)
 962                 (*ret_val)++;
 963 
 964         if (hkscslen > 1)
 965                 *ob++ = (uchar_t)(hkscscode >> 8);
 966         *ob = (uchar_t)(hkscscode & 0xFF);
 967 
 968         if (special_sequence) {         /* Advance for special sequence */
 969                 (*inbuf) += 2;
 970         }
 971 
 972         return (hkscslen);
 973 }
 974 
 975 /*
 976  * Common convertor for UTF-8 to BIG5/CP950-HKSCS.
 977  * Return: > 0  - Converted successfully
 978  *         = -1 - E2BIG
 979  */
 980 static int8_t
 981 utf8_to_big5_common(uint32_t utf8, uchar_t *ob, uchar_t *obtail,
 982         size_t *ret_val, kiconv_table_t *table, size_t nitems)
 983 {
 984         size_t          index;
 985         int8_t          big5len;
 986         uint32_t        big5code;
 987 
 988         index = kiconv_binsearch(utf8, table, nitems);
 989         big5code = table[index].value;
 990         big5len = (big5code <= 0xFF) ? 1 : 2;
 991 
 992         if (obtail - ob < big5len) {
 993                 *ret_val = (size_t)-1;
 994                 return (-1);
 995         }
 996 
 997         if (index == 0)
 998                 (*ret_val)++;
 999 
1000         if (big5len > 1)
1001                 *ob++ = (uchar_t)(big5code >> 8);
1002         *ob = (uchar_t)(big5code & 0xFF);
1003 
1004         return (big5len);
1005 }
1006 
1007 /*
1008  * Convert single UTF-8 character to BIG5.
1009  */
1010 /* ARGSUSED */
1011 static int8_t
1012 utf8_to_big5(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1013         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1014 {
1015         return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1016             kiconv_utf8_big5, KICONV_UTF8_BIG5_MAX));
1017 }
1018 
1019 /*
1020  * Convert single UTF-8 character to CP950-HKSCS for Windows compatibility.
1021  */
1022 /* ARGSUSED */
1023 static int8_t
1024 utf8_to_cp950hkscs(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
1025         uchar_t *ob, uchar_t *obtail, size_t *ret_val)
1026 {
1027         return (utf8_to_big5_common(utf8, ob, obtail, ret_val,
1028             kiconv_utf8_cp950hkscs, KICONV_UTF8_CP950HKSCS));
1029 }
1030 
1031 static kiconv_ops_t kiconv_tc_ops_tbl[] = {
1032         {
1033                 "big5", "utf-8", kiconv_open_to_cck, kiconv_to_big5,
1034                 kiconv_close_to_cck, kiconvstr_to_big5
1035         },
1036         {
1037                 "utf-8", "big5", open_fr_big5, kiconv_fr_big5,
1038                 close_fr_tc, kiconvstr_fr_big5
1039         },
1040 
1041         {
1042                 "big5-hkscs", "utf-8", kiconv_open_to_cck, kiconv_to_big5hkscs,
1043                 kiconv_close_to_cck, kiconvstr_to_big5hkscs
1044         },
1045         {
1046                 "utf-8", "big5-hkscs", open_fr_big5hkscs, kiconv_fr_big5hkscs,
1047                 close_fr_tc, kiconvstr_fr_big5hkscs
1048         },
1049 
1050         {
1051                 "euc-tw", "utf-8", kiconv_open_to_cck, kiconv_to_euctw,
1052                 kiconv_close_to_cck, kiconvstr_to_euctw
1053         },
1054         {
1055                 "utf-8", "euc-tw", open_fr_euctw, kiconv_fr_euctw,
1056                 close_fr_tc, kiconvstr_fr_euctw
1057         },
1058 
1059         {
1060                 "cp950-hkscs", "utf-8", kiconv_open_to_cck,
1061                 kiconv_to_cp950hkscs, kiconv_close_to_cck,
1062                 kiconvstr_to_cp950hkscs
1063         },
1064         {
1065                 "utf-8", "cp950-hkscs", open_fr_cp950hkscs,
1066                 kiconv_fr_cp950hkscs, close_fr_tc, kiconvstr_fr_cp950hkscs
1067         },
1068 };
1069 
1070 static kiconv_module_info_t kiconv_tc_info = {
1071         "kiconv_tc",            /* module name */
1072         sizeof (kiconv_tc_ops_tbl) / sizeof (kiconv_tc_ops_tbl[0]),
1073         kiconv_tc_ops_tbl,
1074         0,
1075         NULL,
1076         NULL,
1077         0
1078 };
1079 
1080 static struct modlkiconv modlkiconv_tc = {
1081         &mod_kiconvops,
1082         "kiconv Traditional Chinese module 1.0",
1083         &kiconv_tc_info
1084 };
1085 
1086 static struct modlinkage modlinkage = {
1087         MODREV_1,
1088         { (void *)&modlkiconv_tc, NULL }
1089 };
1090 
1091 int
1092 _init(void)
1093 {
1094         int err;
1095 
1096         err = mod_install(&modlinkage);
1097         if (err)
1098                 cmn_err(CE_WARN, "kiconv_tc: failed to load kernel module");
1099 
1100         return (err);
1101 }
1102 
1103 int
1104 _fini(void)
1105 {
1106         int err;
1107 
1108         /*
1109          * If this module is being used, then, we cannot remove the module.
1110          * The following checking will catch pretty much all usual cases.
1111          *
1112          * Any remaining will be catached by the kiconv_unregister_module()
1113          * during mod_remove() at below.
1114          */
1115         if (kiconv_module_ref_count(KICONV_MODULE_ID_TC))
1116                 return (EBUSY);
1117 
1118         err = mod_remove(&modlinkage);
1119         if (err)
1120                 cmn_err(CE_WARN, "kiconv_tc: failed to remove kernel module");
1121 
1122         return (err);
1123 }
1124 
1125 int
1126 _info(struct modinfo *modinfop)
1127 {
1128         return (mod_info(&modlinkage, modinfop));
1129 }