1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/param.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/systm.h>
  30 #include <sys/debug.h>
  31 #include <sys/kmem.h>
  32 #include <sys/sunddi.h>
  33 #include <sys/byteorder.h>
  34 #include <sys/errno.h>
  35 #include <sys/modctl.h>
  36 #include <sys/kiconv.h>
  37 #include <sys/u8_textprep.h>
  38 #include <sys/kiconv_cck_common.h>
  39 #include <sys/kiconv_sc.h>
  40 #include <sys/kiconv_gb18030_utf8.h>
  41 #include <sys/kiconv_gb2312_utf8.h>
  42 #include <sys/kiconv_utf8_gb18030.h>
  43 #include <sys/kiconv_utf8_gb2312.h>
  44 
  45 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
  46         uchar_t *obtail, size_t *ret_val);
  47 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
  48         size_t *ret_val, boolean_t isgbk4);
  49 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  50         uchar_t *ob, uchar_t *obtail, size_t *ret);
  51 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  52         uchar_t *ob, uchar_t *obtail, size_t *ret);
  53 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  54         uchar_t *ob, uchar_t *obtail, size_t *ret);
  55 
  56 #define KICONV_SC_GB18030               (0x01)
  57 #define KICONV_SC_GBK                   (0x02)
  58 #define KICONV_SC_EUCCN                 (0x03)
  59 #define KICONV_SC_MAX_MAGIC_ID          (0x03)
  60 
  61 static void *
  62 open_fr_gb18030()
  63 {
  64         return ((void *)KICONV_SC_GB18030);
  65 }
  66 
  67 static void *
  68 open_fr_gbk()
  69 {
  70         return ((void *)KICONV_SC_GBK);
  71 }
  72 
  73 static void *
  74 open_fr_euccn()
  75 {
  76         return ((void *)KICONV_SC_EUCCN);
  77 }
  78 
  79 static int
  80 close_fr_sc(void *s)
  81 {
  82         if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
  83                 return (EBADF);
  84 
  85         return (0);
  86 }
  87 
  88 /*
  89  * Encoding convertor from UTF-8 to GB18030.
  90  */
  91 size_t
  92 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
  93         char **outbuf, size_t *outbytesleft, int *errno)
  94 {
  95 
  96         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
  97             outbytesleft, errno, utf8_to_gb18030);
  98 }
  99 
 100 /*
 101  * String based encoding convertor from UTF-8 to GB18030.
 102  */
 103 size_t
 104 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
 105         size_t *outlen, int flag, int *errno)
 106 {
 107         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 108             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
 109 }
 110 
 111 /*
 112  * Encoding convertor from GB18030 to UTF-8.
 113  */
 114 size_t
 115 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
 116         char **outbuf, size_t *outbytesleft, int *errno)
 117 {
 118         uchar_t         *ib;
 119         uchar_t         *ob;
 120         uchar_t         *ibtail;
 121         uchar_t         *obtail;
 122         size_t          ret_val;
 123         int8_t          sz;
 124         uint32_t        gb_val;
 125         boolean_t       isgbk4;
 126 
 127         /* Check on the kiconv code conversion descriptor. */
 128         if (kcd == NULL || kcd == (void *)-1) {
 129                 *errno = EBADF;
 130                 return ((size_t)-1);
 131         }
 132 
 133         /* If this is a state reset request, process and return. */
 134         if (inbuf == NULL || *inbuf == NULL) {
 135                 return (0);
 136         }
 137 
 138         ret_val = 0;
 139         ib = (uchar_t *)*inbuf;
 140         ob = (uchar_t *)*outbuf;
 141         ibtail = ib + *inbytesleft;
 142         obtail = ob + *outbytesleft;
 143 
 144         while (ib < ibtail) {
 145                 if (KICONV_IS_ASCII(*ib)) {
 146                         if (ob >= obtail) {
 147                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 148                         }
 149 
 150                         *ob++ = *ib++;
 151                         continue;
 152                 }
 153 
 154                 /*
 155                  * Issue EILSEQ error if the first byte is not a
 156                  * valid GB18030 leading byte.
 157                  */
 158                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 159                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 160                 }
 161 
 162                 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
 163                     KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
 164 
 165                 if (isgbk4) {
 166                         if (ibtail - ib < 4) {
 167                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 168                         }
 169 
 170                         if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
 171                             KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
 172                             KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
 173                                 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 174                         }
 175 
 176                         gb_val = (uint32_t)(*ib) << 24 |
 177                             (uint32_t)(*(ib + 1)) << 16 |
 178                             (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
 179                 } else {
 180                         if (ibtail - ib < 2) {
 181                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 182                         }
 183 
 184                         if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 185                                 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 186                         }
 187 
 188                         gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 189                 }
 190 
 191                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
 192                 if (sz < 0) {
 193                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 194                 }
 195 
 196                 ib += isgbk4 ? 4 : 2;
 197                 ob += sz;
 198         }
 199 
 200         *inbuf = (char *)ib;
 201         *inbytesleft = ibtail - ib;
 202         *outbuf = (char *)ob;
 203         *outbytesleft = obtail - ob;
 204 
 205         return (ret_val);
 206 }
 207 
 208 /*
 209  * String based encoding convertor from GB18030 to UTF-8.
 210  */
 211 size_t
 212 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
 213         size_t *outlen, int flag, int *errno)
 214 {
 215         uchar_t         *ib;
 216         uchar_t         *ob;
 217         uchar_t         *ibtail;
 218         uchar_t         *obtail;
 219         uchar_t         *oldib;
 220         size_t          ret_val;
 221         int8_t          sz;
 222         uint32_t        gb_val;
 223         boolean_t       isgbk4;
 224         boolean_t       do_not_ignore_null;
 225 
 226         ret_val = 0;
 227         ib = (uchar_t *)inarray;
 228         ob = (uchar_t *)outarray;
 229         ibtail = ib + *inlen;
 230         obtail = ob + *outlen;
 231         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 232 
 233         while (ib < ibtail) {
 234                 if (*ib == '\0' && do_not_ignore_null)
 235                         break;
 236 
 237                 if (KICONV_IS_ASCII(*ib)) {
 238                         if (ob >= obtail) {
 239                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 240                         }
 241 
 242                         *ob++ = *ib++;
 243                         continue;
 244                 }
 245 
 246                 oldib = ib;
 247 
 248                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 249                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 250                 }
 251 
 252                 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
 253                     KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
 254 
 255                 if (isgbk4) {
 256                         if (ibtail - ib < 4) {
 257                                 if (flag & KICONV_REPLACE_INVALID) {
 258                                         ib = ibtail;
 259                                         goto REPLACE_INVALID;
 260                                 }
 261 
 262                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 263                         }
 264 
 265                         if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
 266                             KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
 267                             KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
 268                                 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
 269                         }
 270 
 271                         gb_val = (uint32_t)(*ib) << 24 |
 272                             (uint32_t)(*(ib + 1)) << 16 |
 273                             (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
 274                 } else {
 275                         if (ibtail - ib < 2) {
 276                                 if (flag & KICONV_REPLACE_INVALID) {
 277                                         ib = ibtail;
 278                                         goto REPLACE_INVALID;
 279                                 }
 280 
 281                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 282                         }
 283 
 284                         if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 285                                 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 286                         }
 287 
 288                         gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 289                 }
 290 
 291                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
 292                 if (sz < 0) {
 293                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 294                 }
 295 
 296                 ib += isgbk4 ? 4 : 2;
 297                 ob += sz;
 298                 continue;
 299 
 300 REPLACE_INVALID:
 301                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 302                         ib = oldib;
 303                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 304                 }
 305 
 306                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 307                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 308                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 309                 ret_val++;
 310         }
 311 
 312         *inlen = ibtail - ib;
 313         *outlen = obtail - ob;
 314 
 315         return (ret_val);
 316 }
 317 
 318 /*
 319  * Encoding convertor from UTF-8 to GBK.
 320  */
 321 size_t
 322 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
 323         char **outbuf, size_t *outbytesleft, int *errno)
 324 {
 325 
 326         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 327             outbytesleft, errno, utf8_to_gbk);
 328 }
 329 
 330 /*
 331  * String based encoding convertor from UTF-8 to GBK.
 332  */
 333 size_t
 334 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
 335         size_t *outlen, int flag, int *errno)
 336 {
 337         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 338             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
 339 }
 340 
 341 /*
 342  * Encoding convertor from GBK to UTF-8.
 343  */
 344 size_t
 345 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
 346         char **outbuf, size_t *outbytesleft, int *errno)
 347 {
 348         uchar_t         *ib;
 349         uchar_t         *ob;
 350         uchar_t         *ibtail;
 351         uchar_t         *obtail;
 352         size_t          ret_val;
 353         int8_t          sz;
 354         uint32_t        gb_val;
 355 
 356         /* Check on the kiconv code conversion descriptor. */
 357         if (kcd == NULL || kcd == (void *)-1) {
 358                 *errno = EBADF;
 359                 return ((size_t)-1);
 360         }
 361 
 362         /* If this is a state reset request, process and return. */
 363         if (inbuf == NULL || *inbuf == NULL) {
 364                 return (0);
 365         }
 366 
 367         ret_val = 0;
 368         ib = (uchar_t *)*inbuf;
 369         ob = (uchar_t *)*outbuf;
 370         ibtail = ib + *inbytesleft;
 371         obtail = ob + *outbytesleft;
 372 
 373         while (ib < ibtail) {
 374                 if (KICONV_IS_ASCII(*ib)) {
 375                         if (ob >= obtail) {
 376                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 377                         }
 378 
 379                         *ob++ = *ib++;
 380                         continue;
 381                 }
 382 
 383                 /*
 384                  * Issue EILSEQ error if the first byte is not a
 385                  * valid GBK leading byte.
 386                  */
 387                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 388                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 389                 }
 390 
 391                 /*
 392                  * Issue EINVAL error if input buffer has an incomplete
 393                  * character at the end of the buffer.
 394                  */
 395                 if (ibtail - ib < 2) {
 396                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 397                 }
 398 
 399                 /*
 400                  * Issue EILSEQ error if the remaining byte is not
 401                  * a valid GBK byte.
 402                  */
 403                 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 404                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 405                 }
 406 
 407                 /* Now we have a valid GBK character. */
 408                 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 409                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
 410 
 411                 if (sz < 0) {
 412                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 413                 }
 414 
 415                 ib += 2;
 416                 ob += sz;
 417         }
 418 
 419         *inbuf = (char *)ib;
 420         *inbytesleft = ibtail - ib;
 421         *outbuf = (char *)ob;
 422         *outbytesleft = obtail - ob;
 423 
 424         return (ret_val);
 425 }
 426 
 427 /*
 428  * String based encoding convertor from GBK to UTF-8.
 429  */
 430 size_t
 431 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
 432         size_t *outlen, int flag, int *errno)
 433 {
 434         uchar_t         *ib;
 435         uchar_t         *ob;
 436         uchar_t         *ibtail;
 437         uchar_t         *obtail;
 438         uchar_t         *oldib;
 439         size_t          ret_val;
 440         int8_t          sz;
 441         uint32_t        gb_val;
 442         boolean_t       do_not_ignore_null;
 443 
 444         ret_val = 0;
 445         ib = (uchar_t *)inarray;
 446         ob = (uchar_t *)outarray;
 447         ibtail = ib + *inlen;
 448         obtail = ob + *outlen;
 449         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 450 
 451         while (ib < ibtail) {
 452                 if (*ib == '\0' && do_not_ignore_null)
 453                         break;
 454 
 455                 if (KICONV_IS_ASCII(*ib)) {
 456                         if (ob >= obtail) {
 457                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 458                         }
 459 
 460                         *ob++ = *ib++;
 461                         continue;
 462                 }
 463 
 464                 oldib = ib;
 465 
 466                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 467                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 468                 }
 469 
 470                 if (ibtail - ib < 2) {
 471                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 472                 }
 473 
 474                 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 475                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 476                 }
 477 
 478                 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
 479                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
 480 
 481                 if (sz < 0) {
 482                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 483                 }
 484 
 485                 ib += 2;
 486                 ob += sz;
 487                 continue;
 488 
 489 REPLACE_INVALID:
 490                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 491                         ib = oldib;
 492                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 493                 }
 494 
 495                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 496                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 497                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 498                 ret_val++;
 499         }
 500 
 501         *inlen = ibtail - ib;
 502         *outlen = obtail - ob;
 503 
 504         return (ret_val);
 505 }
 506 
 507 /*
 508  * Encoding convertor from UTF-8 to EUC-CN.
 509  */
 510 size_t
 511 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
 512         char **outbuf, size_t *outbytesleft, int *errno)
 513 {
 514         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 515             outbytesleft, errno, utf8_to_gb2312);
 516 }
 517 
 518 /*
 519  * String based encoding convertor from UTF-8 to EUC-CN.
 520  */
 521 size_t
 522 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
 523         size_t *outlen, int flag, int *errno)
 524 {
 525         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 526             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
 527 }
 528 
 529 /*
 530  * Encoding converto from EUC-CN to UTF-8 code.
 531  */
 532 size_t
 533 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
 534         char **outbuf, size_t *outbytesleft, int *errno)
 535 {
 536         uchar_t         *ib;
 537         uchar_t         *ob;
 538         uchar_t         *ibtail;
 539         uchar_t         *obtail;
 540         size_t          ret_val;
 541         int8_t          sz;
 542 
 543         /* Check on the kiconv code conversion descriptor. */
 544         if (kcd == NULL || kcd == (void *)-1) {
 545                 *errno = EBADF;
 546                 return ((size_t)-1);
 547         }
 548 
 549         /* If this is a state reset request, process and return. */
 550         if (inbuf == NULL || *inbuf == NULL) {
 551                 return (0);
 552         }
 553 
 554         ret_val = 0;
 555         ib = (uchar_t *)*inbuf;
 556         ob = (uchar_t *)*outbuf;
 557         ibtail = ib + *inbytesleft;
 558         obtail = ob + *outbytesleft;
 559 
 560         while (ib < ibtail) {
 561                 if (KICONV_IS_ASCII(*ib)) {
 562                         if (ob >= obtail) {
 563                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 564                         }
 565 
 566                         *ob++ = *ib++;
 567                         continue;
 568                 }
 569 
 570                 /*
 571                  * Issue EILSEQ error if the first byte is not a
 572                  * valid GB2312 leading byte.
 573                  */
 574                 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
 575                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 576                 }
 577 
 578                 /*
 579                  * Issue EINVAL error if input buffer has an incomplete
 580                  * character at the end of the buffer.
 581                  */
 582                 if (ibtail - ib < 2) {
 583                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 584                 }
 585 
 586                 /*
 587                  * Issue EILSEQ error if the remaining byte is not
 588                  * a valid GB2312 byte.
 589                  */
 590                 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
 591                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 592                 }
 593 
 594                 /* Now we have a valid GB2312 character */
 595                 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
 596                 if (sz < 0) {
 597                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 598                 }
 599 
 600                 ib += 2;
 601                 ob += sz;
 602         }
 603 
 604         *inbuf = (char *)ib;
 605         *inbytesleft = ibtail - ib;
 606         *outbuf = (char *)ob;
 607         *outbytesleft = obtail - ob;
 608 
 609         return (ret_val);
 610 }
 611 
 612 /*
 613  * String based encoding convertor from EUC-CN to UTF-8.
 614  */
 615 size_t
 616 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
 617     size_t *outlen, int flag, int *errno)
 618 {
 619         uchar_t         *ib;
 620         uchar_t         *ob;
 621         uchar_t         *ibtail;
 622         uchar_t         *obtail;
 623         uchar_t         *oldib;
 624         size_t          ret_val;
 625         int8_t          sz;
 626         boolean_t       do_not_ignore_null;
 627 
 628         ret_val = 0;
 629         ib = (uchar_t *)inarray;
 630         ob = (uchar_t *)outarray;
 631         ibtail = ib + *inlen;
 632         obtail = ob + *outlen;
 633         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 634 
 635         while (ib < ibtail) {
 636                 if (*ib == '\0' && do_not_ignore_null)
 637                         break;
 638 
 639                 if (KICONV_IS_ASCII(*ib)) {
 640                         if (ob >= obtail) {
 641                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 642                         }
 643 
 644                         *ob++ = *ib++;
 645                         continue;
 646                 }
 647 
 648                 oldib = ib;
 649 
 650                 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
 651                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 652                 }
 653 
 654                 if (ibtail - ib < 2) {
 655                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 656                 }
 657 
 658                 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
 659                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 660                 }
 661 
 662                 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
 663                 if (sz < 0) {
 664                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 665                 }
 666 
 667                 ib += 2;
 668                 ob += sz;
 669                 continue;
 670 
 671 REPLACE_INVALID:
 672                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 673                         ib = oldib;
 674                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 675                 }
 676 
 677                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 678                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 679                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 680                 ret_val++;
 681         }
 682 
 683         *inlen = ibtail - ib;
 684         *outlen = obtail - ob;
 685 
 686         return (ret_val);
 687 }
 688 
 689 /*
 690  * Convert single GB2312 character to UTF-8.
 691  * Return: > 0  - Converted successfully
 692  *         = -1 - E2BIG
 693  */
 694 static int8_t
 695 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
 696         size_t *ret_val)
 697 {
 698         size_t  index;
 699         int8_t  sz;
 700         uchar_t *u8;
 701 
 702         /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
 703         index = b1 * 94 + b2 - 0x3BBF;
 704 
 705         if (index >= KICONV_GB2312_UTF8_MAX)
 706                 index = KICONV_GB2312_UTF8_MAX - 1;     /* Map to 0xEFBFBD */
 707 
 708         u8 = kiconv_gb2312_utf8[index];
 709         sz = u8_number_of_bytes[u8[0]];
 710 
 711         if (obtail - ob < sz) {
 712                 *ret_val = (size_t)-1;
 713                 return (-1);
 714         }
 715 
 716         for (index = 0; index < sz; index++)
 717                 *ob++ = u8[index];
 718 
 719         /*
 720          * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
 721          * elements, so need to ckeck more.
 722          */
 723         if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
 724             u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
 725             u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
 726             u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
 727                 (*ret_val)++;
 728 
 729         return (sz);
 730 }
 731 
 732 /*
 733  * Convert single GB18030 or GBK character to UTF-8.
 734  * Return: > 0  - Converted successfully
 735  *         = -1 - E2BIG
 736  */
 737 static int8_t
 738 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
 739         boolean_t isgbk4)
 740 {
 741         size_t  index;
 742         int8_t  sz;
 743         uchar_t u8array[4];
 744         uchar_t *u8;
 745 
 746         if (isgbk4) {
 747                 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
 748                         uint32_t        u32;
 749 
 750                         /*
 751                          * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
 752                          *   (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
 753                          *   (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
 754                          *   (gbk_val & 0xFF - 0x30)+
 755                          *   KICONV_SC_PLANE1_UCS4_START;
 756                          */
 757                         u32 = (gbk_val >> 24) * 12600 +
 758                             ((gbk_val & 0xFF0000) >> 16) * 1260 +
 759                             ((gbk_val & 0xFF00) >> 8) * 10 +
 760                             (gbk_val & 0xFF) - 0x1BA0FA;
 761                         u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
 762                         u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
 763                         u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
 764                         u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
 765                         u8 = u8array;
 766                         index = 1;
 767                 } else {
 768                         index = kiconv_binsearch(gbk_val,
 769                             kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
 770                         u8 = kiconv_gbk4_utf8[index].u8;
 771                 }
 772         } else {
 773                 index = kiconv_binsearch(gbk_val,
 774                     kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
 775                 u8 = kiconv_gbk_utf8[index].u8;
 776         }
 777 
 778         sz = u8_number_of_bytes[u8[0]];
 779         if (obtail - ob < sz) {
 780                 *ret_val = (size_t)-1;
 781                 return (-1);
 782         }
 783 
 784         if (index == 0)
 785                 (*ret_val)++;   /* Non-identical conversion */
 786 
 787         for (index = 0; index < sz; index++)
 788                 *ob++ = u8[index];
 789 
 790         return (sz);
 791 }
 792 
 793 /*
 794  * Convert single UTF-8 character to GB18030.
 795  * Return: > 0  - Converted successfully
 796  *         = -1 - E2BIG
 797  */
 798 /* ARGSUSED */
 799 static int8_t
 800 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 801         uchar_t *ob, uchar_t *obtail, size_t *ret)
 802 {
 803         size_t          index;
 804         int8_t          gbklen;
 805         uint32_t        gbkcode;
 806 
 807         if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
 808                 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
 809                 uint32_t        u32;
 810 
 811                 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
 812                     ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
 813                     KICONV_SC_PLANE1_UCS4_START;
 814                 gbkcode = ((u32 / 12600 + 0x90) << 24) |
 815                     (((u32 % 12600) / 1260 + 0x30) << 16) |
 816                     (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
 817                 gbklen = 4;
 818                 index = 1;
 819         } else {
 820                 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
 821                     KICONV_UTF8_GB18030_MAX);
 822                 gbkcode = kiconv_utf8_gb18030[index].value;
 823                 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
 824         }
 825 
 826         if (obtail - ob < gbklen) {
 827                 *ret = (size_t)-1;
 828                 return (-1);
 829         }
 830 
 831         if (index == 0)
 832                 (*ret)++;               /* Non-identical conversion */
 833 
 834         if (gbklen == 2) {
 835                 *ob++ = (uchar_t)(gbkcode >> 8);
 836         } else if (gbklen == 4) {
 837                 *ob++ = (uchar_t)(gbkcode >> 24);
 838                 *ob++ = (uchar_t)(gbkcode >> 16);
 839                 *ob++ = (uchar_t)(gbkcode >> 8);
 840         }
 841         *ob = (uchar_t)(gbkcode & 0xFF);
 842 
 843         return (gbklen);
 844 }
 845 
 846 /*
 847  * Convert single UTF-8 character to GBK.
 848  * Return: > 0  - Converted successfully
 849  *         = -1 - E2BIG
 850  */
 851 /* ARGSUSED */
 852 static int8_t
 853 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 854         uchar_t *ob, uchar_t *obtail, size_t *ret)
 855 {
 856         size_t          index;
 857         int8_t          gbklen;
 858         uint32_t        gbkcode;
 859 
 860         index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
 861             KICONV_UTF8_GB18030_MAX);
 862         gbkcode = kiconv_utf8_gb18030[index].value;
 863         KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
 864 
 865         /* GBK and GB18030 share the same table, so check the length. */
 866         if (gbklen == 4) {
 867                 index = 0;
 868                 gbkcode = kiconv_utf8_gb18030[index].value;
 869                 gbklen = 1;
 870         }
 871 
 872         if (obtail - ob < gbklen) {
 873                 *ret = (size_t)-1;
 874                 return (-1);
 875         }
 876 
 877         if (index == 0)
 878                 (*ret)++;               /* Non-identical conversion */
 879 
 880         if (gbklen > 1)
 881                 *ob++ = (uchar_t)(gbkcode >> 8);
 882         *ob = (uchar_t)(gbkcode & 0xFF);
 883 
 884         return (gbklen);
 885 }
 886 
 887 /*
 888  * Convert single UTF-8 character to GB2312.
 889  * Return: > 0  - Converted successfully
 890  *         = -1 - E2BIG
 891  */
 892 /* ARGSUSED */
 893 static int8_t
 894 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
 895         uchar_t *ob, uchar_t *obtail, size_t *ret)
 896 {
 897         size_t          index;
 898         int8_t          gblen;
 899         uint32_t        gbcode;
 900 
 901         index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
 902             KICONV_UTF8_GB2312_MAX);
 903         gbcode = kiconv_utf8_gb2312[index].value;
 904         gblen = (gbcode <= 0xFF) ? 1 : 2;
 905 
 906         if (obtail - ob < gblen) {
 907                 *ret = (size_t)-1;
 908                 return (-1);
 909         }
 910 
 911         if (index == 0)
 912                 (*ret)++;
 913 
 914         if (gblen > 1)
 915                 *ob++ = (uchar_t)(gbcode >> 8);
 916         *ob = (uchar_t)(gbcode & 0xFF);
 917 
 918         return (gblen);
 919 }
 920 
 921 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
 922         {
 923                 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
 924                 kiconv_close_to_cck, kiconvstr_to_gb18030
 925         },
 926         {
 927                 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
 928                 close_fr_sc, kiconvstr_fr_gb18030
 929         },
 930         {
 931                 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
 932                 kiconv_close_to_cck, kiconvstr_to_gbk
 933         },
 934         {
 935                 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
 936                 close_fr_sc, kiconvstr_fr_gbk
 937         },
 938         {
 939                 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
 940                 kiconv_close_to_cck, kiconvstr_to_euccn
 941         },
 942         {
 943                 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
 944                 close_fr_sc, kiconvstr_fr_euccn
 945         },
 946 };
 947 
 948 static kiconv_module_info_t kiconv_sc_info = {
 949         "kiconv_sc",            /* module name */
 950         sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
 951         kiconv_sc_ops_tbl,
 952         0,
 953         NULL,
 954         NULL,
 955         0
 956 };
 957 
 958 static struct modlkiconv modlkiconv_sc = {
 959         &mod_kiconvops,
 960         "kiconv Simplified Chinese module 1.0",
 961         &kiconv_sc_info
 962 };
 963 
 964 static struct modlinkage modlinkage = {
 965         MODREV_1,
 966         { (void *)&modlkiconv_sc, NULL }
 967 };
 968 
 969 int
 970 _init(void)
 971 {
 972         int err;
 973 
 974         err = mod_install(&modlinkage);
 975         if (err)
 976                 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
 977 
 978         return (err);
 979 }
 980 
 981 int
 982 _fini(void)
 983 {
 984         int err;
 985 
 986         /*
 987          * If this module is being used, then, we cannot remove the module.
 988          * The following checking will catch pretty much all usual cases.
 989          *
 990          * Any remaining will be catached by the kiconv_unregister_module()
 991          * during mod_remove() at below.
 992          */
 993         if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
 994                 return (EBUSY);
 995 
 996         err = mod_remove(&modlinkage);
 997         if (err)
 998                 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
 999 
1000         return (err);
1001 }
1002 
1003 int
1004 _info(struct modinfo *modinfop)
1005 {
1006         return (mod_info(&modlinkage, modinfop));
1007 }