1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/sysmacros.h>
  31 #include <sys/systm.h>
  32 #include <sys/debug.h>
  33 #include <sys/kmem.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/byteorder.h>
  36 #include <sys/errno.h>
  37 #include <sys/modctl.h>
  38 #include <sys/kiconv.h>
  39 #include <sys/u8_textprep.h>
  40 #include <sys/kiconv_cck_common.h>
  41 #include <sys/kiconv_sc.h>
  42 #include <sys/kiconv_gb18030_utf8.h>
  43 #include <sys/kiconv_gb2312_utf8.h>
  44 #include <sys/kiconv_utf8_gb18030.h>
  45 #include <sys/kiconv_utf8_gb2312.h>
  46 
  47 static int8_t gb2312_to_utf8(uchar_t byte1, uchar_t byte2, uchar_t *ob,
  48         uchar_t *obtail, size_t *ret_val);
  49 static int8_t gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail,
  50         size_t *ret_val, boolean_t isgbk4);
  51 static int8_t utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  52         uchar_t *ob, uchar_t *obtail, size_t *ret);
  53 static int8_t utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  54         uchar_t *ob, uchar_t *obtail, size_t *ret);
  55 static int8_t utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
  56         uchar_t *ob, uchar_t *obtail, size_t *ret);
  57 
  58 #define KICONV_SC_GB18030               (0x01)
  59 #define KICONV_SC_GBK                   (0x02)
  60 #define KICONV_SC_EUCCN                 (0x03)
  61 #define KICONV_SC_MAX_MAGIC_ID          (0x03)
  62 
  63 static void *
  64 open_fr_gb18030()
  65 {
  66         return ((void *)KICONV_SC_GB18030);
  67 }
  68 
  69 static void *
  70 open_fr_gbk()
  71 {
  72         return ((void *)KICONV_SC_GBK);
  73 }
  74 
  75 static void *
  76 open_fr_euccn()
  77 {
  78         return ((void *)KICONV_SC_EUCCN);
  79 }
  80 
  81 static int
  82 close_fr_sc(void *s)
  83 {
  84         if ((uintptr_t)s > KICONV_SC_MAX_MAGIC_ID)
  85                 return (EBADF);
  86 
  87         return (0);
  88 }
  89 
  90 /*
  91  * Encoding convertor from UTF-8 to GB18030.
  92  */
  93 size_t
  94 kiconv_to_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
  95         char **outbuf, size_t *outbytesleft, int *errno)
  96 {
  97 
  98         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
  99             outbytesleft, errno, utf8_to_gb18030);
 100 }
 101 
 102 /*
 103  * String based encoding convertor from UTF-8 to GB18030.
 104  */
 105 size_t
 106 kiconvstr_to_gb18030(char *inarray, size_t *inlen, char *outarray,
 107         size_t *outlen, int flag, int *errno)
 108 {
 109         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 110             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb18030);
 111 }
 112 
 113 /*
 114  * Encoding convertor from GB18030 to UTF-8.
 115  */
 116 size_t
 117 kiconv_fr_gb18030(void *kcd, char **inbuf, size_t *inbytesleft,
 118         char **outbuf, size_t *outbytesleft, int *errno)
 119 {
 120         uchar_t         *ib;
 121         uchar_t         *ob;
 122         uchar_t         *ibtail;
 123         uchar_t         *obtail;
 124         size_t          ret_val;
 125         int8_t          sz;
 126         uint32_t        gb_val;
 127         boolean_t       isgbk4;
 128 
 129         /* Check on the kiconv code conversion descriptor. */
 130         if (kcd == NULL || kcd == (void *)-1) {
 131                 *errno = EBADF;
 132                 return ((size_t)-1);
 133         }
 134 
 135         /* If this is a state reset request, process and return. */
 136         if (inbuf == NULL || *inbuf == NULL) {
 137                 return (0);
 138         }
 139 
 140         ret_val = 0;
 141         ib = (uchar_t *)*inbuf;
 142         ob = (uchar_t *)*outbuf;
 143         ibtail = ib + *inbytesleft;
 144         obtail = ob + *outbytesleft;
 145 
 146         while (ib < ibtail) {
 147                 if (KICONV_IS_ASCII(*ib)) {
 148                         if (ob >= obtail) {
 149                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 150                         }
 151 
 152                         *ob++ = *ib++;
 153                         continue;
 154                 }
 155 
 156                 /*
 157                  * Issue EILSEQ error if the first byte is not a
 158                  * valid GB18030 leading byte.
 159                  */
 160                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 161                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 162                 }
 163 
 164                 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
 165                     KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
 166 
 167                 if (isgbk4) {
 168                         if (ibtail - ib < 4) {
 169                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 170                         }
 171 
 172                         if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
 173                             KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
 174                             KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
 175                                 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 176                         }
 177 
 178                         gb_val = (uint32_t)(*ib) << 24 |
 179                             (uint32_t)(*(ib + 1)) << 16 |
 180                             (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
 181                 } else {
 182                         if (ibtail - ib < 2) {
 183                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 184                         }
 185 
 186                         if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 187                                 KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 188                         }
 189 
 190                         gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 191                 }
 192 
 193                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
 194                 if (sz < 0) {
 195                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 196                 }
 197 
 198                 ib += isgbk4 ? 4 : 2;
 199                 ob += sz;
 200         }
 201 
 202         *inbuf = (char *)ib;
 203         *inbytesleft = ibtail - ib;
 204         *outbuf = (char *)ob;
 205         *outbytesleft = obtail - ob;
 206 
 207         return (ret_val);
 208 }
 209 
 210 /*
 211  * String based encoding convertor from GB18030 to UTF-8.
 212  */
 213 size_t
 214 kiconvstr_fr_gb18030(char *inarray, size_t *inlen, char *outarray,
 215         size_t *outlen, int flag, int *errno)
 216 {
 217         uchar_t         *ib;
 218         uchar_t         *ob;
 219         uchar_t         *ibtail;
 220         uchar_t         *obtail;
 221         uchar_t         *oldib;
 222         size_t          ret_val;
 223         int8_t          sz;
 224         uint32_t        gb_val;
 225         boolean_t       isgbk4;
 226         boolean_t       do_not_ignore_null;
 227 
 228         ret_val = 0;
 229         ib = (uchar_t *)inarray;
 230         ob = (uchar_t *)outarray;
 231         ibtail = ib + *inlen;
 232         obtail = ob + *outlen;
 233         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 234 
 235         while (ib < ibtail) {
 236                 if (*ib == '\0' && do_not_ignore_null)
 237                         break;
 238 
 239                 if (KICONV_IS_ASCII(*ib)) {
 240                         if (ob >= obtail) {
 241                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 242                         }
 243 
 244                         *ob++ = *ib++;
 245                         continue;
 246                 }
 247 
 248                 oldib = ib;
 249 
 250                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 251                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 252                 }
 253 
 254                 isgbk4 = (ibtail - ib < 2) ? B_FALSE :
 255                     KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1));
 256 
 257                 if (isgbk4) {
 258                         if (ibtail - ib < 4) {
 259                                 if (flag & KICONV_REPLACE_INVALID) {
 260                                         ib = ibtail;
 261                                         goto REPLACE_INVALID;
 262                                 }
 263 
 264                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 265                         }
 266 
 267                         if (! (KICONV_SC_IS_GB18030_2nd_BYTE(*(ib + 1)) &&
 268                             KICONV_SC_IS_GB18030_3rd_BYTE(*(ib + 2)) &&
 269                             KICONV_SC_IS_GB18030_4th_BYTE(*(ib + 3)))) {
 270                                 KICONV_SET_ERRNO_WITH_FLAG(4, EILSEQ);
 271                         }
 272 
 273                         gb_val = (uint32_t)(*ib) << 24 |
 274                             (uint32_t)(*(ib + 1)) << 16 |
 275                             (uint32_t)(*(ib + 2)) << 8 | *(ib + 3);
 276                 } else {
 277                         if (ibtail - ib < 2) {
 278                                 if (flag & KICONV_REPLACE_INVALID) {
 279                                         ib = ibtail;
 280                                         goto REPLACE_INVALID;
 281                                 }
 282 
 283                                 KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 284                         }
 285 
 286                         if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 287                                 KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 288                         }
 289 
 290                         gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 291                 }
 292 
 293                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, isgbk4);
 294                 if (sz < 0) {
 295                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 296                 }
 297 
 298                 ib += isgbk4 ? 4 : 2;
 299                 ob += sz;
 300                 continue;
 301 
 302 REPLACE_INVALID:
 303                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 304                         ib = oldib;
 305                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 306                 }
 307 
 308                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 309                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 310                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 311                 ret_val++;
 312         }
 313 
 314         *inlen = ibtail - ib;
 315         *outlen = obtail - ob;
 316 
 317         return (ret_val);
 318 }
 319 
 320 /*
 321  * Encoding convertor from UTF-8 to GBK.
 322  */
 323 size_t
 324 kiconv_to_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
 325         char **outbuf, size_t *outbytesleft, int *errno)
 326 {
 327 
 328         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 329             outbytesleft, errno, utf8_to_gbk);
 330 }
 331 
 332 /*
 333  * String based encoding convertor from UTF-8 to GBK.
 334  */
 335 size_t
 336 kiconvstr_to_gbk(char *inarray, size_t *inlen, char *outarray,
 337         size_t *outlen, int flag, int *errno)
 338 {
 339         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 340             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gbk);
 341 }
 342 
 343 /*
 344  * Encoding convertor from GBK to UTF-8.
 345  */
 346 size_t
 347 kiconv_fr_gbk(void *kcd, char **inbuf, size_t *inbytesleft,
 348         char **outbuf, size_t *outbytesleft, int *errno)
 349 {
 350         uchar_t         *ib;
 351         uchar_t         *ob;
 352         uchar_t         *ibtail;
 353         uchar_t         *obtail;
 354         size_t          ret_val;
 355         int8_t          sz;
 356         uint32_t        gb_val;
 357 
 358         /* Check on the kiconv code conversion descriptor. */
 359         if (kcd == NULL || kcd == (void *)-1) {
 360                 *errno = EBADF;
 361                 return ((size_t)-1);
 362         }
 363 
 364         /* If this is a state reset request, process and return. */
 365         if (inbuf == NULL || *inbuf == NULL) {
 366                 return (0);
 367         }
 368 
 369         ret_val = 0;
 370         ib = (uchar_t *)*inbuf;
 371         ob = (uchar_t *)*outbuf;
 372         ibtail = ib + *inbytesleft;
 373         obtail = ob + *outbytesleft;
 374 
 375         while (ib < ibtail) {
 376                 if (KICONV_IS_ASCII(*ib)) {
 377                         if (ob >= obtail) {
 378                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 379                         }
 380 
 381                         *ob++ = *ib++;
 382                         continue;
 383                 }
 384 
 385                 /*
 386                  * Issue EILSEQ error if the first byte is not a
 387                  * valid GBK leading byte.
 388                  */
 389                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 390                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 391                 }
 392 
 393                 /*
 394                  * Issue EINVAL error if input buffer has an incomplete
 395                  * character at the end of the buffer.
 396                  */
 397                 if (ibtail - ib < 2) {
 398                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 399                 }
 400 
 401                 /*
 402                  * Issue EILSEQ error if the remaining byte is not
 403                  * a valid GBK byte.
 404                  */
 405                 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 406                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 407                 }
 408 
 409                 /* Now we have a valid GBK character. */
 410                 gb_val = (uint32_t)(*ib) << 8 | *(ib + 1);
 411                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
 412 
 413                 if (sz < 0) {
 414                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 415                 }
 416 
 417                 ib += 2;
 418                 ob += sz;
 419         }
 420 
 421         *inbuf = (char *)ib;
 422         *inbytesleft = ibtail - ib;
 423         *outbuf = (char *)ob;
 424         *outbytesleft = obtail - ob;
 425 
 426         return (ret_val);
 427 }
 428 
 429 /*
 430  * String based encoding convertor from GBK to UTF-8.
 431  */
 432 size_t
 433 kiconvstr_fr_gbk(char *inarray, size_t *inlen, char *outarray,
 434         size_t *outlen, int flag, int *errno)
 435 {
 436         uchar_t         *ib;
 437         uchar_t         *ob;
 438         uchar_t         *ibtail;
 439         uchar_t         *obtail;
 440         uchar_t         *oldib;
 441         size_t          ret_val;
 442         int8_t          sz;
 443         uint32_t        gb_val;
 444         boolean_t       do_not_ignore_null;
 445 
 446         ret_val = 0;
 447         ib = (uchar_t *)inarray;
 448         ob = (uchar_t *)outarray;
 449         ibtail = ib + *inlen;
 450         obtail = ob + *outlen;
 451         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 452 
 453         while (ib < ibtail) {
 454                 if (*ib == '\0' && do_not_ignore_null)
 455                         break;
 456 
 457                 if (KICONV_IS_ASCII(*ib)) {
 458                         if (ob >= obtail) {
 459                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 460                         }
 461 
 462                         *ob++ = *ib++;
 463                         continue;
 464                 }
 465 
 466                 oldib = ib;
 467 
 468                 if (! KICONV_SC_IS_GBK_1st_BYTE(*ib)) {
 469                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 470                 }
 471 
 472                 if (ibtail - ib < 2) {
 473                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 474                 }
 475 
 476                 if (! KICONV_SC_IS_GBK_2nd_BYTE(*(ib + 1))) {
 477                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 478                 }
 479 
 480                 gb_val = (uint32_t)(*ib << 8) | *(ib + 1);
 481                 sz = gbk_to_utf8(gb_val, ob, obtail, &ret_val, B_FALSE);
 482 
 483                 if (sz < 0) {
 484                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 485                 }
 486 
 487                 ib += 2;
 488                 ob += sz;
 489                 continue;
 490 
 491 REPLACE_INVALID:
 492                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 493                         ib = oldib;
 494                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 495                 }
 496 
 497                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 498                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 499                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 500                 ret_val++;
 501         }
 502 
 503         *inlen = ibtail - ib;
 504         *outlen = obtail - ob;
 505 
 506         return (ret_val);
 507 }
 508 
 509 /*
 510  * Encoding convertor from UTF-8 to EUC-CN.
 511  */
 512 size_t
 513 kiconv_to_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
 514         char **outbuf, size_t *outbytesleft, int *errno)
 515 {
 516         return kiconv_utf8_to_cck(kcd, inbuf, inbytesleft, outbuf,
 517             outbytesleft, errno, utf8_to_gb2312);
 518 }
 519 
 520 /*
 521  * String based encoding convertor from UTF-8 to EUC-CN.
 522  */
 523 size_t
 524 kiconvstr_to_euccn(char *inarray, size_t *inlen, char *outarray,
 525         size_t *outlen, int flag, int *errno)
 526 {
 527         return kiconvstr_utf8_to_cck((uchar_t *)inarray, inlen,
 528             (uchar_t *)outarray, outlen, flag, errno, utf8_to_gb2312);
 529 }
 530 
 531 /*
 532  * Encoding converto from EUC-CN to UTF-8 code.
 533  */
 534 size_t
 535 kiconv_fr_euccn(void *kcd, char **inbuf, size_t *inbytesleft,
 536         char **outbuf, size_t *outbytesleft, int *errno)
 537 {
 538         uchar_t         *ib;
 539         uchar_t         *ob;
 540         uchar_t         *ibtail;
 541         uchar_t         *obtail;
 542         size_t          ret_val;
 543         int8_t          sz;
 544 
 545         /* Check on the kiconv code conversion descriptor. */
 546         if (kcd == NULL || kcd == (void *)-1) {
 547                 *errno = EBADF;
 548                 return ((size_t)-1);
 549         }
 550 
 551         /* If this is a state reset request, process and return. */
 552         if (inbuf == NULL || *inbuf == NULL) {
 553                 return (0);
 554         }
 555 
 556         ret_val = 0;
 557         ib = (uchar_t *)*inbuf;
 558         ob = (uchar_t *)*outbuf;
 559         ibtail = ib + *inbytesleft;
 560         obtail = ob + *outbytesleft;
 561 
 562         while (ib < ibtail) {
 563                 if (KICONV_IS_ASCII(*ib)) {
 564                         if (ob >= obtail) {
 565                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 566                         }
 567 
 568                         *ob++ = *ib++;
 569                         continue;
 570                 }
 571 
 572                 /*
 573                  * Issue EILSEQ error if the first byte is not a
 574                  * valid GB2312 leading byte.
 575                  */
 576                 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
 577                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 578                 }
 579 
 580                 /*
 581                  * Issue EINVAL error if input buffer has an incomplete
 582                  * character at the end of the buffer.
 583                  */
 584                 if (ibtail - ib < 2) {
 585                         KICONV_SET_ERRNO_AND_BREAK(EINVAL);
 586                 }
 587 
 588                 /*
 589                  * Issue EILSEQ error if the remaining byte is not
 590                  * a valid GB2312 byte.
 591                  */
 592                 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
 593                         KICONV_SET_ERRNO_AND_BREAK(EILSEQ);
 594                 }
 595 
 596                 /* Now we have a valid GB2312 character */
 597                 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
 598                 if (sz < 0) {
 599                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 600                 }
 601 
 602                 ib += 2;
 603                 ob += sz;
 604         }
 605 
 606         *inbuf = (char *)ib;
 607         *inbytesleft = ibtail - ib;
 608         *outbuf = (char *)ob;
 609         *outbytesleft = obtail - ob;
 610 
 611         return (ret_val);
 612 }
 613 
 614 /*
 615  * String based encoding convertor from EUC-CN to UTF-8.
 616  */
 617 size_t
 618 kiconvstr_fr_euccn(char *inarray, size_t *inlen, char *outarray,
 619     size_t *outlen, int flag, int *errno)
 620 {
 621         uchar_t         *ib;
 622         uchar_t         *ob;
 623         uchar_t         *ibtail;
 624         uchar_t         *obtail;
 625         uchar_t         *oldib;
 626         size_t          ret_val;
 627         int8_t          sz;
 628         boolean_t       do_not_ignore_null;
 629 
 630         ret_val = 0;
 631         ib = (uchar_t *)inarray;
 632         ob = (uchar_t *)outarray;
 633         ibtail = ib + *inlen;
 634         obtail = ob + *outlen;
 635         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 636 
 637         while (ib < ibtail) {
 638                 if (*ib == '\0' && do_not_ignore_null)
 639                         break;
 640 
 641                 if (KICONV_IS_ASCII(*ib)) {
 642                         if (ob >= obtail) {
 643                                 KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 644                         }
 645 
 646                         *ob++ = *ib++;
 647                         continue;
 648                 }
 649 
 650                 oldib = ib;
 651 
 652                 if (! KICONV_SC_IS_GB2312_BYTE(*ib)) {
 653                         KICONV_SET_ERRNO_WITH_FLAG(1, EILSEQ);
 654                 }
 655 
 656                 if (ibtail - ib < 2) {
 657                         KICONV_SET_ERRNO_WITH_FLAG(1, EINVAL);
 658                 }
 659 
 660                 if (! KICONV_SC_IS_GB2312_BYTE(*(ib + 1))) {
 661                         KICONV_SET_ERRNO_WITH_FLAG(2, EILSEQ);
 662                 }
 663 
 664                 sz = gb2312_to_utf8(*ib, *(ib + 1), ob, obtail, &ret_val);
 665                 if (sz < 0) {
 666                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 667                 }
 668 
 669                 ib += 2;
 670                 ob += sz;
 671                 continue;
 672 
 673 REPLACE_INVALID:
 674                 if (obtail - ob < KICONV_UTF8_REPLACEMENT_CHAR_LEN) {
 675                         ib = oldib;
 676                         KICONV_SET_ERRNO_AND_BREAK(E2BIG);
 677                 }
 678 
 679                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR1;
 680                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR2;
 681                 *ob++ = KICONV_UTF8_REPLACEMENT_CHAR3;
 682                 ret_val++;
 683         }
 684 
 685         *inlen = ibtail - ib;
 686         *outlen = obtail - ob;
 687 
 688         return (ret_val);
 689 }
 690 
 691 /*
 692  * Convert single GB2312 character to UTF-8.
 693  * Return: > 0  - Converted successfully
 694  *         = -1 - E2BIG
 695  */
 696 static int8_t
 697 gb2312_to_utf8(uchar_t b1, uchar_t b2, uchar_t *ob, uchar_t *obtail,
 698         size_t *ret_val)
 699 {
 700         size_t  index;
 701         int8_t  sz;
 702         uchar_t *u8;
 703 
 704         /* index = (b1 - KICONV_EUC_START) * 94 + b2 - KICONV_EUC_START; */
 705         index = b1 * 94 + b2 - 0x3BBF;
 706 
 707         if (index >= KICONV_GB2312_UTF8_MAX)
 708                 index = KICONV_GB2312_UTF8_MAX - 1;     /* Map to 0xEFBFBD */
 709 
 710         u8 = kiconv_gb2312_utf8[index];
 711         sz = u8_number_of_bytes[u8[0]];
 712 
 713         if (obtail - ob < sz) {
 714                 *ret_val = (size_t)-1;
 715                 return (-1);
 716         }
 717 
 718         for (index = 0; index < sz; index++)
 719                 *ob++ = u8[index];
 720 
 721         /*
 722          * As kiconv_gb2312_utf8 contain muliple KICONV_UTF8_REPLACEMENT_CHAR
 723          * elements, so need to ckeck more.
 724          */
 725         if (sz == KICONV_UTF8_REPLACEMENT_CHAR_LEN &&
 726             u8[0] == KICONV_UTF8_REPLACEMENT_CHAR1 &&
 727             u8[1] == KICONV_UTF8_REPLACEMENT_CHAR2 &&
 728             u8[2] == KICONV_UTF8_REPLACEMENT_CHAR3)
 729                 (*ret_val)++;
 730 
 731         return (sz);
 732 }
 733 
 734 /*
 735  * Convert single GB18030 or GBK character to UTF-8.
 736  * Return: > 0  - Converted successfully
 737  *         = -1 - E2BIG
 738  */
 739 static int8_t
 740 gbk_to_utf8(uint32_t gbk_val, uchar_t *ob, uchar_t *obtail, size_t *ret_val,
 741         boolean_t isgbk4)
 742 {
 743         size_t  index;
 744         int8_t  sz;
 745         uchar_t u8array[4];
 746         uchar_t *u8;
 747 
 748         if (isgbk4) {
 749                 if (gbk_val >= KICONV_SC_PLANE1_GB18030_START) {
 750                         uint32_t        u32;
 751 
 752                         /*
 753                          * u32 = ((gbk_val >> 24) - 0x90) * 12600 +
 754                          *   (((gbk_val & 0xFF0000) >> 16) - 0x30) * 1260 +
 755                          *   (((gbk_val & 0xFF00) >> 8) - 0x81) * 10 +
 756                          *   (gbk_val & 0xFF - 0x30)+
 757                          *   KICONV_SC_PLANE1_UCS4_START;
 758                          */
 759                         u32 = (gbk_val >> 24) * 12600 +
 760                             ((gbk_val & 0xFF0000) >> 16) * 1260 +
 761                             ((gbk_val & 0xFF00) >> 8) * 10 +
 762                             (gbk_val & 0xFF) - 0x1BA0FA;
 763                         u8array[0] = (uchar_t)(0xF0 | ((u32 & 0x1C0000) >> 18));
 764                         u8array[1] = (uchar_t)(0x80 | ((u32 & 0x03F000) >> 12));
 765                         u8array[2] = (uchar_t)(0x80 | ((u32 & 0x000FC0) >> 6));
 766                         u8array[3] = (uchar_t)(0x80 | (u32 & 0x00003F));
 767                         u8 = u8array;
 768                         index = 1;
 769                 } else {
 770                         index = kiconv_binsearch(gbk_val,
 771                             kiconv_gbk4_utf8, KICONV_GBK4_UTF8_MAX);
 772                         u8 = kiconv_gbk4_utf8[index].u8;
 773                 }
 774         } else {
 775                 index = kiconv_binsearch(gbk_val,
 776                     kiconv_gbk_utf8, KICONV_GBK_UTF8_MAX);
 777                 u8 = kiconv_gbk_utf8[index].u8;
 778         }
 779 
 780         sz = u8_number_of_bytes[u8[0]];
 781         if (obtail - ob < sz) {
 782                 *ret_val = (size_t)-1;
 783                 return (-1);
 784         }
 785 
 786         if (index == 0)
 787                 (*ret_val)++;   /* Non-identical conversion */
 788 
 789         for (index = 0; index < sz; index++)
 790                 *ob++ = u8[index];
 791 
 792         return (sz);
 793 }
 794 
 795 /*
 796  * Convert single UTF-8 character to GB18030.
 797  * Return: > 0  - Converted successfully
 798  *         = -1 - E2BIG
 799  */
 800 /* ARGSUSED */
 801 static int8_t
 802 utf8_to_gb18030(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 803         uchar_t *ob, uchar_t *obtail, size_t *ret)
 804 {
 805         size_t          index;
 806         int8_t          gbklen;
 807         uint32_t        gbkcode;
 808 
 809         if (utf8 >= KICONV_SC_PLANE1_UTF8_START) {
 810                 /* Four bytes GB18030 [0x90308130, 0xe339fe39] handling. */
 811                 uint32_t        u32;
 812 
 813                 u32 = (((utf8 & 0x07000000) >> 6) | ((utf8 & 0x3F0000) >> 4) |
 814                     ((utf8 & 0x3F00) >> 2) | (utf8 & 0x3F)) -
 815                     KICONV_SC_PLANE1_UCS4_START;
 816                 gbkcode = ((u32 / 12600 + 0x90) << 24) |
 817                     (((u32 % 12600) / 1260 + 0x30) << 16) |
 818                     (((u32 % 1260) / 10 + 0x81) << 8) | (u32 % 10 + 0x30);
 819                 gbklen = 4;
 820                 index = 1;
 821         } else {
 822                 index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
 823                     KICONV_UTF8_GB18030_MAX);
 824                 gbkcode = kiconv_utf8_gb18030[index].value;
 825                 KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
 826         }
 827 
 828         if (obtail - ob < gbklen) {
 829                 *ret = (size_t)-1;
 830                 return (-1);
 831         }
 832 
 833         if (index == 0)
 834                 (*ret)++;               /* Non-identical conversion */
 835 
 836         if (gbklen == 2) {
 837                 *ob++ = (uchar_t)(gbkcode >> 8);
 838         } else if (gbklen == 4) {
 839                 *ob++ = (uchar_t)(gbkcode >> 24);
 840                 *ob++ = (uchar_t)(gbkcode >> 16);
 841                 *ob++ = (uchar_t)(gbkcode >> 8);
 842         }
 843         *ob = (uchar_t)(gbkcode & 0xFF);
 844 
 845         return (gbklen);
 846 }
 847 
 848 /*
 849  * Convert single UTF-8 character to GBK.
 850  * Return: > 0  - Converted successfully
 851  *         = -1 - E2BIG
 852  */
 853 /* ARGSUSED */
 854 static int8_t
 855 utf8_to_gbk(uint32_t utf8, uchar_t **inbuf, uchar_t *ibtail,
 856         uchar_t *ob, uchar_t *obtail, size_t *ret)
 857 {
 858         size_t          index;
 859         int8_t          gbklen;
 860         uint32_t        gbkcode;
 861 
 862         index = kiconv_binsearch(utf8, kiconv_utf8_gb18030,
 863             KICONV_UTF8_GB18030_MAX);
 864         gbkcode = kiconv_utf8_gb18030[index].value;
 865         KICONV_SC_GET_GB_LEN(gbkcode, gbklen);
 866 
 867         /* GBK and GB18030 share the same table, so check the length. */
 868         if (gbklen == 4) {
 869                 index = 0;
 870                 gbkcode = kiconv_utf8_gb18030[index].value;
 871                 gbklen = 1;
 872         }
 873 
 874         if (obtail - ob < gbklen) {
 875                 *ret = (size_t)-1;
 876                 return (-1);
 877         }
 878 
 879         if (index == 0)
 880                 (*ret)++;               /* Non-identical conversion */
 881 
 882         if (gbklen > 1)
 883                 *ob++ = (uchar_t)(gbkcode >> 8);
 884         *ob = (uchar_t)(gbkcode & 0xFF);
 885 
 886         return (gbklen);
 887 }
 888 
 889 /*
 890  * Convert single UTF-8 character to GB2312.
 891  * Return: > 0  - Converted successfully
 892  *         = -1 - E2BIG
 893  */
 894 /* ARGSUSED */
 895 static int8_t
 896 utf8_to_gb2312(uint32_t utf8, uchar_t **inbuf, uchar_t *intail,
 897         uchar_t *ob, uchar_t *obtail, size_t *ret)
 898 {
 899         size_t          index;
 900         int8_t          gblen;
 901         uint32_t        gbcode;
 902 
 903         index = kiconv_binsearch(utf8, kiconv_utf8_gb2312,
 904             KICONV_UTF8_GB2312_MAX);
 905         gbcode = kiconv_utf8_gb2312[index].value;
 906         gblen = (gbcode <= 0xFF) ? 1 : 2;
 907 
 908         if (obtail - ob < gblen) {
 909                 *ret = (size_t)-1;
 910                 return (-1);
 911         }
 912 
 913         if (index == 0)
 914                 (*ret)++;
 915 
 916         if (gblen > 1)
 917                 *ob++ = (uchar_t)(gbcode >> 8);
 918         *ob = (uchar_t)(gbcode & 0xFF);
 919 
 920         return (gblen);
 921 }
 922 
 923 static kiconv_ops_t kiconv_sc_ops_tbl[] = {
 924         {
 925                 "gb18030", "utf-8", kiconv_open_to_cck, kiconv_to_gb18030,
 926                 kiconv_close_to_cck, kiconvstr_to_gb18030
 927         },
 928         {
 929                 "utf-8", "gb18030", open_fr_gb18030, kiconv_fr_gb18030,
 930                 close_fr_sc, kiconvstr_fr_gb18030
 931         },
 932         {
 933                 "gbk", "utf-8", kiconv_open_to_cck, kiconv_to_gbk,
 934                 kiconv_close_to_cck, kiconvstr_to_gbk
 935         },
 936         {
 937                 "utf-8", "gbk", open_fr_gbk, kiconv_fr_gbk,
 938                 close_fr_sc, kiconvstr_fr_gbk
 939         },
 940         {
 941                 "euccn", "utf-8", kiconv_open_to_cck, kiconv_to_euccn,
 942                 kiconv_close_to_cck, kiconvstr_to_euccn
 943         },
 944         {
 945                 "utf-8", "euccn", open_fr_euccn, kiconv_fr_euccn,
 946                 close_fr_sc, kiconvstr_fr_euccn
 947         },
 948 };
 949 
 950 static kiconv_module_info_t kiconv_sc_info = {
 951         "kiconv_sc",            /* module name */
 952         sizeof (kiconv_sc_ops_tbl) / sizeof (kiconv_sc_ops_tbl[0]),
 953         kiconv_sc_ops_tbl,
 954         0,
 955         NULL,
 956         NULL,
 957         0
 958 };
 959 
 960 static struct modlkiconv modlkiconv_sc = {
 961         &mod_kiconvops,
 962         "kiconv Simplified Chinese module 1.0",
 963         &kiconv_sc_info
 964 };
 965 
 966 static struct modlinkage modlinkage = {
 967         MODREV_1,
 968         (void *)&modlkiconv_sc,
 969         NULL
 970 };
 971 
 972 int
 973 _init(void)
 974 {
 975         int err;
 976 
 977         err = mod_install(&modlinkage);
 978         if (err)
 979                 cmn_err(CE_WARN, "kiconv_sc: failed to load kernel module");
 980 
 981         return (err);
 982 }
 983 
 984 int
 985 _fini(void)
 986 {
 987         int err;
 988 
 989         /*
 990          * If this module is being used, then, we cannot remove the module.
 991          * The following checking will catch pretty much all usual cases.
 992          *
 993          * Any remaining will be catached by the kiconv_unregister_module()
 994          * during mod_remove() at below.
 995          */
 996         if (kiconv_module_ref_count(KICONV_MODULE_ID_SC))
 997                 return (EBUSY);
 998 
 999         err = mod_remove(&modlinkage);
1000         if (err)
1001                 cmn_err(CE_WARN, "kiconv_sc: failed to remove kernel module");
1002 
1003         return (err);
1004 }
1005 
1006 int
1007 _info(struct modinfo *modinfop)
1008 {
1009         return (mod_info(&modlinkage, modinfop));
1010 }