1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/systm.h>
  33 #include <sys/debug.h>
  34 #include <sys/kmem.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/byteorder.h>
  37 #include <sys/errno.h>
  38 #include <sys/euc.h>
  39 #include <sys/modctl.h>
  40 #include <sys/kiconv.h>
  41 
  42 #include <sys/kiconv_ja.h>
  43 #include <sys/kiconv_ja_jis_to_unicode.h>
  44 #include <sys/kiconv_ja_unicode_to_jis.h>
  45 
  46 /*
  47  * The following vector shows remaining bytes in a UTF-8 character.
  48  * Index will be the first byte of the character. This is defined in
  49  * u8_textprep.c.
  50  */
  51 extern const int8_t u8_number_of_bytes[];
  52 
  53 /*
  54  * The following is a vector of bit-masks to get used bits in
  55  * the first byte of a UTF-8 character. Index is remaining bytes at above of
  56  * the character. This is defined in uconv.c.
  57  */
  58 extern const uchar_t u8_masks_tbl[];
  59 
  60 /*
  61  * The following two vectors are to provide valid minimum and
  62  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
  63  * better illegal sequence checking. The index value must be the value of
  64  * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
  65  */
  66 extern const uint8_t u8_valid_min_2nd_byte[];
  67 extern const uint8_t u8_valid_max_2nd_byte[];
  68 
  69 static kiconv_ja_euc16_t
  70 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
  71 {
  72         const kiconv_ja_euc16_t *p;
  73 
  74         if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
  75                 return (p[ucs2 & 0xff]);
  76 
  77         return (KICONV_JA_NODEST);
  78 }
  79 
  80 static size_t
  81 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
  82 {
  83         uint_t  l;              /* to be copied to *p on successful return */
  84         uchar_t ic;             /* current byte */
  85         uchar_t ic1;            /* 1st byte */
  86         uchar_t *ip = *pip;     /* next byte to read */
  87         size_t  ileft = *pileft; /* number of bytes available */
  88         size_t  rv = 0;         /* return value of this function */
  89         int     remaining_bytes;
  90         int     u8_size;
  91 
  92         KICONV_JA_NGET(ic1);    /* read 1st byte */
  93 
  94         if (ic1 < 0x80) {
  95                 /* successfully converted */
  96                 *p = (uint_t)ic1;
  97                 goto ret;
  98         }
  99 
 100         u8_size = u8_number_of_bytes[ic1];
 101         if (u8_size == U8_ILLEGAL_CHAR) {
 102                 KICONV_JA_RETERROR(EILSEQ)
 103         } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
 104                 KICONV_JA_RETERROR(ERANGE)
 105         }
 106 
 107         remaining_bytes = u8_size - 1;
 108         if (remaining_bytes != 0) {
 109                 l = ic1 & u8_masks_tbl[remaining_bytes];
 110 
 111                 for (; remaining_bytes > 0; remaining_bytes--) {
 112                         KICONV_JA_NGET(ic);
 113                         if (ic1 != 0U) {
 114                                 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
 115                                     (ic > u8_valid_max_2nd_byte[ic1])) {
 116                                         KICONV_JA_RETERROR(EILSEQ)
 117                                 }
 118                                 ic1 = 0U; /* 2nd byte check done */
 119                         } else {
 120                                 if ((ic < 0x80) || (ic > 0xbf)) {
 121                                         KICONV_JA_RETERROR(EILSEQ)
 122                                 }
 123                         }
 124                         l = (l << 6) | (ic & 0x3f);
 125                 }
 126 
 127                 /* successfully converted */
 128                 *p = l;
 129         } else {
 130                 KICONV_JA_RETERROR(EILSEQ)
 131         }
 132 
 133 ret:
 134         if (rv == 0) {
 135                 /*
 136                  * Update rv, *pip, and *pileft on successfule return.
 137                  */
 138                 rv = *pileft - ileft;
 139                 *pip = ip;
 140                 *pileft = ileft;
 141         }
 142 
 143         return (rv);
 144 }
 145 
 146 static size_t
 147 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
 148 {
 149         uint_t  l;              /* to be copied to *p on successful return */
 150         uchar_t ic;             /* current byte */
 151         uchar_t ic1;            /* 1st byte */
 152         uchar_t *ip = *pip;     /* next byte to read */
 153         size_t  ileft = *pileft; /* number of bytes available */
 154         size_t  rv = 0;         /* return value of this function */
 155         int     remaining_bytes;
 156         int     u8_size;
 157 
 158         KICONV_JA_NGET_REP_TO_MB(ic1);  /* read 1st byte */
 159 
 160         if (ic1 < 0x80) {
 161                 /* successfully converted */
 162                 l = (uint_t)ic1;
 163                 goto ret;
 164         }
 165 
 166         u8_size = u8_number_of_bytes[ic1];
 167         if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
 168                 l = KICONV_JA_DEF_SINGLE;
 169                 (*repnum)++;
 170                 goto ret;
 171         }
 172 
 173         remaining_bytes = u8_size - 1;
 174 
 175         if (remaining_bytes != 0) {
 176                 l = ic1 & u8_masks_tbl[remaining_bytes];
 177 
 178                 for (; remaining_bytes > 0; remaining_bytes--) {
 179                         KICONV_JA_NGET_REP_TO_MB(ic);
 180                         if (ic1 != 0U) {
 181                                 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
 182                                     (ic > u8_valid_max_2nd_byte[ic1])) {
 183                                         l = KICONV_JA_DEF_SINGLE;
 184                                         (*repnum)++;
 185                                         ileft -= (remaining_bytes - 1);
 186                                         ip += (remaining_bytes - 1);
 187                                         break;
 188                                 }
 189                                 ic1 = 0U; /* 2nd byte check done */
 190                         } else {
 191                                 if ((ic < 0x80) || (ic > 0xbf)) {
 192                                         l = KICONV_JA_DEF_SINGLE;
 193                                         (*repnum)++;
 194                                         ileft -= (remaining_bytes - 1);
 195                                         ip += (remaining_bytes - 1);
 196                                         break;
 197                                 }
 198                         }
 199                         l = (l << 6) | (ic & 0x3f);
 200                 }
 201         } else {
 202                 l = KICONV_JA_DEF_SINGLE;
 203                 (*repnum)++;
 204         }
 205 
 206 ret:
 207         /* successfully converted */
 208         *p = l;
 209         rv = *pileft - ileft;
 210 
 211         *pip = ip;
 212         *pileft = ileft;
 213 
 214         return (rv);
 215 }
 216 
 217 static size_t                           /* return #bytes read, or -1 */
 218 read_unicode(
 219         uint_t  *p,             /* point variable to store UTF-32 */
 220         uchar_t **pip,          /* point pointer to input buf */
 221         size_t  *pileft,        /* point #bytes left in input buf */
 222         int     *errno,         /* point variable to errno */
 223         int     flag,           /* kiconvstr flag */
 224         size_t  *rv)            /* point return valuse */
 225 {
 226         if (flag & KICONV_REPLACE_INVALID)
 227                 return (utf8_ucs_replace(p, pip, pileft, rv));
 228         else
 229                 return (utf8_ucs(p, pip, pileft, errno));
 230 }
 231 
 232 static size_t
 233 write_unicode(
 234         uint_t  u32,            /* UTF-32 to write */
 235         char    **pop,          /* point pointer to output buf */
 236         size_t  *poleft,        /* point #bytes left in output buf */
 237         int     *errno)         /* point variable to errno */
 238 {
 239         char    *op = *pop;
 240         size_t  oleft = *poleft;
 241         size_t  rv = 0;                 /* return value */
 242 
 243         if (u32 <= 0x7f) {
 244                 KICONV_JA_NPUT((uchar_t)(u32));
 245                 rv = 1;
 246         } else if (u32 <= 0x7ff) {
 247                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
 248                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 249                 rv = 2;
 250         } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
 251                 KICONV_JA_RETERROR(EILSEQ)
 252         } else if (u32 <= 0xffff) {
 253                 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
 254                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
 255                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 256                 rv = 3;
 257         } else if (u32 <= 0x10ffff) {
 258                 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
 259                 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
 260                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
 261                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 262                 rv = 4;
 263         } else {
 264                 KICONV_JA_RETERROR(EILSEQ)
 265         }
 266 
 267 ret:
 268         if (rv != (size_t)-1) {
 269                 /* update *pop and *poleft only on successful return */
 270                 *pop = op;
 271                 *poleft = oleft;
 272         }
 273 
 274         return (rv);
 275 }
 276 
 277 static void *
 278 _kiconv_ja_open_unicode(uint8_t id)
 279 {
 280         kiconv_state_t  kcd;
 281 
 282         kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
 283             KM_SLEEP);
 284         kcd->id = id;
 285         kcd->bom_processed = 0;
 286         return ((void *)kcd);
 287 }
 288 
 289 static void *
 290 open_eucjp(void)
 291 {
 292         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
 293 }
 294 
 295 static void *
 296 open_eucjpms(void)
 297 {
 298         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
 299 }
 300 
 301 static void *
 302 open_sjis(void)
 303 {
 304         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
 305 }
 306 
 307 static void *
 308 open_cp932(void)
 309 {
 310         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
 311 }
 312 
 313 int
 314 close_ja(void *kcd)
 315 {
 316         if (! kcd || kcd == (void *)-1)
 317                 return (EBADF);
 318 
 319         kmem_free(kcd, sizeof (kiconv_state_data_t));
 320 
 321         return (0);
 322 }
 323 
 324 static size_t
 325 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 326                 char **outbuf, size_t *outbytesleft, int *errno)
 327 {
 328         uint_t          u32;            /* UTF-32 */
 329         uint_t          index;          /* index for table lookup */
 330         uchar_t         ic1, ic2, ic3;  /* 1st, 2nd, and 3rd bytes of a char */
 331         size_t          rv = 0;         /* return value of this function */
 332 
 333         uchar_t *ip;
 334         size_t          ileft;
 335         char            *op;
 336         size_t          oleft;
 337         size_t          id = ((kiconv_state_t)kcd)->id;
 338 
 339         if ((inbuf == NULL) || (*inbuf == NULL)) {
 340                 return (0);
 341         }
 342 
 343         ip = (uchar_t *)*inbuf;
 344         ileft = *inbytesleft;
 345         op = *outbuf;
 346         oleft = *outbytesleft;
 347 
 348         while (ileft != 0) {
 349                 KICONV_JA_NGET(ic1);            /* get 1st byte */
 350 
 351                 if (KICONV_JA_ISASC(ic1)) {     /* ASCII; 1 byte */
 352                         u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 353                         KICONV_JA_PUTU(u32);
 354                 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
 355                         KICONV_JA_NGET(ic2);
 356                         if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
 357                                 ic1 &= KICONV_JA_CMASK;
 358                                 ic2 &= KICONV_JA_CMASK;
 359                                 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
 360                                 if (u32 == KICONV_JA_NODEST) {
 361                                         index = (ic1 - 0x21) * 94 + ic2 - 0x21;
 362                                         u32 = kiconv_ja_jisx0208_to_ucs2[index];
 363                                 }
 364                                 if (u32 == KICONV_JA_REPLACE)
 365                                         rv++;
 366                                 KICONV_JA_PUTU(u32);
 367                         } else { /* 2nd byte check failed */
 368                                 KICONV_JA_RETERROR(EILSEQ)
 369                         }
 370                 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
 371                         KICONV_JA_NGET(ic2);
 372                         if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
 373                                 index = (ic2 - 0xa1);
 374                                 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
 375                                 KICONV_JA_PUTU(u32);
 376                         } else { /* 2nd byte check failed */
 377                                 KICONV_JA_RETERROR(EILSEQ)
 378                         }
 379                 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
 380                         KICONV_JA_NGET(ic2);
 381                         if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
 382                                 KICONV_JA_NGET(ic3);
 383                                 if (KICONV_JA_ISCS3(ic3)) {
 384                                         /* 3rd byte check passed */
 385                                         ic2 &= KICONV_JA_CMASK;
 386                                         ic3 &= KICONV_JA_CMASK;
 387                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
 388                                             ic2, ic3);
 389                                         if (u32 == KICONV_JA_NODEST) {
 390                                                 index = ((ic2 - 0x21) * 94 +
 391                                                     (ic3 - 0x21));
 392                                                 u32 = kiconv_ja_jisx0212_to_ucs2
 393                                                     [index];
 394                                         }
 395                                         if (u32 == KICONV_JA_REPLACE)
 396                                                 rv++;
 397                                         KICONV_JA_PUTU(u32);
 398                                 } else { /* 3rd byte check failed */
 399                                         KICONV_JA_RETERROR(EILSEQ)
 400                                 }
 401                         } else { /* 2nd byte check failed */
 402                                 KICONV_JA_RETERROR(EILSEQ)
 403                         }
 404                 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
 405                         /* C1 control; 1 byte */
 406                         u32 = ic1;
 407                         KICONV_JA_PUTU(u32);
 408                 } else { /* 1st byte check failed */
 409                         KICONV_JA_RETERROR(EILSEQ)
 410                 }
 411 
 412                 /*
 413                  * One character successfully converted so update
 414                  * values outside of this function's stack.
 415                  */
 416                 *inbuf = (char *)ip;
 417                 *inbytesleft = ileft;
 418                 *outbuf = op;
 419                 *outbytesleft = oleft;
 420         }
 421 
 422 ret:
 423         return (rv);
 424 }
 425 
 426 static size_t
 427 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 428                 char **outbuf, size_t *outbytesleft, int *errno)
 429 {
 430         uchar_t         ic;
 431         size_t          rv = 0;
 432         uint_t          ucs4;
 433         ushort_t        euc16;
 434 
 435         uchar_t *ip;
 436         size_t          ileft;
 437         char            *op;
 438         size_t          oleft;
 439         size_t          read_len;
 440 
 441         size_t          id = ((kiconv_state_t)kcd)->id;
 442 
 443         if ((inbuf == NULL) || (*inbuf == NULL)) {
 444                 return (0);
 445         }
 446 
 447         ip = (uchar_t *)*inbuf;
 448         ileft = *inbytesleft;
 449         op = *outbuf;
 450         oleft = *outbytesleft;
 451 
 452         KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
 453 
 454         while (ileft != 0) {
 455                 KICONV_JA_GETU(&ucs4, 0);
 456 
 457                 if (ucs4 > 0xffff) {
 458                         /* non-BMP */
 459                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 460                         rv++;
 461                         goto next;
 462                 }
 463 
 464                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
 465                 if (euc16 == KICONV_JA_NODEST) {
 466                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
 467                 }
 468                 if (euc16 == KICONV_JA_NODEST) {
 469                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 470                         rv++;
 471                         goto next;
 472                 }
 473 
 474                 switch (euc16 & 0x8080) {
 475                 case 0x0000:    /* CS0 */
 476                         ic = (uchar_t)euc16;
 477                         KICONV_JA_NPUT(ic);
 478                         break;
 479                 case 0x8080:    /* CS1 */
 480                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 481                         KICONV_JA_NPUT(ic);
 482                         ic = (uchar_t)(euc16 & 0xff);
 483                         KICONV_JA_NPUT(ic);
 484                         break;
 485                 case 0x0080:    /* CS2 */
 486                         KICONV_JA_NPUT(SS2);
 487                         ic = (uchar_t)euc16;
 488                         KICONV_JA_NPUT(ic);
 489                         break;
 490                 case 0x8000:    /* CS3 */
 491                         KICONV_JA_NPUT(SS3);
 492                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 493                         KICONV_JA_NPUT(ic);
 494                         ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
 495                         KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
 496                         break;
 497                 }
 498 next:
 499                 /*
 500                  * One character successfully converted so update
 501                  * values outside of this function's stack.
 502                  */
 503                 *inbuf = (char *)ip;
 504                 *inbytesleft = ileft;
 505                 *outbuf = op;
 506                 *outbytesleft = oleft;
 507         }
 508 
 509 ret:
 510         return (rv);
 511 }
 512 
 513 static size_t
 514 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 515         size_t *outbytesleft, int flag, int *errno, uint8_t id)
 516 {
 517         uint_t          u32;            /* UTF-32 */
 518         uint_t          index;          /* index for table lookup */
 519         uchar_t         ic1, ic2, ic3;  /* 1st, 2nd, and 3rd bytes of a char */
 520         size_t          rv = 0;         /* return value of this function */
 521 
 522         uchar_t *ip;
 523         size_t          ileft;
 524         char            *op;
 525         size_t          oleft;
 526 
 527         boolean_t do_not_ignore_null;
 528 
 529         if ((inbuf == NULL) || (*inbuf == NULL)) {
 530                 return (0);
 531         }
 532 
 533         ip = (uchar_t *)inbuf;
 534         ileft = *inbytesleft;
 535         op = outbuf;
 536         oleft = *outbytesleft;
 537 
 538         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 539 
 540         while (ileft != 0) {
 541                 KICONV_JA_NGET(ic1);            /* get 1st byte */
 542 
 543                 if (KICONV_JA_ISASC(ic1)) {     /* ASCII; 1 byte */
 544                         if (ic1 == '\0' && do_not_ignore_null) {
 545                                 return (0);
 546                         }
 547                         u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 548                         KICONV_JA_PUTU(u32);
 549                 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
 550                         if (flag & KICONV_REPLACE_INVALID) {
 551                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 552                         } else {
 553                                 KICONV_JA_NGET(ic2);
 554                         }
 555                         if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
 556                                 ic1 &= KICONV_JA_CMASK;
 557                                 ic2 &= KICONV_JA_CMASK;
 558                                 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
 559                                 if (u32 == KICONV_JA_NODEST) {
 560                                         index = (ic1 - 0x21) * 94 + ic2 - 0x21;
 561                                         u32 = kiconv_ja_jisx0208_to_ucs2[index];
 562                                 }
 563                                 if (u32 == KICONV_JA_REPLACE)
 564                                         rv++;
 565                                 KICONV_JA_PUTU(u32);
 566                         } else { /* 2nd byte check failed */
 567                                 if (flag & KICONV_REPLACE_INVALID) {
 568                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 569                                         rv++;
 570                                 } else {
 571                                         KICONV_JA_RETERROR(EILSEQ)
 572                                 }
 573                         }
 574                 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
 575                         if (flag & KICONV_REPLACE_INVALID) {
 576                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 577                         } else {
 578                                 KICONV_JA_NGET(ic2);
 579                         }
 580                         if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
 581                                 index = (ic2 - 0xa1);
 582                                 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
 583                                 KICONV_JA_PUTU(u32);
 584                         } else { /* 2nd byte check failed */
 585                                 if (flag & KICONV_REPLACE_INVALID) {
 586                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 587                                         rv++;
 588                                 } else {
 589                                         KICONV_JA_RETERROR(EILSEQ)
 590                                 }
 591                         }
 592                 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
 593                         if (flag & KICONV_REPLACE_INVALID) {
 594                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 595                         } else {
 596                                 KICONV_JA_NGET(ic2);
 597                         }
 598                         if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
 599                                 if (flag & KICONV_REPLACE_INVALID) {
 600                                         KICONV_JA_NGET_REP_FR_MB(ic3);
 601                                 } else {
 602                                         KICONV_JA_NGET(ic3);
 603                                 }
 604                                 if (KICONV_JA_ISCS3(ic3)) {
 605                                         /* 3rd byte check passed */
 606                                         ic2 &= KICONV_JA_CMASK;
 607                                         ic3 &= KICONV_JA_CMASK;
 608                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
 609                                             ic2, ic3);
 610                                         if (u32 == KICONV_JA_NODEST) {
 611                                                 index = ((ic2 - 0x21) * 94 +
 612                                                     (ic3 - 0x21));
 613                                                 u32 = kiconv_ja_jisx0212_to_ucs2
 614                                                     [index];
 615                                         }
 616                                         if (u32 == KICONV_JA_REPLACE)
 617                                                 rv++;
 618                                         KICONV_JA_PUTU(u32);
 619                                 } else { /* 3rd byte check failed */
 620                                         if (flag & KICONV_REPLACE_INVALID) {
 621                                                 KICONV_JA_PUTU(
 622                                                     KICONV_JA_REPLACE);
 623                                                 rv++;
 624                                         } else {
 625                                                 KICONV_JA_RETERROR(EILSEQ)
 626                                         }
 627                                 }
 628                         } else { /* 2nd byte check failed */
 629                                 if (flag & KICONV_REPLACE_INVALID) {
 630                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 631                                         rv++;
 632                                 } else {
 633                                         KICONV_JA_RETERROR(EILSEQ)
 634                                 }
 635                         }
 636                 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
 637                         /* C1 control; 1 byte */
 638                         u32 = ic1;
 639                         KICONV_JA_PUTU(u32);
 640                 } else { /* 1st byte check failed */
 641                         if (flag & KICONV_REPLACE_INVALID) {
 642                                 KICONV_JA_PUTU(KICONV_JA_REPLACE);
 643                                 rv++;
 644                         } else {
 645                                 KICONV_JA_RETERROR(EILSEQ)
 646                         }
 647                 }
 648 
 649 next:
 650                 /*
 651                  * One character successfully converted so update
 652                  * values outside of this function's stack.
 653                  */
 654                 *inbytesleft = ileft;
 655                 *outbytesleft = oleft;
 656         }
 657 
 658 ret:
 659         return (rv);
 660 }
 661 
 662 static size_t
 663 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 664         size_t *outbytesleft, int flag, int *errno, uint8_t id)
 665 {
 666         uchar_t         ic;
 667         size_t          rv = 0;
 668         uint_t          ucs4;
 669         ushort_t        euc16;
 670 
 671         uchar_t *ip;
 672         size_t          ileft;
 673         char            *op;
 674         size_t          oleft;
 675         size_t          read_len;
 676 
 677         boolean_t do_not_ignore_null;
 678 
 679         if ((inbuf == NULL) || (*inbuf == NULL)) {
 680                 return (0);
 681         }
 682 
 683         ip = (uchar_t *)inbuf;
 684         ileft = *inbytesleft;
 685         op = outbuf;
 686         oleft = *outbytesleft;
 687 
 688         KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
 689 
 690         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 691 
 692         while (ileft != 0) {
 693                 KICONV_JA_GETU(&ucs4, flag);
 694 
 695                 if (ucs4 == 0x0 && do_not_ignore_null) {
 696                         return (0);
 697                 }
 698 
 699                 if (ucs4 > 0xffff) {
 700                         /* non-BMP */
 701                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 702                         rv++;
 703                         goto next;
 704                 }
 705 
 706                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
 707                 if (euc16 == KICONV_JA_NODEST) {
 708                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
 709                 }
 710                 if (euc16 == KICONV_JA_NODEST) {
 711                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 712                         rv++;
 713                         goto next;
 714                 }
 715 
 716                 switch (euc16 & 0x8080) {
 717                 case 0x0000:    /* CS0 */
 718                         ic = (uchar_t)euc16;
 719                         KICONV_JA_NPUT(ic);
 720                         break;
 721                 case 0x8080:    /* CS1 */
 722                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 723                         KICONV_JA_NPUT(ic);
 724                         ic = (uchar_t)(euc16 & 0xff);
 725                         KICONV_JA_NPUT(ic);
 726                         break;
 727                 case 0x0080:    /* CS2 */
 728                         KICONV_JA_NPUT(SS2);
 729                         ic = (uchar_t)euc16;
 730                         KICONV_JA_NPUT(ic);
 731                         break;
 732                 case 0x8000:    /* CS3 */
 733                         KICONV_JA_NPUT(SS3);
 734                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 735                         KICONV_JA_NPUT(ic);
 736                         ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
 737                         KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
 738                         break;
 739                 }
 740 next:
 741                 /*
 742                  * One character successfully converted so update
 743                  * values outside of this function's stack.
 744                  */
 745                 *inbytesleft = ileft;
 746                 *outbytesleft = oleft;
 747         }
 748 
 749 ret:
 750         return (rv);
 751 }
 752 
 753 static size_t
 754 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 755                 char **outbuf, size_t *outbytesleft, int *errno)
 756 {
 757         if (! kcd || kcd == (void *)-1) {
 758                 *errno = EBADF;
 759                 return ((size_t)-1);
 760         }
 761 
 762         return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
 763             outbuf, outbytesleft, errno));
 764 }
 765 
 766 static size_t
 767 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 768                 char **outbuf, size_t *outbytesleft, int *errno)
 769 {
 770         if (! kcd || kcd == (void *)-1) {
 771                 *errno = EBADF;
 772                 return ((size_t)-1);
 773         }
 774 
 775         return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
 776             outbuf, outbytesleft, errno));
 777 }
 778 
 779 static size_t
 780 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 781         size_t *outbytesleft, int flag, int *errno)
 782 {
 783         return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
 784             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
 785 }
 786 
 787 static size_t
 788 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 789         size_t *outbytesleft, int flag, int *errno)
 790 {
 791         return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
 792             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
 793 }
 794 
 795 static size_t
 796 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
 797         size_t *outbytesleft, int flag, int *errno)
 798 {
 799         return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
 800             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
 801 }
 802 
 803 static size_t
 804 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
 805         size_t *outbytesleft, int flag, int *errno)
 806 {
 807         return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
 808             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
 809 }
 810 
 811 static size_t
 812 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
 813                 char **outbuf, size_t *outbytesleft, int *errno)
 814 {
 815         uint_t  uni;                    /* UTF-32 */
 816         uint_t  index;                  /* index for table lookup */
 817         uchar_t ic1, ic2;               /* 1st and 2nd bytes of a char */
 818         size_t  rv = 0;                 /* return value of this function */
 819 
 820         uchar_t *ip;
 821         size_t          ileft;
 822         char            *op;
 823         size_t          oleft;
 824         size_t          id = ((kiconv_state_t)kcd)->id;
 825 
 826         if ((inbuf == NULL) || (*inbuf == NULL)) {
 827                 return (0);
 828         }
 829 
 830         ip = (uchar_t *)*inbuf;
 831         ileft = *inbytesleft;
 832         op = *outbuf;
 833         oleft = *outbytesleft;
 834 
 835         while (ileft != 0) {
 836                 KICONV_JA_NGET(ic1);                    /* get 1st byte */
 837 
 838                 if (KICONV_JA_ISASC((int)ic1)) {        /* ASCII; 1 byte */
 839                         uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 840                         KICONV_JA_PUTU(uni);
 841                 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
 842                         uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
 843                         KICONV_JA_PUTU(uni);
 844                 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
 845                         KICONV_JA_NGET(ic2);
 846                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 847                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
 848                                 if (ic2 >= 0x9f) {
 849                                         ic1++;
 850                                 }
 851                                 ic2 = kiconv_ja_sjtojis2[ic2];
 852                                 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
 853                                 if (uni == KICONV_JA_NODEST) {
 854                                         index = ((ic1 - 0x21) * 94)
 855                                             + (ic2 - 0x21);
 856                                         uni = kiconv_ja_jisx0208_to_ucs2[index];
 857                                 }
 858                                 if (uni == KICONV_JA_REPLACE)
 859                                         rv++;
 860                                 KICONV_JA_PUTU(uni);
 861                         } else { /* 2nd byte check failed */
 862                                 KICONV_JA_RETERROR(EILSEQ)
 863                                 /* NOTREACHED */
 864                         }
 865                 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
 866                         KICONV_JA_NGET(ic2);
 867                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 868                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
 869                                 if (ic2 >= 0x9f) {
 870                                         ic1++;
 871                                 }
 872                                 index = ((ic1 - 0x21) * 94)
 873                                     + (kiconv_ja_sjtojis2[ic2] - 0x21);
 874                                 uni = kiconv_ja_jisx0212_to_ucs2[index];
 875                                 if (uni == KICONV_JA_REPLACE)
 876                                         rv++;
 877                                 KICONV_JA_PUTU(uni);
 878                         } else { /* 2nd byte check failed */
 879                                 KICONV_JA_RETERROR(EILSEQ)
 880                         }
 881                 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
 882                     KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
 883                         /*
 884                          * We need a special treatment for each codes.
 885                          * By adding some offset number for them, we
 886                          * can process them as the same way of that of
 887                          * extended IBM chars.
 888                          */
 889                         KICONV_JA_NGET(ic2);
 890                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 891                                 ushort_t dest, upper, lower;
 892                                 dest = (ic1 << 8) + ic2;
 893                                 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
 894                                         KICONV_JA_REMAP_NEC(dest);
 895                                         if (dest == 0xffff) {
 896                                                 KICONV_JA_RETERROR(EILSEQ)
 897                                         }
 898                                 }
 899                                 /*
 900                                  * XXX: 0xfa54 and 0xfa5b must be mapped
 901                                  *      to JIS0208 area. Therefore we
 902                                  *      have to do special treatment.
 903                                  */
 904                                 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
 905                                         if (dest == 0xfa54) {
 906                                                 upper = 0x22;
 907                                                 lower = 0x4c;
 908                                         } else {
 909                                                 upper = 0x22;
 910                                                 lower = 0x68;
 911                                         }
 912                                         KICONV_JA_CNV_JISMS_TO_U2(id, uni,
 913                                             upper, lower);
 914                                         if (uni == KICONV_JA_NODEST) {
 915                                                 index = (uint_t)((upper - 0x21)
 916                                                     * 94 + (lower - 0x21));
 917                                                 uni = kiconv_ja_jisx0208_to_ucs2
 918                                                     [index];
 919                                         }
 920                                         if (uni == KICONV_JA_REPLACE)
 921                                                 rv++;
 922                                         KICONV_JA_PUTU(uni);
 923                                 } else {
 924                                         dest = dest - 0xfa40 -
 925                                             (((dest>>8) - 0xfa) * 0x40);
 926                                         dest = kiconv_ja_sjtoibmext[dest];
 927                                         if (dest == 0xffff) {
 928                                                 KICONV_JA_RETERROR(EILSEQ)
 929                                         }
 930                                         upper = (dest >> 8) & KICONV_JA_CMASK;
 931                                         lower = dest & KICONV_JA_CMASK;
 932                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
 933                                             upper, lower);
 934                                         if (uni == KICONV_JA_NODEST) {
 935                                                 index = (uint_t)((upper - 0x21)
 936                                                     * 94 + (lower - 0x21));
 937                                                 uni = kiconv_ja_jisx0212_to_ucs2
 938                                                     [index];
 939                                         }
 940                                         if (uni == KICONV_JA_REPLACE)
 941                                                 rv++;
 942                                         KICONV_JA_PUTU(uni);
 943                                 }
 944                         } else { /* 2nd byte check failed */
 945                                 KICONV_JA_RETERROR(EILSEQ)
 946                         }
 947                 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
 948                 /*
 949                  * Based on the draft convention of OSF-JVC CDEWG,
 950                  * characters in this area will be mapped to
 951                  * "CHIKAN-MOJI." (convertible character)
 952                  * We use U+FFFD in this case.
 953                  */
 954                         KICONV_JA_NGET(ic2);
 955                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 956                                 uni = 0xfffd;
 957                                 KICONV_JA_PUTU(uni);
 958                         } else { /* 2nd byte check failed */
 959                                 KICONV_JA_RETERROR(EILSEQ)
 960                         }
 961                 } else { /* 1st byte check failed */
 962                         KICONV_JA_RETERROR(EILSEQ)
 963                 }
 964 
 965                 /*
 966                  * One character successfully converted so update
 967                  * values outside of this function's stack.
 968                  */
 969                 *inbuf = (char *)ip;
 970                 *inbytesleft = ileft;
 971                 *outbuf = op;
 972                 *outbytesleft = oleft;
 973         }
 974 
 975 ret:
 976         return (rv);
 977 }
 978 
 979 /*
 980  * _kiconv_ja_lookuptbl()
 981  * Return the index number if its index-ed number
 982  * is the same as dest value.
 983  */
 984 static ushort_t
 985 _kiconv_ja_lookuptbl(ushort_t dest)
 986 {
 987         ushort_t tmp;
 988         int i;
 989         int sz = (sizeof (kiconv_ja_sjtoibmext) /
 990             sizeof (kiconv_ja_sjtoibmext[0]));
 991 
 992         for (i = 0; i < sz; i++) {
 993                 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
 994                 if (tmp == dest)
 995                         return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
 996         }
 997         return (0x3f);
 998 }
 999 
1000 static size_t
1001 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1002                 char **outbuf, size_t *outbytesleft, int *errno)
1003 {
1004         uchar_t ic;
1005         size_t          rv = 0;
1006         uint_t          ucs4;
1007         ushort_t        euc16;
1008         ushort_t        dest;
1009 
1010         uchar_t *ip;
1011         size_t          ileft;
1012         char            *op;
1013         size_t          oleft;
1014         size_t          read_len;
1015 
1016         size_t          id = ((kiconv_state_t)kcd)->id;
1017 
1018         if ((inbuf == NULL) || (*inbuf == NULL)) {
1019                 return (0);
1020         }
1021 
1022         ip = (uchar_t *)*inbuf;
1023         ileft = *inbytesleft;
1024         op = *outbuf;
1025         oleft = *outbytesleft;
1026 
1027         KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1028 
1029         while (ileft != 0) {
1030                 KICONV_JA_GETU(&ucs4, 0);
1031 
1032                 if (ucs4 > 0xffff) {
1033                         /* non-BMP */
1034                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1035                         rv++;
1036                         goto next;
1037                 }
1038 
1039                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1040                 if (euc16 == KICONV_JA_NODEST) {
1041                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1042                 }
1043                 if (euc16 == KICONV_JA_NODEST) {
1044                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1045                         rv++;
1046                         goto next;
1047                 }
1048 
1049                 switch (euc16 & 0x8080) {
1050                 case 0x0000:    /* CS0 */
1051                         if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1052                                 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1053                                 rv++;
1054                         } else {
1055                                 ic = (uchar_t)euc16;
1056                                 KICONV_JA_NPUT(ic);
1057                         }
1058                         break;
1059                 case 0x8080:    /* CS1 */
1060                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1061                         KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1062                         /*
1063                          * for even number row (Ku), add 0x80 to
1064                          * look latter half of kiconv_ja_jistosj2[] array
1065                          */
1066                         ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1067                             + (((ic % 2) == 0) ? 0x80 : 0x00));
1068                         KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1069                         break;
1070                 case 0x0080:    /* CS2 */
1071                         ic = (uchar_t)euc16;
1072                         KICONV_JA_NPUT(ic);
1073                         break;
1074                 case 0x8000:    /* CS3 */
1075                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1076                         if (euc16 == 0xa271) {
1077                                 /* NUMERO SIGN */
1078                                 KICONV_JA_NPUT(0x87);
1079                                 KICONV_JA_NPUT(0x82);
1080                         } else if (ic < 0x75) { /* check if IBM VDC */
1081                                 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1082                                 if (dest == 0xffff) {
1083                                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1084                                 } else {
1085                                         /* avoid putting NUL ('\0') */
1086                                         if (dest > 0xff) {
1087                                                 KICONV_JA_NPUT(
1088                                                     (dest >> 8) & 0xff);
1089                                                 KICONV_JA_NPUT(dest & 0xff);
1090                                         } else {
1091                                                 KICONV_JA_NPUT(dest & 0xff);
1092                                         }
1093                                 }
1094                         } else {
1095                                 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1096                                 /*
1097                                  * for even number row (Ku), add 0x80 to
1098                                  * look latter half of kiconv_ja_jistosj2[]
1099                                  */
1100                                 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1101                                     + (((ic % 2) == 0) ? 0x80 : 0x00));
1102                                 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1103                         }
1104                         break;
1105                 }
1106 
1107 next:
1108                 /*
1109                  * One character successfully converted so update
1110                  * values outside of this function's stack.
1111                  */
1112                 *inbuf = (char *)ip;
1113                 *inbytesleft = ileft;
1114                 *outbuf = op;
1115                 *outbytesleft = oleft;
1116         }
1117 
1118 ret:
1119         return (rv);
1120 }
1121 
1122 static size_t
1123 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1124         size_t *outbytesleft, int flag, int *errno, uint8_t id)
1125 {
1126         uint_t          uni;            /* UTF-32 */
1127         uint_t          index;          /* index for table lookup */
1128         uchar_t         ic1, ic2;       /* 1st and 2nd bytes of a char */
1129         size_t          rv = 0;         /* return value of this function */
1130 
1131         uchar_t *ip;
1132         size_t          ileft;
1133         char            *op;
1134         size_t          oleft;
1135 
1136         boolean_t do_not_ignore_null;
1137 
1138         if ((inbuf == NULL) || (*inbuf == NULL)) {
1139                 return (0);
1140         }
1141 
1142         ip = (uchar_t *)inbuf;
1143         ileft = *inbytesleft;
1144         op = outbuf;
1145         oleft = *outbytesleft;
1146 
1147         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1148 
1149         while (ileft != 0) {
1150                 KICONV_JA_NGET(ic1);                    /* get 1st byte */
1151 
1152                 if (KICONV_JA_ISASC((int)ic1)) {        /* ASCII; 1 byte */
1153                         if (ic1 == '\0' && do_not_ignore_null) {
1154                                 return (0);
1155                         }
1156                         uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1157                         KICONV_JA_PUTU(uni);
1158                 } else if (KICONV_JA_ISSJKANA(ic1)) {
1159                         /* JIS X 0201 Kana; 1 byte */
1160                         uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1161                         KICONV_JA_PUTU(uni);
1162                 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1163                         /* JIS X 0208 or UDC; 2 bytes */
1164                         if (flag & KICONV_REPLACE_INVALID) {
1165                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1166                         } else {
1167                                 KICONV_JA_NGET(ic2);
1168                         }
1169                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1170                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1171                                 if (ic2 >= 0x9f) {
1172                                         ic1++;
1173                                 }
1174                                 ic2 = kiconv_ja_sjtojis2[ic2];
1175                                 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1176                                 if (uni == KICONV_JA_NODEST) {
1177                                         index = ((ic1 - 0x21) * 94)
1178                                             + (ic2 - 0x21);
1179                                         uni = kiconv_ja_jisx0208_to_ucs2[index];
1180                                 }
1181                                 if (uni == KICONV_JA_REPLACE)
1182                                         rv++;
1183                                 KICONV_JA_PUTU(uni);
1184                         } else { /* 2nd byte check failed */
1185                                 if (flag & KICONV_REPLACE_INVALID) {
1186                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1187                                         rv++;
1188                                 } else {
1189                                         KICONV_JA_RETERROR(EILSEQ)
1190                                 }
1191                                 /* NOTREACHED */
1192                         }
1193                 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1194                         if (flag & KICONV_REPLACE_INVALID) {
1195                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1196                         } else {
1197                                 KICONV_JA_NGET(ic2);
1198                         }
1199                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1200                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1201                                 if (ic2 >= 0x9f) {
1202                                         ic1++;
1203                                 }
1204                                 index = ((ic1 - 0x21) * 94)
1205                                     + (kiconv_ja_sjtojis2[ic2] - 0x21);
1206                                 uni = kiconv_ja_jisx0212_to_ucs2[index];
1207                                 if (uni == KICONV_JA_REPLACE)
1208                                         rv++;
1209                                 KICONV_JA_PUTU(uni);
1210                         } else { /* 2nd byte check failed */
1211                                 if (flag & KICONV_REPLACE_INVALID) {
1212                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1213                                         rv++;
1214                                 } else {
1215                                         KICONV_JA_RETERROR(EILSEQ)
1216                                 }
1217                         }
1218                 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1219                     KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1220                         /*
1221                          * We need a special treatment for each codes.
1222                          * By adding some offset number for them, we
1223                          * can process them as the same way of that of
1224                          * extended IBM chars.
1225                          */
1226                         if (flag & KICONV_REPLACE_INVALID) {
1227                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1228                         } else {
1229                                 KICONV_JA_NGET(ic2);
1230                         }
1231                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1232                                 ushort_t dest, upper, lower;
1233                                 dest = (ic1 << 8) + ic2;
1234                                 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1235                                         KICONV_JA_REMAP_NEC(dest);
1236                                         if (dest == 0xffff) {
1237                                                 if (flag &
1238                                                     KICONV_REPLACE_INVALID) {
1239                                                         KICONV_JA_PUTU(
1240                                                             KICONV_JA_REPLACE);
1241                                                         rv++;
1242                                                 } else {
1243                                                         KICONV_JA_RETERROR(
1244                                                             EILSEQ)
1245                                                 }
1246                                         }
1247                                 }
1248                                 /*
1249                                  * XXX: 0xfa54 and 0xfa5b must be mapped
1250                                  *      to JIS0208 area. Therefore we
1251                                  *      have to do special treatment.
1252                                  */
1253                                 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1254                                         if (dest == 0xfa54) {
1255                                                 upper = 0x22;
1256                                                 lower = 0x4c;
1257                                         } else {
1258                                                 upper = 0x22;
1259                                                 lower = 0x68;
1260                                         }
1261                                         KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1262                                             upper, lower);
1263                                         if (uni == KICONV_JA_NODEST) {
1264                                                 index = (uint_t)((upper - 0x21)
1265                                                     * 94 + (lower - 0x21));
1266                                                 uni = kiconv_ja_jisx0208_to_ucs2
1267                                                     [index];
1268                                         }
1269                                         if (uni == KICONV_JA_REPLACE)
1270                                                 rv++;
1271                                         KICONV_JA_PUTU(uni);
1272                                 } else {
1273                                         dest = dest - 0xfa40 -
1274                                             (((dest>>8) - 0xfa) * 0x40);
1275                                         dest = kiconv_ja_sjtoibmext[dest];
1276                                         if (dest == 0xffff) {
1277                                                 if (flag &
1278                                                     KICONV_REPLACE_INVALID) {
1279                                                         KICONV_JA_PUTU(
1280                                                             KICONV_JA_REPLACE);
1281                                                         rv++;
1282                                                 } else {
1283                                                         KICONV_JA_RETERROR(
1284                                                             EILSEQ)
1285                                                 }
1286                                         }
1287                                         upper = (dest >> 8) & KICONV_JA_CMASK;
1288                                         lower = dest & KICONV_JA_CMASK;
1289                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1290                                             upper, lower);
1291                                         if (uni == KICONV_JA_NODEST) {
1292                                                 index = (uint_t)((upper - 0x21)
1293                                                     * 94 + (lower - 0x21));
1294                                                 uni = kiconv_ja_jisx0212_to_ucs2
1295                                                     [index];
1296                                         }
1297                                         if (uni == KICONV_JA_REPLACE)
1298                                                 rv++;
1299                                         KICONV_JA_PUTU(uni);
1300                                 }
1301                         } else { /* 2nd byte check failed */
1302                                 if (flag & KICONV_REPLACE_INVALID) {
1303                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1304                                         rv++;
1305                                 } else {
1306                                         KICONV_JA_RETERROR(EILSEQ)
1307                                 }
1308                         }
1309                 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1310                 /*
1311                  * Based on the draft convention of OSF-JVC CDEWG,
1312                  * characters in this area will be mapped to
1313                  * "CHIKAN-MOJI." (convertible character)
1314                  * We use U+FFFD in this case.
1315                  */
1316                         if (flag & KICONV_REPLACE_INVALID) {
1317                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1318                         } else {
1319                                 KICONV_JA_NGET(ic2);
1320                         }
1321                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1322                                 uni = 0xfffd;
1323                                 KICONV_JA_PUTU(uni);
1324                         } else { /* 2nd byte check failed */
1325                                 if (flag & KICONV_REPLACE_INVALID) {
1326                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1327                                         rv++;
1328                                 } else {
1329                                         KICONV_JA_RETERROR(EILSEQ)
1330                                 }
1331                         }
1332                 } else { /* 1st byte check failed */
1333                         if (flag & KICONV_REPLACE_INVALID) {
1334                                 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1335                                 rv++;
1336                         } else {
1337                                 KICONV_JA_RETERROR(EILSEQ)
1338                         }
1339                 }
1340 
1341 next:
1342                 /*
1343                  * One character successfully converted so update
1344                  * values outside of this function's stack.
1345                  */
1346                 *inbytesleft = ileft;
1347                 *outbytesleft = oleft;
1348         }
1349 
1350 ret:
1351         return (rv);
1352 }
1353 
1354 static size_t
1355 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1356         size_t *outbytesleft, int flag, int *errno, uint8_t id)
1357 {
1358         uchar_t         ic;
1359         size_t          rv = 0;
1360         uint_t          ucs4;
1361         ushort_t        euc16;
1362         ushort_t        dest;
1363 
1364         uchar_t *ip;
1365         size_t          ileft;
1366         char            *op;
1367         size_t          oleft;
1368         size_t          read_len;
1369 
1370         boolean_t do_not_ignore_null;
1371 
1372         if ((inbuf == NULL) || (*inbuf == NULL)) {
1373                 return (0);
1374         }
1375 
1376         ip = (uchar_t *)inbuf;
1377         ileft = *inbytesleft;
1378         op = outbuf;
1379         oleft = *outbytesleft;
1380 
1381         KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1382 
1383         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1384 
1385         while (ileft != 0) {
1386                 KICONV_JA_GETU(&ucs4, flag);
1387 
1388                 if (ucs4 == 0x0 && do_not_ignore_null) {
1389                         return (0);
1390                 }
1391 
1392                 if (ucs4 > 0xffff) {
1393                         /* non-BMP */
1394                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1395                         rv++;
1396                         goto next;
1397                 }
1398 
1399                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1400                 if (euc16 == KICONV_JA_NODEST) {
1401                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1402                 }
1403                 if (euc16 == KICONV_JA_NODEST) {
1404                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1405                         rv++;
1406                         goto next;
1407                 }
1408 
1409                 switch (euc16 & 0x8080) {
1410                 case 0x0000:    /* CS0 */
1411                         if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1412                                 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1413                                 rv++;
1414                         } else {
1415                                 ic = (uchar_t)euc16;
1416                                 KICONV_JA_NPUT(ic);
1417                         }
1418                         break;
1419                 case 0x8080:    /* CS1 */
1420                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1421                         KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1422                         /*
1423                          * for even number row (Ku), add 0x80 to
1424                          * look latter half of kiconv_ja_jistosj2[] array
1425                          */
1426                         ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1427                             + (((ic % 2) == 0) ? 0x80 : 0x00));
1428                         KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1429                         break;
1430                 case 0x0080:    /* CS2 */
1431                         ic = (uchar_t)euc16;
1432                         KICONV_JA_NPUT(ic);
1433                         break;
1434                 case 0x8000:    /* CS3 */
1435                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1436                         if (euc16 == 0xa271) {
1437                                 /* NUMERO SIGN */
1438                                 KICONV_JA_NPUT(0x87);
1439                                 KICONV_JA_NPUT(0x82);
1440                         } else if (ic < 0x75) { /* check if IBM VDC */
1441                                 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1442                                 if (dest == 0xffff) {
1443                                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1444                                 } else {
1445                                         /* avoid putting NUL ('\0') */
1446                                         if (dest > 0xff) {
1447                                                 KICONV_JA_NPUT(
1448                                                     (dest >> 8) & 0xff);
1449                                                 KICONV_JA_NPUT(dest & 0xff);
1450                                         } else {
1451                                                 KICONV_JA_NPUT(dest & 0xff);
1452                                         }
1453                                 }
1454                         } else {
1455                                 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1456                                 /*
1457                                  * for even number row (Ku), add 0x80 to
1458                                  * look latter half of kiconv_ja_jistosj2[]
1459                                  */
1460                                 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1461                                     + (((ic % 2) == 0) ? 0x80 : 0x00));
1462                                 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1463                         }
1464                         break;
1465                 }
1466 
1467 next:
1468                 /*
1469                  * One character successfully converted so update
1470                  * values outside of this function's stack.
1471                  */
1472                 *inbytesleft = ileft;
1473                 *outbytesleft = oleft;
1474         }
1475 
1476 ret:
1477         return (rv);
1478 }
1479 
1480 static size_t
1481 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1482                 char **outbuf, size_t *outbytesleft, int *errno)
1483 {
1484         if (! kcd || kcd == (void *)-1) {
1485                 *errno = EBADF;
1486                 return ((size_t)-1);
1487         }
1488 
1489         return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1490             outbuf, outbytesleft, errno));
1491 }
1492 
1493 static size_t
1494 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1495                 char **outbuf, size_t *outbytesleft, int *errno)
1496 {
1497         if (! kcd || kcd == (void *)-1) {
1498                 *errno = EBADF;
1499                 return ((size_t)-1);
1500         }
1501 
1502         return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1503             outbuf, outbytesleft, errno));
1504 }
1505 
1506 static size_t
1507 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1508         size_t *outbytesleft, int flag, int *errno)
1509 {
1510         return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1511             outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512 }
1513 
1514 static size_t
1515 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1516         size_t *outbytesleft, int flag, int *errno)
1517 {
1518         return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1519             outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520 }
1521 
1522 static size_t
1523 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1524         size_t *outbytesleft, int flag, int *errno)
1525 {
1526         return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1527             outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528 }
1529 
1530 static size_t
1531 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1532         size_t *outbytesleft, int flag, int *errno)
1533 {
1534         return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1535             outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536 }
1537 
1538 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1539         {
1540                 "eucjp", "utf-8", open_eucjp,
1541                 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542         },
1543         {
1544                 "utf-8", "eucjp", open_eucjp,
1545                 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546         },
1547         {
1548                 "eucjpms", "utf-8", open_eucjpms,
1549                 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550         },
1551         {
1552                 "utf-8", "eucjpms", open_eucjpms,
1553                 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554         },
1555         {
1556                 "sjis", "utf-8", open_sjis,
1557                 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558         },
1559         {
1560                 "utf-8", "sjis", open_sjis,
1561                 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562         },
1563         {
1564                 "cp932", "utf-8", open_cp932,
1565                 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566         },
1567         {
1568                 "utf-8", "cp932", open_cp932,
1569                 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1570         }
1571 };
1572 
1573 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1574 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1575 
1576 #define KICONV_JA_MAX_JA_OPS \
1577         (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1578 #define KICONV_JA_MAX_JA_ALIAS \
1579         (sizeof (kiconv_ja_aliases) / sizeof (char *))
1580 
1581 static kiconv_module_info_t kiconv_ja_info = {
1582         "kiconv_ja",            /* module name */
1583         KICONV_JA_MAX_JA_OPS,   /* number of conversion in kiconv_ja */
1584         kiconv_ja_ops_tbl,      /* kiconv_ja ops table */
1585         KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1586         kiconv_ja_aliases,      /* kiconv_ja aliases */
1587         kiconv_ja_canonicals,   /* kiconv_ja canonicals */
1588         0
1589 };
1590 
1591 static struct modlkiconv modlkiconv_ja = {
1592         &mod_kiconvops,
1593         "kiconv module for Japanese",
1594         &kiconv_ja_info
1595 };
1596 
1597 static struct modlinkage modlinkage = {
1598         MODREV_1,
1599         (void *)&modlkiconv_ja,
1600         NULL
1601 };
1602 
1603 int
1604 _init(void)
1605 {
1606         int err;
1607 
1608         err = mod_install(&modlinkage);
1609         if (err)
1610                 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1611 
1612         return (err);
1613 }
1614 
1615 int
1616 _info(struct modinfo *modinfop)
1617 {
1618         return (mod_info(&modlinkage, modinfop));
1619 }
1620 
1621 int
1622 _fini(void)
1623 {
1624         int err;
1625 
1626         /*
1627          * If this module is being used, then, we cannot remove the module.
1628          * The following checking will catch pretty much all usual cases.
1629          *
1630          * Any remaining will be catached by the kiconv_unregister_module()
1631          * during mod_remove() at below.
1632          */
1633         if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1634                 return (EBUSY);
1635 
1636         err = mod_remove(&modlinkage);
1637         if (err)
1638                 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1639 
1640         return (err);
1641 }