1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/sysmacros.h>
  30 #include <sys/systm.h>
  31 #include <sys/debug.h>
  32 #include <sys/kmem.h>
  33 #include <sys/sunddi.h>
  34 #include <sys/byteorder.h>
  35 #include <sys/errno.h>
  36 #include <sys/euc.h>
  37 #include <sys/modctl.h>
  38 #include <sys/kiconv.h>
  39 
  40 #include <sys/kiconv_ja.h>
  41 #include <sys/kiconv_ja_jis_to_unicode.h>
  42 #include <sys/kiconv_ja_unicode_to_jis.h>
  43 
  44 /*
  45  * The following vector shows remaining bytes in a UTF-8 character.
  46  * Index will be the first byte of the character. This is defined in
  47  * u8_textprep.c.
  48  */
  49 extern const int8_t u8_number_of_bytes[];
  50 
  51 /*
  52  * The following is a vector of bit-masks to get used bits in
  53  * the first byte of a UTF-8 character. Index is remaining bytes at above of
  54  * the character. This is defined in uconv.c.
  55  */
  56 extern const uchar_t u8_masks_tbl[];
  57 
  58 /*
  59  * The following two vectors are to provide valid minimum and
  60  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
  61  * better illegal sequence checking. The index value must be the value of
  62  * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
  63  */
  64 extern const uint8_t u8_valid_min_2nd_byte[];
  65 extern const uint8_t u8_valid_max_2nd_byte[];
  66 
  67 static kiconv_ja_euc16_t
  68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
  69 {
  70         const kiconv_ja_euc16_t *p;
  71 
  72         if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
  73                 return (p[ucs2 & 0xff]);
  74 
  75         return (KICONV_JA_NODEST);
  76 }
  77 
  78 static size_t
  79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
  80 {
  81         uint_t  l;              /* to be copied to *p on successful return */
  82         uchar_t ic;             /* current byte */
  83         uchar_t ic1;            /* 1st byte */
  84         uchar_t *ip = *pip;     /* next byte to read */
  85         size_t  ileft = *pileft; /* number of bytes available */
  86         size_t  rv = 0;         /* return value of this function */
  87         int     remaining_bytes;
  88         int     u8_size;
  89 
  90         KICONV_JA_NGET(ic1);    /* read 1st byte */
  91 
  92         if (ic1 < 0x80) {
  93                 /* successfully converted */
  94                 *p = (uint_t)ic1;
  95                 goto ret;
  96         }
  97 
  98         u8_size = u8_number_of_bytes[ic1];
  99         if (u8_size == U8_ILLEGAL_CHAR) {
 100                 KICONV_JA_RETERROR(EILSEQ)
 101         } else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
 102                 KICONV_JA_RETERROR(ERANGE)
 103         }
 104 
 105         remaining_bytes = u8_size - 1;
 106         if (remaining_bytes != 0) {
 107                 l = ic1 & u8_masks_tbl[remaining_bytes];
 108 
 109                 for (; remaining_bytes > 0; remaining_bytes--) {
 110                         KICONV_JA_NGET(ic);
 111                         if (ic1 != 0U) {
 112                                 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
 113                                     (ic > u8_valid_max_2nd_byte[ic1])) {
 114                                         KICONV_JA_RETERROR(EILSEQ)
 115                                 }
 116                                 ic1 = 0U; /* 2nd byte check done */
 117                         } else {
 118                                 if ((ic < 0x80) || (ic > 0xbf)) {
 119                                         KICONV_JA_RETERROR(EILSEQ)
 120                                 }
 121                         }
 122                         l = (l << 6) | (ic & 0x3f);
 123                 }
 124 
 125                 /* successfully converted */
 126                 *p = l;
 127         } else {
 128                 KICONV_JA_RETERROR(EILSEQ)
 129         }
 130 
 131 ret:
 132         if (rv == 0) {
 133                 /*
 134                  * Update rv, *pip, and *pileft on successfule return.
 135                  */
 136                 rv = *pileft - ileft;
 137                 *pip = ip;
 138                 *pileft = ileft;
 139         }
 140 
 141         return (rv);
 142 }
 143 
 144 static size_t
 145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
 146 {
 147         uint_t  l;              /* to be copied to *p on successful return */
 148         uchar_t ic;             /* current byte */
 149         uchar_t ic1;            /* 1st byte */
 150         uchar_t *ip = *pip;     /* next byte to read */
 151         size_t  ileft = *pileft; /* number of bytes available */
 152         size_t  rv = 0;         /* return value of this function */
 153         int     remaining_bytes;
 154         int     u8_size;
 155 
 156         KICONV_JA_NGET_REP_TO_MB(ic1);  /* read 1st byte */
 157 
 158         if (ic1 < 0x80) {
 159                 /* successfully converted */
 160                 l = (uint_t)ic1;
 161                 goto ret;
 162         }
 163 
 164         u8_size = u8_number_of_bytes[ic1];
 165         if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
 166                 l = KICONV_JA_DEF_SINGLE;
 167                 (*repnum)++;
 168                 goto ret;
 169         }
 170 
 171         remaining_bytes = u8_size - 1;
 172 
 173         if (remaining_bytes != 0) {
 174                 l = ic1 & u8_masks_tbl[remaining_bytes];
 175 
 176                 for (; remaining_bytes > 0; remaining_bytes--) {
 177                         KICONV_JA_NGET_REP_TO_MB(ic);
 178                         if (ic1 != 0U) {
 179                                 if ((ic < u8_valid_min_2nd_byte[ic1]) ||
 180                                     (ic > u8_valid_max_2nd_byte[ic1])) {
 181                                         l = KICONV_JA_DEF_SINGLE;
 182                                         (*repnum)++;
 183                                         ileft -= (remaining_bytes - 1);
 184                                         ip += (remaining_bytes - 1);
 185                                         break;
 186                                 }
 187                                 ic1 = 0U; /* 2nd byte check done */
 188                         } else {
 189                                 if ((ic < 0x80) || (ic > 0xbf)) {
 190                                         l = KICONV_JA_DEF_SINGLE;
 191                                         (*repnum)++;
 192                                         ileft -= (remaining_bytes - 1);
 193                                         ip += (remaining_bytes - 1);
 194                                         break;
 195                                 }
 196                         }
 197                         l = (l << 6) | (ic & 0x3f);
 198                 }
 199         } else {
 200                 l = KICONV_JA_DEF_SINGLE;
 201                 (*repnum)++;
 202         }
 203 
 204 ret:
 205         /* successfully converted */
 206         *p = l;
 207         rv = *pileft - ileft;
 208 
 209         *pip = ip;
 210         *pileft = ileft;
 211 
 212         return (rv);
 213 }
 214 
 215 static size_t                           /* return #bytes read, or -1 */
 216 read_unicode(
 217         uint_t  *p,             /* point variable to store UTF-32 */
 218         uchar_t **pip,          /* point pointer to input buf */
 219         size_t  *pileft,        /* point #bytes left in input buf */
 220         int     *errno,         /* point variable to errno */
 221         int     flag,           /* kiconvstr flag */
 222         size_t  *rv)            /* point return valuse */
 223 {
 224         if (flag & KICONV_REPLACE_INVALID)
 225                 return (utf8_ucs_replace(p, pip, pileft, rv));
 226         else
 227                 return (utf8_ucs(p, pip, pileft, errno));
 228 }
 229 
 230 static size_t
 231 write_unicode(
 232         uint_t  u32,            /* UTF-32 to write */
 233         char    **pop,          /* point pointer to output buf */
 234         size_t  *poleft,        /* point #bytes left in output buf */
 235         int     *errno)         /* point variable to errno */
 236 {
 237         char    *op = *pop;
 238         size_t  oleft = *poleft;
 239         size_t  rv = 0;                 /* return value */
 240 
 241         if (u32 <= 0x7f) {
 242                 KICONV_JA_NPUT((uchar_t)(u32));
 243                 rv = 1;
 244         } else if (u32 <= 0x7ff) {
 245                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
 246                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 247                 rv = 2;
 248         } else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
 249                 KICONV_JA_RETERROR(EILSEQ)
 250         } else if (u32 <= 0xffff) {
 251                 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
 252                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
 253                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 254                 rv = 3;
 255         } else if (u32 <= 0x10ffff) {
 256                 KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
 257                 KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
 258                 KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
 259                 KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
 260                 rv = 4;
 261         } else {
 262                 KICONV_JA_RETERROR(EILSEQ)
 263         }
 264 
 265 ret:
 266         if (rv != (size_t)-1) {
 267                 /* update *pop and *poleft only on successful return */
 268                 *pop = op;
 269                 *poleft = oleft;
 270         }
 271 
 272         return (rv);
 273 }
 274 
 275 static void *
 276 _kiconv_ja_open_unicode(uint8_t id)
 277 {
 278         kiconv_state_t  kcd;
 279 
 280         kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
 281             KM_SLEEP);
 282         kcd->id = id;
 283         kcd->bom_processed = 0;
 284         return ((void *)kcd);
 285 }
 286 
 287 static void *
 288 open_eucjp(void)
 289 {
 290         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
 291 }
 292 
 293 static void *
 294 open_eucjpms(void)
 295 {
 296         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
 297 }
 298 
 299 static void *
 300 open_sjis(void)
 301 {
 302         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
 303 }
 304 
 305 static void *
 306 open_cp932(void)
 307 {
 308         return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
 309 }
 310 
 311 int
 312 close_ja(void *kcd)
 313 {
 314         if (! kcd || kcd == (void *)-1)
 315                 return (EBADF);
 316 
 317         kmem_free(kcd, sizeof (kiconv_state_data_t));
 318 
 319         return (0);
 320 }
 321 
 322 static size_t
 323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 324                 char **outbuf, size_t *outbytesleft, int *errno)
 325 {
 326         uint_t          u32;            /* UTF-32 */
 327         uint_t          index;          /* index for table lookup */
 328         uchar_t         ic1, ic2, ic3;  /* 1st, 2nd, and 3rd bytes of a char */
 329         size_t          rv = 0;         /* return value of this function */
 330 
 331         uchar_t *ip;
 332         size_t          ileft;
 333         char            *op;
 334         size_t          oleft;
 335         size_t          id = ((kiconv_state_t)kcd)->id;
 336 
 337         if ((inbuf == NULL) || (*inbuf == NULL)) {
 338                 return (0);
 339         }
 340 
 341         ip = (uchar_t *)*inbuf;
 342         ileft = *inbytesleft;
 343         op = *outbuf;
 344         oleft = *outbytesleft;
 345 
 346         while (ileft != 0) {
 347                 KICONV_JA_NGET(ic1);            /* get 1st byte */
 348 
 349                 if (KICONV_JA_ISASC(ic1)) {     /* ASCII; 1 byte */
 350                         u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 351                         KICONV_JA_PUTU(u32);
 352                 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
 353                         KICONV_JA_NGET(ic2);
 354                         if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
 355                                 ic1 &= KICONV_JA_CMASK;
 356                                 ic2 &= KICONV_JA_CMASK;
 357                                 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
 358                                 if (u32 == KICONV_JA_NODEST) {
 359                                         index = (ic1 - 0x21) * 94 + ic2 - 0x21;
 360                                         u32 = kiconv_ja_jisx0208_to_ucs2[index];
 361                                 }
 362                                 if (u32 == KICONV_JA_REPLACE)
 363                                         rv++;
 364                                 KICONV_JA_PUTU(u32);
 365                         } else { /* 2nd byte check failed */
 366                                 KICONV_JA_RETERROR(EILSEQ)
 367                         }
 368                 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
 369                         KICONV_JA_NGET(ic2);
 370                         if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
 371                                 index = (ic2 - 0xa1);
 372                                 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
 373                                 KICONV_JA_PUTU(u32);
 374                         } else { /* 2nd byte check failed */
 375                                 KICONV_JA_RETERROR(EILSEQ)
 376                         }
 377                 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
 378                         KICONV_JA_NGET(ic2);
 379                         if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
 380                                 KICONV_JA_NGET(ic3);
 381                                 if (KICONV_JA_ISCS3(ic3)) {
 382                                         /* 3rd byte check passed */
 383                                         ic2 &= KICONV_JA_CMASK;
 384                                         ic3 &= KICONV_JA_CMASK;
 385                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
 386                                             ic2, ic3);
 387                                         if (u32 == KICONV_JA_NODEST) {
 388                                                 index = ((ic2 - 0x21) * 94 +
 389                                                     (ic3 - 0x21));
 390                                                 u32 = kiconv_ja_jisx0212_to_ucs2
 391                                                     [index];
 392                                         }
 393                                         if (u32 == KICONV_JA_REPLACE)
 394                                                 rv++;
 395                                         KICONV_JA_PUTU(u32);
 396                                 } else { /* 3rd byte check failed */
 397                                         KICONV_JA_RETERROR(EILSEQ)
 398                                 }
 399                         } else { /* 2nd byte check failed */
 400                                 KICONV_JA_RETERROR(EILSEQ)
 401                         }
 402                 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
 403                         /* C1 control; 1 byte */
 404                         u32 = ic1;
 405                         KICONV_JA_PUTU(u32);
 406                 } else { /* 1st byte check failed */
 407                         KICONV_JA_RETERROR(EILSEQ)
 408                 }
 409 
 410                 /*
 411                  * One character successfully converted so update
 412                  * values outside of this function's stack.
 413                  */
 414                 *inbuf = (char *)ip;
 415                 *inbytesleft = ileft;
 416                 *outbuf = op;
 417                 *outbytesleft = oleft;
 418         }
 419 
 420 ret:
 421         return (rv);
 422 }
 423 
 424 static size_t
 425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 426                 char **outbuf, size_t *outbytesleft, int *errno)
 427 {
 428         uchar_t         ic;
 429         size_t          rv = 0;
 430         uint_t          ucs4;
 431         ushort_t        euc16;
 432 
 433         uchar_t *ip;
 434         size_t          ileft;
 435         char            *op;
 436         size_t          oleft;
 437         size_t          read_len;
 438 
 439         size_t          id = ((kiconv_state_t)kcd)->id;
 440 
 441         if ((inbuf == NULL) || (*inbuf == NULL)) {
 442                 return (0);
 443         }
 444 
 445         ip = (uchar_t *)*inbuf;
 446         ileft = *inbytesleft;
 447         op = *outbuf;
 448         oleft = *outbytesleft;
 449 
 450         KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
 451 
 452         while (ileft != 0) {
 453                 KICONV_JA_GETU(&ucs4, 0);
 454 
 455                 if (ucs4 > 0xffff) {
 456                         /* non-BMP */
 457                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 458                         rv++;
 459                         goto next;
 460                 }
 461 
 462                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
 463                 if (euc16 == KICONV_JA_NODEST) {
 464                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
 465                 }
 466                 if (euc16 == KICONV_JA_NODEST) {
 467                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 468                         rv++;
 469                         goto next;
 470                 }
 471 
 472                 switch (euc16 & 0x8080) {
 473                 case 0x0000:    /* CS0 */
 474                         ic = (uchar_t)euc16;
 475                         KICONV_JA_NPUT(ic);
 476                         break;
 477                 case 0x8080:    /* CS1 */
 478                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 479                         KICONV_JA_NPUT(ic);
 480                         ic = (uchar_t)(euc16 & 0xff);
 481                         KICONV_JA_NPUT(ic);
 482                         break;
 483                 case 0x0080:    /* CS2 */
 484                         KICONV_JA_NPUT(SS2);
 485                         ic = (uchar_t)euc16;
 486                         KICONV_JA_NPUT(ic);
 487                         break;
 488                 case 0x8000:    /* CS3 */
 489                         KICONV_JA_NPUT(SS3);
 490                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 491                         KICONV_JA_NPUT(ic);
 492                         ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
 493                         KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
 494                         break;
 495                 }
 496 next:
 497                 /*
 498                  * One character successfully converted so update
 499                  * values outside of this function's stack.
 500                  */
 501                 *inbuf = (char *)ip;
 502                 *inbytesleft = ileft;
 503                 *outbuf = op;
 504                 *outbytesleft = oleft;
 505         }
 506 
 507 ret:
 508         return (rv);
 509 }
 510 
 511 static size_t
 512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 513         size_t *outbytesleft, int flag, int *errno, uint8_t id)
 514 {
 515         uint_t          u32;            /* UTF-32 */
 516         uint_t          index;          /* index for table lookup */
 517         uchar_t         ic1, ic2, ic3;  /* 1st, 2nd, and 3rd bytes of a char */
 518         size_t          rv = 0;         /* return value of this function */
 519 
 520         uchar_t *ip;
 521         size_t          ileft;
 522         char            *op;
 523         size_t          oleft;
 524 
 525         boolean_t do_not_ignore_null;
 526 
 527         if ((inbuf == NULL) || (*inbuf == NULL)) {
 528                 return (0);
 529         }
 530 
 531         ip = (uchar_t *)inbuf;
 532         ileft = *inbytesleft;
 533         op = outbuf;
 534         oleft = *outbytesleft;
 535 
 536         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 537 
 538         while (ileft != 0) {
 539                 KICONV_JA_NGET(ic1);            /* get 1st byte */
 540 
 541                 if (KICONV_JA_ISASC(ic1)) {     /* ASCII; 1 byte */
 542                         if (ic1 == '\0' && do_not_ignore_null) {
 543                                 return (0);
 544                         }
 545                         u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 546                         KICONV_JA_PUTU(u32);
 547                 } else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
 548                         if (flag & KICONV_REPLACE_INVALID) {
 549                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 550                         } else {
 551                                 KICONV_JA_NGET(ic2);
 552                         }
 553                         if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
 554                                 ic1 &= KICONV_JA_CMASK;
 555                                 ic2 &= KICONV_JA_CMASK;
 556                                 KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
 557                                 if (u32 == KICONV_JA_NODEST) {
 558                                         index = (ic1 - 0x21) * 94 + ic2 - 0x21;
 559                                         u32 = kiconv_ja_jisx0208_to_ucs2[index];
 560                                 }
 561                                 if (u32 == KICONV_JA_REPLACE)
 562                                         rv++;
 563                                 KICONV_JA_PUTU(u32);
 564                         } else { /* 2nd byte check failed */
 565                                 if (flag & KICONV_REPLACE_INVALID) {
 566                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 567                                         rv++;
 568                                 } else {
 569                                         KICONV_JA_RETERROR(EILSEQ)
 570                                 }
 571                         }
 572                 } else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
 573                         if (flag & KICONV_REPLACE_INVALID) {
 574                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 575                         } else {
 576                                 KICONV_JA_NGET(ic2);
 577                         }
 578                         if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
 579                                 index = (ic2 - 0xa1);
 580                                 u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
 581                                 KICONV_JA_PUTU(u32);
 582                         } else { /* 2nd byte check failed */
 583                                 if (flag & KICONV_REPLACE_INVALID) {
 584                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 585                                         rv++;
 586                                 } else {
 587                                         KICONV_JA_RETERROR(EILSEQ)
 588                                 }
 589                         }
 590                 } else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
 591                         if (flag & KICONV_REPLACE_INVALID) {
 592                                 KICONV_JA_NGET_REP_FR_MB(ic2);
 593                         } else {
 594                                 KICONV_JA_NGET(ic2);
 595                         }
 596                         if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
 597                                 if (flag & KICONV_REPLACE_INVALID) {
 598                                         KICONV_JA_NGET_REP_FR_MB(ic3);
 599                                 } else {
 600                                         KICONV_JA_NGET(ic3);
 601                                 }
 602                                 if (KICONV_JA_ISCS3(ic3)) {
 603                                         /* 3rd byte check passed */
 604                                         ic2 &= KICONV_JA_CMASK;
 605                                         ic3 &= KICONV_JA_CMASK;
 606                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
 607                                             ic2, ic3);
 608                                         if (u32 == KICONV_JA_NODEST) {
 609                                                 index = ((ic2 - 0x21) * 94 +
 610                                                     (ic3 - 0x21));
 611                                                 u32 = kiconv_ja_jisx0212_to_ucs2
 612                                                     [index];
 613                                         }
 614                                         if (u32 == KICONV_JA_REPLACE)
 615                                                 rv++;
 616                                         KICONV_JA_PUTU(u32);
 617                                 } else { /* 3rd byte check failed */
 618                                         if (flag & KICONV_REPLACE_INVALID) {
 619                                                 KICONV_JA_PUTU(
 620                                                     KICONV_JA_REPLACE);
 621                                                 rv++;
 622                                         } else {
 623                                                 KICONV_JA_RETERROR(EILSEQ)
 624                                         }
 625                                 }
 626                         } else { /* 2nd byte check failed */
 627                                 if (flag & KICONV_REPLACE_INVALID) {
 628                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
 629                                         rv++;
 630                                 } else {
 631                                         KICONV_JA_RETERROR(EILSEQ)
 632                                 }
 633                         }
 634                 } else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
 635                         /* C1 control; 1 byte */
 636                         u32 = ic1;
 637                         KICONV_JA_PUTU(u32);
 638                 } else { /* 1st byte check failed */
 639                         if (flag & KICONV_REPLACE_INVALID) {
 640                                 KICONV_JA_PUTU(KICONV_JA_REPLACE);
 641                                 rv++;
 642                         } else {
 643                                 KICONV_JA_RETERROR(EILSEQ)
 644                         }
 645                 }
 646 
 647 next:
 648                 /*
 649                  * One character successfully converted so update
 650                  * values outside of this function's stack.
 651                  */
 652                 *inbytesleft = ileft;
 653                 *outbytesleft = oleft;
 654         }
 655 
 656 ret:
 657         return (rv);
 658 }
 659 
 660 static size_t
 661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 662         size_t *outbytesleft, int flag, int *errno, uint8_t id)
 663 {
 664         uchar_t         ic;
 665         size_t          rv = 0;
 666         uint_t          ucs4;
 667         ushort_t        euc16;
 668 
 669         uchar_t *ip;
 670         size_t          ileft;
 671         char            *op;
 672         size_t          oleft;
 673         size_t          read_len;
 674 
 675         boolean_t do_not_ignore_null;
 676 
 677         if ((inbuf == NULL) || (*inbuf == NULL)) {
 678                 return (0);
 679         }
 680 
 681         ip = (uchar_t *)inbuf;
 682         ileft = *inbytesleft;
 683         op = outbuf;
 684         oleft = *outbytesleft;
 685 
 686         KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
 687 
 688         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
 689 
 690         while (ileft != 0) {
 691                 KICONV_JA_GETU(&ucs4, flag);
 692 
 693                 if (ucs4 == 0x0 && do_not_ignore_null) {
 694                         return (0);
 695                 }
 696 
 697                 if (ucs4 > 0xffff) {
 698                         /* non-BMP */
 699                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 700                         rv++;
 701                         goto next;
 702                 }
 703 
 704                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
 705                 if (euc16 == KICONV_JA_NODEST) {
 706                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
 707                 }
 708                 if (euc16 == KICONV_JA_NODEST) {
 709                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
 710                         rv++;
 711                         goto next;
 712                 }
 713 
 714                 switch (euc16 & 0x8080) {
 715                 case 0x0000:    /* CS0 */
 716                         ic = (uchar_t)euc16;
 717                         KICONV_JA_NPUT(ic);
 718                         break;
 719                 case 0x8080:    /* CS1 */
 720                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 721                         KICONV_JA_NPUT(ic);
 722                         ic = (uchar_t)(euc16 & 0xff);
 723                         KICONV_JA_NPUT(ic);
 724                         break;
 725                 case 0x0080:    /* CS2 */
 726                         KICONV_JA_NPUT(SS2);
 727                         ic = (uchar_t)euc16;
 728                         KICONV_JA_NPUT(ic);
 729                         break;
 730                 case 0x8000:    /* CS3 */
 731                         KICONV_JA_NPUT(SS3);
 732                         ic = (uchar_t)((euc16 >> 8) & 0xff);
 733                         KICONV_JA_NPUT(ic);
 734                         ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
 735                         KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
 736                         break;
 737                 }
 738 next:
 739                 /*
 740                  * One character successfully converted so update
 741                  * values outside of this function's stack.
 742                  */
 743                 *inbytesleft = ileft;
 744                 *outbytesleft = oleft;
 745         }
 746 
 747 ret:
 748         return (rv);
 749 }
 750 
 751 static size_t
 752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 753                 char **outbuf, size_t *outbytesleft, int *errno)
 754 {
 755         if (! kcd || kcd == (void *)-1) {
 756                 *errno = EBADF;
 757                 return ((size_t)-1);
 758         }
 759 
 760         return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
 761             outbuf, outbytesleft, errno));
 762 }
 763 
 764 static size_t
 765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
 766                 char **outbuf, size_t *outbytesleft, int *errno)
 767 {
 768         if (! kcd || kcd == (void *)-1) {
 769                 *errno = EBADF;
 770                 return ((size_t)-1);
 771         }
 772 
 773         return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
 774             outbuf, outbytesleft, errno));
 775 }
 776 
 777 static size_t
 778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 779         size_t *outbytesleft, int flag, int *errno)
 780 {
 781         return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
 782             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
 783 }
 784 
 785 static size_t
 786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
 787         size_t *outbytesleft, int flag, int *errno)
 788 {
 789         return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
 790             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
 791 }
 792 
 793 static size_t
 794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
 795         size_t *outbytesleft, int flag, int *errno)
 796 {
 797         return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
 798             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
 799 }
 800 
 801 static size_t
 802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
 803         size_t *outbytesleft, int flag, int *errno)
 804 {
 805         return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
 806             outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
 807 }
 808 
 809 static size_t
 810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
 811                 char **outbuf, size_t *outbytesleft, int *errno)
 812 {
 813         uint_t  uni;                    /* UTF-32 */
 814         uint_t  index;                  /* index for table lookup */
 815         uchar_t ic1, ic2;               /* 1st and 2nd bytes of a char */
 816         size_t  rv = 0;                 /* return value of this function */
 817 
 818         uchar_t *ip;
 819         size_t          ileft;
 820         char            *op;
 821         size_t          oleft;
 822         size_t          id = ((kiconv_state_t)kcd)->id;
 823 
 824         if ((inbuf == NULL) || (*inbuf == NULL)) {
 825                 return (0);
 826         }
 827 
 828         ip = (uchar_t *)*inbuf;
 829         ileft = *inbytesleft;
 830         op = *outbuf;
 831         oleft = *outbytesleft;
 832 
 833         while (ileft != 0) {
 834                 KICONV_JA_NGET(ic1);                    /* get 1st byte */
 835 
 836                 if (KICONV_JA_ISASC((int)ic1)) {        /* ASCII; 1 byte */
 837                         uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
 838                         KICONV_JA_PUTU(uni);
 839                 } else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
 840                         uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
 841                         KICONV_JA_PUTU(uni);
 842                 } else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
 843                         KICONV_JA_NGET(ic2);
 844                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 845                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
 846                                 if (ic2 >= 0x9f) {
 847                                         ic1++;
 848                                 }
 849                                 ic2 = kiconv_ja_sjtojis2[ic2];
 850                                 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
 851                                 if (uni == KICONV_JA_NODEST) {
 852                                         index = ((ic1 - 0x21) * 94)
 853                                             + (ic2 - 0x21);
 854                                         uni = kiconv_ja_jisx0208_to_ucs2[index];
 855                                 }
 856                                 if (uni == KICONV_JA_REPLACE)
 857                                         rv++;
 858                                 KICONV_JA_PUTU(uni);
 859                         } else { /* 2nd byte check failed */
 860                                 KICONV_JA_RETERROR(EILSEQ)
 861                                 /* NOTREACHED */
 862                         }
 863                 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
 864                         KICONV_JA_NGET(ic2);
 865                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 866                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
 867                                 if (ic2 >= 0x9f) {
 868                                         ic1++;
 869                                 }
 870                                 index = ((ic1 - 0x21) * 94)
 871                                     + (kiconv_ja_sjtojis2[ic2] - 0x21);
 872                                 uni = kiconv_ja_jisx0212_to_ucs2[index];
 873                                 if (uni == KICONV_JA_REPLACE)
 874                                         rv++;
 875                                 KICONV_JA_PUTU(uni);
 876                         } else { /* 2nd byte check failed */
 877                                 KICONV_JA_RETERROR(EILSEQ)
 878                         }
 879                 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
 880                     KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
 881                         /*
 882                          * We need a special treatment for each codes.
 883                          * By adding some offset number for them, we
 884                          * can process them as the same way of that of
 885                          * extended IBM chars.
 886                          */
 887                         KICONV_JA_NGET(ic2);
 888                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 889                                 ushort_t dest, upper, lower;
 890                                 dest = (ic1 << 8) + ic2;
 891                                 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
 892                                         KICONV_JA_REMAP_NEC(dest);
 893                                         if (dest == 0xffff) {
 894                                                 KICONV_JA_RETERROR(EILSEQ)
 895                                         }
 896                                 }
 897                                 /*
 898                                  * XXX: 0xfa54 and 0xfa5b must be mapped
 899                                  *      to JIS0208 area. Therefore we
 900                                  *      have to do special treatment.
 901                                  */
 902                                 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
 903                                         if (dest == 0xfa54) {
 904                                                 upper = 0x22;
 905                                                 lower = 0x4c;
 906                                         } else {
 907                                                 upper = 0x22;
 908                                                 lower = 0x68;
 909                                         }
 910                                         KICONV_JA_CNV_JISMS_TO_U2(id, uni,
 911                                             upper, lower);
 912                                         if (uni == KICONV_JA_NODEST) {
 913                                                 index = (uint_t)((upper - 0x21)
 914                                                     * 94 + (lower - 0x21));
 915                                                 uni = kiconv_ja_jisx0208_to_ucs2
 916                                                     [index];
 917                                         }
 918                                         if (uni == KICONV_JA_REPLACE)
 919                                                 rv++;
 920                                         KICONV_JA_PUTU(uni);
 921                                 } else {
 922                                         dest = dest - 0xfa40 -
 923                                             (((dest>>8) - 0xfa) * 0x40);
 924                                         dest = kiconv_ja_sjtoibmext[dest];
 925                                         if (dest == 0xffff) {
 926                                                 KICONV_JA_RETERROR(EILSEQ)
 927                                         }
 928                                         upper = (dest >> 8) & KICONV_JA_CMASK;
 929                                         lower = dest & KICONV_JA_CMASK;
 930                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
 931                                             upper, lower);
 932                                         if (uni == KICONV_JA_NODEST) {
 933                                                 index = (uint_t)((upper - 0x21)
 934                                                     * 94 + (lower - 0x21));
 935                                                 uni = kiconv_ja_jisx0212_to_ucs2
 936                                                     [index];
 937                                         }
 938                                         if (uni == KICONV_JA_REPLACE)
 939                                                 rv++;
 940                                         KICONV_JA_PUTU(uni);
 941                                 }
 942                         } else { /* 2nd byte check failed */
 943                                 KICONV_JA_RETERROR(EILSEQ)
 944                         }
 945                 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
 946                 /*
 947                  * Based on the draft convention of OSF-JVC CDEWG,
 948                  * characters in this area will be mapped to
 949                  * "CHIKAN-MOJI." (convertible character)
 950                  * We use U+FFFD in this case.
 951                  */
 952                         KICONV_JA_NGET(ic2);
 953                         if (KICONV_JA_ISSJKANJI2(ic2)) {
 954                                 uni = 0xfffd;
 955                                 KICONV_JA_PUTU(uni);
 956                         } else { /* 2nd byte check failed */
 957                                 KICONV_JA_RETERROR(EILSEQ)
 958                         }
 959                 } else { /* 1st byte check failed */
 960                         KICONV_JA_RETERROR(EILSEQ)
 961                 }
 962 
 963                 /*
 964                  * One character successfully converted so update
 965                  * values outside of this function's stack.
 966                  */
 967                 *inbuf = (char *)ip;
 968                 *inbytesleft = ileft;
 969                 *outbuf = op;
 970                 *outbytesleft = oleft;
 971         }
 972 
 973 ret:
 974         return (rv);
 975 }
 976 
 977 /*
 978  * _kiconv_ja_lookuptbl()
 979  * Return the index number if its index-ed number
 980  * is the same as dest value.
 981  */
 982 static ushort_t
 983 _kiconv_ja_lookuptbl(ushort_t dest)
 984 {
 985         ushort_t tmp;
 986         int i;
 987         int sz = (sizeof (kiconv_ja_sjtoibmext) /
 988             sizeof (kiconv_ja_sjtoibmext[0]));
 989 
 990         for (i = 0; i < sz; i++) {
 991                 tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
 992                 if (tmp == dest)
 993                         return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
 994         }
 995         return (0x3f);
 996 }
 997 
 998 static size_t
 999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1000                 char **outbuf, size_t *outbytesleft, int *errno)
1001 {
1002         uchar_t ic;
1003         size_t          rv = 0;
1004         uint_t          ucs4;
1005         ushort_t        euc16;
1006         ushort_t        dest;
1007 
1008         uchar_t *ip;
1009         size_t          ileft;
1010         char            *op;
1011         size_t          oleft;
1012         size_t          read_len;
1013 
1014         size_t          id = ((kiconv_state_t)kcd)->id;
1015 
1016         if ((inbuf == NULL) || (*inbuf == NULL)) {
1017                 return (0);
1018         }
1019 
1020         ip = (uchar_t *)*inbuf;
1021         ileft = *inbytesleft;
1022         op = *outbuf;
1023         oleft = *outbytesleft;
1024 
1025         KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1026 
1027         while (ileft != 0) {
1028                 KICONV_JA_GETU(&ucs4, 0);
1029 
1030                 if (ucs4 > 0xffff) {
1031                         /* non-BMP */
1032                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1033                         rv++;
1034                         goto next;
1035                 }
1036 
1037                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1038                 if (euc16 == KICONV_JA_NODEST) {
1039                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1040                 }
1041                 if (euc16 == KICONV_JA_NODEST) {
1042                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1043                         rv++;
1044                         goto next;
1045                 }
1046 
1047                 switch (euc16 & 0x8080) {
1048                 case 0x0000:    /* CS0 */
1049                         if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1050                                 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1051                                 rv++;
1052                         } else {
1053                                 ic = (uchar_t)euc16;
1054                                 KICONV_JA_NPUT(ic);
1055                         }
1056                         break;
1057                 case 0x8080:    /* CS1 */
1058                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1059                         KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1060                         /*
1061                          * for even number row (Ku), add 0x80 to
1062                          * look latter half of kiconv_ja_jistosj2[] array
1063                          */
1064                         ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1065                             + (((ic % 2) == 0) ? 0x80 : 0x00));
1066                         KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1067                         break;
1068                 case 0x0080:    /* CS2 */
1069                         ic = (uchar_t)euc16;
1070                         KICONV_JA_NPUT(ic);
1071                         break;
1072                 case 0x8000:    /* CS3 */
1073                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1074                         if (euc16 == 0xa271) {
1075                                 /* NUMERO SIGN */
1076                                 KICONV_JA_NPUT(0x87);
1077                                 KICONV_JA_NPUT(0x82);
1078                         } else if (ic < 0x75) { /* check if IBM VDC */
1079                                 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1080                                 if (dest == 0xffff) {
1081                                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1082                                 } else {
1083                                         /* avoid putting NUL ('\0') */
1084                                         if (dest > 0xff) {
1085                                                 KICONV_JA_NPUT(
1086                                                     (dest >> 8) & 0xff);
1087                                                 KICONV_JA_NPUT(dest & 0xff);
1088                                         } else {
1089                                                 KICONV_JA_NPUT(dest & 0xff);
1090                                         }
1091                                 }
1092                         } else {
1093                                 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1094                                 /*
1095                                  * for even number row (Ku), add 0x80 to
1096                                  * look latter half of kiconv_ja_jistosj2[]
1097                                  */
1098                                 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1099                                     + (((ic % 2) == 0) ? 0x80 : 0x00));
1100                                 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1101                         }
1102                         break;
1103                 }
1104 
1105 next:
1106                 /*
1107                  * One character successfully converted so update
1108                  * values outside of this function's stack.
1109                  */
1110                 *inbuf = (char *)ip;
1111                 *inbytesleft = ileft;
1112                 *outbuf = op;
1113                 *outbytesleft = oleft;
1114         }
1115 
1116 ret:
1117         return (rv);
1118 }
1119 
1120 static size_t
1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1122         size_t *outbytesleft, int flag, int *errno, uint8_t id)
1123 {
1124         uint_t          uni;            /* UTF-32 */
1125         uint_t          index;          /* index for table lookup */
1126         uchar_t         ic1, ic2;       /* 1st and 2nd bytes of a char */
1127         size_t          rv = 0;         /* return value of this function */
1128 
1129         uchar_t *ip;
1130         size_t          ileft;
1131         char            *op;
1132         size_t          oleft;
1133 
1134         boolean_t do_not_ignore_null;
1135 
1136         if ((inbuf == NULL) || (*inbuf == NULL)) {
1137                 return (0);
1138         }
1139 
1140         ip = (uchar_t *)inbuf;
1141         ileft = *inbytesleft;
1142         op = outbuf;
1143         oleft = *outbytesleft;
1144 
1145         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1146 
1147         while (ileft != 0) {
1148                 KICONV_JA_NGET(ic1);                    /* get 1st byte */
1149 
1150                 if (KICONV_JA_ISASC((int)ic1)) {        /* ASCII; 1 byte */
1151                         if (ic1 == '\0' && do_not_ignore_null) {
1152                                 return (0);
1153                         }
1154                         uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1155                         KICONV_JA_PUTU(uni);
1156                 } else if (KICONV_JA_ISSJKANA(ic1)) {
1157                         /* JIS X 0201 Kana; 1 byte */
1158                         uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1159                         KICONV_JA_PUTU(uni);
1160                 } else if (KICONV_JA_ISSJKANJI1(ic1)) {
1161                         /* JIS X 0208 or UDC; 2 bytes */
1162                         if (flag & KICONV_REPLACE_INVALID) {
1163                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1164                         } else {
1165                                 KICONV_JA_NGET(ic2);
1166                         }
1167                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1168                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1169                                 if (ic2 >= 0x9f) {
1170                                         ic1++;
1171                                 }
1172                                 ic2 = kiconv_ja_sjtojis2[ic2];
1173                                 KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1174                                 if (uni == KICONV_JA_NODEST) {
1175                                         index = ((ic1 - 0x21) * 94)
1176                                             + (ic2 - 0x21);
1177                                         uni = kiconv_ja_jisx0208_to_ucs2[index];
1178                                 }
1179                                 if (uni == KICONV_JA_REPLACE)
1180                                         rv++;
1181                                 KICONV_JA_PUTU(uni);
1182                         } else { /* 2nd byte check failed */
1183                                 if (flag & KICONV_REPLACE_INVALID) {
1184                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1185                                         rv++;
1186                                 } else {
1187                                         KICONV_JA_RETERROR(EILSEQ)
1188                                 }
1189                                 /* NOTREACHED */
1190                         }
1191                 } else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1192                         if (flag & KICONV_REPLACE_INVALID) {
1193                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1194                         } else {
1195                                 KICONV_JA_NGET(ic2);
1196                         }
1197                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1198                                 ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1199                                 if (ic2 >= 0x9f) {
1200                                         ic1++;
1201                                 }
1202                                 index = ((ic1 - 0x21) * 94)
1203                                     + (kiconv_ja_sjtojis2[ic2] - 0x21);
1204                                 uni = kiconv_ja_jisx0212_to_ucs2[index];
1205                                 if (uni == KICONV_JA_REPLACE)
1206                                         rv++;
1207                                 KICONV_JA_PUTU(uni);
1208                         } else { /* 2nd byte check failed */
1209                                 if (flag & KICONV_REPLACE_INVALID) {
1210                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1211                                         rv++;
1212                                 } else {
1213                                         KICONV_JA_RETERROR(EILSEQ)
1214                                 }
1215                         }
1216                 } else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1217                     KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1218                         /*
1219                          * We need a special treatment for each codes.
1220                          * By adding some offset number for them, we
1221                          * can process them as the same way of that of
1222                          * extended IBM chars.
1223                          */
1224                         if (flag & KICONV_REPLACE_INVALID) {
1225                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1226                         } else {
1227                                 KICONV_JA_NGET(ic2);
1228                         }
1229                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1230                                 ushort_t dest, upper, lower;
1231                                 dest = (ic1 << 8) + ic2;
1232                                 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1233                                         KICONV_JA_REMAP_NEC(dest);
1234                                         if (dest == 0xffff) {
1235                                                 if (flag &
1236                                                     KICONV_REPLACE_INVALID) {
1237                                                         KICONV_JA_PUTU(
1238                                                             KICONV_JA_REPLACE);
1239                                                         rv++;
1240                                                 } else {
1241                                                         KICONV_JA_RETERROR(
1242                                                             EILSEQ)
1243                                                 }
1244                                         }
1245                                 }
1246                                 /*
1247                                  * XXX: 0xfa54 and 0xfa5b must be mapped
1248                                  *      to JIS0208 area. Therefore we
1249                                  *      have to do special treatment.
1250                                  */
1251                                 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1252                                         if (dest == 0xfa54) {
1253                                                 upper = 0x22;
1254                                                 lower = 0x4c;
1255                                         } else {
1256                                                 upper = 0x22;
1257                                                 lower = 0x68;
1258                                         }
1259                                         KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1260                                             upper, lower);
1261                                         if (uni == KICONV_JA_NODEST) {
1262                                                 index = (uint_t)((upper - 0x21)
1263                                                     * 94 + (lower - 0x21));
1264                                                 uni = kiconv_ja_jisx0208_to_ucs2
1265                                                     [index];
1266                                         }
1267                                         if (uni == KICONV_JA_REPLACE)
1268                                                 rv++;
1269                                         KICONV_JA_PUTU(uni);
1270                                 } else {
1271                                         dest = dest - 0xfa40 -
1272                                             (((dest>>8) - 0xfa) * 0x40);
1273                                         dest = kiconv_ja_sjtoibmext[dest];
1274                                         if (dest == 0xffff) {
1275                                                 if (flag &
1276                                                     KICONV_REPLACE_INVALID) {
1277                                                         KICONV_JA_PUTU(
1278                                                             KICONV_JA_REPLACE);
1279                                                         rv++;
1280                                                 } else {
1281                                                         KICONV_JA_RETERROR(
1282                                                             EILSEQ)
1283                                                 }
1284                                         }
1285                                         upper = (dest >> 8) & KICONV_JA_CMASK;
1286                                         lower = dest & KICONV_JA_CMASK;
1287                                         KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1288                                             upper, lower);
1289                                         if (uni == KICONV_JA_NODEST) {
1290                                                 index = (uint_t)((upper - 0x21)
1291                                                     * 94 + (lower - 0x21));
1292                                                 uni = kiconv_ja_jisx0212_to_ucs2
1293                                                     [index];
1294                                         }
1295                                         if (uni == KICONV_JA_REPLACE)
1296                                                 rv++;
1297                                         KICONV_JA_PUTU(uni);
1298                                 }
1299                         } else { /* 2nd byte check failed */
1300                                 if (flag & KICONV_REPLACE_INVALID) {
1301                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1302                                         rv++;
1303                                 } else {
1304                                         KICONV_JA_RETERROR(EILSEQ)
1305                                 }
1306                         }
1307                 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1308                 /*
1309                  * Based on the draft convention of OSF-JVC CDEWG,
1310                  * characters in this area will be mapped to
1311                  * "CHIKAN-MOJI." (convertible character)
1312                  * We use U+FFFD in this case.
1313                  */
1314                         if (flag & KICONV_REPLACE_INVALID) {
1315                                 KICONV_JA_NGET_REP_FR_MB(ic2);
1316                         } else {
1317                                 KICONV_JA_NGET(ic2);
1318                         }
1319                         if (KICONV_JA_ISSJKANJI2(ic2)) {
1320                                 uni = 0xfffd;
1321                                 KICONV_JA_PUTU(uni);
1322                         } else { /* 2nd byte check failed */
1323                                 if (flag & KICONV_REPLACE_INVALID) {
1324                                         KICONV_JA_PUTU(KICONV_JA_REPLACE);
1325                                         rv++;
1326                                 } else {
1327                                         KICONV_JA_RETERROR(EILSEQ)
1328                                 }
1329                         }
1330                 } else { /* 1st byte check failed */
1331                         if (flag & KICONV_REPLACE_INVALID) {
1332                                 KICONV_JA_PUTU(KICONV_JA_REPLACE);
1333                                 rv++;
1334                         } else {
1335                                 KICONV_JA_RETERROR(EILSEQ)
1336                         }
1337                 }
1338 
1339 next:
1340                 /*
1341                  * One character successfully converted so update
1342                  * values outside of this function's stack.
1343                  */
1344                 *inbytesleft = ileft;
1345                 *outbytesleft = oleft;
1346         }
1347 
1348 ret:
1349         return (rv);
1350 }
1351 
1352 static size_t
1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1354         size_t *outbytesleft, int flag, int *errno, uint8_t id)
1355 {
1356         uchar_t         ic;
1357         size_t          rv = 0;
1358         uint_t          ucs4;
1359         ushort_t        euc16;
1360         ushort_t        dest;
1361 
1362         uchar_t *ip;
1363         size_t          ileft;
1364         char            *op;
1365         size_t          oleft;
1366         size_t          read_len;
1367 
1368         boolean_t do_not_ignore_null;
1369 
1370         if ((inbuf == NULL) || (*inbuf == NULL)) {
1371                 return (0);
1372         }
1373 
1374         ip = (uchar_t *)inbuf;
1375         ileft = *inbytesleft;
1376         op = outbuf;
1377         oleft = *outbytesleft;
1378 
1379         KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1380 
1381         do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1382 
1383         while (ileft != 0) {
1384                 KICONV_JA_GETU(&ucs4, flag);
1385 
1386                 if (ucs4 == 0x0 && do_not_ignore_null) {
1387                         return (0);
1388                 }
1389 
1390                 if (ucs4 > 0xffff) {
1391                         /* non-BMP */
1392                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1393                         rv++;
1394                         goto next;
1395                 }
1396 
1397                 KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1398                 if (euc16 == KICONV_JA_NODEST) {
1399                         euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1400                 }
1401                 if (euc16 == KICONV_JA_NODEST) {
1402                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1403                         rv++;
1404                         goto next;
1405                 }
1406 
1407                 switch (euc16 & 0x8080) {
1408                 case 0x0000:    /* CS0 */
1409                         if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1410                                 KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1411                                 rv++;
1412                         } else {
1413                                 ic = (uchar_t)euc16;
1414                                 KICONV_JA_NPUT(ic);
1415                         }
1416                         break;
1417                 case 0x8080:    /* CS1 */
1418                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1419                         KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1420                         /*
1421                          * for even number row (Ku), add 0x80 to
1422                          * look latter half of kiconv_ja_jistosj2[] array
1423                          */
1424                         ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1425                             + (((ic % 2) == 0) ? 0x80 : 0x00));
1426                         KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1427                         break;
1428                 case 0x0080:    /* CS2 */
1429                         ic = (uchar_t)euc16;
1430                         KICONV_JA_NPUT(ic);
1431                         break;
1432                 case 0x8000:    /* CS3 */
1433                         ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1434                         if (euc16 == 0xa271) {
1435                                 /* NUMERO SIGN */
1436                                 KICONV_JA_NPUT(0x87);
1437                                 KICONV_JA_NPUT(0x82);
1438                         } else if (ic < 0x75) { /* check if IBM VDC */
1439                                 dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1440                                 if (dest == 0xffff) {
1441                                         KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1442                                 } else {
1443                                         /* avoid putting NUL ('\0') */
1444                                         if (dest > 0xff) {
1445                                                 KICONV_JA_NPUT(
1446                                                     (dest >> 8) & 0xff);
1447                                                 KICONV_JA_NPUT(dest & 0xff);
1448                                         } else {
1449                                                 KICONV_JA_NPUT(dest & 0xff);
1450                                         }
1451                                 }
1452                         } else {
1453                                 KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1454                                 /*
1455                                  * for even number row (Ku), add 0x80 to
1456                                  * look latter half of kiconv_ja_jistosj2[]
1457                                  */
1458                                 ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1459                                     + (((ic % 2) == 0) ? 0x80 : 0x00));
1460                                 KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1461                         }
1462                         break;
1463                 }
1464 
1465 next:
1466                 /*
1467                  * One character successfully converted so update
1468                  * values outside of this function's stack.
1469                  */
1470                 *inbytesleft = ileft;
1471                 *outbytesleft = oleft;
1472         }
1473 
1474 ret:
1475         return (rv);
1476 }
1477 
1478 static size_t
1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1480                 char **outbuf, size_t *outbytesleft, int *errno)
1481 {
1482         if (! kcd || kcd == (void *)-1) {
1483                 *errno = EBADF;
1484                 return ((size_t)-1);
1485         }
1486 
1487         return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1488             outbuf, outbytesleft, errno));
1489 }
1490 
1491 static size_t
1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1493                 char **outbuf, size_t *outbytesleft, int *errno)
1494 {
1495         if (! kcd || kcd == (void *)-1) {
1496                 *errno = EBADF;
1497                 return ((size_t)-1);
1498         }
1499 
1500         return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1501             outbuf, outbytesleft, errno));
1502 }
1503 
1504 static size_t
1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1506         size_t *outbytesleft, int flag, int *errno)
1507 {
1508         return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1509             outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1510 }
1511 
1512 static size_t
1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1514         size_t *outbytesleft, int flag, int *errno)
1515 {
1516         return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1517             outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1518 }
1519 
1520 static size_t
1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1522         size_t *outbytesleft, int flag, int *errno)
1523 {
1524         return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1525             outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1526 }
1527 
1528 static size_t
1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1530         size_t *outbytesleft, int flag, int *errno)
1531 {
1532         return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1533             outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1534 }
1535 
1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1537         {
1538                 "eucjp", "utf-8", open_eucjp,
1539                 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1540         },
1541         {
1542                 "utf-8", "eucjp", open_eucjp,
1543                 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1544         },
1545         {
1546                 "eucjpms", "utf-8", open_eucjpms,
1547                 kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1548         },
1549         {
1550                 "utf-8", "eucjpms", open_eucjpms,
1551                 kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1552         },
1553         {
1554                 "sjis", "utf-8", open_sjis,
1555                 kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1556         },
1557         {
1558                 "utf-8", "sjis", open_sjis,
1559                 kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1560         },
1561         {
1562                 "cp932", "utf-8", open_cp932,
1563                 kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1564         },
1565         {
1566                 "utf-8", "cp932", open_cp932,
1567                 kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1568         }
1569 };
1570 
1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1573 
1574 #define KICONV_JA_MAX_JA_OPS \
1575         (sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1576 #define KICONV_JA_MAX_JA_ALIAS \
1577         (sizeof (kiconv_ja_aliases) / sizeof (char *))
1578 
1579 static kiconv_module_info_t kiconv_ja_info = {
1580         "kiconv_ja",            /* module name */
1581         KICONV_JA_MAX_JA_OPS,   /* number of conversion in kiconv_ja */
1582         kiconv_ja_ops_tbl,      /* kiconv_ja ops table */
1583         KICONV_JA_MAX_JA_ALIAS, /* number of alias in kiconv_ja */
1584         kiconv_ja_aliases,      /* kiconv_ja aliases */
1585         kiconv_ja_canonicals,   /* kiconv_ja canonicals */
1586         0
1587 };
1588 
1589 static struct modlkiconv modlkiconv_ja = {
1590         &mod_kiconvops,
1591         "kiconv module for Japanese",
1592         &kiconv_ja_info
1593 };
1594 
1595 static struct modlinkage modlinkage = {
1596         MODREV_1,
1597         { (void *)&modlkiconv_ja, NULL }
1598 };
1599 
1600 int
1601 _init(void)
1602 {
1603         int err;
1604 
1605         err = mod_install(&modlinkage);
1606         if (err)
1607                 cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1608 
1609         return (err);
1610 }
1611 
1612 int
1613 _info(struct modinfo *modinfop)
1614 {
1615         return (mod_info(&modlinkage, modinfop));
1616 }
1617 
1618 int
1619 _fini(void)
1620 {
1621         int err;
1622 
1623         /*
1624          * If this module is being used, then, we cannot remove the module.
1625          * The following checking will catch pretty much all usual cases.
1626          *
1627          * Any remaining will be catached by the kiconv_unregister_module()
1628          * during mod_remove() at below.
1629          */
1630         if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1631                 return (EBUSY);
1632 
1633         err = mod_remove(&modlinkage);
1634         if (err)
1635                 cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1636 
1637         return (err);
1638 }