1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  *
  21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  22  * Use is subject to license terms.
  23  */
  24 
  25 #include <errno.h>
  26 #include <locale.h>
  27 #include <langinfo.h>
  28 #include <iconv.h>
  29 #include <ctype.h>
  30 #include <wctype.h>
  31 #include <strings.h>
  32 #include <string.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include "includes.h"
  36 #include "xmalloc.h"
  37 #include "xlist.h"
  38 #include "compat.h"
  39 #include "log.h"
  40 
  41 #ifdef MIN
  42 #undef MIN
  43 #endif /* MIN */
  44 
  45 #define MIN(x, y)       ((x) < (y) ? (x) : (y))
  46 
  47 #define LOCALE_PATH     "/usr/bin/locale"
  48 
  49 /* two-char country code, '-' and two-char region code */
  50 #define LANGTAG_MAX     5
  51 
  52 static int locale_cmp(const void *d1, const void *d2);
  53 static char *g11n_locale2langtag(char *locale);
  54 
  55 static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
  56 
  57 /*
  58  * native_codeset records the codeset of the default system locale.
  59  * It is used to convert the contents of file (eg /etc/issue) which is
  60  * supposed to be in the codeset of default system locale.
  61  */
  62 static char *native_codeset;
  63 
  64 /*
  65  * Convert locale string name into a language tag. The caller is responsible for
  66  * freeing the memory allocated for the result.
  67  */
  68 static char *
  69 g11n_locale2langtag(char *locale)
  70 {
  71         char *langtag;
  72 
  73         /* base cases */
  74         if (!locale || !*locale)
  75                 return (NULL);
  76 
  77         if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
  78                 return (xstrdup("i-default"));
  79 
  80         /* punt for language codes which are not exactly 2 letters */
  81         if (strlen(locale) < 2 ||
  82             !isalpha(locale[0]) ||
  83             !isalpha(locale[1]) ||
  84             (locale[2] != '\0' &&
  85             locale[2] != '_' &&
  86             locale[2] != '.' &&
  87             locale[2] != '@'))
  88                 return (NULL);
  89 
  90 
  91         /* we have a primary language sub-tag */
  92         langtag = (char *)xmalloc(LANGTAG_MAX + 1);
  93 
  94         strncpy(langtag, locale, 2);
  95         langtag[2] = '\0';
  96 
  97         /* do we have country sub-tag? For example: cs_CZ */
  98         if (locale[2] == '_') {
  99                 if (strlen(locale) < 5 ||
 100                     !isalpha(locale[3]) ||
 101                     !isalpha(locale[4]) ||
 102                     (locale[5] != '\0' && (locale[5] != '.' &&
 103                     locale[5] != '@'))) {
 104                         return (langtag);
 105                 }
 106 
 107                 /* example: create cs-CZ from cs_CZ */
 108                 if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
 109                     locale + 3) == 5)
 110                         return (langtag);
 111         }
 112 
 113         /* in all other cases we just use the primary language sub-tag */
 114         return (langtag);
 115 }
 116 
 117 uint_t
 118 g11n_langtag_is_default(char *langtag)
 119 {
 120         return (strcmp(langtag, "i-default") == 0);
 121 }
 122 
 123 /*
 124  * This lang tag / locale matching function works only for two-character
 125  * language primary sub-tags and two-character country sub-tags.
 126  */
 127 uint_t
 128 g11n_langtag_matches_locale(char *langtag, char *locale)
 129 {
 130         /* match "i-default" to the process' current locale if possible */
 131         if (g11n_langtag_is_default(langtag)) {
 132                 if (strcasecmp(locale, "POSIX") == 0 ||
 133                     strcasecmp(locale, "C") == 0)
 134                         return (1);
 135                 else
 136                         return (0);
 137         }
 138 
 139         /*
 140          * locale must be at least 2 chars long and the lang part must be
 141          * exactly two characters
 142          */
 143         if (strlen(locale) < 2 ||
 144             (!isalpha(locale[0]) || !isalpha(locale[1]) ||
 145             (locale[2] != '\0' && locale[2] != '_' &&
 146             locale[2] != '.' && locale[2] != '@')))
 147                 return (0);
 148 
 149         /* same thing with the langtag */
 150         if (strlen(langtag) < 2 ||
 151             (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
 152             (langtag[2] != '\0' && langtag[2] != '-')))
 153                 return (0);
 154 
 155         /* primary language sub-tag and the locale's language part must match */
 156         if (strncasecmp(langtag, locale, 2) != 0)
 157                 return (0);
 158 
 159         /*
 160          * primary language sub-tag and the locale's language match, now
 161          * fuzzy check country part
 162          */
 163 
 164         /* neither langtag nor locale have more than one component */
 165         if (langtag[2] == '\0' &&
 166             (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
 167                 return (2);
 168 
 169         /* langtag has only one sub-tag... */
 170         if (langtag[2] == '\0')
 171                 return (1);
 172 
 173         /* locale has no country code... */
 174         if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
 175                 return (1);
 176 
 177         /* langtag has more than one subtag and the locale has a country code */
 178 
 179         /* ignore second subtag if not two chars */
 180         if (strlen(langtag) < 5)
 181                 return (1);
 182 
 183         if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
 184             (langtag[5] != '\0' && langtag[5] != '-'))
 185                 return (1);
 186 
 187         /* ignore rest of locale if there is no two-character country part */
 188         if (strlen(locale) < 5)
 189                 return (1);
 190 
 191         if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
 192             (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
 193                 return (1);
 194 
 195         /* if the country part matches, return 2 */
 196         if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
 197                 return (2);
 198 
 199         return (1);
 200 }
 201 
 202 char *
 203 g11n_getlocale()
 204 {
 205         /* we have one text domain - always set it */
 206         (void) textdomain(TEXT_DOMAIN);
 207 
 208         /* if the locale is not set, set it from the env vars */
 209         if (!setlocale(LC_MESSAGES, NULL))
 210                 (void) setlocale(LC_MESSAGES, "");
 211 
 212         return (setlocale(LC_MESSAGES, NULL));
 213 }
 214 
 215 void
 216 g11n_setlocale(int category, const char *locale)
 217 {
 218         char *curr;
 219 
 220         if (native_codeset == NULL) {
 221                 /* set default locale, and record current codeset */
 222                 (void) setlocale(LC_ALL, "");
 223                 curr = nl_langinfo(CODESET);
 224                 native_codeset = xstrdup(curr);
 225         }
 226 
 227         /* we have one text domain - always set it */
 228         (void) textdomain(TEXT_DOMAIN);
 229 
 230         if (!locale)
 231                 return;
 232 
 233         if (*locale && ((curr = setlocale(category, NULL))) &&
 234             strcmp(curr, locale) == 0)
 235                 return;
 236 
 237         /* if <category> is bogus, setlocale() will do nothing */
 238         (void) setlocale(category, locale);
 239 }
 240 
 241 char **
 242 g11n_getlocales()
 243 {
 244         FILE *locale_out;
 245         uint_t n_elems, list_size, long_line = 0;
 246         char **list;
 247         char locale[64];        /* 64 bytes is plenty for locale names */
 248 
 249         if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
 250                 return (NULL);
 251 
 252         /*
 253          * start with enough room for 65 locales - that's a lot fewer than
 254          * all the locales available for installation, but a lot more than
 255          * what most users will need and install
 256          */
 257         n_elems = 0;
 258         list_size = 192;
 259         list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
 260         memset(list, 0, sizeof (char *) * (list_size + 1));
 261 
 262         while (fgets(locale, sizeof (locale), locale_out)) {
 263                 /* skip long locale names (if any) */
 264                 if (!strchr(locale, '\n')) {
 265                         long_line = 1;
 266                         continue;
 267                 } else if (long_line) {
 268                         long_line = 0;
 269                         continue;
 270                 }
 271 
 272                 if (strncmp(locale, "iso_8859", 8) == 0)
 273                         /* ignore locale names like "iso_8859-1" */
 274                         continue;
 275 
 276                 if (n_elems == list_size) {
 277                         list_size *= 2;
 278                         list = (char **)xrealloc((void *) list,
 279                             (list_size + 1) * sizeof (char *));
 280                         memset(&list[n_elems + 1], 0,
 281                             sizeof (char *) * (list_size - n_elems + 1));
 282                 }
 283 
 284                 *(strchr(locale, '\n')) = '\0'; /* remove the trailing \n */
 285                 list[n_elems++] = xstrdup(locale);
 286         }
 287 
 288         (void) pclose(locale_out);
 289 
 290         if (n_elems == 0) {
 291                 xfree(list);
 292                 return (NULL);
 293         }
 294 
 295         list[n_elems] = NULL;
 296 
 297         qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
 298         return (list);
 299 }
 300 
 301 char *
 302 g11n_getlangs()
 303 {
 304         char *locale;
 305 
 306         if (getenv("SSH_LANGS"))
 307                 return (xstrdup(getenv("SSH_LANGS")));
 308 
 309         locale = g11n_getlocale();
 310 
 311         if (!locale || !*locale)
 312                 return (xstrdup("i-default"));
 313 
 314         return (g11n_locale2langtag(locale));
 315 }
 316 
 317 char *
 318 g11n_locales2langs(char **locale_set)
 319 {
 320         char **p, **r, **q;
 321         char *langtag, *langs;
 322         int locales, skip;
 323 
 324         for (locales = 0, p = locale_set; p && *p; p++)
 325                 locales++;
 326 
 327         r = (char **)xmalloc((locales + 1) * sizeof (char *));
 328         memset(r, 0, (locales + 1) * sizeof (char *));
 329 
 330         for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
 331                 skip = 0;
 332                 if ((langtag = g11n_locale2langtag(*p)) == NULL)
 333                         continue;
 334                 for (q = r; (q - r) < locales; q++) {
 335                         if (!*q)
 336                                 break;
 337                         if (*q && strcmp(*q, langtag) == 0)
 338                                 skip = 1;
 339                 }
 340                 if (!skip)
 341                         *(q++) = langtag;
 342                 else
 343                         xfree(langtag);
 344                 *q = NULL;
 345         }
 346 
 347         langs = xjoin(r, ',');
 348         g11n_freelist(r);
 349 
 350         return (langs);
 351 }
 352 
 353 static int
 354 sortcmp(const void *d1, const void *d2)
 355 {
 356         char *s1 = *(char **)d1;
 357         char *s2 = *(char **)d2;
 358 
 359         return (strcmp(s1, s2));
 360 }
 361 
 362 int
 363 g11n_langtag_match(char *langtag1, char *langtag2)
 364 {
 365         int len1, len2;
 366         char c1, c2;
 367 
 368         len1 = (strchr(langtag1, '-')) ?
 369             (strchr(langtag1, '-') - langtag1)
 370             : strlen(langtag1);
 371 
 372         len2 = (strchr(langtag2, '-')) ?
 373             (strchr(langtag2, '-') - langtag2)
 374             : strlen(langtag2);
 375 
 376         /* no match */
 377         if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
 378                 return (0);
 379 
 380         c1 = *(langtag1 + len1);
 381         c2 = *(langtag2 + len2);
 382 
 383         /* no country sub-tags - exact match */
 384         if (c1 == '\0' && c2 == '\0')
 385                 return (2);
 386 
 387         /* one langtag has a country sub-tag, the other doesn't */
 388         if (c1 == '\0' || c2 == '\0')
 389                 return (1);
 390 
 391         /* can't happen - both langtags have a country sub-tag */
 392         if (c1 != '-' || c2 != '-')
 393                 return (1);
 394 
 395         /* compare country subtags */
 396         langtag1 = langtag1 + len1 + 1;
 397         langtag2 = langtag2 + len2 + 1;
 398 
 399         len1 = (strchr(langtag1, '-')) ?
 400             (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
 401 
 402         len2 = (strchr(langtag2, '-')) ?
 403             (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
 404 
 405         if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
 406                 return (1);
 407 
 408         /* country tags matched - exact match */
 409         return (2);
 410 }
 411 
 412 char *
 413 g11n_langtag_set_intersect(char *set1, char *set2)
 414 {
 415         char **list1, **list2, **list3, **p, **q, **r;
 416         char *set3, *lang_subtag;
 417         uint_t n1, n2, n3;
 418         uint_t do_append;
 419 
 420         list1 = xsplit(set1, ',');
 421         list2 = xsplit(set2, ',');
 422 
 423         for (n1 = 0, p = list1; p && *p; p++, n1++)
 424                 ;
 425         for (n2 = 0, p = list2; p && *p; p++, n2++)
 426                 ;
 427 
 428         list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
 429         *list3 = NULL;
 430 
 431         /*
 432          * we must not sort the user langtags - sorting or not the server's
 433          * should not affect the outcome
 434          */
 435         qsort(list2, n2, sizeof (char *), sortcmp);
 436 
 437         for (n3 = 0, p = list1; p && *p; p++) {
 438                 do_append = 0;
 439                 for (q = list2; q && *q; q++) {
 440                         if (g11n_langtag_match(*p, *q) != 2) continue;
 441                         /* append element */
 442                         for (r = list3; (r - list3) <= (n1 + n2); r++) {
 443                                 do_append = 1;
 444                                 if (!*r)
 445                                         break;
 446                                 if (strcmp(*p, *r) == 0) {
 447                                         do_append = 0;
 448                                         break;
 449                                 }
 450                         }
 451                         if (do_append && n3 <= (n1 + n2)) {
 452                                 list3[n3++] = xstrdup(*p);
 453                                 list3[n3] = NULL;
 454                         }
 455                 }
 456         }
 457 
 458         for (p = list1; p && *p; p++) {
 459                 do_append = 0;
 460                 for (q = list2; q && *q; q++) {
 461                         if (g11n_langtag_match(*p, *q) != 1)
 462                                 continue;
 463 
 464                         /* append element */
 465                         lang_subtag = xstrdup(*p);
 466                         if (strchr(lang_subtag, '-'))
 467                                 *(strchr(lang_subtag, '-')) = '\0';
 468                         for (r = list3; (r - list3) <= (n1 + n2); r++) {
 469                                 do_append = 1;
 470                                 if (!*r)
 471                                         break;
 472                                 if (strcmp(lang_subtag, *r) == 0) {
 473                                         do_append = 0;
 474                                         break;
 475                                 }
 476                         }
 477                         if (do_append && n3 <= (n1 + n2)) {
 478                                 list3[n3++] = lang_subtag;
 479                                 list3[n3] = NULL;
 480                         } else
 481                                 xfree(lang_subtag);
 482                 }
 483         }
 484 
 485         set3 = xjoin(list3, ',');
 486         xfree_split_list(list1);
 487         xfree_split_list(list2);
 488         xfree_split_list(list3);
 489 
 490         return (set3);
 491 }
 492 
 493 char *
 494 g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
 495 {
 496         char *list, *result;
 497         char **xlist;
 498 
 499         /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
 500         list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
 501 
 502         if (!list)
 503                 return (NULL);
 504 
 505         xlist = xsplit(list, ',');
 506 
 507         xfree(list);
 508 
 509         if (!xlist || !*xlist)
 510                 return (NULL);
 511 
 512         result = xstrdup(*xlist);
 513         xfree_split_list(xlist);
 514 
 515         return (result);
 516 }
 517 
 518 /*
 519  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
 520  * a stright strcmp()
 521  */
 522 static int
 523 locale_cmp(const void *d1, const void *d2)
 524 {
 525         char *dot_ptr;
 526         char *s1 = *(char **)d1;
 527         char *s2 = *(char **)d2;
 528         int s1_is_utf8 = 0;
 529         int s2_is_utf8 = 0;
 530 
 531         /* check if s1 is a UTF-8 locale */
 532         if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
 533             (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
 534             (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
 535                 s1_is_utf8++;
 536         }
 537 
 538         /* check if s2 is a UTF-8 locale */
 539         if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
 540             (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
 541             (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
 542                 s2_is_utf8++;
 543         }
 544 
 545         /* prefer UTF-8 locales */
 546         if (s1_is_utf8 && !s2_is_utf8)
 547                 return (-1);
 548 
 549         if (s2_is_utf8 && !s1_is_utf8)
 550                 return (1);
 551 
 552         /* prefer any locale over the default locales */
 553         if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
 554             strcmp(s1, "common") == 0) {
 555                 if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
 556                     strcmp(s2, "common") != 0)
 557                         return (1);
 558         }
 559 
 560         if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
 561             strcmp(s2, "common") == 0) {
 562                 if (strcmp(s1, "C") != 0 &&
 563                     strcmp(s1, "POSIX") != 0 &&
 564                     strcmp(s1, "common") != 0)
 565                         return (-1);
 566         }
 567 
 568         return (strcmp(s1, s2));
 569 }
 570 
 571 
 572 char **
 573 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
 574 {
 575         char **langtag_list, **result, **p, **q, **r;
 576         char *s;
 577         uint_t do_append, n_langtags, n_locales, n_results, max_results;
 578 
 579         if (locale_set == NULL)
 580                 return (NULL);
 581 
 582         /* count lang tags and locales */
 583         for (n_locales = 0, p = locale_set; p && *p; p++)
 584                 n_locales++;
 585 
 586         n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
 587         /* count the number of langtags */
 588         for (; s = strchr(s, ','); s++, n_langtags++)
 589                 ;
 590 
 591         qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
 592 
 593         langtag_list = xsplit(langtag_set, ',');
 594         for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
 595                 ;
 596 
 597         max_results = MIN(n_locales, n_langtags) * 2;
 598         result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
 599         *result = NULL;
 600         n_results = 0;
 601 
 602         /* more specific matches first */
 603         for (p = langtag_list; p && *p; p++) {
 604                 do_append = 0;
 605                 for (q = locale_set; q && *q; q++) {
 606                         if (g11n_langtag_matches_locale(*p, *q) == 2) {
 607                                 do_append = 1;
 608                                 for (r = result; (r - result) <=
 609                                     MIN(n_locales, n_langtags); r++) {
 610                                         if (!*r)
 611                                                 break;
 612                                         if (strcmp(*q, *r) == 0) {
 613                                                 do_append = 0;
 614                                                 break;
 615                                         }
 616                                 }
 617                                 if (do_append && n_results < max_results) {
 618                                         result[n_results++] = xstrdup(*q);
 619                                         result[n_results] = NULL;
 620                                 }
 621                                 break;
 622                         }
 623                 }
 624         }
 625 
 626         for (p = langtag_list; p && *p; p++) {
 627                 do_append = 0;
 628                 for (q = locale_set; q && *q; q++) {
 629                         if (g11n_langtag_matches_locale(*p, *q) == 1) {
 630                                 do_append = 1;
 631                                 for (r = result; (r - result) <=
 632                                     MIN(n_locales, n_langtags); r++) {
 633                                         if (!*r)
 634                                                 break;
 635                                         if (strcmp(*q, *r) == 0) {
 636                                                 do_append = 0;
 637                                                 break;
 638                                         }
 639                                 }
 640                                 if (do_append && n_results < max_results) {
 641                                         result[n_results++] = xstrdup(*q);
 642                                         result[n_results] = NULL;
 643                                 }
 644                                 break;
 645                         }
 646                 }
 647         }
 648 
 649         xfree_split_list(langtag_list);
 650 
 651         return (result);
 652 }
 653 
 654 char *
 655 g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
 656 {
 657         char **results, **locales, *result = NULL;
 658 
 659         if (srvr_locales == NULL)
 660                 locales = g11n_getlocales();
 661         else
 662                 locales = srvr_locales;
 663 
 664         if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
 665             locales)) == NULL)
 666                 goto err;
 667 
 668         if (*results != NULL)
 669                 result = xstrdup(*results);
 670 
 671         xfree_split_list(results);
 672 
 673 err:
 674         if (locales != NULL && locales != srvr_locales)
 675                 g11n_freelist(locales);
 676         return (result);
 677 }
 678 
 679 /*
 680  * Functions for converting to UTF-8 from the local codeset and
 681  * converting from UTF-8 to the local codeset.
 682  *
 683  * The error_str parameter is an pointer to a char variable where to
 684  * store a string suitable for use with error() or fatal() or friends.
 685  * It is also used for an error indicator when NULL is returned.
 686  *
 687  * If conversion isn't necessary, *error_str is set to NULL, and
 688  * NULL is returned.
 689  * If conversion error occured, *error_str points to an error message,
 690  * and NULL is returned.
 691  */
 692 char *
 693 g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
 694 {
 695         static char *last_codeset;
 696         static iconv_t cd = (iconv_t)-1;
 697         char    *codeset;
 698 
 699         *error_str = NULL;
 700 
 701         codeset = nl_langinfo(CODESET);
 702 
 703         if (strcmp(codeset, "UTF-8") == 0)
 704                 return (NULL);
 705 
 706         if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
 707                 if (last_codeset != NULL) {
 708                         xfree(last_codeset);
 709                         last_codeset = NULL;
 710                 }
 711                 if (cd != (iconv_t)-1)
 712                         (void) iconv_close(cd);
 713 
 714                 if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
 715                         *error_str = gettext("Cannot convert UTF-8 "
 716                             "strings to the local codeset");
 717                         return (NULL);
 718                 }
 719                 last_codeset = xstrdup(codeset);
 720         }
 721         return (do_iconv(cd, str, lenp, error_str));
 722 }
 723 
 724 char *
 725 g11n_convert_to_utf8(const char *str, uint_t *lenp,
 726     int native, char **error_str)
 727 {
 728         static char *last_codeset;
 729         static iconv_t cd = (iconv_t)-1;
 730         char    *codeset;
 731 
 732         *error_str = NULL;
 733 
 734         if (native)
 735                 codeset = native_codeset;
 736         else
 737                 codeset = nl_langinfo(CODESET);
 738 
 739         if (strcmp(codeset, "UTF-8") == 0)
 740                 return (NULL);
 741 
 742         if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
 743                 if (last_codeset != NULL) {
 744                         xfree(last_codeset);
 745                         last_codeset = NULL;
 746                 }
 747                 if (cd != (iconv_t)-1)
 748                         (void) iconv_close(cd);
 749 
 750                 if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
 751                         *error_str = gettext("Cannot convert the "
 752                             "local codeset strings to UTF-8");
 753                         return (NULL);
 754                 }
 755                 last_codeset = xstrdup(codeset);
 756         }
 757         return (do_iconv(cd, str, lenp, error_str));
 758 }
 759 
 760 /*
 761  * Wrapper around iconv()
 762  *
 763  * The caller is responsible for freeing the result. NULL is returned when
 764  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
 765  * The caller must ensure that the input string isn't NULL pointer.
 766  */
 767 static char *
 768 do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
 769 {
 770         int     ilen, olen;
 771         size_t  ileft, oleft;
 772         char    *ostr, *optr;
 773         const char *istr;
 774 
 775         ilen = *lenp;
 776         olen = ilen + 1;
 777 
 778         ostr = NULL;
 779         for (;;) {
 780                 olen *= 2;
 781                 oleft = olen;
 782                 ostr = optr = xrealloc(ostr, olen);
 783                 istr = (const char *)str;
 784                 if ((ileft = ilen) == 0)
 785                         break;
 786 
 787                 if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
 788                         /* success: generate reset sequence */
 789                         if (iconv(cd, NULL, NULL,
 790                             &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
 791                                 continue;
 792                         }
 793                         break;
 794                 }
 795                 /* failed */
 796                 if (errno != E2BIG) {
 797                         oleft = olen;
 798                         (void) iconv(cd, NULL, NULL, &ostr, &oleft);
 799                         xfree(ostr);
 800                         *err_str = gettext("Codeset conversion failed");
 801                         return (NULL);
 802                 }
 803         }
 804         olen = optr - ostr;
 805         optr = xmalloc(olen + 1);
 806         (void) memcpy(optr, ostr, olen);
 807         xfree(ostr);
 808 
 809         optr[olen] = '\0';
 810         *lenp = olen;
 811 
 812         return (optr);
 813 }
 814 
 815 /*
 816  * A filter for output string. Control and unprintable characters
 817  * are converted into visible form (eg "\ooo").
 818  */
 819 char *
 820 g11n_filter_string(char *s)
 821 {
 822         int     mb_cur_max = MB_CUR_MAX;
 823         int     mblen, len;
 824         char    *os = s;
 825         wchar_t wc;
 826         char    *obuf, *op;
 827 
 828         /* all character may be converted into the form of \ooo */
 829         obuf = op = xmalloc(strlen(s) * 4 + 1);
 830 
 831         while (*s != '\0') {
 832                 mblen = mbtowc(&wc, s, mb_cur_max);
 833                 if (mblen <= 0) {
 834                         mblen = 1;
 835                         wc = (unsigned char)*s;
 836                 }
 837                 if (!iswprint(wc) &&
 838                     wc != L'\n' && wc != L'\r' && wc != L'\t') {
 839                         /*
 840                          * control chars which need to be replaced
 841                          * with safe character sequence.
 842                          */
 843                         while (mblen != 0) {
 844                                 op += sprintf(op, "\\%03o",
 845                                     (unsigned char)*s++);
 846                                 mblen--;
 847                         }
 848                 } else {
 849                         while (mblen != 0) {
 850                                 *op++ = *s++;
 851                                 mblen--;
 852                         }
 853                 }
 854         }
 855         *op = '\0';
 856         len = op - obuf + 1;
 857         op = xrealloc(os, len);
 858         (void) memcpy(op, obuf, len);
 859         xfree(obuf);
 860         return (op);
 861 }
 862 
 863 /*
 864  * Once we negotiated with a langtag, server need to map it to a system
 865  * locale. That is done based on the locale supported on the server side.
 866  * We know (with the locale supported on Solaris) how the langtag is
 867  * mapped to. However, from the client point of view, there is no way to
 868  * know exactly what locale(encoding) will be used.
 869  *
 870  * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
 871  * UTF-8 characters always come over the wire, so it is no longer the problem
 872  * as long as both side has the bug fix. However if the server side doesn't
 873  * have the fix, client can't safely perform the code conversion since the
 874  * incoming character encoding is unknown.
 875  *
 876  * To alleviate this situation, we take an empirical approach to find
 877  * encoding from langtag.
 878  *
 879  * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
 880  * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
 881  * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
 882  * of L10N support ..). Those are:
 883  *
 884  *      no_NO, no_NY, sr_SP, sr_YU
 885  *
 886  * They all use ISO8859-X encoding.
 887  *
 888  * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
 889  * but others cannot. So we need to use the "xx" as the locale name.
 890  * Those locales are:
 891  *
 892  * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
 893  *
 894  * Their encoding vary. They could be ISO8859-X or EUC or something else.
 895  * So we don't perform code conversion for these langtags.
 896  */
 897 static const char *non_utf8_langtag[] = {
 898         "no-NO", "no-NY", "sr-SP", "sr-YU",
 899         "ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
 900         "lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
 901 
 902 void
 903 g11n_test_langtag(const char *lang, int server)
 904 {
 905         const char      **lp;
 906 
 907         if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
 908                 /*
 909                  * We negotiated with real locale name (not lang tag).
 910                  * We shouldn't expect UTF-8, thus shouldn't do code
 911                  * conversion.
 912                  */
 913                 datafellows |= SSH_BUG_STRING_ENCODING;
 914                 return;
 915         }
 916 
 917         if (datafellows & SSH_BUG_STRING_ENCODING) {
 918                 if (server) {
 919                         /*
 920                          * Whatever bug exists in the client side, server
 921                          * side has nothing to do, since server has no way
 922                          * to know what actual encoding is used on the client
 923                          * side. For example, even if we negotiated with
 924                          * en_US, client locale could be en_US.ISO8859-X or
 925                          * en_US.UTF-8.
 926                          */
 927                         return;
 928                 }
 929                 /*
 930                  * We are on the client side. We'll check with known
 931                  * locales to see if non-UTF8 characters could come in.
 932                  */
 933                 for (lp = non_utf8_langtag; *lp != NULL; lp++) {
 934                         if (strcmp(lang, *lp) == 0)
 935                                 break;
 936                 }
 937                 if (*lp == NULL) {
 938                         debug2("Server is expected to use UTF-8 locale");
 939                         datafellows &= ~SSH_BUG_STRING_ENCODING;
 940                 } else {
 941                         /*
 942                          * Server is expected to use non-UTF8 encoding.
 943                          */
 944                         debug2("Enforcing no code conversion: %s", lang);
 945                 }
 946         }
 947 }
 948 
 949 /*
 950  * Free all strings in the list and then free the list itself. We know that the
 951  * list ends with a NULL pointer.
 952  */
 953 void
 954 g11n_freelist(char **list)
 955 {
 956         int i = 0;
 957 
 958         while (list[i] != NULL) {
 959                 xfree(list[i]);
 960                 i++;
 961         }
 962 
 963         xfree(list);
 964 }