1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  14  */
  15 
  16 /*
  17  * This file implements the 2008 newlocale and friends handling.
  18  */
  19 
  20 #ifndef _LCONV_C99
  21 #define _LCONV_C99
  22 #endif
  23 
  24 #include "lint.h"
  25 #include <atomic.h>
  26 #include <locale.h>
  27 #include <sys/types.h>
  28 #include <sys/mman.h>
  29 #include <errno.h>
  30 #include <string.h>
  31 #include "libc.h"
  32 #include "mtlib.h"
  33 #include "tsd.h"
  34 #include "localeimpl.h"
  35 #include "lctype.h"
  36 
  37 /*
  38  * Big Theory of Locales:
  39  *
  40  * (It is recommended that readers familiarize themselves with the POSIX
  41  * 2008 (XPG Issue 7) specifications for locales, first.)
  42  *
  43  * Historically, we had a bunch of global variables that stored locale
  44  * data.  While this worked well, it limited applications to a single locale
  45  * at a time.  This doesn't work well in certain server applications.
  46  *
  47  * Issue 7, X/Open introduced the concept of a locale_t object, along with
  48  * versions of functions that can take this object as a parameter, along
  49  * with functions to clone and manipulate these locale objects.  The new
  50  * functions are named with a _l() suffix.
  51  *
  52  * Additionally uselocale() is introduced which can change the locale of
  53  * of a single thread.  However, setlocale() can still be used to change
  54  * the global locale.
  55  *
  56  * In our implementation, we use libc's TSD to store the locale data that
  57  * was previously global.  We still have global data because some applications
  58  * have had those global objects compiled into them.  (Such applications will
  59  * be unable to benefit from uselocale(), btw.)  The legacy routines are
  60  * reimplemented as wrappers that use the appropriate locale object by
  61  * calling uselocale().  uselocale() when passed a NULL pointer returns the
  62  * thread-specific locale object if one is present, or the global locale
  63  * object otherwise.  Note that once the TSD data is set, the only way
  64  * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
  65  * to uselocale().
  66  *
  67  * We are careful to minimize performance impact of multiple calls to
  68  * uselocale() or setlocale() by using a cache of locale data whenever possible.
  69  * As a consequence of this, applications that iterate over all possible
  70  * locales will burn through a lot of virtual memory, but we find such
  71  * applications rare.  (locale -a might be an exception, but it is short lived.)
  72  *
  73  * Category data is never released (although enclosing locale objects might be),
  74  * in order to guarantee thread-safety.  Calling freelocale() on an object
  75  * while it is in use by another thread is a programmer error (use-after-free)
  76  * and we don't bother to note it further.
  77  *
  78  * Locale objects (global locales) established by setlocale() are also
  79  * never freed (for MT safety), but we will save previous locale objects
  80  * and reuse them when we can.
  81  */
  82 
  83 typedef struct locdata *(*loadfn_t)(const char *);
  84 
  85 static const loadfn_t loaders[LC_ALL] = {
  86         __lc_ctype_load,
  87         __lc_numeric_load,
  88         __lc_time_load,
  89         __lc_collate_load,
  90         __lc_monetary_load,
  91         __lc_messages_load,
  92 };
  93 
  94 extern struct lc_monetary lc_monetary_posix;
  95 extern struct lc_numeric lc_numeric_posix;
  96 extern struct lc_messages lc_messages_posix;
  97 extern struct lc_time lc_time_posix;
  98 extern struct lc_ctype lc_ctype_posix;
  99 extern struct lc_collate lc_collate_posix;
 100 
 101 static struct locale posix_locale = {
 102         /* locdata */
 103         .locdata = {
 104                 &__posix_ctype_locdata,
 105                 &__posix_numeric_locdata,
 106                 &__posix_time_locdata,
 107                 &__posix_collate_locdata,
 108                 &__posix_monetary_locdata,
 109                 &__posix_messages_locdata,
 110         },
 111         .locname = "C",
 112         .ctype = &lc_ctype_posix,
 113         .numeric = &lc_numeric_posix,
 114         .collate = &lc_collate_posix,
 115         .monetary = &lc_monetary_posix,
 116         .messages = &lc_messages_posix,
 117         .time = &lc_time_posix,
 118         .runelocale = &_DefaultRuneLocale,
 119 };
 120 
 121 locale_t ___global_locale = &posix_locale;
 122 
 123 locale_t
 124 __global_locale(void)
 125 {
 126         return (___global_locale);
 127 }
 128 
 129 /*
 130  * Category names for getenv()  Note that this was modified
 131  * for Solaris.  See <iso/locale_iso.h>.
 132  */
 133 #define NUM_CATS        7
 134 static char *categories[7] = {
 135         "LC_CTYPE",
 136         "LC_NUMERIC",
 137         "LC_TIME",
 138         "LC_COLLATE",
 139         "LC_MONETARY",
 140         "LC_MESSAGES",
 141         "LC_ALL",
 142 };
 143 
 144 /*
 145  * Prototypes.
 146  */
 147 static const char *get_locale_env(int);
 148 static struct locdata *locdata_get(int, const const char *);
 149 static struct locdata *locdata_get_cache(int, const char *);
 150 static locale_t mklocname(locale_t);
 151 
 152 /*
 153  * Some utility routines.
 154  */
 155 
 156 struct locdata *
 157 __locdata_alloc(const char *name, size_t memsz)
 158 {
 159         struct locdata *ldata;
 160 
 161         if ((ldata = lmalloc(sizeof (*ldata))) == NULL) {
 162                 return (NULL);
 163         }
 164         if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) {
 165                 lfree(ldata, sizeof (*ldata));
 166                 errno = ENOMEM;
 167                 return (NULL);
 168         }
 169         (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
 170 
 171         return (ldata);
 172 }
 173 
 174 /*
 175  * Normally we never free locale data truly, but if we failed to load it
 176  * for some reason, this routine is used to cleanup the partial mess.
 177  */
 178 void
 179 __locdata_free(struct locdata *ldata)
 180 {
 181         for (int i = 0; i < NLOCDATA; i++)
 182                 libc_free(ldata->l_data[i]);
 183         if (ldata->l_map != NULL && ldata->l_map_len)
 184                 (void) munmap(ldata->l_map, ldata->l_map_len);
 185         lfree(ldata, sizeof (*ldata));
 186 }
 187 
 188 /*
 189  * It turns out that for performance reasons we would really like to
 190  * cache the most recently referenced locale data to avoid wasteful
 191  * loading from files.
 192  */
 193 
 194 static struct locdata *cache_data[LC_ALL];
 195 static struct locdata *cat_data[LC_ALL];
 196 static mutex_t cache_lock = DEFAULTMUTEX;
 197 
 198 /*
 199  * Returns the cached data if the locale name is the same.  If not,
 200  * returns NULL (cache miss).  The locdata is returned with a hold on
 201  * it, taken on behalf of the caller.  The caller should drop the hold
 202  * when it is finished.
 203  */
 204 static struct locdata *
 205 locdata_get_cache(int category, const char *locname)
 206 {
 207         struct locdata *loc;
 208 
 209         if (category < 0 || category >= LC_ALL)
 210                 return (NULL);
 211 
 212         /* Try cache first. */
 213         lmutex_lock(&cache_lock);
 214         loc = cache_data[category];
 215 
 216         if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
 217                 lmutex_unlock(&cache_lock);
 218                 return (loc);
 219         }
 220 
 221         /*
 222          * Failing that try previously loaded locales (linear search) --
 223          * this could be optimized to a hash, but its unlikely that a single
 224          * application will ever need to work with more than a few locales.
 225          */
 226         for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
 227                 if (strcmp(locname, loc->l_lname) == 0) {
 228                         break;
 229                 }
 230         }
 231 
 232         /*
 233          * Finally, if we still don't have one, try loading the locale
 234          * data from the actual on-disk data.
 235          *
 236          * We drop the lock (libc wants to ensure no internal locks
 237          * are held when we call other routines required to read from
 238          * files, allocate memory, etc.)  There is a small race here,
 239          * but the consequences of the race are benign -- if multiple
 240          * threads hit this at precisely the same point, we could
 241          * wind up with duplicates of the locale data in the cache.
 242          *
 243          * This wastes the memory for an extra copy of the locale
 244          * data, but there is no further harm beyond that.  Its not
 245          * worth the effort to recode this to something "safe"
 246          * (which would require rescanning the list, etc.), given
 247          * that this race will probably never actually occur.
 248          */
 249         if (loc == NULL) {
 250                 lmutex_unlock(&cache_lock);
 251                 loc = (*loaders[category])(locname);
 252                 lmutex_lock(&cache_lock);
 253                 if (loc != NULL)
 254                         (void) strlcpy(loc->l_lname, locname,
 255                             sizeof (loc->l_lname));
 256         }
 257 
 258         /*
 259          * Assuming we got one, update the cache, and stick us on the list
 260          * of loaded locale data.  We insert into the head (more recent
 261          * use is likely to win.)
 262          */
 263         if (loc != NULL) {
 264                 cache_data[category] = loc;
 265                 if (!loc->l_cached) {
 266                         loc->l_cached = 1;
 267                         loc->l_next = cat_data[category];
 268                         cat_data[category] = loc;
 269                 }
 270         }
 271 
 272         lmutex_unlock(&cache_lock);
 273         return (loc);
 274 }
 275 
 276 /*
 277  * Routine to get the locdata for a given category and locale.
 278  * This includes retrieving it from cache, retrieving it from
 279  * a file, etc.
 280  */
 281 static struct locdata *
 282 locdata_get(int category, const char *locname)
 283 {
 284         char scratch[ENCODING_LEN + 1];
 285         char *slash;
 286         int cnt;
 287         int len;
 288 
 289         if (locname == NULL || *locname == 0) {
 290                 locname = get_locale_env(category);
 291         }
 292 
 293         /*
 294          * Extract the locale name for the category if it is a composite
 295          * locale.
 296          */
 297         if ((slash = strchr(locname, '/')) != NULL) {
 298                 for (cnt = category; cnt && slash != NULL; cnt--) {
 299                         locname = slash + 1;
 300                         slash = strchr(locname, '/');
 301                 }
 302                 if (slash) {
 303                         len = slash - locname + 1;
 304                         if (len >= sizeof (scratch)) {
 305                                 len = sizeof (scratch);
 306                         }
 307                 } else {
 308                         len = sizeof (scratch);
 309                 }
 310                 (void) strlcpy(scratch, locname, len);
 311                 locname = scratch;
 312         }
 313 
 314         if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
 315                 return (posix_locale.locdata[category]);
 316 
 317         return (locdata_get_cache(category, locname));
 318 }
 319 
 320 /* tsd destructor */
 321 static void
 322 freelocptr(void *arg)
 323 {
 324         locale_t *locptr = arg;
 325         if (*locptr != NULL)
 326                 freelocale(*locptr);
 327 }
 328 
 329 static const char *
 330 get_locale_env(int category)
 331 {
 332         const char *env;
 333 
 334         /* 1. check LC_ALL. */
 335         env = getenv(categories[LC_ALL]);
 336 
 337         /* 2. check LC_* */
 338         if (env == NULL || *env == '\0')
 339                 env = getenv(categories[category]);
 340 
 341         /* 3. check LANG */
 342         if (env == NULL || *env == '\0')
 343                 env = getenv("LANG");
 344 
 345         /* 4. if none is set, fall to "C" */
 346         if (env == NULL || *env == '\0')
 347                 env = "C";
 348 
 349         return (env);
 350 }
 351 
 352 
 353 /*
 354  * This routine is exposed via the MB_CUR_MAX macro.  Note that legacy
 355  * code will continue to use _ctype[520], but we prefer this function as
 356  * it is the only way to get thread-specific information.
 357  */
 358 unsigned char
 359 __mb_cur_max_l(locale_t loc)
 360 {
 361         return (loc->ctype->lc_max_mblen);
 362 }
 363 
 364 unsigned char
 365 __mb_cur_max(void)
 366 {
 367         return (__mb_cur_max_l(uselocale(NULL)));
 368 }
 369 
 370 /*
 371  * Public interfaces.
 372  */
 373 
 374 locale_t
 375 duplocale(locale_t src)
 376 {
 377         locale_t        loc;
 378         int             i;
 379 
 380         loc = lmalloc(sizeof (*loc));
 381         if (loc == NULL) {
 382                 return (NULL);
 383         }
 384         if (src == NULL) {
 385                 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
 386                 src = ___global_locale;
 387         }
 388         for (i = 0; i < LC_ALL; i++) {
 389                 loc->locdata[i] = src->locdata[i];
 390                 loc->loaded[i] = 0;
 391         }
 392         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 393         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 394         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 395         loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 396         loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 397         loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 398         loc->time = loc->locdata[LC_TIME]->l_data[0];
 399         return (loc);
 400 }
 401 
 402 void
 403 freelocale(locale_t loc)
 404 {
 405         /*
 406          * We take extra care never to free a saved locale created by
 407          * setlocale().  This shouldn't be strictly necessary, but a little
 408          * extra safety doesn't hurt here.
 409          */
 410         if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list))
 411                 lfree(loc, sizeof (*loc));
 412 }
 413 
 414 locale_t
 415 newlocale(int catmask, const char *locname, locale_t base)
 416 {
 417         locale_t loc;
 418         int i, e;
 419 
 420         if (catmask & ~(LC_ALL_MASK)) {
 421                 errno = EINVAL;
 422                 return (NULL);
 423         }
 424 
 425         /*
 426          * Technically passing LC_GLOBAL_LOCALE here is illegal,
 427          * but we allow it.
 428          */
 429         if (base == NULL || base == ___global_locale) {
 430                 loc = duplocale(___global_locale);
 431         } else {
 432                 loc = duplocale(base);
 433         }
 434         if (loc == NULL) {
 435                 return (NULL);
 436         }
 437 
 438         for (i = 0; i < LC_ALL; i++) {
 439                 struct locdata *ldata;
 440                 loc->loaded[i] = 0;
 441                 if (((1 << i) & catmask) == 0) {
 442                         /* Default to base locale if not overriding */
 443                         continue;
 444                 }
 445                 ldata = locdata_get(i, locname);
 446                 if (ldata == NULL) {
 447                         e = errno;
 448                         freelocale(loc);
 449                         errno = e;
 450                         return (NULL);
 451                 }
 452                 loc->locdata[i] = ldata;
 453         }
 454         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 455         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 456         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 457         loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 458         loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 459         loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 460         loc->time = loc->locdata[LC_TIME]->l_data[0];
 461         freelocale(base);
 462 
 463         return (mklocname(loc));
 464 }
 465 
 466 locale_t
 467 uselocale(locale_t loc)
 468 {
 469         locale_t lastloc = ___global_locale;
 470         locale_t *locptr;
 471 
 472         locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
 473         /* Should never occur */
 474         if (locptr == NULL) {
 475                 errno = EINVAL;
 476                 return (NULL);
 477         }
 478 
 479         if (*locptr != NULL)
 480                 lastloc = *locptr;
 481 
 482         /* Argument loc is NULL if we are just querying. */
 483         if (loc != NULL) {
 484                 /*
 485                  * Set it to LC_GLOBAL_LOCAL to return to using
 486                  * the global locale (setlocale).
 487                  */
 488                 if (loc == ___global_locale) {
 489                         *locptr = NULL;
 490                 } else {
 491                         /* No validation of the provided locale at present */
 492                         *locptr = loc;
 493                 }
 494         }
 495 
 496         /*
 497          * The caller is responsible for freeing, of course it would be
 498          * gross error to call freelocale() on a locale object that is still
 499          * in use.
 500          */
 501         return (lastloc);
 502 }
 503 
 504 static locale_t
 505 mklocname(locale_t loc)
 506 {
 507         int composite = 0;
 508 
 509         /* Look to see if any category is different */
 510         for (int i = 1; i < LC_ALL; ++i) {
 511                 if (strcmp(loc->locdata[0]->l_lname,
 512                     loc->locdata[i]->l_lname) != 0) {
 513                         composite = 1;
 514                         break;
 515                 }
 516         }
 517 
 518         if (composite) {
 519                 /*
 520                  * Note ordering of these follows the numeric order,
 521                  * if the order is changed, then setlocale() will need
 522                  * to be changed as well.
 523                  */
 524                 (void) snprintf(loc->locname, sizeof (loc->locname),
 525                     "%s/%s/%s/%s/%s/%s",
 526                     loc->locdata[LC_CTYPE]->l_lname,
 527                     loc->locdata[LC_NUMERIC]->l_lname,
 528                     loc->locdata[LC_TIME]->l_lname,
 529                     loc->locdata[LC_COLLATE]->l_lname,
 530                     loc->locdata[LC_MONETARY]->l_lname,
 531                     loc->locdata[LC_MESSAGES]->l_lname);
 532         } else {
 533                 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
 534                     sizeof (loc->locname));
 535         }
 536         return (loc);
 537 }