1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  14  */
  15 
  16 /*
  17  * This file implements the 2008 newlocale and friends handling.
  18  */
  19 
  20 #ifndef _LCONV_C99
  21 #define _LCONV_C99
  22 #endif
  23 
  24 #include "lint.h"
  25 #include <atomic.h>
  26 #include <locale.h>
  27 #include <sys/types.h>
  28 #include <sys/mman.h>
  29 #include <errno.h>
  30 #include <string.h>
  31 #include "libc.h"
  32 #include "mtlib.h"
  33 #include "tsd.h"
  34 #include "localeimpl.h"
  35 #include "lctype.h"
  36 
  37 /*
  38  * Big Theory of Locales:
  39  *
  40  * (It is recommended that readers familiarize themselves with the POSIX
  41  * 2008 (XPG Issue 7) specifications for locales, first.)
  42  *
  43  * Historically, we had a bunch of global variables that stored locale
  44  * data.  While this worked well, it limited applications to a single locale
  45  * at a time.  This doesn't work well in certain server applications.
  46  *
  47  * Issue 7, X/Open introduced the concept of a locale_t object, along with
  48  * versions of functions that can take this object as a parameter, along
  49  * with functions to clone and manipulate these locale objects.  The new
  50  * functions are named with a _l() suffix.
  51  *
  52  * Additionally uselocale() is introduced which can change the locale of
  53  * of a single thread.  However, setlocale() can still be used to change
  54  * the global locale.
  55  *
  56  * In our implementation, we use libc's TSD to store the locale data that
  57  * was previously global.  We still have global data because some applications
  58  * have had those global objects compiled into them.  (Such applications will
  59  * be unable to benefit from uselocale(), btw.)  The legacy routines are
  60  * reimplemented as wrappers that use the appropriate locale object by
  61  * calling uselocale().  uselocale() when passed a NULL pointer returns the
  62  * thread-specific locale object if one is present, or the global locale
  63  * object otherwise.  Note that once the TSD data is set, the only way
  64  * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
  65  * to uselocale().
  66  *
  67  * We are careful to minimize performance impact of multiple calls to
  68  * uselocale() or setlocale() by using a cache of locale data whenever possible.
  69  * As a consequence of this, applications that iterate over all possible
  70  * locales will burn through a lot of virtual memory, but we find such
  71  * applications rare.  (locale -a might be an exception, but it is short lived.)
  72  */
  73 
  74 /*
  75  * NB: Each of the structures listed herein should have the refcnt
  76  * set to -1, to ensure that posix locale information is never ever freed,
  77  * even when changing the global locale.
  78  */
  79 
  80 typedef struct locdata *(*loadfn_t)(const char *);
  81 
  82 static const loadfn_t loaders[LC_ALL] = {
  83         __lc_ctype_load,
  84         __lc_numeric_load,
  85         __lc_time_load,
  86         __lc_collate_load,
  87         __lc_monetary_load,
  88         __lc_messages_load,
  89 };
  90 
  91 extern struct lc_monetary lc_monetary_posix;
  92 extern struct lc_numeric lc_numeric_posix;
  93 extern struct lc_messages lc_messages_posix;
  94 extern struct lc_time lc_time_posix;
  95 extern struct lc_ctype lc_ctype_posix;
  96 extern struct lc_collate lc_collate_posix;
  97 
  98 static struct locale posix_locale = {
  99         /* locdata */
 100         .locdata = {
 101                 &__posix_ctype_locdata,
 102                 &__posix_numeric_locdata,
 103                 &__posix_time_locdata,
 104                 &__posix_collate_locdata,
 105                 &__posix_monetary_locdata,
 106                 &__posix_messages_locdata,
 107         },
 108         .ctype = &lc_ctype_posix,
 109         .numeric = &lc_numeric_posix,
 110         .collate = &lc_collate_posix,
 111         .monetary = &lc_monetary_posix,
 112         .messages = &lc_messages_posix,
 113         .time = &lc_time_posix,
 114         .runelocale = &_DefaultRuneLocale,
 115 };
 116 
 117 locale_t ___global_locale = &posix_locale;
 118 
 119 locale_t
 120 __global_locale(void)
 121 {
 122         return (___global_locale);
 123 }
 124 
 125 /*
 126  * Category names for getenv()  Note that this was modified
 127  * for Solaris.  See <iso/locale_iso.h>.
 128  */
 129 #define NUM_CATS        7
 130 static char *categories[7] = {
 131         "LC_CTYPE",
 132         "LC_NUMERIC",
 133         "LC_TIME",
 134         "LC_COLLATE",
 135         "LC_MONETARY",
 136         "LC_MESSAGES",
 137         "LC_ALL",
 138 };
 139 
 140 /*
 141  * Prototypes.
 142  */
 143 static const char *get_locale_env(int);
 144 static struct locdata *locdata_get(int, const const char *);
 145 static struct locdata *locdata_get_cache(int, const char *);
 146 static void locdata_set_cache(int, struct locdata *);
 147 
 148 /*
 149  * Some utility routines.
 150  */
 151 struct locdata *
 152 __locdata_hold(struct locdata *ld)
 153 {
 154         if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
 155                 atomic_inc_32(&ld->l_refcnt);
 156         return (ld);
 157 }
 158 
 159 void
 160 __locdata_release(struct locdata *ld)
 161 {
 162         if (ld->l_refcnt == (uint32_t)-1)
 163                 return;
 164 
 165         if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
 166                 for (int i = 0; i < NLOCDATA; i++)
 167                         free(ld->l_data[i]);
 168                 if (ld->l_map && ld->l_map_len) {
 169                         (void) munmap(ld->l_map, ld->l_map_len);
 170                 }
 171                 free(ld);
 172         }
 173 }
 174 
 175 struct locdata *
 176 __locdata_alloc(const char *name, size_t memsz)
 177 {
 178         struct locdata *ldata;
 179 
 180         if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
 181                 return (NULL);
 182         }
 183         if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
 184                 free(ldata);
 185                 errno = ENOMEM;
 186                 return (NULL);
 187         }
 188         (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
 189         ldata->l_refcnt = 1;
 190 
 191         return (ldata);
 192 }
 193 
 194 /*
 195  * It turns out that for performance reasons we would really like to
 196  * cache the most recently referenced locale data to avoid wasteful
 197  * loading from files.
 198  */
 199 
 200 static struct locdata *cache_data[LC_ALL];
 201 static mutex_t cache_lock = DEFAULTMUTEX;
 202 
 203 /*
 204  * Returns the cached data if the locale name is the same.  If not,
 205  * returns NULL (cache miss).  The locdata is returned with a hold on
 206  * it, taken on behalf of the caller.  The caller should drop the hold
 207  * when it is finished.
 208  */
 209 static struct locdata *
 210 locdata_get_cache(int category, const char *locname)
 211 {
 212         struct locdata *loc;
 213 
 214         if (category < 0 || category >= LC_ALL)
 215                 return (NULL);
 216 
 217         lmutex_lock(&cache_lock);
 218         if ((loc = cache_data[category]) != NULL) {
 219                 if (strcmp(locname, loc->l_lname) == 0) {
 220                         loc = __locdata_hold(loc);
 221                 } else {
 222                         loc = NULL;
 223                 }
 224         }
 225         lmutex_unlock(&cache_lock);
 226         return (loc);
 227 }
 228 
 229 /*
 230  * Set the cache for the category to specific content.  An additional hold
 231  * is taken for the data while it is in the cache, so the caller may drop
 232  * its own hold once this is complete.  Also, releases the hold on any
 233  * previously cached data.
 234  */
 235 static void
 236 locdata_set_cache(int category, struct locdata *loc)
 237 {
 238         struct locdata *old;
 239 
 240         if (category < 0 || category >= LC_ALL)
 241                 return;
 242 
 243         lmutex_lock(&cache_lock);
 244         old = cache_data[category];
 245         cache_data[category] = __locdata_hold(loc);
 246         lmutex_unlock(&cache_lock);
 247 
 248         /* drop our reference on the old data */
 249         if (old)
 250                 __locdata_release(old);
 251 }
 252 
 253 /*
 254  * Routine to get the locdata for a given category and locale.
 255  * This includes retrieving it from cache, retrieving it from
 256  * a file, etc.
 257  */
 258 static struct locdata *
 259 locdata_get(int category, const char *locname)
 260 {
 261         struct locdata *ldata;
 262         char scratch[ENCODING_LEN + 1];
 263         char *slash;
 264         int cnt;
 265         int len;
 266 
 267         if (locname == NULL || *locname == 0) {
 268                 locname = get_locale_env(category);
 269         }
 270 
 271         /*
 272          * Extract the locale name for the category if it is a composite
 273          * locale.
 274          */
 275         if ((slash = strchr(locname, '/')) != NULL) {
 276                 for (cnt = category; cnt && slash != NULL; cnt--) {
 277                         locname = slash + 1;
 278                         slash = strchr(locname, '/');
 279                 }
 280                 if (slash) {
 281                         len = slash - locname;
 282                         if (len >= sizeof (scratch)) {
 283                                 len = sizeof (scratch);
 284                         }
 285                 } else {
 286                         len = sizeof (scratch);
 287                 }
 288                 (void) strlcpy(scratch, locname, len);
 289                 locname = scratch;
 290         }
 291 
 292         if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
 293                 return (__locdata_hold(posix_locale.locdata[category]));
 294 
 295         ldata = locdata_get_cache(category, locname);
 296         if (ldata != NULL)
 297                 return (ldata);
 298 
 299         /* Otherwise load it */
 300         ldata = (*loaders[category])(locname);
 301         if (ldata != NULL) {
 302                 locdata_set_cache(category, ldata);
 303         }
 304         return (ldata);
 305 }
 306 
 307 /* tsd destructor */
 308 static void
 309 freelocptr(void *arg)
 310 {
 311         locale_t *locptr = arg;
 312         if (*locptr != NULL)
 313                 freelocale(*locptr);
 314 }
 315 
 316 static const char *
 317 get_locale_env(int category)
 318 {
 319         const char *env;
 320 
 321         /* 1. check LC_ALL. */
 322         env = getenv(categories[LC_ALL]);
 323 
 324         /* 2. check LC_* */
 325         if (env == NULL || *env == '\0')
 326                 env = getenv(categories[category]);
 327 
 328         /* 3. check LANG */
 329         if (env == NULL || *env == '\0')
 330                 env = getenv("LANG");
 331 
 332         /* 4. if none is set, fall to "C" */
 333         if (env == NULL || *env == '\0')
 334                 env = "C";
 335 
 336         return (env);
 337 }
 338 
 339 
 340 /*
 341  * This routine is exposed via the MB_CUR_MAX macro.  Note that legacy
 342  * code will continue to use _ctype[520], but we prefer this function as
 343  * it is the only way to get thread-specific information.
 344  */
 345 unsigned char
 346 __mb_cur_max_l(locale_t loc)
 347 {
 348         return (loc->ctype->lc_max_mblen);
 349 }
 350 
 351 unsigned char
 352 __mb_cur_max(void)
 353 {
 354         return (__mb_cur_max_l(uselocale(NULL)));
 355 }
 356 
 357 /*
 358  * Public interfaces.
 359  */
 360 
 361 locale_t
 362 duplocale(locale_t src)
 363 {
 364         locale_t        loc;
 365         int             i;
 366 
 367         loc = calloc(1, sizeof (*loc));
 368         if (loc == NULL) {
 369                 return (NULL);
 370         }
 371         for (i = 0; i < LC_ALL; i++) {
 372                 loc->locdata[i] = __locdata_hold(src->locdata[i]);
 373                 loc->loaded[i] = 0;
 374         }
 375         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 376         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 377         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 378         loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 379         loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 380         loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 381         loc->time = loc->locdata[LC_TIME]->l_data[0];
 382         return (loc);
 383 }
 384 
 385 void
 386 freelocale(locale_t loc)
 387 {
 388         int i;
 389         for (i = 0; i < LC_ALL; i++)
 390                 __locdata_release(loc->locdata[i]);
 391         if (loc != &posix_locale)
 392                 free(loc);
 393 }
 394 
 395 locale_t
 396 newlocale(int catmask, const char *locname, locale_t base)
 397 {
 398         locale_t loc;
 399         int i, e;
 400 
 401         if (catmask & ~(LC_ALL_MASK)) {
 402                 errno = EINVAL;
 403                 return (NULL);
 404         }
 405         loc = duplocale(base != NULL ? base : ___global_locale);
 406         if (loc == NULL) {
 407                 return (NULL);
 408         }
 409 
 410         for (i = 0; i < LC_ALL; i++) {
 411                 struct locdata *ldata;
 412                 loc->loaded[i] = 0;
 413                 if (((1 << i) & catmask) == 0) {
 414                         /* Default to base locale if not overriding */
 415                         continue;
 416                 }
 417                 ldata = locdata_get(i, locname);
 418                 if (ldata == NULL) {
 419                         e = errno;
 420                         freelocale(loc);
 421                         errno = e;
 422                         return (NULL);
 423                 }
 424                 __locdata_release(loc->locdata[i]);
 425                 loc->locdata[i] = ldata;
 426         }
 427         if (base && base != ___global_locale) {
 428                 freelocale(base);
 429         }
 430         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
 431         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
 432         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
 433         loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
 434         loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
 435         loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
 436         loc->time = loc->locdata[LC_TIME]->l_data[0];
 437         return (loc);
 438 }
 439 
 440 locale_t
 441 uselocale(locale_t loc)
 442 {
 443         locale_t lastloc = ___global_locale;
 444         locale_t *locptr;
 445 
 446         locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
 447         /* Should never occur */
 448         if (locptr == NULL) {
 449                 errno = EINVAL;
 450                 return (NULL);
 451         }
 452 
 453         if (*locptr != NULL)
 454                 lastloc = *locptr;
 455 
 456         /* Argument loc is NULL if we are just querying. */
 457         if (loc != NULL) {
 458                 /*
 459                  * Set it to LC_GLOBAL_LOCAL to return to using
 460                  * the global locale (setlocale).
 461                  */
 462                 if (loc == ___global_locale) {
 463                         *locptr = NULL;
 464                 } else {
 465                         /* No validation of the provided locale at present */
 466                         *locptr = loc;
 467                 }
 468         }
 469 
 470         /*
 471          * The caller is responsible for freeing, of course it would be
 472          * gross error to call freelocale() on a locale object that is still
 473          * in use.
 474          */
 475         return (lastloc);
 476 }