1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 14 */ 15 16 /* 17 * This file implements the 2008 newlocale and friends handling. 18 */ 19 20 #ifndef _LCONV_C99 21 #define _LCONV_C99 22 #endif 23 24 #include "lint.h" 25 #include <atomic.h> 26 #include <locale.h> 27 #include <sys/types.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 #include <string.h> 31 #include "libc.h" 32 #include "mtlib.h" 33 #include "tsd.h" 34 #include "localeimpl.h" 35 #include "lctype.h" 36 37 /* 38 * Big Theory of Locales: 39 * 40 * (It is recommended that readers familiarize themselves with the POSIX 41 * 2008 (XPG Issue 7) specifications for locales, first.) 42 * 43 * Historically, we had a bunch of global variables that stored locale 44 * data. While this worked well, it limited applications to a single locale 45 * at a time. This doesn't work well in certain server applications. 46 * 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with 48 * versions of functions that can take this object as a parameter, along 49 * with functions to clone and manipulate these locale objects. The new 50 * functions are named with a _l() suffix. 51 * 52 * Additionally uselocale() is introduced which can change the locale of 53 * of a single thread. However, setlocale() can still be used to change 54 * the global locale. 55 * 56 * In our implementation, we use libc's TSD to store the locale data that 57 * was previously global. We still have global data because some applications 58 * have had those global objects compiled into them. (Such applications will 59 * be unable to benefit from uselocale(), btw.) The legacy routines are 60 * reimplemented as wrappers that use the appropriate locale object by 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the 62 * thread-specific locale object if one is present, or the global locale 63 * object otherwise. Note that once the TSD data is set, the only way 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE 65 * to uselocale(). 66 * 67 * We are careful to minimize performance impact of multiple calls to 68 * uselocale() or setlocale() by using a cache of locale data whenever possible. 69 * As a consequence of this, applications that iterate over all possible 70 * locales will burn through a lot of virtual memory, but we find such 71 * applications rare. (locale -a might be an exception, but it is short lived.) 72 */ 73 74 /* 75 * NB: Each of the structures listed herein should have the refcnt 76 * set to -1, to ensure that posix locale information is never ever freed, 77 * even when changing the global locale. 78 */ 79 80 typedef struct locdata *(*loadfn_t)(const char *); 81 82 static const loadfn_t loaders[LC_ALL] = { 83 __lc_ctype_load, 84 __lc_numeric_load, 85 __lc_time_load, 86 __lc_collate_load, 87 __lc_monetary_load, 88 __lc_messages_load, 89 }; 90 91 extern struct lc_monetary lc_monetary_posix; 92 extern struct lc_numeric lc_numeric_posix; 93 extern struct lc_messages lc_messages_posix; 94 extern struct lc_time lc_time_posix; 95 extern struct lc_ctype lc_ctype_posix; 96 extern struct lc_collate lc_collate_posix; 97 98 static struct locale posix_locale = { 99 /* locdata */ 100 .locdata = { 101 &__posix_ctype_locdata, 102 &__posix_numeric_locdata, 103 &__posix_time_locdata, 104 &__posix_collate_locdata, 105 &__posix_monetary_locdata, 106 &__posix_messages_locdata, 107 }, 108 .ctype = &lc_ctype_posix, 109 .numeric = &lc_numeric_posix, 110 .collate = &lc_collate_posix, 111 .monetary = &lc_monetary_posix, 112 .messages = &lc_messages_posix, 113 .time = &lc_time_posix, 114 .runelocale = &_DefaultRuneLocale, 115 }; 116 117 locale_t ___global_locale = &posix_locale; 118 119 locale_t 120 __global_locale(void) 121 { 122 return (___global_locale); 123 } 124 125 /* 126 * Category names for getenv() Note that this was modified 127 * for Solaris. See <iso/locale_iso.h>. 128 */ 129 #define NUM_CATS 7 130 static char *categories[7] = { 131 "LC_CTYPE", 132 "LC_NUMERIC", 133 "LC_TIME", 134 "LC_COLLATE", 135 "LC_MONETARY", 136 "LC_MESSAGES", 137 "LC_ALL", 138 }; 139 140 /* 141 * Prototypes. 142 */ 143 static const char *get_locale_env(int); 144 static struct locdata *locdata_get(int, const const char *); 145 static struct locdata *locdata_get_cache(int, const char *); 146 static void locdata_set_cache(int, struct locdata *); 147 148 /* 149 * Some utility routines. 150 */ 151 struct locdata * 152 __locdata_hold(struct locdata *ld) 153 { 154 if (ld != NULL && ld->l_refcnt != (uint32_t)-1) 155 atomic_inc_32(&ld->l_refcnt); 156 return (ld); 157 } 158 159 void 160 __locdata_release(struct locdata *ld) 161 { 162 if (ld->l_refcnt == (uint32_t)-1) 163 return; 164 165 if (atomic_dec_32_nv(&ld->l_refcnt) == 0) { 166 for (int i = 0; i < NLOCDATA; i++) 167 free(ld->l_data[i]); 168 if (ld->l_map && ld->l_map_len) { 169 (void) munmap(ld->l_map, ld->l_map_len); 170 } 171 free(ld); 172 } 173 } 174 175 struct locdata * 176 __locdata_alloc(const char *name, size_t memsz) 177 { 178 struct locdata *ldata; 179 180 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) { 181 return (NULL); 182 } 183 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) { 184 free(ldata); 185 errno = ENOMEM; 186 return (NULL); 187 } 188 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); 189 ldata->l_refcnt = 1; 190 191 return (ldata); 192 } 193 194 /* 195 * It turns out that for performance reasons we would really like to 196 * cache the most recently referenced locale data to avoid wasteful 197 * loading from files. 198 */ 199 200 static struct locdata *cache_data[LC_ALL]; 201 static mutex_t cache_lock = DEFAULTMUTEX; 202 203 /* 204 * Returns the cached data if the locale name is the same. If not, 205 * returns NULL (cache miss). The locdata is returned with a hold on 206 * it, taken on behalf of the caller. The caller should drop the hold 207 * when it is finished. 208 */ 209 static struct locdata * 210 locdata_get_cache(int category, const char *locname) 211 { 212 struct locdata *loc; 213 214 if (category < 0 || category >= LC_ALL) 215 return (NULL); 216 217 lmutex_lock(&cache_lock); 218 if ((loc = cache_data[category]) != NULL) { 219 if (strcmp(locname, loc->l_lname) == 0) { 220 loc = __locdata_hold(loc); 221 } else { 222 loc = NULL; 223 } 224 } 225 lmutex_unlock(&cache_lock); 226 return (loc); 227 } 228 229 /* 230 * Set the cache for the category to specific content. An additional hold 231 * is taken for the data while it is in the cache, so the caller may drop 232 * its own hold once this is complete. Also, releases the hold on any 233 * previously cached data. 234 */ 235 static void 236 locdata_set_cache(int category, struct locdata *loc) 237 { 238 struct locdata *old; 239 240 if (category < 0 || category >= LC_ALL) 241 return; 242 243 lmutex_lock(&cache_lock); 244 old = cache_data[category]; 245 cache_data[category] = __locdata_hold(loc); 246 lmutex_unlock(&cache_lock); 247 248 /* drop our reference on the old data */ 249 if (old) 250 __locdata_release(old); 251 } 252 253 /* 254 * Routine to get the locdata for a given category and locale. 255 * This includes retrieving it from cache, retrieving it from 256 * a file, etc. 257 */ 258 static struct locdata * 259 locdata_get(int category, const char *locname) 260 { 261 struct locdata *ldata; 262 char scratch[ENCODING_LEN + 1]; 263 char *slash; 264 int cnt; 265 int len; 266 267 if (locname == NULL || *locname == 0) { 268 locname = get_locale_env(category); 269 } 270 271 /* 272 * Extract the locale name for the category if it is a composite 273 * locale. 274 */ 275 if ((slash = strchr(locname, '/')) != NULL) { 276 for (cnt = category; cnt && slash != NULL; cnt--) { 277 locname = slash + 1; 278 slash = strchr(locname, '/'); 279 } 280 if (slash) { 281 len = slash - locname; 282 if (len >= sizeof (scratch)) { 283 len = sizeof (scratch); 284 } 285 } else { 286 len = sizeof (scratch); 287 } 288 (void) strlcpy(scratch, locname, len); 289 locname = scratch; 290 } 291 292 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) 293 return (__locdata_hold(posix_locale.locdata[category])); 294 295 ldata = locdata_get_cache(category, locname); 296 if (ldata != NULL) 297 return (ldata); 298 299 /* Otherwise load it */ 300 ldata = (*loaders[category])(locname); 301 if (ldata != NULL) { 302 locdata_set_cache(category, ldata); 303 } 304 return (ldata); 305 } 306 307 /* tsd destructor */ 308 static void 309 freelocptr(void *arg) 310 { 311 locale_t *locptr = arg; 312 if (*locptr != NULL) 313 freelocale(*locptr); 314 } 315 316 static const char * 317 get_locale_env(int category) 318 { 319 const char *env; 320 321 /* 1. check LC_ALL. */ 322 env = getenv(categories[LC_ALL]); 323 324 /* 2. check LC_* */ 325 if (env == NULL || *env == '\0') 326 env = getenv(categories[category]); 327 328 /* 3. check LANG */ 329 if (env == NULL || *env == '\0') 330 env = getenv("LANG"); 331 332 /* 4. if none is set, fall to "C" */ 333 if (env == NULL || *env == '\0') 334 env = "C"; 335 336 return (env); 337 } 338 339 340 /* 341 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy 342 * code will continue to use _ctype[520], but we prefer this function as 343 * it is the only way to get thread-specific information. 344 */ 345 unsigned char 346 __mb_cur_max_l(locale_t loc) 347 { 348 return (loc->ctype->lc_max_mblen); 349 } 350 351 unsigned char 352 __mb_cur_max(void) 353 { 354 return (__mb_cur_max_l(uselocale(NULL))); 355 } 356 357 /* 358 * Public interfaces. 359 */ 360 361 locale_t 362 duplocale(locale_t src) 363 { 364 locale_t loc; 365 int i; 366 367 loc = calloc(1, sizeof (*loc)); 368 if (loc == NULL) { 369 return (NULL); 370 } 371 for (i = 0; i < LC_ALL; i++) { 372 loc->locdata[i] = __locdata_hold(src->locdata[i]); 373 loc->loaded[i] = 0; 374 } 375 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 376 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 377 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 378 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 379 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 380 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 381 loc->time = loc->locdata[LC_TIME]->l_data[0]; 382 return (loc); 383 } 384 385 void 386 freelocale(locale_t loc) 387 { 388 int i; 389 for (i = 0; i < LC_ALL; i++) 390 __locdata_release(loc->locdata[i]); 391 if (loc != &posix_locale) 392 free(loc); 393 } 394 395 locale_t 396 newlocale(int catmask, const char *locname, locale_t base) 397 { 398 locale_t loc; 399 int i, e; 400 401 if (catmask & ~(LC_ALL_MASK)) { 402 errno = EINVAL; 403 return (NULL); 404 } 405 loc = duplocale(base != NULL ? base : ___global_locale); 406 if (loc == NULL) { 407 return (NULL); 408 } 409 410 for (i = 0; i < LC_ALL; i++) { 411 struct locdata *ldata; 412 loc->loaded[i] = 0; 413 if (((1 << i) & catmask) == 0) { 414 /* Default to base locale if not overriding */ 415 continue; 416 } 417 ldata = locdata_get(i, locname); 418 if (ldata == NULL) { 419 e = errno; 420 freelocale(loc); 421 errno = e; 422 return (NULL); 423 } 424 __locdata_release(loc->locdata[i]); 425 loc->locdata[i] = ldata; 426 } 427 if (base && base != ___global_locale) { 428 freelocale(base); 429 } 430 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 431 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 432 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 433 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 434 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 435 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 436 loc->time = loc->locdata[LC_TIME]->l_data[0]; 437 return (loc); 438 } 439 440 locale_t 441 uselocale(locale_t loc) 442 { 443 locale_t lastloc = ___global_locale; 444 locale_t *locptr; 445 446 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); 447 /* Should never occur */ 448 if (locptr == NULL) { 449 errno = EINVAL; 450 return (NULL); 451 } 452 453 if (*locptr != NULL) 454 lastloc = *locptr; 455 456 /* Argument loc is NULL if we are just querying. */ 457 if (loc != NULL) { 458 /* 459 * Set it to LC_GLOBAL_LOCAL to return to using 460 * the global locale (setlocale). 461 */ 462 if (loc == ___global_locale) { 463 *locptr = NULL; 464 } else { 465 /* No validation of the provided locale at present */ 466 *locptr = loc; 467 } 468 } 469 470 /* 471 * The caller is responsible for freeing, of course it would be 472 * gross error to call freelocale() on a locale object that is still 473 * in use. 474 */ 475 return (lastloc); 476 }