1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 14 */ 15 16 /* 17 * This file implements the 2008 newlocale and friends handling. 18 */ 19 20 #ifndef _LCONV_C99 21 #define _LCONV_C99 22 #endif 23 24 #include "lint.h" 25 #include <atomic.h> 26 #include <locale.h> 27 #include <sys/types.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 #include <string.h> 31 #include "libc.h" 32 #include "mtlib.h" 33 #include "tsd.h" 34 #include "localeimpl.h" 35 #include "lctype.h" 36 37 /* 38 * Big Theory of Locales: 39 * 40 * (It is recommended that readers familiarize themselves with the POSIX 41 * 2008 (XPG Issue 7) specifications for locales, first.) 42 * 43 * Historically, we had a bunch of global variables that stored locale 44 * data. While this worked well, it limited applications to a single locale 45 * at a time. This doesn't work well in certain server applications. 46 * 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with 48 * versions of functions that can take this object as a parameter, along 49 * with functions to clone and manipulate these locale objects. The new 50 * functions are named with a _l() suffix. 51 * 52 * Additionally uselocale() is introduced which can change the locale of 53 * of a single thread. However, setlocale() can still be used to change 54 * the global locale. 55 * 56 * In our implementation, we use libc's TSD to store the locale data that 57 * was previously global. We still have global data because some applications 58 * have had those global objects compiled into them. (Such applications will 59 * be unable to benefit from uselocale(), btw.) The legacy routines are 60 * reimplemented as wrappers that use the appropriate locale object by 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the 62 * thread-specific locale object if one is present, or the global locale 63 * object otherwise. Note that once the TSD data is set, the only way 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE 65 * to uselocale(). 66 * 67 * We are careful to minimize performance impact of multiple calls to 68 * uselocale() or setlocale() by using a cache of locale data whenever possible. 69 * As a consequence of this, applications that iterate over all possible 70 * locales will burn through a lot of virtual memory, but we find such 71 * applications rare. (locale -a might be an exception, but it is short lived.) 72 * 73 * Category data is never released (although enclosing locale objects might be), 74 * in order to guarantee thread-safety. Calling freelocale() on an object 75 * while it is in use by another thread is a programmer error (use-after-free) 76 * and we don't bother to note it further. 77 * 78 * Locale objects (global locales) established by setlocale() are also 79 * never freed (for MT safety), but we will save previous locale objects 80 * and reuse them when we can. 81 */ 82 83 typedef struct locdata *(*loadfn_t)(const char *); 84 85 static const loadfn_t loaders[LC_ALL] = { 86 __lc_ctype_load, 87 __lc_numeric_load, 88 __lc_time_load, 89 __lc_collate_load, 90 __lc_monetary_load, 91 __lc_messages_load, 92 }; 93 94 extern struct lc_monetary lc_monetary_posix; 95 extern struct lc_numeric lc_numeric_posix; 96 extern struct lc_messages lc_messages_posix; 97 extern struct lc_time lc_time_posix; 98 extern struct lc_ctype lc_ctype_posix; 99 extern struct lc_collate lc_collate_posix; 100 101 static struct locale posix_locale = { 102 /* locdata */ 103 .locdata = { 104 &__posix_ctype_locdata, 105 &__posix_numeric_locdata, 106 &__posix_time_locdata, 107 &__posix_collate_locdata, 108 &__posix_monetary_locdata, 109 &__posix_messages_locdata, 110 }, 111 .locname = "C", 112 .ctype = &lc_ctype_posix, 113 .numeric = &lc_numeric_posix, 114 .collate = &lc_collate_posix, 115 .monetary = &lc_monetary_posix, 116 .messages = &lc_messages_posix, 117 .time = &lc_time_posix, 118 .runelocale = &_DefaultRuneLocale, 119 }; 120 121 locale_t ___global_locale = &posix_locale; 122 123 locale_t 124 __global_locale(void) 125 { 126 return (___global_locale); 127 } 128 129 /* 130 * Category names for getenv() Note that this was modified 131 * for Solaris. See <iso/locale_iso.h>. 132 */ 133 #define NUM_CATS 7 134 static char *categories[7] = { 135 "LC_CTYPE", 136 "LC_NUMERIC", 137 "LC_TIME", 138 "LC_COLLATE", 139 "LC_MONETARY", 140 "LC_MESSAGES", 141 "LC_ALL", 142 }; 143 144 /* 145 * Prototypes. 146 */ 147 static const char *get_locale_env(int); 148 static struct locdata *locdata_get(int, const const char *); 149 static struct locdata *locdata_get_cache(int, const char *); 150 static locale_t mklocname(locale_t); 151 152 /* 153 * Some utility routines. 154 */ 155 156 struct locdata * 157 __locdata_alloc(const char *name, size_t memsz) 158 { 159 struct locdata *ldata; 160 161 if ((ldata = lmalloc(sizeof (*ldata))) == NULL) { 162 return (NULL); 163 } 164 if ((ldata->l_data[0] = libc_malloc(memsz)) == NULL) { 165 lfree(ldata, sizeof (*ldata)); 166 errno = ENOMEM; 167 return (NULL); 168 } 169 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); 170 171 return (ldata); 172 } 173 174 /* 175 * Normally we never free locale data truly, but if we failed to load it 176 * for some reason, this routine is used to cleanup the partial mess. 177 */ 178 void 179 __locdata_free(struct locdata *ldata) 180 { 181 for (int i = 0; i < NLOCDATA; i++) 182 libc_free(ldata->l_data[i]); 183 if (ldata->l_map != NULL && ldata->l_map_len) 184 (void) munmap(ldata->l_map, ldata->l_map_len); 185 lfree(ldata, sizeof (*ldata)); 186 } 187 188 /* 189 * It turns out that for performance reasons we would really like to 190 * cache the most recently referenced locale data to avoid wasteful 191 * loading from files. 192 */ 193 194 static struct locdata *cache_data[LC_ALL]; 195 static struct locdata *cat_data[LC_ALL]; 196 static mutex_t cache_lock = DEFAULTMUTEX; 197 198 /* 199 * Returns the cached data if the locale name is the same. If not, 200 * returns NULL (cache miss). The locdata is returned with a hold on 201 * it, taken on behalf of the caller. The caller should drop the hold 202 * when it is finished. 203 */ 204 static struct locdata * 205 locdata_get_cache(int category, const char *locname) 206 { 207 struct locdata *loc; 208 209 if (category < 0 || category >= LC_ALL) 210 return (NULL); 211 212 /* Try cache first. */ 213 lmutex_lock(&cache_lock); 214 loc = cache_data[category]; 215 216 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) { 217 lmutex_unlock(&cache_lock); 218 return (loc); 219 } 220 221 /* 222 * Failing that try previously loaded locales (linear search) -- 223 * this could be optimized to a hash, but its unlikely that a single 224 * application will ever need to work with more than a few locales. 225 */ 226 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) { 227 if (strcmp(locname, loc->l_lname) == 0) { 228 break; 229 } 230 } 231 232 /* 233 * Finally, if we still don't have one, try loading the locale 234 * data from the actual on-disk data. 235 * 236 * We drop the lock (libc wants to ensure no internal locks 237 * are held when we call other routines required to read from 238 * files, allocate memory, etc.) There is a small race here, 239 * but the consequences of the race are benign -- if multiple 240 * threads hit this at precisely the same point, we could 241 * wind up with duplicates of the locale data in the cache. 242 * 243 * This wastes the memory for an extra copy of the locale 244 * data, but there is no further harm beyond that. Its not 245 * worth the effort to recode this to something "safe" 246 * (which would require rescanning the list, etc.), given 247 * that this race will probably never actually occur. 248 */ 249 if (loc == NULL) { 250 lmutex_unlock(&cache_lock); 251 loc = (*loaders[category])(locname); 252 lmutex_lock(&cache_lock); 253 if (loc != NULL) 254 (void) strlcpy(loc->l_lname, locname, 255 sizeof (loc->l_lname)); 256 } 257 258 /* 259 * Assuming we got one, update the cache, and stick us on the list 260 * of loaded locale data. We insert into the head (more recent 261 * use is likely to win.) 262 */ 263 if (loc != NULL) { 264 cache_data[category] = loc; 265 if (!loc->l_cached) { 266 loc->l_cached = 1; 267 loc->l_next = cat_data[category]; 268 cat_data[category] = loc; 269 } 270 } 271 272 lmutex_unlock(&cache_lock); 273 return (loc); 274 } 275 276 /* 277 * Routine to get the locdata for a given category and locale. 278 * This includes retrieving it from cache, retrieving it from 279 * a file, etc. 280 */ 281 static struct locdata * 282 locdata_get(int category, const char *locname) 283 { 284 char scratch[ENCODING_LEN + 1]; 285 char *slash; 286 int cnt; 287 int len; 288 289 if (locname == NULL || *locname == 0) { 290 locname = get_locale_env(category); 291 } 292 293 /* 294 * Extract the locale name for the category if it is a composite 295 * locale. 296 */ 297 if ((slash = strchr(locname, '/')) != NULL) { 298 for (cnt = category; cnt && slash != NULL; cnt--) { 299 locname = slash + 1; 300 slash = strchr(locname, '/'); 301 } 302 if (slash) { 303 len = slash - locname + 1; 304 if (len >= sizeof (scratch)) { 305 len = sizeof (scratch); 306 } 307 } else { 308 len = sizeof (scratch); 309 } 310 (void) strlcpy(scratch, locname, len); 311 locname = scratch; 312 } 313 314 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) 315 return (posix_locale.locdata[category]); 316 317 return (locdata_get_cache(category, locname)); 318 } 319 320 /* tsd destructor */ 321 static void 322 freelocptr(void *arg) 323 { 324 locale_t *locptr = arg; 325 if (*locptr != NULL) 326 freelocale(*locptr); 327 } 328 329 static const char * 330 get_locale_env(int category) 331 { 332 const char *env; 333 334 /* 1. check LC_ALL. */ 335 env = getenv(categories[LC_ALL]); 336 337 /* 2. check LC_* */ 338 if (env == NULL || *env == '\0') 339 env = getenv(categories[category]); 340 341 /* 3. check LANG */ 342 if (env == NULL || *env == '\0') 343 env = getenv("LANG"); 344 345 /* 4. if none is set, fall to "C" */ 346 if (env == NULL || *env == '\0') 347 env = "C"; 348 349 return (env); 350 } 351 352 353 /* 354 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy 355 * code will continue to use _ctype[520], but we prefer this function as 356 * it is the only way to get thread-specific information. 357 */ 358 unsigned char 359 __mb_cur_max_l(locale_t loc) 360 { 361 return (loc->ctype->lc_max_mblen); 362 } 363 364 unsigned char 365 __mb_cur_max(void) 366 { 367 return (__mb_cur_max_l(uselocale(NULL))); 368 } 369 370 /* 371 * Public interfaces. 372 */ 373 374 locale_t 375 duplocale(locale_t src) 376 { 377 locale_t loc; 378 int i; 379 380 loc = lmalloc(sizeof (*loc)); 381 if (loc == NULL) { 382 return (NULL); 383 } 384 if (src == NULL) { 385 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */ 386 src = ___global_locale; 387 } 388 for (i = 0; i < LC_ALL; i++) { 389 loc->locdata[i] = src->locdata[i]; 390 loc->loaded[i] = 0; 391 } 392 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 393 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 394 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 395 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 396 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 397 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 398 loc->time = loc->locdata[LC_TIME]->l_data[0]; 399 return (loc); 400 } 401 402 void 403 freelocale(locale_t loc) 404 { 405 /* 406 * We take extra care never to free a saved locale created by 407 * setlocale(). This shouldn't be strictly necessary, but a little 408 * extra safety doesn't hurt here. 409 */ 410 if ((loc != NULL) && (loc != &posix_locale) && (!loc->on_list)) 411 lfree(loc, sizeof (*loc)); 412 } 413 414 locale_t 415 newlocale(int catmask, const char *locname, locale_t base) 416 { 417 locale_t loc; 418 int i, e; 419 420 if (catmask & ~(LC_ALL_MASK)) { 421 errno = EINVAL; 422 return (NULL); 423 } 424 425 /* 426 * Technically passing LC_GLOBAL_LOCALE here is illegal, 427 * but we allow it. 428 */ 429 if (base == NULL || base == ___global_locale) { 430 loc = duplocale(___global_locale); 431 } else { 432 loc = duplocale(base); 433 } 434 if (loc == NULL) { 435 return (NULL); 436 } 437 438 for (i = 0; i < LC_ALL; i++) { 439 struct locdata *ldata; 440 loc->loaded[i] = 0; 441 if (((1 << i) & catmask) == 0) { 442 /* Default to base locale if not overriding */ 443 continue; 444 } 445 ldata = locdata_get(i, locname); 446 if (ldata == NULL) { 447 e = errno; 448 freelocale(loc); 449 errno = e; 450 return (NULL); 451 } 452 loc->locdata[i] = ldata; 453 } 454 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 455 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 456 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 457 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 458 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 459 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 460 loc->time = loc->locdata[LC_TIME]->l_data[0]; 461 freelocale(base); 462 463 return (mklocname(loc)); 464 } 465 466 locale_t 467 uselocale(locale_t loc) 468 { 469 locale_t lastloc = ___global_locale; 470 locale_t *locptr; 471 472 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); 473 /* Should never occur */ 474 if (locptr == NULL) { 475 errno = EINVAL; 476 return (NULL); 477 } 478 479 if (*locptr != NULL) 480 lastloc = *locptr; 481 482 /* Argument loc is NULL if we are just querying. */ 483 if (loc != NULL) { 484 /* 485 * Set it to LC_GLOBAL_LOCAL to return to using 486 * the global locale (setlocale). 487 */ 488 if (loc == ___global_locale) { 489 *locptr = NULL; 490 } else { 491 /* No validation of the provided locale at present */ 492 *locptr = loc; 493 } 494 } 495 496 /* 497 * The caller is responsible for freeing, of course it would be 498 * gross error to call freelocale() on a locale object that is still 499 * in use. 500 */ 501 return (lastloc); 502 } 503 504 static locale_t 505 mklocname(locale_t loc) 506 { 507 int composite = 0; 508 509 /* Look to see if any category is different */ 510 for (int i = 1; i < LC_ALL; ++i) { 511 if (strcmp(loc->locdata[0]->l_lname, 512 loc->locdata[i]->l_lname) != 0) { 513 composite = 1; 514 break; 515 } 516 } 517 518 if (composite) { 519 /* 520 * Note ordering of these follows the numeric order, 521 * if the order is changed, then setlocale() will need 522 * to be changed as well. 523 */ 524 (void) snprintf(loc->locname, sizeof (loc->locname), 525 "%s/%s/%s/%s/%s/%s", 526 loc->locdata[LC_CTYPE]->l_lname, 527 loc->locdata[LC_NUMERIC]->l_lname, 528 loc->locdata[LC_TIME]->l_lname, 529 loc->locdata[LC_COLLATE]->l_lname, 530 loc->locdata[LC_MONETARY]->l_lname, 531 loc->locdata[LC_MESSAGES]->l_lname); 532 } else { 533 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname, 534 sizeof (loc->locname)); 535 } 536 return (loc); 537 }