1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 14 */ 15 16 /* 17 * This file implements the 2008 newlocale and friends handling. 18 */ 19 20 #ifndef _LCONV_C99 21 #define _LCONV_C99 22 #endif 23 24 #include "lint.h" 25 #include <atomic.h> 26 #include <locale.h> 27 #include <sys/types.h> 28 #include <sys/mman.h> 29 #include <errno.h> 30 #include <string.h> 31 #include "libc.h" 32 #include "mtlib.h" 33 #include "tsd.h" 34 #include "localeimpl.h" 35 #include "lctype.h" 36 37 /* 38 * Big Theory of Locales: 39 * 40 * (It is recommended that readers familiarize themselves with the POSIX 41 * 2008 (XPG Issue 7) specifications for locales, first.) 42 * 43 * Historically, we had a bunch of global variables that stored locale 44 * data. While this worked well, it limited applications to a single locale 45 * at a time. This doesn't work well in certain server applications. 46 * 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with 48 * versions of functions that can take this object as a parameter, along 49 * with functions to clone and manipulate these locale objects. The new 50 * functions are named with a _l() suffix. 51 * 52 * Additionally uselocale() is introduced which can change the locale of 53 * of a single thread. However, setlocale() can still be used to change 54 * the global locale. 55 * 56 * In our implementation, we use libc's TSD to store the locale data that 57 * was previously global. We still have global data because some applications 58 * have had those global objects compiled into them. (Such applications will 59 * be unable to benefit from uselocale(), btw.) The legacy routines are 60 * reimplemented as wrappers that use the appropriate locale object by 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the 62 * thread-specific locale object if one is present, or the global locale 63 * object otherwise. Note that once the TSD data is set, the only way 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE 65 * to uselocale(). 66 * 67 * We are careful to minimize performance impact of multiple calls to 68 * uselocale() or setlocale() by using a cache of locale data whenever possible. 69 * As a consequence of this, applications that iterate over all possible 70 * locales will burn through a lot of virtual memory, but we find such 71 * applications rare. (locale -a might be an exception, but it is short lived.) 72 * 73 * Category data is never released (although enclosing locale objects might be), 74 * in order to guarantee thread-safety. Calling freelocale() on an object 75 * while it is in use by another thread is a programmer error (use-after-free) 76 * and we don't bother to note it further. 77 * 78 * Locale objects (global locales) established by setlocale() are also 79 * never freed (for MT safety), but we will save previous locale objects 80 * and reuse them when we can. 81 */ 82 83 typedef struct locdata *(*loadfn_t)(const char *); 84 85 static const loadfn_t loaders[LC_ALL] = { 86 __lc_ctype_load, 87 __lc_numeric_load, 88 __lc_time_load, 89 __lc_collate_load, 90 __lc_monetary_load, 91 __lc_messages_load, 92 }; 93 94 extern struct lc_monetary lc_monetary_posix; 95 extern struct lc_numeric lc_numeric_posix; 96 extern struct lc_messages lc_messages_posix; 97 extern struct lc_time lc_time_posix; 98 extern struct lc_ctype lc_ctype_posix; 99 extern struct lc_collate lc_collate_posix; 100 101 static struct locale posix_locale = { 102 /* locdata */ 103 .locdata = { 104 &__posix_ctype_locdata, 105 &__posix_numeric_locdata, 106 &__posix_time_locdata, 107 &__posix_collate_locdata, 108 &__posix_monetary_locdata, 109 &__posix_messages_locdata, 110 }, 111 .ctype = &lc_ctype_posix, 112 .numeric = &lc_numeric_posix, 113 .collate = &lc_collate_posix, 114 .monetary = &lc_monetary_posix, 115 .messages = &lc_messages_posix, 116 .time = &lc_time_posix, 117 .runelocale = &_DefaultRuneLocale, 118 }; 119 120 locale_t ___global_locale = &posix_locale; 121 122 locale_t 123 __global_locale(void) 124 { 125 return (___global_locale); 126 } 127 128 /* 129 * Category names for getenv() Note that this was modified 130 * for Solaris. See <iso/locale_iso.h>. 131 */ 132 #define NUM_CATS 7 133 static char *categories[7] = { 134 "LC_CTYPE", 135 "LC_NUMERIC", 136 "LC_TIME", 137 "LC_COLLATE", 138 "LC_MONETARY", 139 "LC_MESSAGES", 140 "LC_ALL", 141 }; 142 143 /* 144 * Prototypes. 145 */ 146 static const char *get_locale_env(int); 147 static struct locdata *locdata_get(int, const const char *); 148 static struct locdata *locdata_get_cache(int, const char *); 149 150 /* 151 * Some utility routines. 152 */ 153 154 struct locdata * 155 __locdata_alloc(const char *name, size_t memsz) 156 { 157 struct locdata *ldata; 158 159 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) { 160 return (NULL); 161 } 162 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) { 163 free(ldata); 164 errno = ENOMEM; 165 return (NULL); 166 } 167 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname)); 168 169 return (ldata); 170 } 171 172 /* 173 * Normally we never free locale data truly, but if we failed to load it 174 * for some reason, this routine is used to cleanup the partial mess. 175 */ 176 void 177 __locdata_free(struct locdata *ldata) 178 { 179 for (int i = 0; i < NLOCDATA; i++) 180 free(ldata->l_data[i]); 181 if (ldata->l_map != NULL && ldata->l_map_len) 182 (void) munmap(ldata->l_map, ldata->l_map_len); 183 free(ldata); 184 } 185 186 /* 187 * It turns out that for performance reasons we would really like to 188 * cache the most recently referenced locale data to avoid wasteful 189 * loading from files. 190 */ 191 192 static struct locdata *cache_data[LC_ALL]; 193 static struct locdata *cat_data[LC_ALL]; 194 static mutex_t cache_lock = DEFAULTMUTEX; 195 196 /* 197 * Returns the cached data if the locale name is the same. If not, 198 * returns NULL (cache miss). The locdata is returned with a hold on 199 * it, taken on behalf of the caller. The caller should drop the hold 200 * when it is finished. 201 */ 202 static struct locdata * 203 locdata_get_cache(int category, const char *locname) 204 { 205 struct locdata *loc; 206 207 if (category < 0 || category >= LC_ALL) 208 return (NULL); 209 210 /* Try cache first. */ 211 lmutex_lock(&cache_lock); 212 loc = cache_data[category]; 213 214 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) { 215 lmutex_unlock(&cache_lock); 216 return (loc); 217 } 218 219 /* 220 * Failing that try previously loaded locales (linear search) -- 221 * this could be optimized to a hash, but its unlikely that a single 222 * application will ever need to work with more than a few locales. 223 */ 224 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) { 225 if (strcmp(locname, loc->l_lname) == 0) { 226 break; 227 } 228 } 229 230 /* 231 * Finally, if we still don't have one, try loading the locale 232 * data from the actual on-disk data. 233 * 234 * We drop the lock (libc wants to ensure no internal locks 235 * are held when we call other routines required to read from 236 * files, allocate memory, etc.) There is a small race here, 237 * but the consequences of the race are benign -- if multiple 238 * threads hit this at precisely the same point, we could 239 * wind up with duplicates of the locale data in the cache. 240 * 241 * This wastes the memory for an extra copy of the locale 242 * data, but there is no further harm beyond that. Its not 243 * worth the effort to recode this to something "safe" 244 * (which would require rescanning the list, etc.), given 245 * that this race will probably never actually occur. 246 */ 247 if (loc == NULL) { 248 lmutex_unlock(&cache_lock); 249 loc = (*loaders[category])(locname); 250 lmutex_lock(&cache_lock); 251 (void) strlcpy(loc->l_lname, locname, 252 sizeof (loc->l_lname)); 253 } 254 255 /* 256 * Assuming we got one, update the cache, and stick us on the list 257 * of loaded locale data. We insert into the head (more recent 258 * use is likely to win.) 259 */ 260 if (loc != NULL) { 261 cache_data[category] = loc; 262 if (loc->l_next == NULL) { 263 loc->l_next = cat_data[category]; 264 cat_data[category] = loc; 265 } 266 } 267 268 lmutex_unlock(&cache_lock); 269 return (loc); 270 } 271 272 /* 273 * Routine to get the locdata for a given category and locale. 274 * This includes retrieving it from cache, retrieving it from 275 * a file, etc. 276 */ 277 static struct locdata * 278 locdata_get(int category, const char *locname) 279 { 280 char scratch[ENCODING_LEN + 1]; 281 char *slash; 282 int cnt; 283 int len; 284 285 if (locname == NULL || *locname == 0) { 286 locname = get_locale_env(category); 287 } 288 289 /* 290 * Extract the locale name for the category if it is a composite 291 * locale. 292 */ 293 if ((slash = strchr(locname, '/')) != NULL) { 294 for (cnt = category; cnt && slash != NULL; cnt--) { 295 locname = slash + 1; 296 slash = strchr(locname, '/'); 297 } 298 if (slash) { 299 len = slash - locname; 300 if (len >= sizeof (scratch)) { 301 len = sizeof (scratch); 302 } 303 } else { 304 len = sizeof (scratch); 305 } 306 (void) strlcpy(scratch, locname, len); 307 locname = scratch; 308 } 309 310 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0)) 311 return (posix_locale.locdata[category]); 312 313 return (locdata_get_cache(category, locname)); 314 } 315 316 /* tsd destructor */ 317 static void 318 freelocptr(void *arg) 319 { 320 locale_t *locptr = arg; 321 if (*locptr != NULL) 322 freelocale(*locptr); 323 } 324 325 static const char * 326 get_locale_env(int category) 327 { 328 const char *env; 329 330 /* 1. check LC_ALL. */ 331 env = getenv(categories[LC_ALL]); 332 333 /* 2. check LC_* */ 334 if (env == NULL || *env == '\0') 335 env = getenv(categories[category]); 336 337 /* 3. check LANG */ 338 if (env == NULL || *env == '\0') 339 env = getenv("LANG"); 340 341 /* 4. if none is set, fall to "C" */ 342 if (env == NULL || *env == '\0') 343 env = "C"; 344 345 return (env); 346 } 347 348 349 /* 350 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy 351 * code will continue to use _ctype[520], but we prefer this function as 352 * it is the only way to get thread-specific information. 353 */ 354 unsigned char 355 __mb_cur_max_l(locale_t loc) 356 { 357 return (loc->ctype->lc_max_mblen); 358 } 359 360 unsigned char 361 __mb_cur_max(void) 362 { 363 return (__mb_cur_max_l(uselocale(NULL))); 364 } 365 366 /* 367 * Public interfaces. 368 */ 369 370 locale_t 371 duplocale(locale_t src) 372 { 373 locale_t loc; 374 int i; 375 376 loc = calloc(1, sizeof (*loc)); 377 if (loc == NULL) { 378 return (NULL); 379 } 380 if (src == NULL) { 381 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */ 382 src = ___global_locale; 383 } 384 for (i = 0; i < LC_ALL; i++) { 385 loc->locdata[i] = src->locdata[i]; 386 loc->loaded[i] = 0; 387 } 388 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 389 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 390 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 391 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 392 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 393 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 394 loc->time = loc->locdata[LC_TIME]->l_data[0]; 395 return (loc); 396 } 397 398 void 399 freelocale(locale_t loc) 400 { 401 /* 402 * We take extra care never to free a saved locale created by 403 * setlocale(). This shouldn't be strictly necessary, but a little 404 * extra safety doesn't hurt here. 405 */ 406 if ((loc != &posix_locale) && (loc->next == NULL)) 407 free(loc); 408 } 409 410 locale_t 411 newlocale(int catmask, const char *locname, locale_t base) 412 { 413 locale_t loc; 414 int i, e; 415 416 if (catmask & ~(LC_ALL_MASK)) { 417 errno = EINVAL; 418 return (NULL); 419 } 420 /* 421 * Technically passing LC_GLOBAL_LOCALE here is illegal, 422 * but we allow it. 423 */ 424 if (base == NULL || base == ___global_locale) { 425 loc = duplocale(___global_locale); 426 } else { 427 loc = base; 428 } 429 if (loc == NULL) { 430 return (NULL); 431 } 432 433 for (i = 0; i < LC_ALL; i++) { 434 struct locdata *ldata; 435 loc->loaded[i] = 0; 436 if (((1 << i) & catmask) == 0) { 437 /* Default to base locale if not overriding */ 438 continue; 439 } 440 ldata = locdata_get(i, locname); 441 if (ldata == NULL) { 442 e = errno; 443 freelocale(loc); 444 errno = e; 445 return (NULL); 446 } 447 loc->locdata[i] = ldata; 448 } 449 loc->collate = loc->locdata[LC_COLLATE]->l_data[0]; 450 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0]; 451 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1]; 452 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0]; 453 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0]; 454 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0]; 455 loc->time = loc->locdata[LC_TIME]->l_data[0]; 456 return (loc); 457 } 458 459 locale_t 460 uselocale(locale_t loc) 461 { 462 locale_t lastloc = ___global_locale; 463 locale_t *locptr; 464 465 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr); 466 /* Should never occur */ 467 if (locptr == NULL) { 468 errno = EINVAL; 469 return (NULL); 470 } 471 472 if (*locptr != NULL) 473 lastloc = *locptr; 474 475 /* Argument loc is NULL if we are just querying. */ 476 if (loc != NULL) { 477 /* 478 * Set it to LC_GLOBAL_LOCAL to return to using 479 * the global locale (setlocale). 480 */ 481 if (loc == ___global_locale) { 482 *locptr = NULL; 483 } else { 484 /* No validation of the provided locale at present */ 485 *locptr = loc; 486 } 487 } 488 489 /* 490 * The caller is responsible for freeing, of course it would be 491 * gross error to call freelocale() on a locale object that is still 492 * in use. 493 */ 494 return (lastloc); 495 }