Print this page
Thread safety fixes.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/localeimpl.c
+++ new/usr/src/lib/libc/port/locale/localeimpl.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
14 14 */
15 15
16 16 /*
17 17 * This file implements the 2008 newlocale and friends handling.
18 18 */
19 19
20 20 #ifndef _LCONV_C99
21 21 #define _LCONV_C99
22 22 #endif
23 23
24 24 #include "lint.h"
25 25 #include <atomic.h>
26 26 #include <locale.h>
27 27 #include <sys/types.h>
28 28 #include <sys/mman.h>
29 29 #include <errno.h>
30 30 #include <string.h>
31 31 #include "libc.h"
32 32 #include "mtlib.h"
33 33 #include "tsd.h"
34 34 #include "localeimpl.h"
35 35 #include "lctype.h"
36 36
37 37 /*
38 38 * Big Theory of Locales:
39 39 *
40 40 * (It is recommended that readers familiarize themselves with the POSIX
41 41 * 2008 (XPG Issue 7) specifications for locales, first.)
42 42 *
43 43 * Historically, we had a bunch of global variables that stored locale
44 44 * data. While this worked well, it limited applications to a single locale
45 45 * at a time. This doesn't work well in certain server applications.
46 46 *
47 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 48 * versions of functions that can take this object as a parameter, along
49 49 * with functions to clone and manipulate these locale objects. The new
50 50 * functions are named with a _l() suffix.
51 51 *
52 52 * Additionally uselocale() is introduced which can change the locale of
53 53 * of a single thread. However, setlocale() can still be used to change
54 54 * the global locale.
55 55 *
56 56 * In our implementation, we use libc's TSD to store the locale data that
57 57 * was previously global. We still have global data because some applications
58 58 * have had those global objects compiled into them. (Such applications will
59 59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 60 * reimplemented as wrappers that use the appropriate locale object by
61 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
62 62 * thread-specific locale object if one is present, or the global locale
63 63 * object otherwise. Note that once the TSD data is set, the only way
64 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 65 * to uselocale().
66 66 *
67 67 * We are careful to minimize performance impact of multiple calls to
68 68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 69 * As a consequence of this, applications that iterate over all possible
70 70 * locales will burn through a lot of virtual memory, but we find such
71 71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 + *
73 + * Category data is never released (although enclosing locale objects might be),
74 + * in order to guarantee thread-safety. Calling freelocale() on an object
75 + * while it is in use by another thread is a programmer error (use-after-free)
76 + * and we don't bother to note it further.
77 + *
78 + * Locale objects (global locales) established by setlocale() are also
79 + * never freed (for MT safety), but we will save previous locale objects
80 + * and reuse them when we can.
72 81 */
73 82
74 -/*
75 - * NB: Each of the structures listed herein should have the refcnt
76 - * set to -1, to ensure that posix locale information is never ever freed,
77 - * even when changing the global locale.
78 - */
79 -
80 83 typedef struct locdata *(*loadfn_t)(const char *);
81 84
82 85 static const loadfn_t loaders[LC_ALL] = {
83 86 __lc_ctype_load,
84 87 __lc_numeric_load,
85 88 __lc_time_load,
86 89 __lc_collate_load,
87 90 __lc_monetary_load,
88 91 __lc_messages_load,
89 92 };
90 93
91 94 extern struct lc_monetary lc_monetary_posix;
92 95 extern struct lc_numeric lc_numeric_posix;
93 96 extern struct lc_messages lc_messages_posix;
94 97 extern struct lc_time lc_time_posix;
95 98 extern struct lc_ctype lc_ctype_posix;
96 99 extern struct lc_collate lc_collate_posix;
97 100
98 101 static struct locale posix_locale = {
99 102 /* locdata */
100 103 .locdata = {
101 104 &__posix_ctype_locdata,
102 105 &__posix_numeric_locdata,
103 106 &__posix_time_locdata,
104 107 &__posix_collate_locdata,
105 108 &__posix_monetary_locdata,
106 109 &__posix_messages_locdata,
107 110 },
108 111 .ctype = &lc_ctype_posix,
109 112 .numeric = &lc_numeric_posix,
110 113 .collate = &lc_collate_posix,
111 114 .monetary = &lc_monetary_posix,
112 115 .messages = &lc_messages_posix,
113 116 .time = &lc_time_posix,
114 117 .runelocale = &_DefaultRuneLocale,
115 118 };
116 119
117 120 locale_t ___global_locale = &posix_locale;
118 121
119 122 locale_t
120 123 __global_locale(void)
121 124 {
122 125 return (___global_locale);
123 126 }
124 127
125 128 /*
126 129 * Category names for getenv() Note that this was modified
127 130 * for Solaris. See <iso/locale_iso.h>.
128 131 */
129 132 #define NUM_CATS 7
130 133 static char *categories[7] = {
131 134 "LC_CTYPE",
132 135 "LC_NUMERIC",
133 136 "LC_TIME",
134 137 "LC_COLLATE",
135 138 "LC_MONETARY",
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
136 139 "LC_MESSAGES",
137 140 "LC_ALL",
138 141 };
139 142
140 143 /*
141 144 * Prototypes.
142 145 */
143 146 static const char *get_locale_env(int);
144 147 static struct locdata *locdata_get(int, const const char *);
145 148 static struct locdata *locdata_get_cache(int, const char *);
146 -static void locdata_set_cache(int, struct locdata *);
147 149
148 150 /*
149 151 * Some utility routines.
150 152 */
151 -struct locdata *
152 -__locdata_hold(struct locdata *ld)
153 -{
154 - if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
155 - atomic_inc_32(&ld->l_refcnt);
156 - return (ld);
157 -}
158 153
159 -void
160 -__locdata_release(struct locdata *ld)
161 -{
162 - if (ld->l_refcnt == (uint32_t)-1)
163 - return;
164 -
165 - if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
166 - for (int i = 0; i < NLOCDATA; i++)
167 - free(ld->l_data[i]);
168 - if (ld->l_map && ld->l_map_len) {
169 - (void) munmap(ld->l_map, ld->l_map_len);
170 - }
171 - free(ld);
172 - }
173 -}
174 -
175 154 struct locdata *
176 155 __locdata_alloc(const char *name, size_t memsz)
177 156 {
178 157 struct locdata *ldata;
179 158
180 159 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
181 160 return (NULL);
182 161 }
183 162 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
184 163 free(ldata);
185 164 errno = ENOMEM;
186 165 return (NULL);
187 166 }
188 167 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
189 - ldata->l_refcnt = 1;
190 168
191 169 return (ldata);
192 170 }
193 171
194 172 /*
173 + * Normally we never free locale data truly, but if we failed to load it
174 + * for some reason, this routine is used to cleanup the partial mess.
175 + */
176 +void
177 +__locdata_free(struct locdata *ldata)
178 +{
179 + for (int i = 0; i < NLOCDATA; i++)
180 + free(ldata->l_data[i]);
181 + if (ldata->l_map != NULL && ldata->l_map_len)
182 + (void) munmap(ldata->l_map, ldata->l_map_len);
183 + free(ldata);
184 +}
185 +
186 +/*
195 187 * It turns out that for performance reasons we would really like to
196 188 * cache the most recently referenced locale data to avoid wasteful
197 189 * loading from files.
198 190 */
199 191
200 192 static struct locdata *cache_data[LC_ALL];
193 +static struct locdata *cat_data[LC_ALL];
201 194 static mutex_t cache_lock = DEFAULTMUTEX;
202 195
203 196 /*
204 197 * Returns the cached data if the locale name is the same. If not,
205 198 * returns NULL (cache miss). The locdata is returned with a hold on
206 199 * it, taken on behalf of the caller. The caller should drop the hold
207 200 * when it is finished.
208 201 */
209 202 static struct locdata *
210 203 locdata_get_cache(int category, const char *locname)
211 204 {
212 205 struct locdata *loc;
213 206
214 207 if (category < 0 || category >= LC_ALL)
215 208 return (NULL);
216 209
210 + /* Try cache first. */
217 211 lmutex_lock(&cache_lock);
218 - if ((loc = cache_data[category]) != NULL) {
219 - if (strcmp(locname, loc->l_lname) == 0) {
220 - loc = __locdata_hold(loc);
221 - } else {
222 - loc = NULL;
223 - }
212 + loc = cache_data[category];
213 +
214 + if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
215 + lmutex_unlock(&cache_lock);
216 + return (loc);
224 217 }
225 - lmutex_unlock(&cache_lock);
226 - return (loc);
227 -}
228 218
229 -/*
230 - * Set the cache for the category to specific content. An additional hold
231 - * is taken for the data while it is in the cache, so the caller may drop
232 - * its own hold once this is complete. Also, releases the hold on any
233 - * previously cached data.
234 - */
235 -static void
236 -locdata_set_cache(int category, struct locdata *loc)
237 -{
238 - struct locdata *old;
219 + /*
220 + * Failing that try previously loaded locales (linear search) --
221 + * this could be optimized to a hash, but its unlikely that a single
222 + * application will ever need to work with more than a few locales.
223 + */
224 + for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
225 + if (strcmp(locname, loc->l_lname) == 0) {
226 + break;
227 + }
228 + }
239 229
240 - if (category < 0 || category >= LC_ALL)
241 - return;
230 + /*
231 + * Finally, if we still don't have one, try loading the locale
232 + * data from the actual on-disk data.
233 + *
234 + * We drop the lock (libc wants to ensure no internal locks
235 + * are held when we call other routines required to read from
236 + * files, allocate memory, etc.) There is a small race here,
237 + * but the consequences of the race are benign -- if multiple
238 + * threads hit this at precisely the same point, we could
239 + * wind up with duplicates of the locale data in the cache.
240 + *
241 + * This wastes the memory for an extra copy of the locale
242 + * data, but there is no further harm beyond that. Its not
243 + * worth the effort to recode this to something "safe"
244 + * (which would require rescanning the list, etc.), given
245 + * that this race will probably never actually occur.
246 + */
247 + if (loc == NULL) {
248 + lmutex_unlock(&cache_lock);
249 + loc = (*loaders[category])(locname);
250 + lmutex_lock(&cache_lock);
251 + (void) strlcpy(loc->l_lname, locname,
252 + sizeof (loc->l_lname));
253 + }
242 254
243 - lmutex_lock(&cache_lock);
244 - old = cache_data[category];
245 - cache_data[category] = __locdata_hold(loc);
246 - lmutex_unlock(&cache_lock);
255 + /*
256 + * Assuming we got one, update the cache, and stick us on the list
257 + * of loaded locale data. We insert into the head (more recent
258 + * use is likely to win.)
259 + */
260 + if (loc != NULL) {
261 + cache_data[category] = loc;
262 + if (loc->l_next == NULL) {
263 + loc->l_next = cat_data[category];
264 + cat_data[category] = loc;
265 + }
266 + }
247 267
248 - /* drop our reference on the old data */
249 - if (old)
250 - __locdata_release(old);
268 + lmutex_unlock(&cache_lock);
269 + return (loc);
251 270 }
252 271
253 272 /*
254 273 * Routine to get the locdata for a given category and locale.
255 274 * This includes retrieving it from cache, retrieving it from
256 275 * a file, etc.
257 276 */
258 277 static struct locdata *
259 278 locdata_get(int category, const char *locname)
260 279 {
261 - struct locdata *ldata;
262 280 char scratch[ENCODING_LEN + 1];
263 281 char *slash;
264 282 int cnt;
265 283 int len;
266 284
267 285 if (locname == NULL || *locname == 0) {
268 286 locname = get_locale_env(category);
269 287 }
270 288
271 289 /*
272 290 * Extract the locale name for the category if it is a composite
273 291 * locale.
274 292 */
275 293 if ((slash = strchr(locname, '/')) != NULL) {
276 294 for (cnt = category; cnt && slash != NULL; cnt--) {
277 295 locname = slash + 1;
278 296 slash = strchr(locname, '/');
279 297 }
280 298 if (slash) {
281 299 len = slash - locname;
282 300 if (len >= sizeof (scratch)) {
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
283 301 len = sizeof (scratch);
284 302 }
285 303 } else {
286 304 len = sizeof (scratch);
287 305 }
288 306 (void) strlcpy(scratch, locname, len);
289 307 locname = scratch;
290 308 }
291 309
292 310 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
293 - return (__locdata_hold(posix_locale.locdata[category]));
311 + return (posix_locale.locdata[category]);
294 312
295 - ldata = locdata_get_cache(category, locname);
296 - if (ldata != NULL)
297 - return (ldata);
298 -
299 - /* Otherwise load it */
300 - ldata = (*loaders[category])(locname);
301 - if (ldata != NULL) {
302 - locdata_set_cache(category, ldata);
303 - }
304 - return (ldata);
313 + return (locdata_get_cache(category, locname));
305 314 }
306 315
307 316 /* tsd destructor */
308 317 static void
309 318 freelocptr(void *arg)
310 319 {
311 320 locale_t *locptr = arg;
312 321 if (*locptr != NULL)
313 322 freelocale(*locptr);
314 323 }
315 324
316 325 static const char *
317 326 get_locale_env(int category)
318 327 {
319 328 const char *env;
320 329
321 330 /* 1. check LC_ALL. */
322 331 env = getenv(categories[LC_ALL]);
323 332
324 333 /* 2. check LC_* */
325 334 if (env == NULL || *env == '\0')
326 335 env = getenv(categories[category]);
327 336
328 337 /* 3. check LANG */
329 338 if (env == NULL || *env == '\0')
330 339 env = getenv("LANG");
331 340
332 341 /* 4. if none is set, fall to "C" */
333 342 if (env == NULL || *env == '\0')
334 343 env = "C";
335 344
336 345 return (env);
337 346 }
338 347
339 348
340 349 /*
341 350 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
342 351 * code will continue to use _ctype[520], but we prefer this function as
343 352 * it is the only way to get thread-specific information.
344 353 */
345 354 unsigned char
346 355 __mb_cur_max_l(locale_t loc)
347 356 {
348 357 return (loc->ctype->lc_max_mblen);
349 358 }
350 359
351 360 unsigned char
352 361 __mb_cur_max(void)
353 362 {
354 363 return (__mb_cur_max_l(uselocale(NULL)));
355 364 }
356 365
357 366 /*
358 367 * Public interfaces.
359 368 */
360 369
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
361 370 locale_t
362 371 duplocale(locale_t src)
363 372 {
364 373 locale_t loc;
365 374 int i;
366 375
367 376 loc = calloc(1, sizeof (*loc));
368 377 if (loc == NULL) {
369 378 return (NULL);
370 379 }
380 + if (src == NULL) {
381 + /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
382 + src = ___global_locale;
383 + }
371 384 for (i = 0; i < LC_ALL; i++) {
372 - loc->locdata[i] = __locdata_hold(src->locdata[i]);
385 + loc->locdata[i] = src->locdata[i];
373 386 loc->loaded[i] = 0;
374 387 }
375 388 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
376 389 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
377 390 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
378 391 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
379 392 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
380 393 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
381 394 loc->time = loc->locdata[LC_TIME]->l_data[0];
382 395 return (loc);
383 396 }
384 397
385 398 void
386 399 freelocale(locale_t loc)
387 400 {
388 - int i;
389 - for (i = 0; i < LC_ALL; i++)
390 - __locdata_release(loc->locdata[i]);
391 - if (loc != &posix_locale)
401 + /*
402 + * We take extra care never to free a saved locale created by
403 + * setlocale(). This shouldn't be strictly necessary, but a little
404 + * extra safety doesn't hurt here.
405 + */
406 + if ((loc != &posix_locale) && (loc->next == NULL))
392 407 free(loc);
393 408 }
394 409
395 410 locale_t
396 411 newlocale(int catmask, const char *locname, locale_t base)
397 412 {
398 413 locale_t loc;
399 414 int i, e;
400 415
401 416 if (catmask & ~(LC_ALL_MASK)) {
402 417 errno = EINVAL;
403 418 return (NULL);
404 419 }
405 - loc = duplocale(base != NULL ? base : ___global_locale);
420 + /*
421 + * Technically passing LC_GLOBAL_LOCALE here is illegal,
422 + * but we allow it.
423 + */
424 + if (base == NULL || base == ___global_locale) {
425 + loc = duplocale(___global_locale);
426 + } else {
427 + loc = base;
428 + }
406 429 if (loc == NULL) {
407 430 return (NULL);
408 431 }
409 432
410 433 for (i = 0; i < LC_ALL; i++) {
411 434 struct locdata *ldata;
412 435 loc->loaded[i] = 0;
413 436 if (((1 << i) & catmask) == 0) {
414 437 /* Default to base locale if not overriding */
415 438 continue;
416 439 }
417 440 ldata = locdata_get(i, locname);
418 441 if (ldata == NULL) {
419 442 e = errno;
420 443 freelocale(loc);
421 444 errno = e;
422 445 return (NULL);
423 446 }
424 - __locdata_release(loc->locdata[i]);
425 447 loc->locdata[i] = ldata;
426 448 }
427 - if (base && base != ___global_locale) {
428 - freelocale(base);
429 - }
430 449 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
431 450 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
432 451 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
433 452 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
434 453 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
435 454 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
436 455 loc->time = loc->locdata[LC_TIME]->l_data[0];
437 456 return (loc);
438 457 }
439 458
440 459 locale_t
441 460 uselocale(locale_t loc)
442 461 {
443 462 locale_t lastloc = ___global_locale;
444 463 locale_t *locptr;
445 464
446 465 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
447 466 /* Should never occur */
448 467 if (locptr == NULL) {
449 468 errno = EINVAL;
450 469 return (NULL);
451 470 }
452 471
453 472 if (*locptr != NULL)
454 473 lastloc = *locptr;
455 474
456 475 /* Argument loc is NULL if we are just querying. */
457 476 if (loc != NULL) {
458 477 /*
459 478 * Set it to LC_GLOBAL_LOCAL to return to using
460 479 * the global locale (setlocale).
461 480 */
462 481 if (loc == ___global_locale) {
463 482 *locptr = NULL;
464 483 } else {
465 484 /* No validation of the provided locale at present */
466 485 *locptr = loc;
467 486 }
468 487 }
469 488
470 489 /*
471 490 * The caller is responsible for freeing, of course it would be
472 491 * gross error to call freelocale() on a locale object that is still
473 492 * in use.
474 493 */
475 494 return (lastloc);
476 495 }
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX