52 * Additionally uselocale() is introduced which can change the locale of
53 * of a single thread. However, setlocale() can still be used to change
54 * the global locale.
55 *
56 * In our implementation, we use libc's TSD to store the locale data that
57 * was previously global. We still have global data because some applications
58 * have had those global objects compiled into them. (Such applications will
59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 * reimplemented as wrappers that use the appropriate locale object by
61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 * thread-specific locale object if one is present, or the global locale
63 * object otherwise. Note that once the TSD data is set, the only way
64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 * to uselocale().
66 *
67 * We are careful to minimize performance impact of multiple calls to
68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 * As a consequence of this, applications that iterate over all possible
70 * locales will burn through a lot of virtual memory, but we find such
71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 */
73
74 /*
75 * NB: Each of the structures listed herein should have the refcnt
76 * set to -1, to ensure that posix locale information is never ever freed,
77 * even when changing the global locale.
78 */
79
80 typedef struct locdata *(*loadfn_t)(const char *);
81
82 static const loadfn_t loaders[LC_ALL] = {
83 __lc_ctype_load,
84 __lc_numeric_load,
85 __lc_time_load,
86 __lc_collate_load,
87 __lc_monetary_load,
88 __lc_messages_load,
89 };
90
91 extern struct lc_monetary lc_monetary_posix;
92 extern struct lc_numeric lc_numeric_posix;
93 extern struct lc_messages lc_messages_posix;
94 extern struct lc_time lc_time_posix;
95 extern struct lc_ctype lc_ctype_posix;
96 extern struct lc_collate lc_collate_posix;
97
98 static struct locale posix_locale = {
99 /* locdata */
126 * Category names for getenv() Note that this was modified
127 * for Solaris. See <iso/locale_iso.h>.
128 */
129 #define NUM_CATS 7
130 static char *categories[7] = {
131 "LC_CTYPE",
132 "LC_NUMERIC",
133 "LC_TIME",
134 "LC_COLLATE",
135 "LC_MONETARY",
136 "LC_MESSAGES",
137 "LC_ALL",
138 };
139
140 /*
141 * Prototypes.
142 */
143 static const char *get_locale_env(int);
144 static struct locdata *locdata_get(int, const const char *);
145 static struct locdata *locdata_get_cache(int, const char *);
146 static void locdata_set_cache(int, struct locdata *);
147
148 /*
149 * Some utility routines.
150 */
151 struct locdata *
152 __locdata_hold(struct locdata *ld)
153 {
154 if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
155 atomic_inc_32(&ld->l_refcnt);
156 return (ld);
157 }
158
159 void
160 __locdata_release(struct locdata *ld)
161 {
162 if (ld->l_refcnt == (uint32_t)-1)
163 return;
164
165 if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
166 for (int i = 0; i < NLOCDATA; i++)
167 free(ld->l_data[i]);
168 if (ld->l_map && ld->l_map_len) {
169 (void) munmap(ld->l_map, ld->l_map_len);
170 }
171 free(ld);
172 }
173 }
174
175 struct locdata *
176 __locdata_alloc(const char *name, size_t memsz)
177 {
178 struct locdata *ldata;
179
180 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
181 return (NULL);
182 }
183 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
184 free(ldata);
185 errno = ENOMEM;
186 return (NULL);
187 }
188 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
189 ldata->l_refcnt = 1;
190
191 return (ldata);
192 }
193
194 /*
195 * It turns out that for performance reasons we would really like to
196 * cache the most recently referenced locale data to avoid wasteful
197 * loading from files.
198 */
199
200 static struct locdata *cache_data[LC_ALL];
201 static mutex_t cache_lock = DEFAULTMUTEX;
202
203 /*
204 * Returns the cached data if the locale name is the same. If not,
205 * returns NULL (cache miss). The locdata is returned with a hold on
206 * it, taken on behalf of the caller. The caller should drop the hold
207 * when it is finished.
208 */
209 static struct locdata *
210 locdata_get_cache(int category, const char *locname)
211 {
212 struct locdata *loc;
213
214 if (category < 0 || category >= LC_ALL)
215 return (NULL);
216
217 lmutex_lock(&cache_lock);
218 if ((loc = cache_data[category]) != NULL) {
219 if (strcmp(locname, loc->l_lname) == 0) {
220 loc = __locdata_hold(loc);
221 } else {
222 loc = NULL;
223 }
224 }
225 lmutex_unlock(&cache_lock);
226 return (loc);
227 }
228
229 /*
230 * Set the cache for the category to specific content. An additional hold
231 * is taken for the data while it is in the cache, so the caller may drop
232 * its own hold once this is complete. Also, releases the hold on any
233 * previously cached data.
234 */
235 static void
236 locdata_set_cache(int category, struct locdata *loc)
237 {
238 struct locdata *old;
239
240 if (category < 0 || category >= LC_ALL)
241 return;
242
243 lmutex_lock(&cache_lock);
244 old = cache_data[category];
245 cache_data[category] = __locdata_hold(loc);
246 lmutex_unlock(&cache_lock);
247
248 /* drop our reference on the old data */
249 if (old)
250 __locdata_release(old);
251 }
252
253 /*
254 * Routine to get the locdata for a given category and locale.
255 * This includes retrieving it from cache, retrieving it from
256 * a file, etc.
257 */
258 static struct locdata *
259 locdata_get(int category, const char *locname)
260 {
261 struct locdata *ldata;
262 char scratch[ENCODING_LEN + 1];
263 char *slash;
264 int cnt;
265 int len;
266
267 if (locname == NULL || *locname == 0) {
268 locname = get_locale_env(category);
269 }
270
271 /*
272 * Extract the locale name for the category if it is a composite
273 * locale.
274 */
275 if ((slash = strchr(locname, '/')) != NULL) {
276 for (cnt = category; cnt && slash != NULL; cnt--) {
277 locname = slash + 1;
278 slash = strchr(locname, '/');
279 }
280 if (slash) {
281 len = slash - locname;
282 if (len >= sizeof (scratch)) {
283 len = sizeof (scratch);
284 }
285 } else {
286 len = sizeof (scratch);
287 }
288 (void) strlcpy(scratch, locname, len);
289 locname = scratch;
290 }
291
292 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
293 return (__locdata_hold(posix_locale.locdata[category]));
294
295 ldata = locdata_get_cache(category, locname);
296 if (ldata != NULL)
297 return (ldata);
298
299 /* Otherwise load it */
300 ldata = (*loaders[category])(locname);
301 if (ldata != NULL) {
302 locdata_set_cache(category, ldata);
303 }
304 return (ldata);
305 }
306
307 /* tsd destructor */
308 static void
309 freelocptr(void *arg)
310 {
311 locale_t *locptr = arg;
312 if (*locptr != NULL)
313 freelocale(*locptr);
314 }
315
316 static const char *
317 get_locale_env(int category)
318 {
319 const char *env;
320
321 /* 1. check LC_ALL. */
322 env = getenv(categories[LC_ALL]);
323
324 /* 2. check LC_* */
351 unsigned char
352 __mb_cur_max(void)
353 {
354 return (__mb_cur_max_l(uselocale(NULL)));
355 }
356
357 /*
358 * Public interfaces.
359 */
360
361 locale_t
362 duplocale(locale_t src)
363 {
364 locale_t loc;
365 int i;
366
367 loc = calloc(1, sizeof (*loc));
368 if (loc == NULL) {
369 return (NULL);
370 }
371 for (i = 0; i < LC_ALL; i++) {
372 loc->locdata[i] = __locdata_hold(src->locdata[i]);
373 loc->loaded[i] = 0;
374 }
375 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
376 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
377 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
378 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
379 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
380 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
381 loc->time = loc->locdata[LC_TIME]->l_data[0];
382 return (loc);
383 }
384
385 void
386 freelocale(locale_t loc)
387 {
388 int i;
389 for (i = 0; i < LC_ALL; i++)
390 __locdata_release(loc->locdata[i]);
391 if (loc != &posix_locale)
392 free(loc);
393 }
394
395 locale_t
396 newlocale(int catmask, const char *locname, locale_t base)
397 {
398 locale_t loc;
399 int i, e;
400
401 if (catmask & ~(LC_ALL_MASK)) {
402 errno = EINVAL;
403 return (NULL);
404 }
405 loc = duplocale(base != NULL ? base : ___global_locale);
406 if (loc == NULL) {
407 return (NULL);
408 }
409
410 for (i = 0; i < LC_ALL; i++) {
411 struct locdata *ldata;
412 loc->loaded[i] = 0;
413 if (((1 << i) & catmask) == 0) {
414 /* Default to base locale if not overriding */
415 continue;
416 }
417 ldata = locdata_get(i, locname);
418 if (ldata == NULL) {
419 e = errno;
420 freelocale(loc);
421 errno = e;
422 return (NULL);
423 }
424 __locdata_release(loc->locdata[i]);
425 loc->locdata[i] = ldata;
426 }
427 if (base && base != ___global_locale) {
428 freelocale(base);
429 }
430 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
431 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
432 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
433 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
434 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
435 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
436 loc->time = loc->locdata[LC_TIME]->l_data[0];
437 return (loc);
438 }
439
440 locale_t
441 uselocale(locale_t loc)
442 {
443 locale_t lastloc = ___global_locale;
444 locale_t *locptr;
445
446 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
447 /* Should never occur */
448 if (locptr == NULL) {
449 errno = EINVAL;
|
52 * Additionally uselocale() is introduced which can change the locale of
53 * of a single thread. However, setlocale() can still be used to change
54 * the global locale.
55 *
56 * In our implementation, we use libc's TSD to store the locale data that
57 * was previously global. We still have global data because some applications
58 * have had those global objects compiled into them. (Such applications will
59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 * reimplemented as wrappers that use the appropriate locale object by
61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 * thread-specific locale object if one is present, or the global locale
63 * object otherwise. Note that once the TSD data is set, the only way
64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 * to uselocale().
66 *
67 * We are careful to minimize performance impact of multiple calls to
68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 * As a consequence of this, applications that iterate over all possible
70 * locales will burn through a lot of virtual memory, but we find such
71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 *
73 * Category data is never released (although enclosing locale objects might be),
74 * in order to guarantee thread-safety. Calling freelocale() on an object
75 * while it is in use by another thread is a programmer error (use-after-free)
76 * and we don't bother to note it further.
77 *
78 * Locale objects (global locales) established by setlocale() are also
79 * never freed (for MT safety), but we will save previous locale objects
80 * and reuse them when we can.
81 */
82
83 typedef struct locdata *(*loadfn_t)(const char *);
84
85 static const loadfn_t loaders[LC_ALL] = {
86 __lc_ctype_load,
87 __lc_numeric_load,
88 __lc_time_load,
89 __lc_collate_load,
90 __lc_monetary_load,
91 __lc_messages_load,
92 };
93
94 extern struct lc_monetary lc_monetary_posix;
95 extern struct lc_numeric lc_numeric_posix;
96 extern struct lc_messages lc_messages_posix;
97 extern struct lc_time lc_time_posix;
98 extern struct lc_ctype lc_ctype_posix;
99 extern struct lc_collate lc_collate_posix;
100
101 static struct locale posix_locale = {
102 /* locdata */
129 * Category names for getenv() Note that this was modified
130 * for Solaris. See <iso/locale_iso.h>.
131 */
132 #define NUM_CATS 7
133 static char *categories[7] = {
134 "LC_CTYPE",
135 "LC_NUMERIC",
136 "LC_TIME",
137 "LC_COLLATE",
138 "LC_MONETARY",
139 "LC_MESSAGES",
140 "LC_ALL",
141 };
142
143 /*
144 * Prototypes.
145 */
146 static const char *get_locale_env(int);
147 static struct locdata *locdata_get(int, const const char *);
148 static struct locdata *locdata_get_cache(int, const char *);
149
150 /*
151 * Some utility routines.
152 */
153
154 struct locdata *
155 __locdata_alloc(const char *name, size_t memsz)
156 {
157 struct locdata *ldata;
158
159 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
160 return (NULL);
161 }
162 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
163 free(ldata);
164 errno = ENOMEM;
165 return (NULL);
166 }
167 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
168
169 return (ldata);
170 }
171
172 /*
173 * Normally we never free locale data truly, but if we failed to load it
174 * for some reason, this routine is used to cleanup the partial mess.
175 */
176 void
177 __locdata_free(struct locdata *ldata)
178 {
179 for (int i = 0; i < NLOCDATA; i++)
180 free(ldata->l_data[i]);
181 if (ldata->l_map != NULL && ldata->l_map_len)
182 (void) munmap(ldata->l_map, ldata->l_map_len);
183 free(ldata);
184 }
185
186 /*
187 * It turns out that for performance reasons we would really like to
188 * cache the most recently referenced locale data to avoid wasteful
189 * loading from files.
190 */
191
192 static struct locdata *cache_data[LC_ALL];
193 static struct locdata *cat_data[LC_ALL];
194 static mutex_t cache_lock = DEFAULTMUTEX;
195
196 /*
197 * Returns the cached data if the locale name is the same. If not,
198 * returns NULL (cache miss). The locdata is returned with a hold on
199 * it, taken on behalf of the caller. The caller should drop the hold
200 * when it is finished.
201 */
202 static struct locdata *
203 locdata_get_cache(int category, const char *locname)
204 {
205 struct locdata *loc;
206
207 if (category < 0 || category >= LC_ALL)
208 return (NULL);
209
210 /* Try cache first. */
211 lmutex_lock(&cache_lock);
212 loc = cache_data[category];
213
214 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
215 lmutex_unlock(&cache_lock);
216 return (loc);
217 }
218
219 /*
220 * Failing that try previously loaded locales (linear search) --
221 * this could be optimized to a hash, but its unlikely that a single
222 * application will ever need to work with more than a few locales.
223 */
224 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
225 if (strcmp(locname, loc->l_lname) == 0) {
226 break;
227 }
228 }
229
230 /*
231 * Finally, if we still don't have one, try loading the locale
232 * data from the actual on-disk data.
233 *
234 * We drop the lock (libc wants to ensure no internal locks
235 * are held when we call other routines required to read from
236 * files, allocate memory, etc.) There is a small race here,
237 * but the consequences of the race are benign -- if multiple
238 * threads hit this at precisely the same point, we could
239 * wind up with duplicates of the locale data in the cache.
240 *
241 * This wastes the memory for an extra copy of the locale
242 * data, but there is no further harm beyond that. Its not
243 * worth the effort to recode this to something "safe"
244 * (which would require rescanning the list, etc.), given
245 * that this race will probably never actually occur.
246 */
247 if (loc == NULL) {
248 lmutex_unlock(&cache_lock);
249 loc = (*loaders[category])(locname);
250 lmutex_lock(&cache_lock);
251 (void) strlcpy(loc->l_lname, locname,
252 sizeof (loc->l_lname));
253 }
254
255 /*
256 * Assuming we got one, update the cache, and stick us on the list
257 * of loaded locale data. We insert into the head (more recent
258 * use is likely to win.)
259 */
260 if (loc != NULL) {
261 cache_data[category] = loc;
262 if (loc->l_next == NULL) {
263 loc->l_next = cat_data[category];
264 cat_data[category] = loc;
265 }
266 }
267
268 lmutex_unlock(&cache_lock);
269 return (loc);
270 }
271
272 /*
273 * Routine to get the locdata for a given category and locale.
274 * This includes retrieving it from cache, retrieving it from
275 * a file, etc.
276 */
277 static struct locdata *
278 locdata_get(int category, const char *locname)
279 {
280 char scratch[ENCODING_LEN + 1];
281 char *slash;
282 int cnt;
283 int len;
284
285 if (locname == NULL || *locname == 0) {
286 locname = get_locale_env(category);
287 }
288
289 /*
290 * Extract the locale name for the category if it is a composite
291 * locale.
292 */
293 if ((slash = strchr(locname, '/')) != NULL) {
294 for (cnt = category; cnt && slash != NULL; cnt--) {
295 locname = slash + 1;
296 slash = strchr(locname, '/');
297 }
298 if (slash) {
299 len = slash - locname;
300 if (len >= sizeof (scratch)) {
301 len = sizeof (scratch);
302 }
303 } else {
304 len = sizeof (scratch);
305 }
306 (void) strlcpy(scratch, locname, len);
307 locname = scratch;
308 }
309
310 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
311 return (posix_locale.locdata[category]);
312
313 return (locdata_get_cache(category, locname));
314 }
315
316 /* tsd destructor */
317 static void
318 freelocptr(void *arg)
319 {
320 locale_t *locptr = arg;
321 if (*locptr != NULL)
322 freelocale(*locptr);
323 }
324
325 static const char *
326 get_locale_env(int category)
327 {
328 const char *env;
329
330 /* 1. check LC_ALL. */
331 env = getenv(categories[LC_ALL]);
332
333 /* 2. check LC_* */
360 unsigned char
361 __mb_cur_max(void)
362 {
363 return (__mb_cur_max_l(uselocale(NULL)));
364 }
365
366 /*
367 * Public interfaces.
368 */
369
370 locale_t
371 duplocale(locale_t src)
372 {
373 locale_t loc;
374 int i;
375
376 loc = calloc(1, sizeof (*loc));
377 if (loc == NULL) {
378 return (NULL);
379 }
380 if (src == NULL) {
381 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
382 src = ___global_locale;
383 }
384 for (i = 0; i < LC_ALL; i++) {
385 loc->locdata[i] = src->locdata[i];
386 loc->loaded[i] = 0;
387 }
388 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
389 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
390 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
391 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
392 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
393 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
394 loc->time = loc->locdata[LC_TIME]->l_data[0];
395 return (loc);
396 }
397
398 void
399 freelocale(locale_t loc)
400 {
401 /*
402 * We take extra care never to free a saved locale created by
403 * setlocale(). This shouldn't be strictly necessary, but a little
404 * extra safety doesn't hurt here.
405 */
406 if ((loc != &posix_locale) && (loc->next == NULL))
407 free(loc);
408 }
409
410 locale_t
411 newlocale(int catmask, const char *locname, locale_t base)
412 {
413 locale_t loc;
414 int i, e;
415
416 if (catmask & ~(LC_ALL_MASK)) {
417 errno = EINVAL;
418 return (NULL);
419 }
420 /*
421 * Technically passing LC_GLOBAL_LOCALE here is illegal,
422 * but we allow it.
423 */
424 if (base == NULL || base == ___global_locale) {
425 loc = duplocale(___global_locale);
426 } else {
427 loc = base;
428 }
429 if (loc == NULL) {
430 return (NULL);
431 }
432
433 for (i = 0; i < LC_ALL; i++) {
434 struct locdata *ldata;
435 loc->loaded[i] = 0;
436 if (((1 << i) & catmask) == 0) {
437 /* Default to base locale if not overriding */
438 continue;
439 }
440 ldata = locdata_get(i, locname);
441 if (ldata == NULL) {
442 e = errno;
443 freelocale(loc);
444 errno = e;
445 return (NULL);
446 }
447 loc->locdata[i] = ldata;
448 }
449 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
450 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
451 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
452 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
453 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
454 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
455 loc->time = loc->locdata[LC_TIME]->l_data[0];
456 return (loc);
457 }
458
459 locale_t
460 uselocale(locale_t loc)
461 {
462 locale_t lastloc = ___global_locale;
463 locale_t *locptr;
464
465 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
466 /* Should never occur */
467 if (locptr == NULL) {
468 errno = EINVAL;
|