1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
14 */
15
16 /*
17 * This file implements the 2008 newlocale and friends handling.
18 */
19
20 #ifndef _LCONV_C99
21 #define _LCONV_C99
22 #endif
23
24 #include "lint.h"
25 #include <atomic.h>
26 #include <locale.h>
27 #include <sys/types.h>
28 #include <sys/mman.h>
29 #include <errno.h>
30 #include <string.h>
31 #include "libc.h"
32 #include "mtlib.h"
33 #include "tsd.h"
34 #include "localeimpl.h"
35 #include "lctype.h"
36
37 /*
38 * Big Theory of Locales:
39 *
40 * (It is recommended that readers familiarize themselves with the POSIX
41 * 2008 (XPG Issue 7) specifications for locales, first.)
42 *
43 * Historically, we had a bunch of global variables that stored locale
44 * data. While this worked well, it limited applications to a single locale
45 * at a time. This doesn't work well in certain server applications.
46 *
47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 * versions of functions that can take this object as a parameter, along
49 * with functions to clone and manipulate these locale objects. The new
50 * functions are named with a _l() suffix.
51 *
52 * Additionally uselocale() is introduced which can change the locale of
53 * of a single thread. However, setlocale() can still be used to change
54 * the global locale.
55 *
56 * In our implementation, we use libc's TSD to store the locale data that
57 * was previously global. We still have global data because some applications
58 * have had those global objects compiled into them. (Such applications will
59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 * reimplemented as wrappers that use the appropriate locale object by
61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 * thread-specific locale object if one is present, or the global locale
63 * object otherwise. Note that once the TSD data is set, the only way
64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 * to uselocale().
66 *
67 * We are careful to minimize performance impact of multiple calls to
68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 * As a consequence of this, applications that iterate over all possible
70 * locales will burn through a lot of virtual memory, but we find such
71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 *
73 * Category data is never released (although enclosing locale objects might be),
74 * in order to guarantee thread-safety. Calling freelocale() on an object
75 * while it is in use by another thread is a programmer error (use-after-free)
76 * and we don't bother to note it further.
77 *
78 * Locale objects (global locales) established by setlocale() are also
79 * never freed (for MT safety), but we will save previous locale objects
80 * and reuse them when we can.
81 */
82
83 typedef struct locdata *(*loadfn_t)(const char *);
84
85 static const loadfn_t loaders[LC_ALL] = {
86 __lc_ctype_load,
87 __lc_numeric_load,
88 __lc_time_load,
89 __lc_collate_load,
90 __lc_monetary_load,
91 __lc_messages_load,
92 };
93
94 extern struct lc_monetary lc_monetary_posix;
95 extern struct lc_numeric lc_numeric_posix;
96 extern struct lc_messages lc_messages_posix;
97 extern struct lc_time lc_time_posix;
98 extern struct lc_ctype lc_ctype_posix;
99 extern struct lc_collate lc_collate_posix;
100
101 static struct locale posix_locale = {
102 /* locdata */
103 .locdata = {
104 &__posix_ctype_locdata,
105 &__posix_numeric_locdata,
106 &__posix_time_locdata,
107 &__posix_collate_locdata,
108 &__posix_monetary_locdata,
109 &__posix_messages_locdata,
110 },
111 .locname = "C",
112 .ctype = &lc_ctype_posix,
113 .numeric = &lc_numeric_posix,
114 .collate = &lc_collate_posix,
115 .monetary = &lc_monetary_posix,
116 .messages = &lc_messages_posix,
117 .time = &lc_time_posix,
118 .runelocale = &_DefaultRuneLocale,
119 };
120
121 locale_t ___global_locale = &posix_locale;
122
123 locale_t
124 __global_locale(void)
125 {
126 return (___global_locale);
127 }
128
129 /*
130 * Category names for getenv() Note that this was modified
131 * for Solaris. See <iso/locale_iso.h>.
132 */
133 #define NUM_CATS 7
134 static char *categories[7] = {
135 "LC_CTYPE",
136 "LC_NUMERIC",
137 "LC_TIME",
138 "LC_COLLATE",
139 "LC_MONETARY",
140 "LC_MESSAGES",
141 "LC_ALL",
142 };
143
144 /*
145 * Prototypes.
146 */
147 static const char *get_locale_env(int);
148 static struct locdata *locdata_get(int, const const char *);
149 static struct locdata *locdata_get_cache(int, const char *);
150 static locale_t mklocname(locale_t);
151
152 /*
153 * Some utility routines.
154 */
155
156 struct locdata *
157 __locdata_alloc(const char *name, size_t memsz)
158 {
159 struct locdata *ldata;
160
161 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
162 return (NULL);
163 }
164 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
165 free(ldata);
166 errno = ENOMEM;
167 return (NULL);
168 }
169 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
170
171 return (ldata);
172 }
173
174 /*
175 * Normally we never free locale data truly, but if we failed to load it
176 * for some reason, this routine is used to cleanup the partial mess.
177 */
178 void
179 __locdata_free(struct locdata *ldata)
180 {
181 for (int i = 0; i < NLOCDATA; i++)
182 free(ldata->l_data[i]);
183 if (ldata->l_map != NULL && ldata->l_map_len)
184 (void) munmap(ldata->l_map, ldata->l_map_len);
185 free(ldata);
186 }
187
188 /*
189 * It turns out that for performance reasons we would really like to
190 * cache the most recently referenced locale data to avoid wasteful
191 * loading from files.
192 */
193
194 static struct locdata *cache_data[LC_ALL];
195 static struct locdata *cat_data[LC_ALL];
196 static mutex_t cache_lock = DEFAULTMUTEX;
197
198 /*
199 * Returns the cached data if the locale name is the same. If not,
200 * returns NULL (cache miss). The locdata is returned with a hold on
201 * it, taken on behalf of the caller. The caller should drop the hold
202 * when it is finished.
203 */
204 static struct locdata *
205 locdata_get_cache(int category, const char *locname)
206 {
207 struct locdata *loc;
208
209 if (category < 0 || category >= LC_ALL)
210 return (NULL);
211
212 /* Try cache first. */
213 lmutex_lock(&cache_lock);
214 loc = cache_data[category];
215
216 if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
217 lmutex_unlock(&cache_lock);
218 return (loc);
219 }
220
221 /*
222 * Failing that try previously loaded locales (linear search) --
223 * this could be optimized to a hash, but its unlikely that a single
224 * application will ever need to work with more than a few locales.
225 */
226 for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
227 if (strcmp(locname, loc->l_lname) == 0) {
228 break;
229 }
230 }
231
232 /*
233 * Finally, if we still don't have one, try loading the locale
234 * data from the actual on-disk data.
235 *
236 * We drop the lock (libc wants to ensure no internal locks
237 * are held when we call other routines required to read from
238 * files, allocate memory, etc.) There is a small race here,
239 * but the consequences of the race are benign -- if multiple
240 * threads hit this at precisely the same point, we could
241 * wind up with duplicates of the locale data in the cache.
242 *
243 * This wastes the memory for an extra copy of the locale
244 * data, but there is no further harm beyond that. Its not
245 * worth the effort to recode this to something "safe"
246 * (which would require rescanning the list, etc.), given
247 * that this race will probably never actually occur.
248 */
249 if (loc == NULL) {
250 lmutex_unlock(&cache_lock);
251 loc = (*loaders[category])(locname);
252 lmutex_lock(&cache_lock);
253 (void) strlcpy(loc->l_lname, locname, sizeof (loc->l_lname));
254 }
255
256 /*
257 * Assuming we got one, update the cache, and stick us on the list
258 * of loaded locale data. We insert into the head (more recent
259 * use is likely to win.)
260 */
261 if (loc != NULL) {
262 cache_data[category] = loc;
263 if (loc->l_next == NULL) {
264 loc->l_next = cat_data[category];
265 cat_data[category] = loc;
266 }
267 }
268
269 lmutex_unlock(&cache_lock);
270 return (loc);
271 }
272
273 /*
274 * Routine to get the locdata for a given category and locale.
275 * This includes retrieving it from cache, retrieving it from
276 * a file, etc.
277 */
278 static struct locdata *
279 locdata_get(int category, const char *locname)
280 {
281 char scratch[ENCODING_LEN + 1];
282 char *slash;
283 int cnt;
284 int len;
285
286 if (locname == NULL || *locname == 0) {
287 locname = get_locale_env(category);
288 }
289
290 /*
291 * Extract the locale name for the category if it is a composite
292 * locale.
293 */
294 if ((slash = strchr(locname, '/')) != NULL) {
295 for (cnt = category; cnt && slash != NULL; cnt--) {
296 locname = slash + 1;
297 slash = strchr(locname, '/');
298 }
299 if (slash) {
300 len = slash - locname;
301 if (len >= sizeof (scratch)) {
302 len = sizeof (scratch);
303 }
304 } else {
305 len = sizeof (scratch);
306 }
307 (void) strlcpy(scratch, locname, len);
308 locname = scratch;
309 }
310
311 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
312 return (posix_locale.locdata[category]);
313
314 return (locdata_get_cache(category, locname));
315 }
316
317 /* tsd destructor */
318 static void
319 freelocptr(void *arg)
320 {
321 locale_t *locptr = arg;
322 if (*locptr != NULL)
323 freelocale(*locptr);
324 }
325
326 static const char *
327 get_locale_env(int category)
328 {
329 const char *env;
330
331 /* 1. check LC_ALL. */
332 env = getenv(categories[LC_ALL]);
333
334 /* 2. check LC_* */
335 if (env == NULL || *env == '\0')
336 env = getenv(categories[category]);
337
338 /* 3. check LANG */
339 if (env == NULL || *env == '\0')
340 env = getenv("LANG");
341
342 /* 4. if none is set, fall to "C" */
343 if (env == NULL || *env == '\0')
344 env = "C";
345
346 return (env);
347 }
348
349
350 /*
351 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
352 * code will continue to use _ctype[520], but we prefer this function as
353 * it is the only way to get thread-specific information.
354 */
355 unsigned char
356 __mb_cur_max_l(locale_t loc)
357 {
358 return (loc->ctype->lc_max_mblen);
359 }
360
361 unsigned char
362 __mb_cur_max(void)
363 {
364 return (__mb_cur_max_l(uselocale(NULL)));
365 }
366
367 /*
368 * Public interfaces.
369 */
370
371 locale_t
372 duplocale(locale_t src)
373 {
374 locale_t loc;
375 int i;
376
377 loc = calloc(1, sizeof (*loc));
378 if (loc == NULL) {
379 return (NULL);
380 }
381 if (src == NULL) {
382 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
383 src = ___global_locale;
384 }
385 for (i = 0; i < LC_ALL; i++) {
386 loc->locdata[i] = src->locdata[i];
387 loc->loaded[i] = 0;
388 }
389 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
390 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
391 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
392 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
393 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
394 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
395 loc->time = loc->locdata[LC_TIME]->l_data[0];
396 return (loc);
397 }
398
399 void
400 freelocale(locale_t loc)
401 {
402 /*
403 * We take extra care never to free a saved locale created by
404 * setlocale(). This shouldn't be strictly necessary, but a little
405 * extra safety doesn't hurt here.
406 */
407 if ((loc != &posix_locale) && (!loc->on_list))
408 free(loc);
409 }
410
411 locale_t
412 newlocale(int catmask, const char *locname, locale_t base)
413 {
414 locale_t loc;
415 int i, e;
416
417 if (catmask & ~(LC_ALL_MASK)) {
418 errno = EINVAL;
419 return (NULL);
420 }
421
422 /*
423 * Technically passing LC_GLOBAL_LOCALE here is illegal,
424 * but we allow it.
425 */
426 if (base == NULL || base == ___global_locale) {
427 loc = duplocale(___global_locale);
428 } else {
429 loc = base;
430 }
431 if (loc == NULL) {
432 return (NULL);
433 }
434
435 for (i = 0; i < LC_ALL; i++) {
436 struct locdata *ldata;
437 loc->loaded[i] = 0;
438 if (((1 << i) & catmask) == 0) {
439 /* Default to base locale if not overriding */
440 continue;
441 }
442 ldata = locdata_get(i, locname);
443 if (ldata == NULL) {
444 e = errno;
445 freelocale(loc);
446 errno = e;
447 return (NULL);
448 }
449 loc->locdata[i] = ldata;
450 }
451 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
452 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
453 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
454 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
455 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
456 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
457 loc->time = loc->locdata[LC_TIME]->l_data[0];
458 return (mklocname(loc));
459 }
460
461 locale_t
462 uselocale(locale_t loc)
463 {
464 locale_t lastloc = ___global_locale;
465 locale_t *locptr;
466
467 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
468 /* Should never occur */
469 if (locptr == NULL) {
470 errno = EINVAL;
471 return (NULL);
472 }
473
474 if (*locptr != NULL)
475 lastloc = *locptr;
476
477 /* Argument loc is NULL if we are just querying. */
478 if (loc != NULL) {
479 /*
480 * Set it to LC_GLOBAL_LOCAL to return to using
481 * the global locale (setlocale).
482 */
483 if (loc == ___global_locale) {
484 *locptr = NULL;
485 } else {
486 /* No validation of the provided locale at present */
487 *locptr = loc;
488 }
489 }
490
491 /*
492 * The caller is responsible for freeing, of course it would be
493 * gross error to call freelocale() on a locale object that is still
494 * in use.
495 */
496 return (lastloc);
497 }
498
499 static locale_t
500 mklocname(locale_t loc)
501 {
502 int composite = 0;
503
504 /* Look to see if any category is different */
505 for (int i = 1; i < LC_ALL; ++i) {
506 if (strcmp(loc->locdata[0]->l_lname,
507 loc->locdata[i]->l_lname) != 0) {
508 composite = 1;
509 break;
510 }
511 }
512
513 if (composite) {
514 /*
515 * Note ordering of these follows the numeric order,
516 * if the order is changed, then setlocale() will need
517 * to be changed as well.
518 */
519 (void) snprintf(loc->locname, sizeof (loc->locname),
520 "%s/%s/%s/%s/%s/%s",
521 loc->locdata[LC_CTYPE]->l_lname,
522 loc->locdata[LC_NUMERIC]->l_lname,
523 loc->locdata[LC_TIME]->l_lname,
524 loc->locdata[LC_COLLATE]->l_lname,
525 loc->locdata[LC_MONETARY]->l_lname,
526 loc->locdata[LC_MESSAGES]->l_lname);
527 } else {
528 (void) strlcpy(loc->locname, loc->locdata[LC_CTYPE]->l_lname,
529 sizeof (loc->locname));
530 }
531 return (loc);
532 }