1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
14 */
15
16 /*
17 * This file implements the 2008 newlocale and friends handling.
18 */
19
20 #ifndef _LCONV_C99
21 #define _LCONV_C99
22 #endif
23
24 #include "lint.h"
25 #include <atomic.h>
26 #include <locale.h>
27 #include <sys/types.h>
28 #include <sys/mman.h>
29 #include <errno.h>
30 #include <string.h>
31 #include "libc.h"
32 #include "mtlib.h"
33 #include "tsd.h"
34 #include "localeimpl.h"
35 #include "lctype.h"
36
37 /*
38 * Big Theory of Locales:
39 *
40 * (It is recommended that readers familiarize themselves with the POSIX
41 * 2008 (XPG Issue 7) specifications for locales, first.)
42 *
43 * Historically, we had a bunch of global variables that stored locale
44 * data. While this worked well, it limited applications to a single locale
45 * at a time. This doesn't work well in certain server applications.
46 *
47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 * versions of functions that can take this object as a parameter, along
49 * with functions to clone and manipulate these locale objects. The new
50 * functions are named with a _l() suffix.
51 *
52 * Additionally uselocale() is introduced which can change the locale of
53 * of a single thread. However, setlocale() can still be used to change
54 * the global locale.
55 *
56 * In our implementation, we use libc's TSD to store the locale data that
57 * was previously global. We still have global data because some applications
58 * have had those global objects compiled into them. (Such applications will
59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 * reimplemented as wrappers that use the appropriate locale object by
61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 * thread-specific locale object if one is present, or the global locale
63 * object otherwise. Note that once the TSD data is set, the only way
64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 * to uselocale().
66 *
67 * We are careful to minimize performance impact of multiple calls to
68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 * As a consequence of this, applications that iterate over all possible
70 * locales will burn through a lot of virtual memory, but we find such
71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 */
73
74 /*
75 * NB: Each of the structures listed herein should have the refcnt
76 * set to -1, to ensure that posix locale information is never ever freed,
77 * even when changing the global locale.
78 */
79
80 typedef struct locdata *(*loadfn_t)(const char *);
81
82 static const loadfn_t loaders[LC_ALL] = {
83 __lc_ctype_load,
84 __lc_numeric_load,
85 __lc_time_load,
86 __lc_collate_load,
87 __lc_monetary_load,
88 __lc_messages_load,
89 };
90
91 extern struct lc_monetary lc_monetary_posix;
92 extern struct lc_numeric lc_numeric_posix;
93 extern struct lc_messages lc_messages_posix;
94 extern struct lc_time lc_time_posix;
95 extern struct lc_ctype lc_ctype_posix;
96 extern struct lc_collate lc_collate_posix;
97
98 static struct locale posix_locale = {
99 /* locdata */
100 .locdata = {
101 &__posix_ctype_locdata,
102 &__posix_numeric_locdata,
103 &__posix_time_locdata,
104 &__posix_collate_locdata,
105 &__posix_monetary_locdata,
106 &__posix_messages_locdata,
107 },
108 .ctype = &lc_ctype_posix,
109 .numeric = &lc_numeric_posix,
110 .collate = &lc_collate_posix,
111 .monetary = &lc_monetary_posix,
112 .messages = &lc_messages_posix,
113 .time = &lc_time_posix,
114 .runelocale = &_DefaultRuneLocale,
115 };
116
117 locale_t __global_locale = &posix_locale;
118
119 /*
120 * Category names for getenv() Note that this was modified
121 * for Solaris. See <iso/locale_iso.h>.
122 */
123 #define NUM_CATS 7
124 static char *categories[7] = {
125 "LC_CTYPE",
126 "LC_NUMERIC",
127 "LC_TIME",
128 "LC_COLLATE",
129 "LC_MONETARY",
130 "LC_MESSAGES",
131 "LC_ALL",
132 };
133
134 /*
135 * Prototypes.
136 */
137 static const char *get_locale_env(int);
138 static struct locdata *locdata_get(int, const const char *);
139 static struct locdata *locdata_get_cache(int, const char *);
140 static void locdata_set_cache(int, struct locdata *);
141
142 /*
143 * Some utility routines.
144 */
145 struct locdata *
146 __locdata_hold(struct locdata *ld)
147 {
148 if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
149 atomic_inc_32(&ld->l_refcnt);
150 return (ld);
151 }
152
153 void
154 __locdata_release(struct locdata *ld)
155 {
156 if (ld->l_refcnt == (uint32_t)-1)
157 return;
158
159 if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
160 for (int i = 0; i < NLOCDATA; i++)
161 free(ld->l_data[i]);
162 if (ld->l_map && ld->l_map_len) {
163 (void) munmap(ld->l_map, ld->l_map_len);
164 }
165 free(ld);
166 }
167 }
168
169 struct locdata *
170 __locdata_alloc(const char *name, size_t memsz)
171 {
172 struct locdata *ldata;
173
174 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
175 return (NULL);
176 }
177 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
178 free(ldata);
179 errno = ENOMEM;
180 return (NULL);
181 }
182 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
183 ldata->l_refcnt = 1;
184
185 return (ldata);
186 }
187
188 /*
189 * It turns out that for performance reasons we would really like to
190 * cache the most recently referenced locale data to avoid wasteful
191 * loading from files.
192 */
193
194 static struct locdata *cache_data[LC_ALL];
195 static mutex_t cache_lock = DEFAULTMUTEX;
196
197 /*
198 * Returns the cached data if the locale name is the same. If not,
199 * returns NULL (cache miss). The locdata is returned with a hold on
200 * it, taken on behalf of the caller. The caller should drop the hold
201 * when it is finished.
202 */
203 static struct locdata *
204 locdata_get_cache(int category, const char *locname)
205 {
206 struct locdata *loc;
207
208 if (category < 0 || category >= LC_ALL)
209 return (NULL);
210
211 lmutex_lock(&cache_lock);
212 if ((loc = cache_data[category]) != NULL) {
213 if (strcmp(locname, loc->l_lname) == 0) {
214 loc = __locdata_hold(loc);
215 } else {
216 loc = NULL;
217 }
218 }
219 lmutex_unlock(&cache_lock);
220 return (loc);
221 }
222
223 /*
224 * Set the cache for the category to specific content. An additional hold
225 * is taken for the data while it is in the cache, so the caller may drop
226 * its own hold once this is complete. Also, releases the hold on any
227 * previously cached data.
228 */
229 static void
230 locdata_set_cache(int category, struct locdata *loc)
231 {
232 struct locdata *old;
233
234 if (category < 0 || category >= LC_ALL)
235 return;
236
237 lmutex_lock(&cache_lock);
238 old = cache_data[category];
239 cache_data[category] = __locdata_hold(loc);
240 lmutex_unlock(&cache_lock);
241
242 /* drop our reference on the old data */
243 if (old)
244 __locdata_release(old);
245 }
246
247 /*
248 * Routine to get the locdata for a given category and locale.
249 * This includes retrieving it from cache, retrieving it from
250 * a file, etc.
251 */
252 static struct locdata *
253 locdata_get(int category, const char *locname)
254 {
255 struct locdata *ldata;
256 char scratch[ENCODING_LEN + 1];
257 char *slash;
258 int cnt;
259 int len;
260
261 if (locname == NULL || *locname == 0) {
262 locname = get_locale_env(category);
263 }
264
265 /*
266 * Extract the locale name for the category if it is a composite
267 * locale.
268 */
269 if ((slash = strchr(locname, '/')) != NULL) {
270 for (cnt = category; cnt && slash != NULL; cnt--) {
271 locname = slash + 1;
272 slash = strchr(locname, '/');
273 }
274 if (slash) {
275 len = slash - locname;
276 if (len >= sizeof (scratch)) {
277 len = sizeof (scratch);
278 }
279 } else {
280 len = sizeof (scratch);
281 }
282 (void) strlcpy(scratch, locname, len);
283 locname = scratch;
284 }
285
286 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
287 return (__locdata_hold(posix_locale.locdata[category]));
288
289 ldata = locdata_get_cache(category, locname);
290 if (ldata != NULL)
291 return (ldata);
292
293 /* Otherwise load it */
294 ldata = (*loaders[category])(locname);
295 if (ldata != NULL) {
296 locdata_set_cache(category, ldata);
297 }
298 return (ldata);
299 }
300
301 /* tsd destructor */
302 static void
303 freelocptr(void *arg)
304 {
305 locale_t *locptr = arg;
306 if (*locptr != NULL)
307 freelocale(*locptr);
308 }
309
310 static const char *
311 get_locale_env(int category)
312 {
313 const char *env;
314
315 /* 1. check LC_ALL. */
316 env = getenv(categories[LC_ALL]);
317
318 /* 2. check LC_* */
319 if (env == NULL || *env == '\0')
320 env = getenv(categories[category]);
321
322 /* 3. check LANG */
323 if (env == NULL || *env == '\0')
324 env = getenv("LANG");
325
326 /* 4. if none is set, fall to "C" */
327 if (env == NULL || *env == '\0')
328 env = "C";
329
330 return (env);
331 }
332
333
334 /*
335 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
336 * code will continue to use _ctype[520], but we prefer this function as
337 * it is the only way to get thread-specific information.
338 */
339 unsigned char
340 __mb_cur_max_l(locale_t loc)
341 {
342 return (loc->ctype->lc_max_mblen);
343 }
344
345 unsigned char
346 __mb_cur_max(void)
347 {
348 return (__mb_cur_max_l(uselocale(NULL)));
349 }
350
351 /*
352 * Public interfaces.
353 */
354
355 locale_t
356 duplocale(locale_t src)
357 {
358 locale_t loc;
359 int i;
360
361 loc = calloc(1, sizeof (*loc));
362 if (loc == NULL) {
363 return (NULL);
364 }
365 for (i = 0; i < LC_ALL; i++) {
366 loc->locdata[i] = __locdata_hold(src->locdata[i]);
367 loc->loaded[i] = 0;
368 }
369 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
370 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
371 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
372 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
373 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
374 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
375 loc->time = loc->locdata[LC_TIME]->l_data[0];
376 return (loc);
377 }
378
379 void
380 freelocale(locale_t loc)
381 {
382 int i;
383 for (i = 0; i < LC_ALL; i++)
384 __locdata_release(loc->locdata[i]);
385 if (loc != &posix_locale)
386 free(loc);
387 }
388
389 locale_t
390 newlocale(int catmask, const char *locname, locale_t base)
391 {
392 locale_t loc;
393 int i, e;
394
395 if (catmask & ~(LC_ALL_MASK)) {
396 errno = EINVAL;
397 return (NULL);
398 }
399 loc = duplocale(base != NULL ? base : __global_locale);
400 if (loc == NULL) {
401 return (NULL);
402 }
403
404 for (i = 0; i < LC_ALL; i++) {
405 struct locdata *ldata;
406 loc->loaded[i] = 0;
407 if (((1 << i) & catmask) == 0) {
408 /* Default to base locale if not overriding */
409 continue;
410 }
411 ldata = locdata_get(i, locname);
412 if (ldata == NULL) {
413 e = errno;
414 freelocale(loc);
415 errno = e;
416 return (NULL);
417 }
418 __locdata_release(loc->locdata[i]);
419 loc->locdata[i] = ldata;
420 }
421 if (base && base != __global_locale) {
422 freelocale(base);
423 }
424 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
425 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
426 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
427 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
428 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
429 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
430 loc->time = loc->locdata[LC_TIME]->l_data[0];
431 return (loc);
432 }
433
434 locale_t
435 uselocale(locale_t loc)
436 {
437 locale_t lastloc = __global_locale;
438 locale_t *locptr;
439
440 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
441 /* Should never occur */
442 if (locptr == NULL) {
443 errno = EINVAL;
444 return (NULL);
445 }
446
447 if (*locptr != NULL)
448 lastloc = *locptr;
449
450 /* Argument loc is NULL if we are just querying. */
451 if (loc != NULL) {
452 /*
453 * Set it to LC_GLOBAL_LOCAL to return to using
454 * the global locale (setlocale).
455 */
456 if (loc == __global_locale) {
457 *locptr = NULL;
458 } else {
459 /* No validation of the provided locale at present */
460 *locptr = loc;
461 }
462 }
463
464 /*
465 * The caller is responsible for freeing, of course it would be
466 * gross error to call freelocale() on a locale object that is still
467 * in use.
468 */
469 return (lastloc);
470 }