Print this page
Fix compile errors, code review feedback, and add basic libc test suite.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/localeimpl.c
+++ new/usr/src/lib/libc/port/locale/localeimpl.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
14 14 */
15 15
16 16 /*
17 17 * This file implements the 2008 newlocale and friends handling.
18 18 */
19 19
20 20 #ifndef _LCONV_C99
21 21 #define _LCONV_C99
22 22 #endif
23 23
24 24 #include "lint.h"
25 25 #include <atomic.h>
26 26 #include <locale.h>
27 27 #include <sys/types.h>
28 28 #include <sys/mman.h>
29 29 #include <errno.h>
30 30 #include <string.h>
31 31 #include "libc.h"
32 32 #include "mtlib.h"
33 33 #include "tsd.h"
34 34 #include "localeimpl.h"
35 35 #include "lctype.h"
36 36
37 37 /*
38 38 * Big Theory of Locales:
39 39 *
40 40 * (It is recommended that readers familiarize themselves with the POSIX
41 41 * 2008 (XPG Issue 7) specifications for locales, first.)
42 42 *
43 43 * Historically, we had a bunch of global variables that stored locale
44 44 * data. While this worked well, it limited applications to a single locale
45 45 * at a time. This doesn't work well in certain server applications.
46 46 *
47 47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 48 * versions of functions that can take this object as a parameter, along
49 49 * with functions to clone and manipulate these locale objects. The new
50 50 * functions are named with a _l() suffix.
51 51 *
52 52 * Additionally uselocale() is introduced which can change the locale of
53 53 * of a single thread. However, setlocale() can still be used to change
54 54 * the global locale.
55 55 *
56 56 * In our implementation, we use libc's TSD to store the locale data that
57 57 * was previously global. We still have global data because some applications
58 58 * have had those global objects compiled into them. (Such applications will
59 59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 60 * reimplemented as wrappers that use the appropriate locale object by
61 61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 62 * thread-specific locale object if one is present, or the global locale
63 63 * object otherwise. Note that once the TSD data is set, the only way
64 64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
65 65 * to uselocale().
66 66 *
67 67 * We are careful to minimize performance impact of multiple calls to
68 68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 69 * As a consequence of this, applications that iterate over all possible
70 70 * locales will burn through a lot of virtual memory, but we find such
71 71 * applications rare. (locale -a might be an exception, but it is short lived.)
72 72 */
73 73
74 74 /*
75 75 * NB: Each of the structures listed herein should have the refcnt
76 76 * set to -1, to ensure that posix locale information is never ever freed,
77 77 * even when changing the global locale.
78 78 */
79 79
80 80 typedef struct locdata *(*loadfn_t)(const char *);
81 81
82 82 static const loadfn_t loaders[LC_ALL] = {
83 83 __lc_ctype_load,
84 84 __lc_numeric_load,
85 85 __lc_time_load,
86 86 __lc_collate_load,
87 87 __lc_monetary_load,
88 88 __lc_messages_load,
89 89 };
90 90
91 91 extern struct lc_monetary lc_monetary_posix;
92 92 extern struct lc_numeric lc_numeric_posix;
93 93 extern struct lc_messages lc_messages_posix;
94 94 extern struct lc_time lc_time_posix;
95 95 extern struct lc_ctype lc_ctype_posix;
96 96 extern struct lc_collate lc_collate_posix;
97 97
98 98 static struct locale posix_locale = {
99 99 /* locdata */
100 100 .locdata = {
101 101 &__posix_ctype_locdata,
102 102 &__posix_numeric_locdata,
103 103 &__posix_time_locdata,
104 104 &__posix_collate_locdata,
105 105 &__posix_monetary_locdata,
106 106 &__posix_messages_locdata,
↓ open down ↓ |
106 lines elided |
↑ open up ↑ |
107 107 },
108 108 .ctype = &lc_ctype_posix,
109 109 .numeric = &lc_numeric_posix,
110 110 .collate = &lc_collate_posix,
111 111 .monetary = &lc_monetary_posix,
112 112 .messages = &lc_messages_posix,
113 113 .time = &lc_time_posix,
114 114 .runelocale = &_DefaultRuneLocale,
115 115 };
116 116
117 -locale_t __global_locale = &posix_locale;
117 +locale_t ___global_locale = &posix_locale;
118 118
119 +locale_t
120 +__global_locale(void)
121 +{
122 + return (___global_locale);
123 +}
124 +
119 125 /*
120 126 * Category names for getenv() Note that this was modified
121 127 * for Solaris. See <iso/locale_iso.h>.
122 128 */
123 129 #define NUM_CATS 7
124 130 static char *categories[7] = {
125 131 "LC_CTYPE",
126 132 "LC_NUMERIC",
127 133 "LC_TIME",
128 134 "LC_COLLATE",
129 135 "LC_MONETARY",
130 136 "LC_MESSAGES",
131 137 "LC_ALL",
132 138 };
133 139
134 140 /*
135 141 * Prototypes.
136 142 */
137 143 static const char *get_locale_env(int);
138 144 static struct locdata *locdata_get(int, const const char *);
139 145 static struct locdata *locdata_get_cache(int, const char *);
140 146 static void locdata_set_cache(int, struct locdata *);
141 147
142 148 /*
143 149 * Some utility routines.
144 150 */
145 151 struct locdata *
146 152 __locdata_hold(struct locdata *ld)
147 153 {
148 154 if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
149 155 atomic_inc_32(&ld->l_refcnt);
150 156 return (ld);
151 157 }
152 158
153 159 void
154 160 __locdata_release(struct locdata *ld)
155 161 {
156 162 if (ld->l_refcnt == (uint32_t)-1)
157 163 return;
158 164
159 165 if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
160 166 for (int i = 0; i < NLOCDATA; i++)
161 167 free(ld->l_data[i]);
162 168 if (ld->l_map && ld->l_map_len) {
163 169 (void) munmap(ld->l_map, ld->l_map_len);
164 170 }
165 171 free(ld);
166 172 }
167 173 }
168 174
169 175 struct locdata *
170 176 __locdata_alloc(const char *name, size_t memsz)
171 177 {
172 178 struct locdata *ldata;
173 179
174 180 if ((ldata = calloc(1, sizeof (*ldata))) == NULL) {
175 181 return (NULL);
176 182 }
177 183 if ((ldata->l_data[0] = calloc(1, memsz)) == NULL) {
178 184 free(ldata);
179 185 errno = ENOMEM;
180 186 return (NULL);
181 187 }
182 188 (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
183 189 ldata->l_refcnt = 1;
184 190
185 191 return (ldata);
186 192 }
187 193
188 194 /*
189 195 * It turns out that for performance reasons we would really like to
190 196 * cache the most recently referenced locale data to avoid wasteful
191 197 * loading from files.
192 198 */
193 199
194 200 static struct locdata *cache_data[LC_ALL];
195 201 static mutex_t cache_lock = DEFAULTMUTEX;
196 202
197 203 /*
198 204 * Returns the cached data if the locale name is the same. If not,
199 205 * returns NULL (cache miss). The locdata is returned with a hold on
200 206 * it, taken on behalf of the caller. The caller should drop the hold
201 207 * when it is finished.
202 208 */
203 209 static struct locdata *
204 210 locdata_get_cache(int category, const char *locname)
205 211 {
206 212 struct locdata *loc;
207 213
208 214 if (category < 0 || category >= LC_ALL)
209 215 return (NULL);
210 216
211 217 lmutex_lock(&cache_lock);
212 218 if ((loc = cache_data[category]) != NULL) {
213 219 if (strcmp(locname, loc->l_lname) == 0) {
214 220 loc = __locdata_hold(loc);
215 221 } else {
216 222 loc = NULL;
217 223 }
218 224 }
219 225 lmutex_unlock(&cache_lock);
220 226 return (loc);
221 227 }
222 228
223 229 /*
224 230 * Set the cache for the category to specific content. An additional hold
225 231 * is taken for the data while it is in the cache, so the caller may drop
226 232 * its own hold once this is complete. Also, releases the hold on any
227 233 * previously cached data.
228 234 */
229 235 static void
230 236 locdata_set_cache(int category, struct locdata *loc)
231 237 {
232 238 struct locdata *old;
233 239
234 240 if (category < 0 || category >= LC_ALL)
235 241 return;
236 242
237 243 lmutex_lock(&cache_lock);
238 244 old = cache_data[category];
239 245 cache_data[category] = __locdata_hold(loc);
240 246 lmutex_unlock(&cache_lock);
241 247
242 248 /* drop our reference on the old data */
243 249 if (old)
244 250 __locdata_release(old);
245 251 }
246 252
247 253 /*
248 254 * Routine to get the locdata for a given category and locale.
249 255 * This includes retrieving it from cache, retrieving it from
250 256 * a file, etc.
251 257 */
252 258 static struct locdata *
253 259 locdata_get(int category, const char *locname)
254 260 {
255 261 struct locdata *ldata;
256 262 char scratch[ENCODING_LEN + 1];
257 263 char *slash;
258 264 int cnt;
259 265 int len;
260 266
261 267 if (locname == NULL || *locname == 0) {
262 268 locname = get_locale_env(category);
263 269 }
264 270
265 271 /*
266 272 * Extract the locale name for the category if it is a composite
267 273 * locale.
268 274 */
269 275 if ((slash = strchr(locname, '/')) != NULL) {
270 276 for (cnt = category; cnt && slash != NULL; cnt--) {
271 277 locname = slash + 1;
272 278 slash = strchr(locname, '/');
273 279 }
274 280 if (slash) {
275 281 len = slash - locname;
276 282 if (len >= sizeof (scratch)) {
277 283 len = sizeof (scratch);
278 284 }
279 285 } else {
280 286 len = sizeof (scratch);
281 287 }
282 288 (void) strlcpy(scratch, locname, len);
283 289 locname = scratch;
284 290 }
285 291
286 292 if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
287 293 return (__locdata_hold(posix_locale.locdata[category]));
288 294
289 295 ldata = locdata_get_cache(category, locname);
290 296 if (ldata != NULL)
291 297 return (ldata);
292 298
293 299 /* Otherwise load it */
294 300 ldata = (*loaders[category])(locname);
295 301 if (ldata != NULL) {
296 302 locdata_set_cache(category, ldata);
297 303 }
298 304 return (ldata);
299 305 }
300 306
301 307 /* tsd destructor */
302 308 static void
303 309 freelocptr(void *arg)
304 310 {
305 311 locale_t *locptr = arg;
306 312 if (*locptr != NULL)
307 313 freelocale(*locptr);
308 314 }
309 315
310 316 static const char *
311 317 get_locale_env(int category)
312 318 {
313 319 const char *env;
314 320
315 321 /* 1. check LC_ALL. */
316 322 env = getenv(categories[LC_ALL]);
317 323
318 324 /* 2. check LC_* */
319 325 if (env == NULL || *env == '\0')
320 326 env = getenv(categories[category]);
321 327
322 328 /* 3. check LANG */
323 329 if (env == NULL || *env == '\0')
324 330 env = getenv("LANG");
325 331
326 332 /* 4. if none is set, fall to "C" */
327 333 if (env == NULL || *env == '\0')
328 334 env = "C";
329 335
330 336 return (env);
331 337 }
332 338
333 339
334 340 /*
335 341 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
336 342 * code will continue to use _ctype[520], but we prefer this function as
337 343 * it is the only way to get thread-specific information.
338 344 */
339 345 unsigned char
340 346 __mb_cur_max_l(locale_t loc)
341 347 {
342 348 return (loc->ctype->lc_max_mblen);
343 349 }
344 350
345 351 unsigned char
346 352 __mb_cur_max(void)
347 353 {
348 354 return (__mb_cur_max_l(uselocale(NULL)));
349 355 }
350 356
351 357 /*
352 358 * Public interfaces.
353 359 */
354 360
355 361 locale_t
356 362 duplocale(locale_t src)
357 363 {
358 364 locale_t loc;
359 365 int i;
360 366
361 367 loc = calloc(1, sizeof (*loc));
362 368 if (loc == NULL) {
363 369 return (NULL);
364 370 }
365 371 for (i = 0; i < LC_ALL; i++) {
366 372 loc->locdata[i] = __locdata_hold(src->locdata[i]);
367 373 loc->loaded[i] = 0;
368 374 }
369 375 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
370 376 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
371 377 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
372 378 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
373 379 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
374 380 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
375 381 loc->time = loc->locdata[LC_TIME]->l_data[0];
376 382 return (loc);
377 383 }
378 384
379 385 void
380 386 freelocale(locale_t loc)
381 387 {
382 388 int i;
383 389 for (i = 0; i < LC_ALL; i++)
384 390 __locdata_release(loc->locdata[i]);
385 391 if (loc != &posix_locale)
386 392 free(loc);
387 393 }
388 394
↓ open down ↓ |
260 lines elided |
↑ open up ↑ |
389 395 locale_t
390 396 newlocale(int catmask, const char *locname, locale_t base)
391 397 {
392 398 locale_t loc;
393 399 int i, e;
394 400
395 401 if (catmask & ~(LC_ALL_MASK)) {
396 402 errno = EINVAL;
397 403 return (NULL);
398 404 }
399 - loc = duplocale(base != NULL ? base : __global_locale);
405 + loc = duplocale(base != NULL ? base : ___global_locale);
400 406 if (loc == NULL) {
401 407 return (NULL);
402 408 }
403 409
404 410 for (i = 0; i < LC_ALL; i++) {
405 411 struct locdata *ldata;
406 412 loc->loaded[i] = 0;
407 413 if (((1 << i) & catmask) == 0) {
408 414 /* Default to base locale if not overriding */
409 415 continue;
410 416 }
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
411 417 ldata = locdata_get(i, locname);
412 418 if (ldata == NULL) {
413 419 e = errno;
414 420 freelocale(loc);
415 421 errno = e;
416 422 return (NULL);
417 423 }
418 424 __locdata_release(loc->locdata[i]);
419 425 loc->locdata[i] = ldata;
420 426 }
421 - if (base && base != __global_locale) {
427 + if (base && base != ___global_locale) {
422 428 freelocale(base);
423 429 }
424 430 loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
425 431 loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
426 432 loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
427 433 loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
428 434 loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];
429 435 loc->numeric = loc->locdata[LC_NUMERIC]->l_data[0];
430 436 loc->time = loc->locdata[LC_TIME]->l_data[0];
431 437 return (loc);
432 438 }
433 439
434 440 locale_t
435 441 uselocale(locale_t loc)
436 442 {
437 - locale_t lastloc = __global_locale;
443 + locale_t lastloc = ___global_locale;
438 444 locale_t *locptr;
439 445
440 446 locptr = tsdalloc(_T_SETLOCALE, sizeof (locale_t), freelocptr);
441 447 /* Should never occur */
442 448 if (locptr == NULL) {
443 449 errno = EINVAL;
444 450 return (NULL);
445 451 }
446 452
447 453 if (*locptr != NULL)
448 454 lastloc = *locptr;
449 455
450 456 /* Argument loc is NULL if we are just querying. */
451 457 if (loc != NULL) {
452 458 /*
453 459 * Set it to LC_GLOBAL_LOCAL to return to using
454 460 * the global locale (setlocale).
455 461 */
456 - if (loc == __global_locale) {
462 + if (loc == ___global_locale) {
457 463 *locptr = NULL;
458 464 } else {
459 465 /* No validation of the provided locale at present */
460 466 *locptr = loc;
461 467 }
462 468 }
463 469
464 470 /*
465 471 * The caller is responsible for freeing, of course it would be
466 472 * gross error to call freelocale() on a locale object that is still
467 473 * in use.
468 474 */
469 475 return (lastloc);
470 476 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX