Print this page
Thread safety fixes.

@@ -67,18 +67,21 @@
  * We are careful to minimize performance impact of multiple calls to
  * uselocale() or setlocale() by using a cache of locale data whenever possible.
  * As a consequence of this, applications that iterate over all possible
  * locales will burn through a lot of virtual memory, but we find such
  * applications rare.  (locale -a might be an exception, but it is short lived.)
+ *
+ * Category data is never released (although enclosing locale objects might be),
+ * in order to guarantee thread-safety.  Calling freelocale() on an object
+ * while it is in use by another thread is a programmer error (use-after-free)
+ * and we don't bother to note it further.
+ *
+ * Locale objects (global locales) established by setlocale() are also
+ * never freed (for MT safety), but we will save previous locale objects
+ * and reuse them when we can.
  */
 
-/*
- * NB: Each of the structures listed herein should have the refcnt
- * set to -1, to ensure that posix locale information is never ever freed,
- * even when changing the global locale.
- */
-
 typedef struct locdata *(*loadfn_t)(const char *);
 
 static const loadfn_t loaders[LC_ALL] = {
         __lc_ctype_load,
         __lc_numeric_load,

@@ -141,39 +144,15 @@
  * Prototypes.
  */
 static const char *get_locale_env(int);
 static struct locdata *locdata_get(int, const const char *);
 static struct locdata *locdata_get_cache(int, const char *);
-static void locdata_set_cache(int, struct locdata *);
 
 /*
  * Some utility routines.
  */
-struct locdata *
-__locdata_hold(struct locdata *ld)
-{
-        if (ld != NULL && ld->l_refcnt != (uint32_t)-1)
-                atomic_inc_32(&ld->l_refcnt);
-        return (ld);
-}
 
-void
-__locdata_release(struct locdata *ld)
-{
-        if (ld->l_refcnt == (uint32_t)-1)
-                return;
-
-        if (atomic_dec_32_nv(&ld->l_refcnt) == 0) {
-                for (int i = 0; i < NLOCDATA; i++)
-                        free(ld->l_data[i]);
-                if (ld->l_map && ld->l_map_len) {
-                        (void) munmap(ld->l_map, ld->l_map_len);
-                }
-                free(ld);
-        }
-}
-
 struct locdata *
 __locdata_alloc(const char *name, size_t memsz)
 {
         struct locdata *ldata;
 

@@ -184,22 +163,36 @@
                 free(ldata);
                 errno = ENOMEM;
                 return (NULL);
         }
         (void) strlcpy(ldata->l_lname, name, sizeof (ldata->l_lname));
-        ldata->l_refcnt = 1;
 
         return (ldata);
 }
 
 /*
+ * Normally we never free locale data truly, but if we failed to load it
+ * for some reason, this routine is used to cleanup the partial mess.
+ */
+void
+__locdata_free(struct locdata *ldata)
+{
+        for (int i = 0; i < NLOCDATA; i++)
+                free(ldata->l_data[i]);
+        if (ldata->l_map != NULL && ldata->l_map_len)
+                (void) munmap(ldata->l_map, ldata->l_map_len);
+        free(ldata);
+}
+
+/*
  * It turns out that for performance reasons we would really like to
  * cache the most recently referenced locale data to avoid wasteful
  * loading from files.
  */
 
 static struct locdata *cache_data[LC_ALL];
+static struct locdata *cat_data[LC_ALL];
 static mutex_t cache_lock = DEFAULTMUTEX;
 
 /*
  * Returns the cached data if the locale name is the same.  If not,
  * returns NULL (cache miss).  The locdata is returned with a hold on

@@ -212,44 +205,70 @@
         struct locdata *loc;
 
         if (category < 0 || category >= LC_ALL)
                 return (NULL);
 
+        /* Try cache first. */
         lmutex_lock(&cache_lock);
-        if ((loc = cache_data[category]) != NULL) {
+        loc = cache_data[category];
+
+        if ((loc != NULL) && (strcmp(loc->l_lname, locname) == 0)) {
+                lmutex_unlock(&cache_lock);
+                return (loc);
+        }
+
+        /*
+         * Failing that try previously loaded locales (linear search) --
+         * this could be optimized to a hash, but its unlikely that a single
+         * application will ever need to work with more than a few locales.
+         */
+                for (loc = cat_data[category]; loc != NULL; loc = loc->l_next) {
                 if (strcmp(locname, loc->l_lname) == 0) {
-                        loc = __locdata_hold(loc);
-                } else {
-                        loc = NULL;
+                                break;
                 }
         }
+
+                /*
+                 * Finally, if we still don't have one, try loading the locale
+                 * data from the actual on-disk data.
+                 *
+                 * We drop the lock (libc wants to ensure no internal locks
+                 * are held when we call other routines required to read from
+                 * files, allocate memory, etc.)  There is a small race here,
+                 * but the consequences of the race are benign -- if multiple
+                 * threads hit this at precisely the same point, we could
+                 * wind up with duplicates of the locale data in the cache.
+                 *
+                 * This wastes the memory for an extra copy of the locale
+                 * data, but there is no further harm beyond that.  Its not
+                 * worth the effort to recode this to something "safe"
+                 * (which would require rescanning the list, etc.), given
+                 * that this race will probably never actually occur.
+                 */
+                if (loc == NULL) {
         lmutex_unlock(&cache_lock);
-        return (loc);
-}
+                        loc = (*loaders[category])(locname);
+                        lmutex_lock(&cache_lock);
+                        (void) strlcpy(loc->l_lname, locname,
+                            sizeof (loc->l_lname));
+                }
 
-/*
- * Set the cache for the category to specific content.  An additional hold
- * is taken for the data while it is in the cache, so the caller may drop
- * its own hold once this is complete.  Also, releases the hold on any
- * previously cached data.
+        /*
+         * Assuming we got one, update the cache, and stick us on the list
+         * of loaded locale data.  We insert into the head (more recent
+         * use is likely to win.)
  */
-static void
-locdata_set_cache(int category, struct locdata *loc)
-{
-        struct locdata *old;
+        if (loc != NULL) {
+                cache_data[category] = loc;
+                if (loc->l_next == NULL) {
+                        loc->l_next = cat_data[category];
+                        cat_data[category] = loc;
+                }
+        }
 
-        if (category < 0 || category >= LC_ALL)
-                return;
-
-        lmutex_lock(&cache_lock);
-        old = cache_data[category];
-        cache_data[category] = __locdata_hold(loc);
         lmutex_unlock(&cache_lock);
-
-        /* drop our reference on the old data */
-        if (old)
-                __locdata_release(old);
+        return (loc);
 }
 
 /*
  * Routine to get the locdata for a given category and locale.
  * This includes retrieving it from cache, retrieving it from

@@ -256,11 +275,10 @@
  * a file, etc.
  */
 static struct locdata *
 locdata_get(int category, const char *locname)
 {
-        struct locdata *ldata;
         char scratch[ENCODING_LEN + 1];
         char *slash;
         int cnt;
         int len;
 

@@ -288,22 +306,13 @@
                 (void) strlcpy(scratch, locname, len);
                 locname = scratch;
         }
 
         if ((strcmp(locname, "C") == 0) || (strcmp(locname, "POSIX") == 0))
-                return (__locdata_hold(posix_locale.locdata[category]));
+                return (posix_locale.locdata[category]);
 
-        ldata = locdata_get_cache(category, locname);
-        if (ldata != NULL)
-                return (ldata);
-
-        /* Otherwise load it */
-        ldata = (*loaders[category])(locname);
-        if (ldata != NULL) {
-                locdata_set_cache(category, ldata);
-        }
-        return (ldata);
+        return (locdata_get_cache(category, locname));
 }
 
 /* tsd destructor */
 static void
 freelocptr(void *arg)

@@ -366,12 +375,16 @@
 
         loc = calloc(1, sizeof (*loc));
         if (loc == NULL) {
                 return (NULL);
         }
+        if (src == NULL) {
+                /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
+                src = ___global_locale;
+        }
         for (i = 0; i < LC_ALL; i++) {
-                loc->locdata[i] = __locdata_hold(src->locdata[i]);
+                loc->locdata[i] = src->locdata[i];
                 loc->loaded[i] = 0;
         }
         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];

@@ -383,14 +396,16 @@
 }
 
 void
 freelocale(locale_t loc)
 {
-        int i;
-        for (i = 0; i < LC_ALL; i++)
-                __locdata_release(loc->locdata[i]);
-        if (loc != &posix_locale)
+        /*
+         * We take extra care never to free a saved locale created by
+         * setlocale().  This shouldn't be strictly necessary, but a little
+         * extra safety doesn't hurt here.
+         */
+        if ((loc != &posix_locale) && (loc->next == NULL))
                 free(loc);
 }
 
 locale_t
 newlocale(int catmask, const char *locname, locale_t base)

@@ -400,11 +415,19 @@
 
         if (catmask & ~(LC_ALL_MASK)) {
                 errno = EINVAL;
                 return (NULL);
         }
-        loc = duplocale(base != NULL ? base : ___global_locale);
+        /*
+         * Technically passing LC_GLOBAL_LOCALE here is illegal,
+         * but we allow it.
+         */
+        if (base == NULL || base == ___global_locale) {
+                loc = duplocale(___global_locale);
+        } else {
+                loc = base;
+        }
         if (loc == NULL) {
                 return (NULL);
         }
 
         for (i = 0; i < LC_ALL; i++) {

@@ -419,16 +442,12 @@
                         e = errno;
                         freelocale(loc);
                         errno = e;
                         return (NULL);
                 }
-                __locdata_release(loc->locdata[i]);
                 loc->locdata[i] = ldata;
         }
-        if (base && base != ___global_locale) {
-                freelocale(base);
-        }
         loc->collate = loc->locdata[LC_COLLATE]->l_data[0];
         loc->ctype = loc->locdata[LC_CTYPE]->l_data[0];
         loc->runelocale = loc->locdata[LC_CTYPE]->l_data[1];
         loc->messages = loc->locdata[LC_MESSAGES]->l_data[0];
         loc->monetary = loc->locdata[LC_MONETARY]->l_data[0];