Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD

@@ -1,6 +1,7 @@
 /*
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 1993
  *      The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by

@@ -43,210 +44,106 @@
 #include "runetype.h"
 #include "ldpart.h"
 #include "mblocal.h"
 #include "setlocale.h"
 #include "_ctype.h"
-#include "../i18n/_locale.h"
+#include "lctype.h"
+#include "localeimpl.h"
 
-extern _RuneLocale      *_Read_RuneMagi(FILE *);
-extern unsigned char    __ctype_C[];
+extern _RuneLocale      *_Read_RuneMagi(const char *);
 
-static int              __setrunelocale(const char *);
+struct lc_ctype lc_ctype_posix = {
+        .lc_mbrtowc = __mbrtowc_ascii,
+        .lc_mbsinit = __mbsinit_ascii,
+        .lc_mbsnrtowcs = __mbsnrtowcs_ascii,
+        .lc_wcrtomb = __wcrtomb_ascii,
+        .lc_wcsnrtombs = __wcsnrtombs_ascii,
+        .lc_is_ascii = 1,
+        .lc_max_mblen = 1,
+        .lc_trans_upper = _DefaultRuneLocale.__mapupper,
+        .lc_trans_lower = _DefaultRuneLocale.__maplower,
+        .lc_ctype_mask = _DefaultRuneLocale.__runetype,
+};
 
-static int
-__setrunelocale(const char *encoding)
-{
-        FILE *fp;
-        char name[PATH_MAX];
-        _RuneLocale *rl;
-        int saverr, ret;
-        size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
-            const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
-        size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
-            mbstate_t *_RESTRICT_KYWD);
-        int (*old__mbsinit)(const mbstate_t *);
-        size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
-            const char **_RESTRICT_KYWD, size_t, size_t,
-            mbstate_t *_RESTRICT_KYWD);
-        size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
-            const wchar_t **_RESTRICT_KYWD, size_t, size_t,
-            mbstate_t *_RESTRICT_KYWD);
-        static char ctype_encoding[ENCODING_LEN + 1];
-        static _RuneLocale *CachedRuneLocale;
-        static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
-            const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
-        static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
-            mbstate_t *_RESTRICT_KYWD);
-        static int (*Cached__mbsinit)(const mbstate_t *);
-        static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
-            const char **_RESTRICT_KYWD, size_t, size_t,
-            mbstate_t *_RESTRICT_KYWD);
-        static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
-            const wchar_t **_RESTRICT_KYWD, size_t, size_t,
-            mbstate_t *_RESTRICT_KYWD);
+struct locdata __posix_ctype_locdata = {
+        .l_lname = "C",
+        .l_refcnt = (uint32_t)-1,
+        .l_data = { &lc_ctype_posix, &_DefaultRuneLocale }
+};
 
-        /*
-         * The "C" and "POSIX" locale are always here.
+
+/*
+ * Table of initializers for encodings.  When you add a new encoding type,
+ * this table should be updated.
          */
-        if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
-                int i;
+static struct {
+        const char *e_name;
+        void (*e_init)(struct lc_ctype *);
+} encodings[] = {
+        { "NONE", _none_init },
+        { "UTF-8",      _UTF8_init },
+        { "EUC-CN",     _EUC_CN_init },
+        { "EUC-JP",     _EUC_JP_init },
+        { "EUC-KR",     _EUC_KR_init },
+        { "EUC-TW",     _EUC_TW_init },
+        { "GB18030",    _GB18030_init },
+        { "GB2312",     _GB2312_init },
+        { "GBK",        _GBK_init },
+        { "BIG5",       _BIG5_init },
+        { "MSKanji",    _MSKanji_init },
+        { NULL,         NULL }
+};
 
-                (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
 
-                for (i = 0; i < _CACHED_RUNES; i++) {
-                        __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
-                        __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
-                        __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
-                }
+struct locdata *
+__lc_ctype_load(const char *name)
+{
+        struct locdata *ldata;
+        struct lc_ctype *lct;
+        _RuneLocale *rl;
+        int i;
+        char path[PATH_MAX];
 
-                (void) _none_init(&_DefaultRuneLocale);
-                return (0);
-        }
-
+        if ((ldata = __locdata_alloc(name, sizeof (*lct))) == NULL)
+                return (NULL);
+        lct = ldata->l_data[0];
         /*
-         * If the locale name is the same as our cache, use the cache.
-         */
-        if (CachedRuneLocale != NULL &&
-            strcmp(encoding, ctype_encoding) == 0) {
-                _CurrentRuneLocale = CachedRuneLocale;
-                __mbrtowc = Cached__mbrtowc;
-                __mbsinit = Cached__mbsinit;
-                __mbsnrtowcs = Cached__mbsnrtowcs;
-                __wcrtomb = Cached__wcrtomb;
-                __wcsnrtombs = Cached__wcsnrtombs;
-                return (0);
-        }
-
-        /*
          * Slurp the locale file into the cache.
          */
 
-        (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
-            _PathLocale, encoding);
+        (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE/LCL_DATA",
+            _PathLocale, name);
 
-        if ((fp = fopen(name, "r")) == NULL)
-                return (errno == 0 ? ENOENT : errno);
-
-        if ((rl = _Read_RuneMagi(fp)) == NULL) {
-                saverr = (errno == 0 ? EINVAL : errno);
-                (void) fclose(fp);
-                return (saverr);
+        if ((rl = _Read_RuneMagi(path)) == NULL) {
+                __locdata_release(ldata);
+                errno = EINVAL;
+                return (NULL);
         }
-        (void) fclose(fp);
+        ldata->l_data[1] = rl;
 
-        old__mbrtowc = __mbrtowc;
-        old__mbsinit = __mbsinit;
-        old__mbsnrtowcs = __mbsnrtowcs;
-        old__wcrtomb = __wcrtomb;
-        old__wcsnrtombs = __wcsnrtombs;
+        lct->lc_mbrtowc = NULL;
+        lct->lc_mbsinit = NULL;
+        lct->lc_mbsnrtowcs = NULL;
+        lct->lc_wcrtomb = NULL;
+        lct->lc_wcsnrtombs = NULL;
+        lct->lc_ctype_mask = rl->__runetype;
+        lct->lc_trans_upper = rl->__mapupper;
+        lct->lc_trans_lower = rl->__maplower;
 
-        __mbrtowc = NULL;
-        __mbsinit = NULL;
-        __mbsnrtowcs = __mbsnrtowcs_std;
-        __wcrtomb = NULL;
-        __wcsnrtombs = __wcsnrtombs_std;
-
-        if (strcmp(rl->__encoding, "NONE") == 0)
-                ret = _none_init(rl);
-        else if (strcmp(rl->__encoding, "UTF-8") == 0)
-                ret = _UTF8_init(rl);
-        else if (strcmp(rl->__encoding, "EUC-CN") == 0)
-                ret = _EUC_CN_init(rl);
-        else if (strcmp(rl->__encoding, "EUC-JP") == 0)
-                ret = _EUC_JP_init(rl);
-        else if (strcmp(rl->__encoding, "EUC-KR") == 0)
-                ret = _EUC_KR_init(rl);
-        else if (strcmp(rl->__encoding, "EUC-TW") == 0)
-                ret = _EUC_TW_init(rl);
-        else if (strcmp(rl->__encoding, "GB18030") == 0)
-                ret = _GB18030_init(rl);
-        else if (strcmp(rl->__encoding, "GB2312") == 0)
-                ret = _GB2312_init(rl);
-        else if (strcmp(rl->__encoding, "GBK") == 0)
-                ret = _GBK_init(rl);
-        else if (strcmp(rl->__encoding, "BIG5") == 0)
-                ret = _BIG5_init(rl);
-        else if (strcmp(rl->__encoding, "MSKanji") == 0)
-                ret = _MSKanji_init(rl);
-        else
-                ret = EINVAL;
-
-        if (ret == 0) {
-                if (CachedRuneLocale != NULL) {
-                        free(CachedRuneLocale);
+        /* set up the function pointers */
+        for (i = 0; encodings[i].e_name != NULL; i++) {
+                int l = strlen(encodings[i].e_name);
+                if ((strncmp(rl->__encoding, encodings[i].e_name, l) == 0) &&
+                    (rl->__encoding[l] == '\0' || rl->__encoding[l] == '@')) {
+                        encodings[i].e_init(lct);
+                        break;
                 }
-                CachedRuneLocale = _CurrentRuneLocale;
-                Cached__mbrtowc = __mbrtowc;
-                Cached__mbsinit = __mbsinit;
-                Cached__mbsnrtowcs = __mbsnrtowcs;
-                Cached__wcrtomb = __wcrtomb;
-                Cached__wcsnrtombs = __wcsnrtombs;
-                (void) strcpy(ctype_encoding, encoding);
-
-                /*
-                 * We need to overwrite the _ctype array.  This requires
-                 * some finagling.  This is because references to it may
-                 * have been baked into applications.
-                 *
-                 * Note that it is interesting that toupper/tolower only
-                 * produce defined results when the input is representable
-                 * as a byte.
-                 */
-
-                /*
-                 * The top half is the type mask array.  Because we
-                 * want to support both legacy Solaris code (which have
-                 * mask valeus baked in to them), and we want to be able
-                 * to import locale files from other sources (FreeBSD)
-                 * which probably uses different masks, we have to perform
-                 * a conversion here.  Ugh.  Note that the _CTYPE definitions
-                 * we use from FreeBSD are richer than the Solaris legacy.
-                 *
-                 * We have to cope with these limitations though, because the
-                 * inadequate Solaris definitions were baked into binaries.
-                 */
-                for (int i = 0; i < _CACHED_RUNES; i++) {
-                        /* ctype can only encode the lower 8 bits. */
-                        __ctype[i+1] = rl->__runetype[i] & 0xff;
-                        __ctype_mask[i] = rl->__runetype[i];
                 }
-
-                /* The bottom half is the toupper/lower array */
-                for (int i = 0; i < _CACHED_RUNES; i++) {
-                        __ctype[258 + i] = i;
-                        if (rl->__mapupper[i] && rl->__mapupper[i] != i)
-                                __ctype[258+i] = rl->__mapupper[i];
-                        if (rl->__maplower[i] && rl->__maplower[i] != i)
-                                __ctype[258+i] = rl->__maplower[i];
-
-                        /* Don't forget these annoyances either! */
-                        __trans_upper[i] = rl->__mapupper[i];
-                        __trans_lower[i] = rl->__maplower[i];
+        if (encodings[i].e_name == NULL) {
+                __locdata_release(ldata);
+                errno = EINVAL;
+                return (NULL);
                 }
 
-                /*
-                 * Note that we expect the init code will have populated
-                 * the CSWIDTH array (__ctype[514-520]) properly.
-                 */
-        } else {
-                __mbrtowc = old__mbrtowc;
-                __mbsinit = old__mbsinit;
-                __mbsnrtowcs = old__mbsnrtowcs;
-                __wcrtomb = old__wcrtomb;
-                __wcsnrtombs = old__wcsnrtombs;
-                free(rl);
-        }
 
-        return (ret);
-}
-
-int
-__wrap_setrunelocale(const char *locale)
-{
-        int ret = __setrunelocale(locale);
-
-        if (ret != 0) {
-                errno = ret;
-                return (_LDP_ERROR);
-        }
-        return (_LDP_LOADED);
+        return (ldata);
 }