Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD

*** 1,6 **** --- 1,7 ---- /* + * Copyright 2013 Garrett D'Amore <garrett@damore.org> * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by
*** 43,252 **** #include "runetype.h" #include "ldpart.h" #include "mblocal.h" #include "setlocale.h" #include "_ctype.h" ! #include "../i18n/_locale.h" ! extern _RuneLocale *_Read_RuneMagi(FILE *); ! extern unsigned char __ctype_C[]; ! static int __setrunelocale(const char *); ! static int ! __setrunelocale(const char *encoding) ! { ! FILE *fp; ! char name[PATH_MAX]; ! _RuneLocale *rl; ! int saverr, ret; ! size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, ! const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); ! size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, ! mbstate_t *_RESTRICT_KYWD); ! int (*old__mbsinit)(const mbstate_t *); ! size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, ! const char **_RESTRICT_KYWD, size_t, size_t, ! mbstate_t *_RESTRICT_KYWD); ! size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, ! const wchar_t **_RESTRICT_KYWD, size_t, size_t, ! mbstate_t *_RESTRICT_KYWD); ! static char ctype_encoding[ENCODING_LEN + 1]; ! static _RuneLocale *CachedRuneLocale; ! static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, ! const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); ! static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, ! mbstate_t *_RESTRICT_KYWD); ! static int (*Cached__mbsinit)(const mbstate_t *); ! static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, ! const char **_RESTRICT_KYWD, size_t, size_t, ! mbstate_t *_RESTRICT_KYWD); ! static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, ! const wchar_t **_RESTRICT_KYWD, size_t, size_t, ! mbstate_t *_RESTRICT_KYWD); ! /* ! * The "C" and "POSIX" locale are always here. */ ! if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { ! int i; - (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); ! for (i = 0; i < _CACHED_RUNES; i++) { ! __ctype_mask[i] = _DefaultRuneLocale.__runetype[i]; ! __trans_upper[i] = _DefaultRuneLocale.__mapupper[i]; ! __trans_lower[i] = _DefaultRuneLocale.__maplower[i]; ! } ! (void) _none_init(&_DefaultRuneLocale); ! return (0); ! } ! /* - * If the locale name is the same as our cache, use the cache. - */ - if (CachedRuneLocale != NULL && - strcmp(encoding, ctype_encoding) == 0) { - _CurrentRuneLocale = CachedRuneLocale; - __mbrtowc = Cached__mbrtowc; - __mbsinit = Cached__mbsinit; - __mbsnrtowcs = Cached__mbsnrtowcs; - __wcrtomb = Cached__wcrtomb; - __wcsnrtombs = Cached__wcsnrtombs; - return (0); - } - - /* * Slurp the locale file into the cache. */ ! (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA", ! _PathLocale, encoding); ! if ((fp = fopen(name, "r")) == NULL) ! return (errno == 0 ? ENOENT : errno); ! ! if ((rl = _Read_RuneMagi(fp)) == NULL) { ! saverr = (errno == 0 ? EINVAL : errno); ! (void) fclose(fp); ! return (saverr); } ! (void) fclose(fp); ! old__mbrtowc = __mbrtowc; ! old__mbsinit = __mbsinit; ! old__mbsnrtowcs = __mbsnrtowcs; ! old__wcrtomb = __wcrtomb; ! old__wcsnrtombs = __wcsnrtombs; ! __mbrtowc = NULL; ! __mbsinit = NULL; ! __mbsnrtowcs = __mbsnrtowcs_std; ! __wcrtomb = NULL; ! __wcsnrtombs = __wcsnrtombs_std; ! ! if (strcmp(rl->__encoding, "NONE") == 0) ! ret = _none_init(rl); ! else if (strcmp(rl->__encoding, "UTF-8") == 0) ! ret = _UTF8_init(rl); ! else if (strcmp(rl->__encoding, "EUC-CN") == 0) ! ret = _EUC_CN_init(rl); ! else if (strcmp(rl->__encoding, "EUC-JP") == 0) ! ret = _EUC_JP_init(rl); ! else if (strcmp(rl->__encoding, "EUC-KR") == 0) ! ret = _EUC_KR_init(rl); ! else if (strcmp(rl->__encoding, "EUC-TW") == 0) ! ret = _EUC_TW_init(rl); ! else if (strcmp(rl->__encoding, "GB18030") == 0) ! ret = _GB18030_init(rl); ! else if (strcmp(rl->__encoding, "GB2312") == 0) ! ret = _GB2312_init(rl); ! else if (strcmp(rl->__encoding, "GBK") == 0) ! ret = _GBK_init(rl); ! else if (strcmp(rl->__encoding, "BIG5") == 0) ! ret = _BIG5_init(rl); ! else if (strcmp(rl->__encoding, "MSKanji") == 0) ! ret = _MSKanji_init(rl); ! else ! ret = EINVAL; ! ! if (ret == 0) { ! if (CachedRuneLocale != NULL) { ! free(CachedRuneLocale); } - CachedRuneLocale = _CurrentRuneLocale; - Cached__mbrtowc = __mbrtowc; - Cached__mbsinit = __mbsinit; - Cached__mbsnrtowcs = __mbsnrtowcs; - Cached__wcrtomb = __wcrtomb; - Cached__wcsnrtombs = __wcsnrtombs; - (void) strcpy(ctype_encoding, encoding); - - /* - * We need to overwrite the _ctype array. This requires - * some finagling. This is because references to it may - * have been baked into applications. - * - * Note that it is interesting that toupper/tolower only - * produce defined results when the input is representable - * as a byte. - */ - - /* - * The top half is the type mask array. Because we - * want to support both legacy Solaris code (which have - * mask valeus baked in to them), and we want to be able - * to import locale files from other sources (FreeBSD) - * which probably uses different masks, we have to perform - * a conversion here. Ugh. Note that the _CTYPE definitions - * we use from FreeBSD are richer than the Solaris legacy. - * - * We have to cope with these limitations though, because the - * inadequate Solaris definitions were baked into binaries. - */ - for (int i = 0; i < _CACHED_RUNES; i++) { - /* ctype can only encode the lower 8 bits. */ - __ctype[i+1] = rl->__runetype[i] & 0xff; - __ctype_mask[i] = rl->__runetype[i]; } ! ! /* The bottom half is the toupper/lower array */ ! for (int i = 0; i < _CACHED_RUNES; i++) { ! __ctype[258 + i] = i; ! if (rl->__mapupper[i] && rl->__mapupper[i] != i) ! __ctype[258+i] = rl->__mapupper[i]; ! if (rl->__maplower[i] && rl->__maplower[i] != i) ! __ctype[258+i] = rl->__maplower[i]; ! ! /* Don't forget these annoyances either! */ ! __trans_upper[i] = rl->__mapupper[i]; ! __trans_lower[i] = rl->__maplower[i]; } - /* - * Note that we expect the init code will have populated - * the CSWIDTH array (__ctype[514-520]) properly. - */ - } else { - __mbrtowc = old__mbrtowc; - __mbsinit = old__mbsinit; - __mbsnrtowcs = old__mbsnrtowcs; - __wcrtomb = old__wcrtomb; - __wcsnrtombs = old__wcsnrtombs; - free(rl); - } ! return (ret); ! } ! ! int ! __wrap_setrunelocale(const char *locale) ! { ! int ret = __setrunelocale(locale); ! ! if (ret != 0) { ! errno = ret; ! return (_LDP_ERROR); ! } ! return (_LDP_LOADED); } --- 44,149 ---- #include "runetype.h" #include "ldpart.h" #include "mblocal.h" #include "setlocale.h" #include "_ctype.h" ! #include "lctype.h" ! #include "localeimpl.h" ! extern _RuneLocale *_Read_RuneMagi(const char *); ! struct lc_ctype lc_ctype_posix = { ! .lc_mbrtowc = __mbrtowc_ascii, ! .lc_mbsinit = __mbsinit_ascii, ! .lc_mbsnrtowcs = __mbsnrtowcs_ascii, ! .lc_wcrtomb = __wcrtomb_ascii, ! .lc_wcsnrtombs = __wcsnrtombs_ascii, ! .lc_is_ascii = 1, ! .lc_max_mblen = 1, ! .lc_trans_upper = _DefaultRuneLocale.__mapupper, ! .lc_trans_lower = _DefaultRuneLocale.__maplower, ! .lc_ctype_mask = _DefaultRuneLocale.__runetype, ! }; ! struct locdata __posix_ctype_locdata = { ! .l_lname = "C", ! .l_refcnt = (uint32_t)-1, ! .l_data = { &lc_ctype_posix, &_DefaultRuneLocale } ! }; ! ! /* ! * Table of initializers for encodings. When you add a new encoding type, ! * this table should be updated. */ ! static struct { ! const char *e_name; ! void (*e_init)(struct lc_ctype *); ! } encodings[] = { ! { "NONE", _none_init }, ! { "UTF-8", _UTF8_init }, ! { "EUC-CN", _EUC_CN_init }, ! { "EUC-JP", _EUC_JP_init }, ! { "EUC-KR", _EUC_KR_init }, ! { "EUC-TW", _EUC_TW_init }, ! { "GB18030", _GB18030_init }, ! { "GB2312", _GB2312_init }, ! { "GBK", _GBK_init }, ! { "BIG5", _BIG5_init }, ! { "MSKanji", _MSKanji_init }, ! { NULL, NULL } ! }; ! struct locdata * ! __lc_ctype_load(const char *name) ! { ! struct locdata *ldata; ! struct lc_ctype *lct; ! _RuneLocale *rl; ! int i; ! char path[PATH_MAX]; ! if ((ldata = __locdata_alloc(name, sizeof (*lct))) == NULL) ! return (NULL); ! lct = ldata->l_data[0]; /* * Slurp the locale file into the cache. */ ! (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE/LCL_DATA", ! _PathLocale, name); ! if ((rl = _Read_RuneMagi(path)) == NULL) { ! __locdata_release(ldata); ! errno = EINVAL; ! return (NULL); } ! ldata->l_data[1] = rl; ! lct->lc_mbrtowc = NULL; ! lct->lc_mbsinit = NULL; ! lct->lc_mbsnrtowcs = NULL; ! lct->lc_wcrtomb = NULL; ! lct->lc_wcsnrtombs = NULL; ! lct->lc_ctype_mask = rl->__runetype; ! lct->lc_trans_upper = rl->__mapupper; ! lct->lc_trans_lower = rl->__maplower; ! /* set up the function pointers */ ! for (i = 0; encodings[i].e_name != NULL; i++) { ! int l = strlen(encodings[i].e_name); ! if ((strncmp(rl->__encoding, encodings[i].e_name, l) == 0) && ! (rl->__encoding[l] == '\0' || rl->__encoding[l] == '@')) { ! encodings[i].e_init(lct); ! break; } } ! if (encodings[i].e_name == NULL) { ! __locdata_release(ldata); ! errno = EINVAL; ! return (NULL); } ! return (ldata); }