Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/setrunelocale.c
+++ new/usr/src/lib/libc/port/locale/setrunelocale.c
1 1 /*
2 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
2 3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
3 4 * Copyright (c) 1993
4 5 * The Regents of the University of California. All rights reserved.
5 6 *
6 7 * This code is derived from software contributed to Berkeley by
7 8 * Paul Borman at Krystal Technologies.
8 9 *
9 10 * Redistribution and use in source and binary forms, with or without
10 11 * modification, are permitted provided that the following conditions
11 12 * are met:
12 13 * 1. Redistributions of source code must retain the above copyright
13 14 * notice, this list of conditions and the following disclaimer.
14 15 * 2. Redistributions in binary form must reproduce the above copyright
15 16 * notice, this list of conditions and the following disclaimer in the
16 17 * documentation and/or other materials provided with the distribution.
17 18 * 4. Neither the name of the University nor the names of its contributors
18 19 * may be used to endorse or promote products derived from this software
19 20 * without specific prior written permission.
20 21 *
21 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 32 * SUCH DAMAGE.
32 33 */
33 34
34 35 #include "lint.h"
35 36 #include "file64.h"
36 37 #include <errno.h>
37 38 #include <limits.h>
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
38 39 #include <string.h>
39 40 #include <stdio.h>
40 41 #include <stdlib.h>
41 42 #include <unistd.h>
42 43 #include <wchar.h>
43 44 #include "runetype.h"
44 45 #include "ldpart.h"
45 46 #include "mblocal.h"
46 47 #include "setlocale.h"
47 48 #include "_ctype.h"
48 -#include "../i18n/_locale.h"
49 +#include "lctype.h"
50 +#include "localeimpl.h"
49 51
50 -extern _RuneLocale *_Read_RuneMagi(FILE *);
51 -extern unsigned char __ctype_C[];
52 +extern _RuneLocale *_Read_RuneMagi(const char *);
52 53
53 -static int __setrunelocale(const char *);
54 +struct lc_ctype lc_ctype_posix = {
55 + .lc_mbrtowc = __mbrtowc_ascii,
56 + .lc_mbsinit = __mbsinit_ascii,
57 + .lc_mbsnrtowcs = __mbsnrtowcs_ascii,
58 + .lc_wcrtomb = __wcrtomb_ascii,
59 + .lc_wcsnrtombs = __wcsnrtombs_ascii,
60 + .lc_is_ascii = 1,
61 + .lc_max_mblen = 1,
62 + .lc_trans_upper = _DefaultRuneLocale.__mapupper,
63 + .lc_trans_lower = _DefaultRuneLocale.__maplower,
64 + .lc_ctype_mask = _DefaultRuneLocale.__runetype,
65 +};
54 66
55 -static int
56 -__setrunelocale(const char *encoding)
57 -{
58 - FILE *fp;
59 - char name[PATH_MAX];
60 - _RuneLocale *rl;
61 - int saverr, ret;
62 - size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
63 - const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
64 - size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
65 - mbstate_t *_RESTRICT_KYWD);
66 - int (*old__mbsinit)(const mbstate_t *);
67 - size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
68 - const char **_RESTRICT_KYWD, size_t, size_t,
69 - mbstate_t *_RESTRICT_KYWD);
70 - size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
71 - const wchar_t **_RESTRICT_KYWD, size_t, size_t,
72 - mbstate_t *_RESTRICT_KYWD);
73 - static char ctype_encoding[ENCODING_LEN + 1];
74 - static _RuneLocale *CachedRuneLocale;
75 - static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
76 - const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
77 - static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
78 - mbstate_t *_RESTRICT_KYWD);
79 - static int (*Cached__mbsinit)(const mbstate_t *);
80 - static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
81 - const char **_RESTRICT_KYWD, size_t, size_t,
82 - mbstate_t *_RESTRICT_KYWD);
83 - static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
84 - const wchar_t **_RESTRICT_KYWD, size_t, size_t,
85 - mbstate_t *_RESTRICT_KYWD);
67 +struct locdata __posix_ctype_locdata = {
68 + .l_lname = "C",
69 + .l_refcnt = (uint32_t)-1,
70 + .l_data = { &lc_ctype_posix, &_DefaultRuneLocale }
71 +};
86 72
87 - /*
88 - * The "C" and "POSIX" locale are always here.
89 - */
90 - if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
91 - int i;
92 73
93 - (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
74 +/*
75 + * Table of initializers for encodings. When you add a new encoding type,
76 + * this table should be updated.
77 + */
78 +static struct {
79 + const char *e_name;
80 + void (*e_init)(struct lc_ctype *);
81 +} encodings[] = {
82 + { "NONE", _none_init },
83 + { "UTF-8", _UTF8_init },
84 + { "EUC-CN", _EUC_CN_init },
85 + { "EUC-JP", _EUC_JP_init },
86 + { "EUC-KR", _EUC_KR_init },
87 + { "EUC-TW", _EUC_TW_init },
88 + { "GB18030", _GB18030_init },
89 + { "GB2312", _GB2312_init },
90 + { "GBK", _GBK_init },
91 + { "BIG5", _BIG5_init },
92 + { "MSKanji", _MSKanji_init },
93 + { NULL, NULL }
94 +};
94 95
95 - for (i = 0; i < _CACHED_RUNES; i++) {
96 - __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
97 - __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
98 - __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
99 - }
100 96
101 - (void) _none_init(&_DefaultRuneLocale);
102 - return (0);
103 - }
97 +struct locdata *
98 +__lc_ctype_load(const char *name)
99 +{
100 + struct locdata *ldata;
101 + struct lc_ctype *lct;
102 + _RuneLocale *rl;
103 + int i;
104 + char path[PATH_MAX];
104 105
106 + if ((ldata = __locdata_alloc(name, sizeof (*lct))) == NULL)
107 + return (NULL);
108 + lct = ldata->l_data[0];
105 109 /*
106 - * If the locale name is the same as our cache, use the cache.
107 - */
108 - if (CachedRuneLocale != NULL &&
109 - strcmp(encoding, ctype_encoding) == 0) {
110 - _CurrentRuneLocale = CachedRuneLocale;
111 - __mbrtowc = Cached__mbrtowc;
112 - __mbsinit = Cached__mbsinit;
113 - __mbsnrtowcs = Cached__mbsnrtowcs;
114 - __wcrtomb = Cached__wcrtomb;
115 - __wcsnrtombs = Cached__wcsnrtombs;
116 - return (0);
117 - }
118 -
119 - /*
120 110 * Slurp the locale file into the cache.
121 111 */
122 112
123 - (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
124 - _PathLocale, encoding);
113 + (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE/LCL_DATA",
114 + _PathLocale, name);
125 115
126 - if ((fp = fopen(name, "r")) == NULL)
127 - return (errno == 0 ? ENOENT : errno);
128 -
129 - if ((rl = _Read_RuneMagi(fp)) == NULL) {
130 - saverr = (errno == 0 ? EINVAL : errno);
131 - (void) fclose(fp);
132 - return (saverr);
116 + if ((rl = _Read_RuneMagi(path)) == NULL) {
117 + __locdata_release(ldata);
118 + errno = EINVAL;
119 + return (NULL);
133 120 }
134 - (void) fclose(fp);
121 + ldata->l_data[1] = rl;
135 122
136 - old__mbrtowc = __mbrtowc;
137 - old__mbsinit = __mbsinit;
138 - old__mbsnrtowcs = __mbsnrtowcs;
139 - old__wcrtomb = __wcrtomb;
140 - old__wcsnrtombs = __wcsnrtombs;
123 + lct->lc_mbrtowc = NULL;
124 + lct->lc_mbsinit = NULL;
125 + lct->lc_mbsnrtowcs = NULL;
126 + lct->lc_wcrtomb = NULL;
127 + lct->lc_wcsnrtombs = NULL;
128 + lct->lc_ctype_mask = rl->__runetype;
129 + lct->lc_trans_upper = rl->__mapupper;
130 + lct->lc_trans_lower = rl->__maplower;
141 131
142 - __mbrtowc = NULL;
143 - __mbsinit = NULL;
144 - __mbsnrtowcs = __mbsnrtowcs_std;
145 - __wcrtomb = NULL;
146 - __wcsnrtombs = __wcsnrtombs_std;
147 -
148 - if (strcmp(rl->__encoding, "NONE") == 0)
149 - ret = _none_init(rl);
150 - else if (strcmp(rl->__encoding, "UTF-8") == 0)
151 - ret = _UTF8_init(rl);
152 - else if (strcmp(rl->__encoding, "EUC-CN") == 0)
153 - ret = _EUC_CN_init(rl);
154 - else if (strcmp(rl->__encoding, "EUC-JP") == 0)
155 - ret = _EUC_JP_init(rl);
156 - else if (strcmp(rl->__encoding, "EUC-KR") == 0)
157 - ret = _EUC_KR_init(rl);
158 - else if (strcmp(rl->__encoding, "EUC-TW") == 0)
159 - ret = _EUC_TW_init(rl);
160 - else if (strcmp(rl->__encoding, "GB18030") == 0)
161 - ret = _GB18030_init(rl);
162 - else if (strcmp(rl->__encoding, "GB2312") == 0)
163 - ret = _GB2312_init(rl);
164 - else if (strcmp(rl->__encoding, "GBK") == 0)
165 - ret = _GBK_init(rl);
166 - else if (strcmp(rl->__encoding, "BIG5") == 0)
167 - ret = _BIG5_init(rl);
168 - else if (strcmp(rl->__encoding, "MSKanji") == 0)
169 - ret = _MSKanji_init(rl);
170 - else
171 - ret = EINVAL;
172 -
173 - if (ret == 0) {
174 - if (CachedRuneLocale != NULL) {
175 - free(CachedRuneLocale);
132 + /* set up the function pointers */
133 + for (i = 0; encodings[i].e_name != NULL; i++) {
134 + int l = strlen(encodings[i].e_name);
135 + if ((strncmp(rl->__encoding, encodings[i].e_name, l) == 0) &&
136 + (rl->__encoding[l] == '\0' || rl->__encoding[l] == '@')) {
137 + encodings[i].e_init(lct);
138 + break;
176 139 }
177 - CachedRuneLocale = _CurrentRuneLocale;
178 - Cached__mbrtowc = __mbrtowc;
179 - Cached__mbsinit = __mbsinit;
180 - Cached__mbsnrtowcs = __mbsnrtowcs;
181 - Cached__wcrtomb = __wcrtomb;
182 - Cached__wcsnrtombs = __wcsnrtombs;
183 - (void) strcpy(ctype_encoding, encoding);
184 -
185 - /*
186 - * We need to overwrite the _ctype array. This requires
187 - * some finagling. This is because references to it may
188 - * have been baked into applications.
189 - *
190 - * Note that it is interesting that toupper/tolower only
191 - * produce defined results when the input is representable
192 - * as a byte.
193 - */
194 -
195 - /*
196 - * The top half is the type mask array. Because we
197 - * want to support both legacy Solaris code (which have
198 - * mask valeus baked in to them), and we want to be able
199 - * to import locale files from other sources (FreeBSD)
200 - * which probably uses different masks, we have to perform
201 - * a conversion here. Ugh. Note that the _CTYPE definitions
202 - * we use from FreeBSD are richer than the Solaris legacy.
203 - *
204 - * We have to cope with these limitations though, because the
205 - * inadequate Solaris definitions were baked into binaries.
206 - */
207 - for (int i = 0; i < _CACHED_RUNES; i++) {
208 - /* ctype can only encode the lower 8 bits. */
209 - __ctype[i+1] = rl->__runetype[i] & 0xff;
210 - __ctype_mask[i] = rl->__runetype[i];
211 - }
212 -
213 - /* The bottom half is the toupper/lower array */
214 - for (int i = 0; i < _CACHED_RUNES; i++) {
215 - __ctype[258 + i] = i;
216 - if (rl->__mapupper[i] && rl->__mapupper[i] != i)
217 - __ctype[258+i] = rl->__mapupper[i];
218 - if (rl->__maplower[i] && rl->__maplower[i] != i)
219 - __ctype[258+i] = rl->__maplower[i];
220 -
221 - /* Don't forget these annoyances either! */
222 - __trans_upper[i] = rl->__mapupper[i];
223 - __trans_lower[i] = rl->__maplower[i];
224 - }
225 -
226 - /*
227 - * Note that we expect the init code will have populated
228 - * the CSWIDTH array (__ctype[514-520]) properly.
229 - */
230 - } else {
231 - __mbrtowc = old__mbrtowc;
232 - __mbsinit = old__mbsinit;
233 - __mbsnrtowcs = old__mbsnrtowcs;
234 - __wcrtomb = old__wcrtomb;
235 - __wcsnrtombs = old__wcsnrtombs;
236 - free(rl);
237 140 }
141 + if (encodings[i].e_name == NULL) {
142 + __locdata_release(ldata);
143 + errno = EINVAL;
144 + return (NULL);
145 + }
238 146
239 - return (ret);
240 -}
241 147
242 -int
243 -__wrap_setrunelocale(const char *locale)
244 -{
245 - int ret = __setrunelocale(locale);
246 -
247 - if (ret != 0) {
248 - errno = ret;
249 - return (_LDP_ERROR);
250 - }
251 - return (_LDP_LOADED);
148 + return (ldata);
252 149 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX