1 /*
2 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
3 * Copyright (c) 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Paul Borman at Krystal Technologies.
8 *
9 * Copyright (c) 2011 The FreeBSD Foundation
10 * All rights reserved.
11 * Portions of this software were developed by David Chisnall
12 * under sponsorship from the FreeBSD Foundation.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include "lint.h"
40 #include "file64.h"
41 #include <errno.h>
42 #include <limits.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <wchar.h>
48 #include "runetype.h"
49 #include "ldpart.h"
50 #include "mblocal.h"
51 #include "setlocale.h"
52 #include "_ctype.h"
53 #include "../i18n/_locale.h"
54
55 /*
56 * A cached version of the runes for this thread. Used by ctype.h
57 */
58 __thread const _RuneLocale *_ThreadRuneLocale;
59
60 extern _RuneLocale *_Read_RuneMagi(FILE *);
61 extern unsigned char __ctype_C[];
62
63 static int __setrunelocale(const char *);
64
65 static int
66 __setrunelocale(const char *encoding)
67 {
68 FILE *fp;
69 char name[PATH_MAX];
70 _RuneLocale *rl;
71 int saverr, ret;
72 size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
73 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
74 size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
75 mbstate_t *_RESTRICT_KYWD);
76 int (*old__mbsinit)(const mbstate_t *);
77 size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
78 const char **_RESTRICT_KYWD, size_t, size_t,
79 mbstate_t *_RESTRICT_KYWD);
80 size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
81 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
82 mbstate_t *_RESTRICT_KYWD);
83 static char ctype_encoding[ENCODING_LEN + 1];
84 static _RuneLocale *CachedRuneLocale;
85 static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
86 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
87 static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
88 mbstate_t *_RESTRICT_KYWD);
89 static int (*Cached__mbsinit)(const mbstate_t *);
90 static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
91 const char **_RESTRICT_KYWD, size_t, size_t,
92 mbstate_t *_RESTRICT_KYWD);
93 static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
94 const wchar_t **_RESTRICT_KYWD, size_t, size_t,
95 mbstate_t *_RESTRICT_KYWD);
96
97 /*
98 * The "C" and "POSIX" locale are always here.
99 */
100 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
101 int i;
102
103 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
104
105 for (i = 0; i < _CACHED_RUNES; i++) {
106 __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
107 __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
108 __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
109 }
110
111 (void) _none_init(&_DefaultRuneLocale);
112 return (0);
113 }
114
115 /*
116 * If the locale name is the same as our cache, use the cache.
117 */
118 if (CachedRuneLocale != NULL &&
119 strcmp(encoding, ctype_encoding) == 0) {
120 _CurrentRuneLocale = CachedRuneLocale;
121 __mbrtowc = Cached__mbrtowc;
122 __mbsinit = Cached__mbsinit;
123 __mbsnrtowcs = Cached__mbsnrtowcs;
124 __wcrtomb = Cached__wcrtomb;
125 __wcsnrtombs = Cached__wcsnrtombs;
126 return (0);
127 }
128
129 /*
130 * Slurp the locale file into the cache.
131 */
132
133 (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
134 _PathLocale, encoding);
135
136 if ((fp = fopen(name, "r")) == NULL)
137 return (errno == 0 ? ENOENT : errno);
138
139 if ((rl = _Read_RuneMagi(fp)) == NULL) {
140 saverr = (errno == 0 ? EINVAL : errno);
141 (void) fclose(fp);
142 return (saverr);
143 }
144 (void) fclose(fp);
145
146 old__mbrtowc = __mbrtowc;
147 old__mbsinit = __mbsinit;
148 old__mbsnrtowcs = __mbsnrtowcs;
149 old__wcrtomb = __wcrtomb;
150 old__wcsnrtombs = __wcsnrtombs;
151
152 __mbrtowc = NULL;
153 __mbsinit = NULL;
154 __mbsnrtowcs = __mbsnrtowcs_std;
155 __wcrtomb = NULL;
156 __wcsnrtombs = __wcsnrtombs_std;
157
158 if (strcmp(rl->__encoding, "NONE") == 0)
159 ret = _none_init(rl);
160 else if (strcmp(rl->__encoding, "UTF-8") == 0)
161 ret = _UTF8_init(rl);
162 else if (strcmp(rl->__encoding, "EUC-CN") == 0)
163 ret = _EUC_CN_init(rl);
164 else if (strcmp(rl->__encoding, "EUC-JP") == 0)
165 ret = _EUC_JP_init(rl);
166 else if (strcmp(rl->__encoding, "EUC-KR") == 0)
167 ret = _EUC_KR_init(rl);
168 else if (strcmp(rl->__encoding, "EUC-TW") == 0)
169 ret = _EUC_TW_init(rl);
170 else if (strcmp(rl->__encoding, "GB18030") == 0)
171 ret = _GB18030_init(rl);
172 else if (strcmp(rl->__encoding, "GB2312") == 0)
173 ret = _GB2312_init(rl);
174 else if (strcmp(rl->__encoding, "GBK") == 0)
175 ret = _GBK_init(rl);
176 else if (strcmp(rl->__encoding, "BIG5") == 0)
177 ret = _BIG5_init(rl);
178 else if (strcmp(rl->__encoding, "MSKanji") == 0)
179 ret = _MSKanji_init(rl);
180 else
181 ret = EINVAL;
182
183 if (ret == 0) {
184 if (CachedRuneLocale != NULL) {
185 free(CachedRuneLocale);
186 }
187 CachedRuneLocale = _CurrentRuneLocale;
188 Cached__mbrtowc = __mbrtowc;
189 Cached__mbsinit = __mbsinit;
190 Cached__mbsnrtowcs = __mbsnrtowcs;
191 Cached__wcrtomb = __wcrtomb;
192 Cached__wcsnrtombs = __wcsnrtombs;
193 (void) strcpy(ctype_encoding, encoding);
194
195 /*
196 * We need to overwrite the _ctype array. This requires
197 * some finagling. This is because references to it may
198 * have been baked into applications.
199 *
200 * Note that it is interesting that toupper/tolower only
201 * produce defined results when the input is representable
202 * as a byte.
203 */
204
205 /*
206 * The top half is the type mask array. Because we
207 * want to support both legacy Solaris code (which have
208 * mask valeus baked in to them), and we want to be able
209 * to import locale files from other sources (FreeBSD)
210 * which probably uses different masks, we have to perform
211 * a conversion here. Ugh. Note that the _CTYPE definitions
212 * we use from FreeBSD are richer than the Solaris legacy.
213 *
214 * We have to cope with these limitations though, because the
215 * inadequate Solaris definitions were baked into binaries.
216 */
217 for (int i = 0; i < _CACHED_RUNES; i++) {
218 /* ctype can only encode the lower 8 bits. */
219 __ctype[i+1] = rl->__runetype[i] & 0xff;
220 __ctype_mask[i] = rl->__runetype[i];
221 }
222
223 /* The bottom half is the toupper/lower array */
224 for (int i = 0; i < _CACHED_RUNES; i++) {
225 __ctype[258 + i] = i;
226 if (rl->__mapupper[i] && rl->__mapupper[i] != i)
227 __ctype[258+i] = rl->__mapupper[i];
228 if (rl->__maplower[i] && rl->__maplower[i] != i)
229 __ctype[258+i] = rl->__maplower[i];
230
231 /* Don't forget these annoyances either! */
232 __trans_upper[i] = rl->__mapupper[i];
233 __trans_lower[i] = rl->__maplower[i];
234 }
235
236 /*
237 * Note that we expect the init code will have populated
238 * the CSWIDTH array (__ctype[514-520]) properly.
239 */
240 } else {
241 __mbrtowc = old__mbrtowc;
242 __mbsinit = old__mbsinit;
243 __mbsnrtowcs = old__mbsnrtowcs;
244 __wcrtomb = old__wcrtomb;
245 __wcsnrtombs = old__wcsnrtombs;
246 free(rl);
247 }
248
249 return (ret);
250 }
251
252 int
253 __wrap_setrunelocale(const char *locale)
254 {
255 int ret = __setrunelocale(locale);
256
257 if (ret != 0) {
258 errno = ret;
259 return (_LDP_ERROR);
260 }
261 return (_LDP_LOADED);
262 }
263
264 void
265 __set_thread_rune_locale(locale_t loc)
266 {
267
268 if (loc == NULL) {
269 _ThreadRuneLocale = &_DefaultRuneLocale;
270 } else {
271 _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes;
272 }
273 }
274
275 void *
276 __ctype_load(const char *locale, locale_t unused)
277 {
278 struct xlocale_ctype *l;
279
280 l = calloc(sizeof(struct xlocale_ctype), 1);
281 /* XXX */
282
283 return (l);
284 }