1 /* 2 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Copyright (c) 2011 The FreeBSD Foundation 10 * All rights reserved. 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include "lint.h" 40 #include "file64.h" 41 #include <errno.h> 42 #include <limits.h> 43 #include <string.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <wchar.h> 48 #include "runetype.h" 49 #include "ldpart.h" 50 #include "mblocal.h" 51 #include "setlocale.h" 52 #include "_ctype.h" 53 #include "../i18n/_locale.h" 54 55 /* 56 * A cached version of the runes for this thread. Used by ctype.h 57 */ 58 __thread const _RuneLocale *_ThreadRuneLocale; 59 60 extern _RuneLocale *_Read_RuneMagi(FILE *); 61 extern unsigned char __ctype_C[]; 62 63 static int __setrunelocale(const char *); 64 65 static int 66 __setrunelocale(const char *encoding) 67 { 68 FILE *fp; 69 char name[PATH_MAX]; 70 _RuneLocale *rl; 71 int saverr, ret; 72 size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, 73 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 74 size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 75 mbstate_t *_RESTRICT_KYWD); 76 int (*old__mbsinit)(const mbstate_t *); 77 size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 78 const char **_RESTRICT_KYWD, size_t, size_t, 79 mbstate_t *_RESTRICT_KYWD); 80 size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, 81 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 82 mbstate_t *_RESTRICT_KYWD); 83 static char ctype_encoding[ENCODING_LEN + 1]; 84 static _RuneLocale *CachedRuneLocale; 85 static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, 86 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 87 static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 88 mbstate_t *_RESTRICT_KYWD); 89 static int (*Cached__mbsinit)(const mbstate_t *); 90 static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 91 const char **_RESTRICT_KYWD, size_t, size_t, 92 mbstate_t *_RESTRICT_KYWD); 93 static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, 94 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 95 mbstate_t *_RESTRICT_KYWD); 96 97 /* 98 * The "C" and "POSIX" locale are always here. 99 */ 100 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 101 int i; 102 103 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); 104 105 for (i = 0; i < _CACHED_RUNES; i++) { 106 __ctype_mask[i] = _DefaultRuneLocale.__runetype[i]; 107 __trans_upper[i] = _DefaultRuneLocale.__mapupper[i]; 108 __trans_lower[i] = _DefaultRuneLocale.__maplower[i]; 109 } 110 111 (void) _none_init(&_DefaultRuneLocale); 112 return (0); 113 } 114 115 /* 116 * If the locale name is the same as our cache, use the cache. 117 */ 118 if (CachedRuneLocale != NULL && 119 strcmp(encoding, ctype_encoding) == 0) { 120 _CurrentRuneLocale = CachedRuneLocale; 121 __mbrtowc = Cached__mbrtowc; 122 __mbsinit = Cached__mbsinit; 123 __mbsnrtowcs = Cached__mbsnrtowcs; 124 __wcrtomb = Cached__wcrtomb; 125 __wcsnrtombs = Cached__wcsnrtombs; 126 return (0); 127 } 128 129 /* 130 * Slurp the locale file into the cache. 131 */ 132 133 (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA", 134 _PathLocale, encoding); 135 136 if ((fp = fopen(name, "r")) == NULL) 137 return (errno == 0 ? ENOENT : errno); 138 139 if ((rl = _Read_RuneMagi(fp)) == NULL) { 140 saverr = (errno == 0 ? EINVAL : errno); 141 (void) fclose(fp); 142 return (saverr); 143 } 144 (void) fclose(fp); 145 146 old__mbrtowc = __mbrtowc; 147 old__mbsinit = __mbsinit; 148 old__mbsnrtowcs = __mbsnrtowcs; 149 old__wcrtomb = __wcrtomb; 150 old__wcsnrtombs = __wcsnrtombs; 151 152 __mbrtowc = NULL; 153 __mbsinit = NULL; 154 __mbsnrtowcs = __mbsnrtowcs_std; 155 __wcrtomb = NULL; 156 __wcsnrtombs = __wcsnrtombs_std; 157 158 if (strcmp(rl->__encoding, "NONE") == 0) 159 ret = _none_init(rl); 160 else if (strcmp(rl->__encoding, "UTF-8") == 0) 161 ret = _UTF8_init(rl); 162 else if (strcmp(rl->__encoding, "EUC-CN") == 0) 163 ret = _EUC_CN_init(rl); 164 else if (strcmp(rl->__encoding, "EUC-JP") == 0) 165 ret = _EUC_JP_init(rl); 166 else if (strcmp(rl->__encoding, "EUC-KR") == 0) 167 ret = _EUC_KR_init(rl); 168 else if (strcmp(rl->__encoding, "EUC-TW") == 0) 169 ret = _EUC_TW_init(rl); 170 else if (strcmp(rl->__encoding, "GB18030") == 0) 171 ret = _GB18030_init(rl); 172 else if (strcmp(rl->__encoding, "GB2312") == 0) 173 ret = _GB2312_init(rl); 174 else if (strcmp(rl->__encoding, "GBK") == 0) 175 ret = _GBK_init(rl); 176 else if (strcmp(rl->__encoding, "BIG5") == 0) 177 ret = _BIG5_init(rl); 178 else if (strcmp(rl->__encoding, "MSKanji") == 0) 179 ret = _MSKanji_init(rl); 180 else 181 ret = EINVAL; 182 183 if (ret == 0) { 184 if (CachedRuneLocale != NULL) { 185 free(CachedRuneLocale); 186 } 187 CachedRuneLocale = _CurrentRuneLocale; 188 Cached__mbrtowc = __mbrtowc; 189 Cached__mbsinit = __mbsinit; 190 Cached__mbsnrtowcs = __mbsnrtowcs; 191 Cached__wcrtomb = __wcrtomb; 192 Cached__wcsnrtombs = __wcsnrtombs; 193 (void) strcpy(ctype_encoding, encoding); 194 195 /* 196 * We need to overwrite the _ctype array. This requires 197 * some finagling. This is because references to it may 198 * have been baked into applications. 199 * 200 * Note that it is interesting that toupper/tolower only 201 * produce defined results when the input is representable 202 * as a byte. 203 */ 204 205 /* 206 * The top half is the type mask array. Because we 207 * want to support both legacy Solaris code (which have 208 * mask valeus baked in to them), and we want to be able 209 * to import locale files from other sources (FreeBSD) 210 * which probably uses different masks, we have to perform 211 * a conversion here. Ugh. Note that the _CTYPE definitions 212 * we use from FreeBSD are richer than the Solaris legacy. 213 * 214 * We have to cope with these limitations though, because the 215 * inadequate Solaris definitions were baked into binaries. 216 */ 217 for (int i = 0; i < _CACHED_RUNES; i++) { 218 /* ctype can only encode the lower 8 bits. */ 219 __ctype[i+1] = rl->__runetype[i] & 0xff; 220 __ctype_mask[i] = rl->__runetype[i]; 221 } 222 223 /* The bottom half is the toupper/lower array */ 224 for (int i = 0; i < _CACHED_RUNES; i++) { 225 __ctype[258 + i] = i; 226 if (rl->__mapupper[i] && rl->__mapupper[i] != i) 227 __ctype[258+i] = rl->__mapupper[i]; 228 if (rl->__maplower[i] && rl->__maplower[i] != i) 229 __ctype[258+i] = rl->__maplower[i]; 230 231 /* Don't forget these annoyances either! */ 232 __trans_upper[i] = rl->__mapupper[i]; 233 __trans_lower[i] = rl->__maplower[i]; 234 } 235 236 /* 237 * Note that we expect the init code will have populated 238 * the CSWIDTH array (__ctype[514-520]) properly. 239 */ 240 } else { 241 __mbrtowc = old__mbrtowc; 242 __mbsinit = old__mbsinit; 243 __mbsnrtowcs = old__mbsnrtowcs; 244 __wcrtomb = old__wcrtomb; 245 __wcsnrtombs = old__wcsnrtombs; 246 free(rl); 247 } 248 249 return (ret); 250 } 251 252 int 253 __wrap_setrunelocale(const char *locale) 254 { 255 int ret = __setrunelocale(locale); 256 257 if (ret != 0) { 258 errno = ret; 259 return (_LDP_ERROR); 260 } 261 return (_LDP_LOADED); 262 } 263 264 void 265 __set_thread_rune_locale(locale_t loc) 266 { 267 268 if (loc == NULL) { 269 _ThreadRuneLocale = &_DefaultRuneLocale; 270 } else { 271 _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes; 272 } 273 } 274 275 void * 276 __ctype_load(const char *locale, locale_t unused) 277 { 278 struct xlocale_ctype *l; 279 280 l = calloc(sizeof(struct xlocale_ctype), 1); 281 /* XXX */ 282 283 return (l); 284 }