1 /* 2 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 3 * Copyright (c) 1993 4 * The Regents of the University of California. All rights reserved. 5 * 6 * This code is derived from software contributed to Berkeley by 7 * Paul Borman at Krystal Technologies. 8 * 9 * Copyright (c) 2011 The FreeBSD Foundation 10 * All rights reserved. 11 * Portions of this software were developed by David Chisnall 12 * under sponsorship from the FreeBSD Foundation. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include "lint.h" 40 #include "file64.h" 41 #include <errno.h> 42 #include <limits.h> 43 #include <string.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <unistd.h> 47 #include <wchar.h> 48 #include "runetype.h" 49 #include "ldpart.h" 50 #include "mblocal.h" 51 #include "setlocale.h" 52 #include "_ctype.h" 53 #include "../i18n/_locale.h" 54 55 /* 56 * A cached version of the runes for this thread. Used by ctype.h 57 */ 58 __thread const _RuneLocale *_ThreadRuneLocale; 59 60 extern _RuneLocale *_Read_RuneMagi(FILE *); 61 extern unsigned char __ctype_C[]; 62 63 static int __setrunelocale(struct xlocale_ctype *, const char *); 64 65 static int 66 __setrunelocale(struct xlocale_ctype *l, const char *encoding) 67 { 68 FILE *fp; 69 char name[PATH_MAX]; 70 _RuneLocale *rl; 71 int saverr, ret; 72 struct xlocale_ctype saved = *l; /* XXX DOUBLE NOT USED */ 73 size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD, 74 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 75 size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 76 mbstate_t *_RESTRICT_KYWD); 77 int (*old__mbsinit)(const mbstate_t *); 78 size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 79 const char **_RESTRICT_KYWD, size_t, size_t, 80 mbstate_t *_RESTRICT_KYWD); 81 size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD, 82 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 83 mbstate_t *_RESTRICT_KYWD); 84 static char ctype_encoding[ENCODING_LEN + 1]; 85 static _RuneLocale *CachedRuneLocale; 86 static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD, 87 const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD); 88 static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t, 89 mbstate_t *_RESTRICT_KYWD); 90 static int (*Cached__mbsinit)(const mbstate_t *); 91 static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD, 92 const char **_RESTRICT_KYWD, size_t, size_t, 93 mbstate_t *_RESTRICT_KYWD); 94 static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD, 95 const wchar_t **_RESTRICT_KYWD, size_t, size_t, 96 mbstate_t *_RESTRICT_KYWD); 97 98 /* 99 * The "C" and "POSIX" locale are always here. 100 */ 101 if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) { 102 int i; 103 104 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL); 105 106 for (i = 0; i < _CACHED_RUNES; i++) { 107 __ctype_mask[i] = _DefaultRuneLocale.__runetype[i]; 108 __trans_upper[i] = _DefaultRuneLocale.__mapupper[i]; 109 __trans_lower[i] = _DefaultRuneLocale.__maplower[i]; 110 } 111 112 (void) _none_init(l, &_DefaultRuneLocale); 113 return (0); 114 } 115 116 /* 117 * If the locale name is the same as our cache, use the cache. 118 */ 119 if (CachedRuneLocale != NULL && 120 strcmp(encoding, ctype_encoding) == 0) { 121 l->runes = CachedRuneLocale; 122 l->__mbrtowc = Cached__mbrtowc; 123 l->__mbsinit = Cached__mbsinit; 124 l->__mbsnrtowcs = Cached__mbsnrtowcs; 125 l->__wcrtomb = Cached__wcrtomb; 126 l->__wcsnrtombs = Cached__wcsnrtombs; 127 return (0); 128 } 129 130 /* 131 * Slurp the locale file into the cache. 132 */ 133 134 (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA", 135 _PathLocale, encoding); 136 137 if ((fp = fopen(name, "r")) == NULL) 138 return (errno == 0 ? ENOENT : errno); 139 140 if ((rl = _Read_RuneMagi(fp)) == NULL) { 141 saverr = (errno == 0 ? EINVAL : errno); 142 (void) fclose(fp); 143 return (saverr); 144 } 145 (void) fclose(fp); 146 147 old__mbrtowc = __mbrtowc; 148 old__mbsinit = __mbsinit; 149 old__mbsnrtowcs = __mbsnrtowcs; 150 old__wcrtomb = __wcrtomb; 151 old__wcsnrtombs = __wcsnrtombs; 152 153 l->__mbrtowc = NULL; 154 l->__mbsinit = NULL; 155 l->__mbsnrtowcs = __mbsnrtowcs_std; 156 l->__wcrtomb = NULL; 157 l->__wcsnrtombs = __wcsnrtombs_std; 158 159 if (strcmp(rl->__encoding, "NONE") == 0) 160 ret = _none_init(l, rl); 161 else if (strcmp(rl->__encoding, "UTF-8") == 0) 162 ret = _UTF8_init(rl); 163 else if (strcmp(rl->__encoding, "EUC-CN") == 0) 164 ret = _EUC_CN_init(rl); 165 else if (strcmp(rl->__encoding, "EUC-JP") == 0) 166 ret = _EUC_JP_init(rl); 167 else if (strcmp(rl->__encoding, "EUC-KR") == 0) 168 ret = _EUC_KR_init(rl); 169 else if (strcmp(rl->__encoding, "EUC-TW") == 0) 170 ret = _EUC_TW_init(rl); 171 else if (strcmp(rl->__encoding, "GB18030") == 0) 172 ret = _GB18030_init(rl); 173 else if (strcmp(rl->__encoding, "GB2312") == 0) 174 ret = _GB2312_init(rl); 175 else if (strcmp(rl->__encoding, "GBK") == 0) 176 ret = _GBK_init(rl); 177 else if (strcmp(rl->__encoding, "BIG5") == 0) 178 ret = _BIG5_init(rl); 179 else if (strcmp(rl->__encoding, "MSKanji") == 0) 180 ret = _MSKanji_init(rl); 181 else 182 ret = EINVAL; 183 184 if (ret == 0) { 185 if (CachedRuneLocale != NULL) { 186 free(CachedRuneLocale); 187 } 188 CachedRuneLocale = l->runes; 189 Cached__mbrtowc = l->__mbrtowc; 190 Cached__mbsinit = l->__mbsinit; 191 Cached__mbsnrtowcs = l->__mbsnrtowcs; 192 Cached__wcrtomb = l->__wcrtomb; 193 Cached__wcsnrtombs = l->__wcsnrtombs; 194 (void) strcpy(ctype_encoding, encoding); 195 196 /* 197 * We need to overwrite the _ctype array. This requires 198 * some finagling. This is because references to it may 199 * have been baked into applications. 200 * 201 * Note that it is interesting that toupper/tolower only 202 * produce defined results when the input is representable 203 * as a byte. 204 */ 205 206 /* 207 * The top half is the type mask array. Because we 208 * want to support both legacy Solaris code (which have 209 * mask valeus baked in to them), and we want to be able 210 * to import locale files from other sources (FreeBSD) 211 * which probably uses different masks, we have to perform 212 * a conversion here. Ugh. Note that the _CTYPE definitions 213 * we use from FreeBSD are richer than the Solaris legacy. 214 * 215 * We have to cope with these limitations though, because the 216 * inadequate Solaris definitions were baked into binaries. 217 */ 218 for (int i = 0; i < _CACHED_RUNES; i++) { 219 /* ctype can only encode the lower 8 bits. */ 220 __ctype[i+1] = rl->__runetype[i] & 0xff; 221 __ctype_mask[i] = rl->__runetype[i]; 222 } 223 224 /* The bottom half is the toupper/lower array */ 225 for (int i = 0; i < _CACHED_RUNES; i++) { 226 __ctype[258 + i] = i; 227 if (rl->__mapupper[i] && rl->__mapupper[i] != i) 228 __ctype[258+i] = rl->__mapupper[i]; 229 if (rl->__maplower[i] && rl->__maplower[i] != i) 230 __ctype[258+i] = rl->__maplower[i]; 231 232 /* Don't forget these annoyances either! */ 233 __trans_upper[i] = rl->__mapupper[i]; 234 __trans_lower[i] = rl->__maplower[i]; 235 } 236 237 /* 238 * Note that we expect the init code will have populated 239 * the CSWIDTH array (__ctype[514-520]) properly. 240 */ 241 } else { 242 l->__mbrtowc = old__mbrtowc; 243 l->__mbsinit = old__mbsinit; 244 l->__mbsnrtowcs = old__mbsnrtowcs; 245 l->__wcrtomb = old__wcrtomb; 246 l->__wcsnrtombs = old__wcsnrtombs; 247 free(rl); 248 } 249 250 return (ret); 251 } 252 253 int 254 __wrap_setrunelocale(const char *locale) 255 { 256 int ret = __setrunelocale(&__xlocale_global_ctype, locale); 257 258 if (ret != 0) { 259 errno = ret; 260 return (_LDP_ERROR); 261 } 262 /* XXX */ 263 // __mb_cur_max = __xlocale_global_ctype.__mb_cur_max; 264 // __mb_sb_limit = __xlocale_global_ctype.__mb_sb_limit; 265 _CurrentRuneLocale = __xlocale_global_ctype.runes; 266 return (_LDP_LOADED); 267 } 268 269 void 270 __set_thread_rune_locale(locale_t loc) 271 { 272 273 if (loc == NULL) { 274 _ThreadRuneLocale = &_DefaultRuneLocale; 275 } else { 276 _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes; 277 } 278 } 279 280 void * 281 __ctype_load(const char *locale, locale_t unused) 282 { 283 struct xlocale_ctype *l; 284 285 l = calloc(sizeof(struct xlocale_ctype), 1); 286 /* XXX */ 287 288 return (l); 289 }