1 /*
   2  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Paul Borman at Krystal Technologies.
   8  *
   9  * Copyright (c) 2011 The FreeBSD Foundation
  10  * All rights reserved.
  11  * Portions of this software were developed by David Chisnall
  12  * under sponsorship from the FreeBSD Foundation.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions
  16  * are met:
  17  * 1. Redistributions of source code must retain the above copyright
  18  *    notice, this list of conditions and the following disclaimer.
  19  * 2. Redistributions in binary form must reproduce the above copyright
  20  *    notice, this list of conditions and the following disclaimer in the
  21  *    documentation and/or other materials provided with the distribution.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38 
  39 #include "lint.h"
  40 #include "file64.h"
  41 #include <errno.h>
  42 #include <limits.h>
  43 #include <string.h>
  44 #include <stdio.h>
  45 #include <stdlib.h>
  46 #include <unistd.h>
  47 #include <wchar.h>
  48 #include "runetype.h"
  49 #include "ldpart.h"
  50 #include "mblocal.h"
  51 #include "setlocale.h"
  52 #include "_ctype.h"
  53 #include "../i18n/_locale.h"
  54 
  55 /*
  56  * A cached version of the runes for this thread.  Used by ctype.h
  57  */
  58 __thread const _RuneLocale *_ThreadRuneLocale;
  59 
  60 extern _RuneLocale      *_Read_RuneMagi(FILE *);
  61 extern unsigned char    __ctype_C[];
  62 
  63 static int              __setrunelocale(const char *);
  64 
  65 static int
  66 __setrunelocale(const char *encoding)
  67 {
  68         FILE *fp;
  69         char name[PATH_MAX];
  70         _RuneLocale *rl;
  71         int saverr, ret;
  72         size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  73             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  74         size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  75             mbstate_t *_RESTRICT_KYWD);
  76         int (*old__mbsinit)(const mbstate_t *);
  77         size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  78             const char **_RESTRICT_KYWD, size_t, size_t,
  79             mbstate_t *_RESTRICT_KYWD);
  80         size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
  81             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  82             mbstate_t *_RESTRICT_KYWD);
  83         static char ctype_encoding[ENCODING_LEN + 1];
  84         static _RuneLocale *CachedRuneLocale;
  85         static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  86             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  87         static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  88             mbstate_t *_RESTRICT_KYWD);
  89         static int (*Cached__mbsinit)(const mbstate_t *);
  90         static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  91             const char **_RESTRICT_KYWD, size_t, size_t,
  92             mbstate_t *_RESTRICT_KYWD);
  93         static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
  94             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  95             mbstate_t *_RESTRICT_KYWD);
  96 
  97         /*
  98          * The "C" and "POSIX" locale are always here.
  99          */
 100         if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
 101                 int i;
 102 
 103                 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
 104 
 105                 for (i = 0; i < _CACHED_RUNES; i++) {
 106                         __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
 107                         __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
 108                         __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
 109                 }
 110 
 111                 (void) _none_init(&_DefaultRuneLocale);
 112                 return (0);
 113         }
 114 
 115         /*
 116          * If the locale name is the same as our cache, use the cache.
 117          */
 118         if (CachedRuneLocale != NULL &&
 119             strcmp(encoding, ctype_encoding) == 0) {
 120                 _CurrentRuneLocale = CachedRuneLocale;
 121                 __mbrtowc = Cached__mbrtowc;
 122                 __mbsinit = Cached__mbsinit;
 123                 __mbsnrtowcs = Cached__mbsnrtowcs;
 124                 __wcrtomb = Cached__wcrtomb;
 125                 __wcsnrtombs = Cached__wcsnrtombs;
 126                 return (0);
 127         }
 128 
 129         /*
 130          * Slurp the locale file into the cache.
 131          */
 132 
 133         (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
 134             _PathLocale, encoding);
 135 
 136         if ((fp = fopen(name, "r")) == NULL)
 137                 return (errno == 0 ? ENOENT : errno);
 138 
 139         if ((rl = _Read_RuneMagi(fp)) == NULL) {
 140                 saverr = (errno == 0 ? EINVAL : errno);
 141                 (void) fclose(fp);
 142                 return (saverr);
 143         }
 144         (void) fclose(fp);
 145 
 146         old__mbrtowc = __mbrtowc;
 147         old__mbsinit = __mbsinit;
 148         old__mbsnrtowcs = __mbsnrtowcs;
 149         old__wcrtomb = __wcrtomb;
 150         old__wcsnrtombs = __wcsnrtombs;
 151 
 152         __mbrtowc = NULL;
 153         __mbsinit = NULL;
 154         __mbsnrtowcs = __mbsnrtowcs_std;
 155         __wcrtomb = NULL;
 156         __wcsnrtombs = __wcsnrtombs_std;
 157 
 158         if (strcmp(rl->__encoding, "NONE") == 0)
 159                 ret = _none_init(rl);
 160         else if (strcmp(rl->__encoding, "UTF-8") == 0)
 161                 ret = _UTF8_init(rl);
 162         else if (strcmp(rl->__encoding, "EUC-CN") == 0)
 163                 ret = _EUC_CN_init(rl);
 164         else if (strcmp(rl->__encoding, "EUC-JP") == 0)
 165                 ret = _EUC_JP_init(rl);
 166         else if (strcmp(rl->__encoding, "EUC-KR") == 0)
 167                 ret = _EUC_KR_init(rl);
 168         else if (strcmp(rl->__encoding, "EUC-TW") == 0)
 169                 ret = _EUC_TW_init(rl);
 170         else if (strcmp(rl->__encoding, "GB18030") == 0)
 171                 ret = _GB18030_init(rl);
 172         else if (strcmp(rl->__encoding, "GB2312") == 0)
 173                 ret = _GB2312_init(rl);
 174         else if (strcmp(rl->__encoding, "GBK") == 0)
 175                 ret = _GBK_init(rl);
 176         else if (strcmp(rl->__encoding, "BIG5") == 0)
 177                 ret = _BIG5_init(rl);
 178         else if (strcmp(rl->__encoding, "MSKanji") == 0)
 179                 ret = _MSKanji_init(rl);
 180         else
 181                 ret = EINVAL;
 182 
 183         if (ret == 0) {
 184                 if (CachedRuneLocale != NULL) {
 185                         free(CachedRuneLocale);
 186                 }
 187                 CachedRuneLocale = _CurrentRuneLocale;
 188                 Cached__mbrtowc = __mbrtowc;
 189                 Cached__mbsinit = __mbsinit;
 190                 Cached__mbsnrtowcs = __mbsnrtowcs;
 191                 Cached__wcrtomb = __wcrtomb;
 192                 Cached__wcsnrtombs = __wcsnrtombs;
 193                 (void) strcpy(ctype_encoding, encoding);
 194 
 195                 /*
 196                  * We need to overwrite the _ctype array.  This requires
 197                  * some finagling.  This is because references to it may
 198                  * have been baked into applications.
 199                  *
 200                  * Note that it is interesting that toupper/tolower only
 201                  * produce defined results when the input is representable
 202                  * as a byte.
 203                  */
 204 
 205                 /*
 206                  * The top half is the type mask array.  Because we
 207                  * want to support both legacy Solaris code (which have
 208                  * mask valeus baked in to them), and we want to be able
 209                  * to import locale files from other sources (FreeBSD)
 210                  * which probably uses different masks, we have to perform
 211                  * a conversion here.  Ugh.  Note that the _CTYPE definitions
 212                  * we use from FreeBSD are richer than the Solaris legacy.
 213                  *
 214                  * We have to cope with these limitations though, because the
 215                  * inadequate Solaris definitions were baked into binaries.
 216                  */
 217                 for (int i = 0; i < _CACHED_RUNES; i++) {
 218                         /* ctype can only encode the lower 8 bits. */
 219                         __ctype[i+1] = rl->__runetype[i] & 0xff;
 220                         __ctype_mask[i] = rl->__runetype[i];
 221                 }
 222 
 223                 /* The bottom half is the toupper/lower array */
 224                 for (int i = 0; i < _CACHED_RUNES; i++) {
 225                         __ctype[258 + i] = i;
 226                         if (rl->__mapupper[i] && rl->__mapupper[i] != i)
 227                                 __ctype[258+i] = rl->__mapupper[i];
 228                         if (rl->__maplower[i] && rl->__maplower[i] != i)
 229                                 __ctype[258+i] = rl->__maplower[i];
 230 
 231                         /* Don't forget these annoyances either! */
 232                         __trans_upper[i] = rl->__mapupper[i];
 233                         __trans_lower[i] = rl->__maplower[i];
 234                 }
 235 
 236                 /*
 237                  * Note that we expect the init code will have populated
 238                  * the CSWIDTH array (__ctype[514-520]) properly.
 239                  */
 240         } else {
 241                 __mbrtowc = old__mbrtowc;
 242                 __mbsinit = old__mbsinit;
 243                 __mbsnrtowcs = old__mbsnrtowcs;
 244                 __wcrtomb = old__wcrtomb;
 245                 __wcsnrtombs = old__wcsnrtombs;
 246                 free(rl);
 247         }
 248 
 249         return (ret);
 250 }
 251 
 252 int
 253 __wrap_setrunelocale(const char *locale)
 254 {
 255         int ret = __setrunelocale(locale);
 256 
 257         if (ret != 0) {
 258                 errno = ret;
 259                 return (_LDP_ERROR);
 260         }
 261         return (_LDP_LOADED);
 262 }
 263 
 264 void
 265 __set_thread_rune_locale(locale_t loc)
 266 {
 267 
 268         if (loc == NULL) {
 269                 _ThreadRuneLocale = &_DefaultRuneLocale;
 270         } else {
 271                 _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes;
 272         }
 273 }
 274 
 275 void *
 276 __ctype_load(const char *locale, locale_t unused)
 277 {
 278         struct xlocale_ctype *l;
 279 
 280         l = calloc(sizeof(struct xlocale_ctype), 1);
 281         /* XXX */
 282 
 283         return (l);
 284 }