1 /*
   2  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Paul Borman at Krystal Technologies.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33 
  34 #include "lint.h"
  35 #include "file64.h"
  36 #include <errno.h>
  37 #include <limits.h>
  38 #include <string.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <unistd.h>
  42 #include <wchar.h>
  43 #include "runetype.h"
  44 #include "ldpart.h"
  45 #include "mblocal.h"
  46 #include "setlocale.h"
  47 #include "_ctype.h"
  48 #include "../i18n/_locale.h"
  49 
  50 extern _RuneLocale      *_Read_RuneMagi(FILE *);
  51 extern unsigned char    __ctype_C[];
  52 
  53 static int              __setrunelocale(const char *);
  54 
  55 static int
  56 __setrunelocale(const char *encoding)
  57 {
  58         FILE *fp;
  59         char name[PATH_MAX];
  60         _RuneLocale *rl;
  61         int saverr, ret;
  62         size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  63             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  64         size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  65             mbstate_t *_RESTRICT_KYWD);
  66         int (*old__mbsinit)(const mbstate_t *);
  67         size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  68             const char **_RESTRICT_KYWD, size_t, size_t,
  69             mbstate_t *_RESTRICT_KYWD);
  70         size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
  71             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  72             mbstate_t *_RESTRICT_KYWD);
  73         static char ctype_encoding[ENCODING_LEN + 1];
  74         static _RuneLocale *CachedRuneLocale;
  75         static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  76             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  77         static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  78             mbstate_t *_RESTRICT_KYWD);
  79         static int (*Cached__mbsinit)(const mbstate_t *);
  80         static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  81             const char **_RESTRICT_KYWD, size_t, size_t,
  82             mbstate_t *_RESTRICT_KYWD);
  83         static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
  84             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  85             mbstate_t *_RESTRICT_KYWD);
  86 
  87         /*
  88          * The "C" and "POSIX" locale are always here.
  89          */
  90         if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
  91                 int i;
  92 
  93                 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
  94 
  95                 for (i = 0; i < _CACHED_RUNES; i++) {
  96                         __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
  97                         __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
  98                         __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
  99                 }
 100 
 101                 (void) _none_init(&_DefaultRuneLocale);
 102                 return (0);
 103         }
 104 
 105         /*
 106          * If the locale name is the same as our cache, use the cache.
 107          */
 108         if (CachedRuneLocale != NULL &&
 109             strcmp(encoding, ctype_encoding) == 0) {
 110                 _CurrentRuneLocale = CachedRuneLocale;
 111                 __mbrtowc = Cached__mbrtowc;
 112                 __mbsinit = Cached__mbsinit;
 113                 __mbsnrtowcs = Cached__mbsnrtowcs;
 114                 __wcrtomb = Cached__wcrtomb;
 115                 __wcsnrtombs = Cached__wcsnrtombs;
 116                 return (0);
 117         }
 118 
 119         /*
 120          * Slurp the locale file into the cache.
 121          */
 122 
 123         (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
 124             _PathLocale, encoding);
 125 
 126         if ((fp = fopen(name, "r")) == NULL)
 127                 return (errno == 0 ? ENOENT : errno);
 128 
 129         if ((rl = _Read_RuneMagi(fp)) == NULL) {
 130                 saverr = (errno == 0 ? EINVAL : errno);
 131                 (void) fclose(fp);
 132                 return (saverr);
 133         }
 134         (void) fclose(fp);
 135 
 136         old__mbrtowc = __mbrtowc;
 137         old__mbsinit = __mbsinit;
 138         old__mbsnrtowcs = __mbsnrtowcs;
 139         old__wcrtomb = __wcrtomb;
 140         old__wcsnrtombs = __wcsnrtombs;
 141 
 142         __mbrtowc = NULL;
 143         __mbsinit = NULL;
 144         __mbsnrtowcs = __mbsnrtowcs_std;
 145         __wcrtomb = NULL;
 146         __wcsnrtombs = __wcsnrtombs_std;
 147 
 148         if (strcmp(rl->__encoding, "NONE") == 0)
 149                 ret = _none_init(rl);
 150         else if (strcmp(rl->__encoding, "UTF-8") == 0)
 151                 ret = _UTF8_init(rl);
 152         else if (strcmp(rl->__encoding, "EUC-CN") == 0)
 153                 ret = _EUC_CN_init(rl);
 154         else if (strcmp(rl->__encoding, "EUC-JP") == 0)
 155                 ret = _EUC_JP_init(rl);
 156         else if (strcmp(rl->__encoding, "EUC-KR") == 0)
 157                 ret = _EUC_KR_init(rl);
 158         else if (strcmp(rl->__encoding, "EUC-TW") == 0)
 159                 ret = _EUC_TW_init(rl);
 160         else if (strcmp(rl->__encoding, "GB18030") == 0)
 161                 ret = _GB18030_init(rl);
 162         else if (strcmp(rl->__encoding, "GB2312") == 0)
 163                 ret = _GB2312_init(rl);
 164         else if (strcmp(rl->__encoding, "GBK") == 0)
 165                 ret = _GBK_init(rl);
 166         else if (strcmp(rl->__encoding, "BIG5") == 0)
 167                 ret = _BIG5_init(rl);
 168         else if (strcmp(rl->__encoding, "MSKanji") == 0)
 169                 ret = _MSKanji_init(rl);
 170         else
 171                 ret = EINVAL;
 172 
 173         if (ret == 0) {
 174                 if (CachedRuneLocale != NULL) {
 175                         free(CachedRuneLocale);
 176                 }
 177                 CachedRuneLocale = _CurrentRuneLocale;
 178                 Cached__mbrtowc = __mbrtowc;
 179                 Cached__mbsinit = __mbsinit;
 180                 Cached__mbsnrtowcs = __mbsnrtowcs;
 181                 Cached__wcrtomb = __wcrtomb;
 182                 Cached__wcsnrtombs = __wcsnrtombs;
 183                 (void) strcpy(ctype_encoding, encoding);
 184 
 185                 /*
 186                  * We need to overwrite the _ctype array.  This requires
 187                  * some finagling.  This is because references to it may
 188                  * have been baked into applications.
 189                  *
 190                  * Note that it is interesting that toupper/tolower only
 191                  * produce defined results when the input is representable
 192                  * as a byte.
 193                  */
 194 
 195                 /*
 196                  * The top half is the type mask array.  Because we
 197                  * want to support both legacy Solaris code (which have
 198                  * mask valeus baked in to them), and we want to be able
 199                  * to import locale files from other sources (FreeBSD)
 200                  * which probably uses different masks, we have to perform
 201                  * a conversion here.  Ugh.  Note that the _CTYPE definitions
 202                  * we use from FreeBSD are richer than the Solaris legacy.
 203                  *
 204                  * We have to cope with these limitations though, because the
 205                  * inadequate Solaris definitions were baked into binaries.
 206                  */
 207                 for (int i = 0; i < _CACHED_RUNES; i++) {
 208                         /* ctype can only encode the lower 8 bits. */
 209                         __ctype[i+1] = rl->__runetype[i] & 0xff;
 210                         __ctype_mask[i] = rl->__runetype[i];
 211                 }
 212 
 213                 /* The bottom half is the toupper/lower array */
 214                 for (int i = 0; i < _CACHED_RUNES; i++) {
 215                         __ctype[258 + i] = i;
 216                         if (rl->__mapupper[i] && rl->__mapupper[i] != i)
 217                                 __ctype[258+i] = rl->__mapupper[i];
 218                         if (rl->__maplower[i] && rl->__maplower[i] != i)
 219                                 __ctype[258+i] = rl->__maplower[i];
 220 
 221                         /* Don't forget these annoyances either! */
 222                         __trans_upper[i] = rl->__mapupper[i];
 223                         __trans_lower[i] = rl->__maplower[i];
 224                 }
 225 
 226                 /*
 227                  * Note that we expect the init code will have populated
 228                  * the CSWIDTH array (__ctype[514-520]) properly.
 229                  */
 230         } else {
 231                 __mbrtowc = old__mbrtowc;
 232                 __mbsinit = old__mbsinit;
 233                 __mbsnrtowcs = old__mbsnrtowcs;
 234                 __wcrtomb = old__wcrtomb;
 235                 __wcsnrtombs = old__wcsnrtombs;
 236                 free(rl);
 237         }
 238 
 239         return (ret);
 240 }
 241 
 242 int
 243 __wrap_setrunelocale(const char *locale)
 244 {
 245         int ret = __setrunelocale(locale);
 246 
 247         if (ret != 0) {
 248                 errno = ret;
 249                 return (_LDP_ERROR);
 250         }
 251         return (_LDP_LOADED);
 252 }