1 /*
   2  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Paul Borman at Krystal Technologies.
   8  *
   9  * Copyright (c) 2011 The FreeBSD Foundation
  10  * All rights reserved.
  11  * Portions of this software were developed by David Chisnall
  12  * under sponsorship from the FreeBSD Foundation.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions
  16  * are met:
  17  * 1. Redistributions of source code must retain the above copyright
  18  *    notice, this list of conditions and the following disclaimer.
  19  * 2. Redistributions in binary form must reproduce the above copyright
  20  *    notice, this list of conditions and the following disclaimer in the
  21  *    documentation and/or other materials provided with the distribution.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38 
  39 #include "lint.h"
  40 #include "file64.h"
  41 #include <errno.h>
  42 #include <limits.h>
  43 #include <string.h>
  44 #include <stdio.h>
  45 #include <stdlib.h>
  46 #include <unistd.h>
  47 #include <wchar.h>
  48 #include "runetype.h"
  49 #include "ldpart.h"
  50 #include "mblocal.h"
  51 #include "setlocale.h"
  52 #include "_ctype.h"
  53 #include "../i18n/_locale.h"
  54 
  55 /*
  56  * A cached version of the runes for this thread.  Used by ctype.h
  57  */
  58 __thread const _RuneLocale *_ThreadRuneLocale;
  59 
  60 extern _RuneLocale      *_Read_RuneMagi(FILE *);
  61 extern unsigned char    __ctype_C[];
  62 
  63 static int              __setrunelocale(struct xlocale_ctype *, const char *);
  64 
  65 static int
  66 __setrunelocale(struct xlocale_ctype *l, const char *encoding)
  67 {
  68         FILE *fp;
  69         char name[PATH_MAX];
  70         _RuneLocale *rl;
  71         int saverr, ret;
  72         struct xlocale_ctype saved = *l; /* XXX DOUBLE NOT USED */
  73         size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  74             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  75         size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  76             mbstate_t *_RESTRICT_KYWD);
  77         int (*old__mbsinit)(const mbstate_t *);
  78         size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  79             const char **_RESTRICT_KYWD, size_t, size_t,
  80             mbstate_t *_RESTRICT_KYWD);
  81         size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
  82             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  83             mbstate_t *_RESTRICT_KYWD);
  84         static char ctype_encoding[ENCODING_LEN + 1];
  85         static _RuneLocale *CachedRuneLocale;
  86         static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  87             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  88         static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  89             mbstate_t *_RESTRICT_KYWD);
  90         static int (*Cached__mbsinit)(const mbstate_t *);
  91         static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  92             const char **_RESTRICT_KYWD, size_t, size_t,
  93             mbstate_t *_RESTRICT_KYWD);
  94         static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
  95             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  96             mbstate_t *_RESTRICT_KYWD);
  97 
  98         /*
  99          * The "C" and "POSIX" locale are always here.
 100          */
 101         if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
 102                 int i;
 103 
 104                 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
 105 
 106                 for (i = 0; i < _CACHED_RUNES; i++) {
 107                         __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
 108                         __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
 109                         __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
 110                 }
 111 
 112                 (void) _none_init(l, &_DefaultRuneLocale);
 113                 return (0);
 114         }
 115 
 116         /*
 117          * If the locale name is the same as our cache, use the cache.
 118          */
 119         if (CachedRuneLocale != NULL &&
 120             strcmp(encoding, ctype_encoding) == 0) {
 121                 l->runes = CachedRuneLocale;
 122                 l->__mbrtowc = Cached__mbrtowc;
 123                 l->__mbsinit = Cached__mbsinit;
 124                 l->__mbsnrtowcs = Cached__mbsnrtowcs;
 125                 l->__wcrtomb = Cached__wcrtomb;
 126                 l->__wcsnrtombs = Cached__wcsnrtombs;
 127                 return (0);
 128         }
 129 
 130         /*
 131          * Slurp the locale file into the cache.
 132          */
 133 
 134         (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
 135             _PathLocale, encoding);
 136 
 137         if ((fp = fopen(name, "r")) == NULL)
 138                 return (errno == 0 ? ENOENT : errno);
 139 
 140         if ((rl = _Read_RuneMagi(fp)) == NULL) {
 141                 saverr = (errno == 0 ? EINVAL : errno);
 142                 (void) fclose(fp);
 143                 return (saverr);
 144         }
 145         (void) fclose(fp);
 146 
 147         old__mbrtowc = __mbrtowc;
 148         old__mbsinit = __mbsinit;
 149         old__mbsnrtowcs = __mbsnrtowcs;
 150         old__wcrtomb = __wcrtomb;
 151         old__wcsnrtombs = __wcsnrtombs;
 152 
 153         l->__mbrtowc = NULL;
 154         l->__mbsinit = NULL;
 155         l->__mbsnrtowcs = __mbsnrtowcs_std;
 156         l->__wcrtomb = NULL;
 157         l->__wcsnrtombs = __wcsnrtombs_std;
 158 
 159         if (strcmp(rl->__encoding, "NONE") == 0)
 160                 ret = _none_init(l, rl);
 161         else if (strcmp(rl->__encoding, "UTF-8") == 0)
 162                 ret = _UTF8_init(rl);
 163         else if (strcmp(rl->__encoding, "EUC-CN") == 0)
 164                 ret = _EUC_CN_init(rl);
 165         else if (strcmp(rl->__encoding, "EUC-JP") == 0)
 166                 ret = _EUC_JP_init(rl);
 167         else if (strcmp(rl->__encoding, "EUC-KR") == 0)
 168                 ret = _EUC_KR_init(rl);
 169         else if (strcmp(rl->__encoding, "EUC-TW") == 0)
 170                 ret = _EUC_TW_init(rl);
 171         else if (strcmp(rl->__encoding, "GB18030") == 0)
 172                 ret = _GB18030_init(rl);
 173         else if (strcmp(rl->__encoding, "GB2312") == 0)
 174                 ret = _GB2312_init(rl);
 175         else if (strcmp(rl->__encoding, "GBK") == 0)
 176                 ret = _GBK_init(rl);
 177         else if (strcmp(rl->__encoding, "BIG5") == 0)
 178                 ret = _BIG5_init(rl);
 179         else if (strcmp(rl->__encoding, "MSKanji") == 0)
 180                 ret = _MSKanji_init(rl);
 181         else
 182                 ret = EINVAL;
 183 
 184         if (ret == 0) {
 185                 if (CachedRuneLocale != NULL) {
 186                         free(CachedRuneLocale);
 187                 }
 188                 CachedRuneLocale = l->runes;
 189                 Cached__mbrtowc = l->__mbrtowc;
 190                 Cached__mbsinit = l->__mbsinit;
 191                 Cached__mbsnrtowcs = l->__mbsnrtowcs;
 192                 Cached__wcrtomb = l->__wcrtomb;
 193                 Cached__wcsnrtombs = l->__wcsnrtombs;
 194                 (void) strcpy(ctype_encoding, encoding);
 195 
 196                 /*
 197                  * We need to overwrite the _ctype array.  This requires
 198                  * some finagling.  This is because references to it may
 199                  * have been baked into applications.
 200                  *
 201                  * Note that it is interesting that toupper/tolower only
 202                  * produce defined results when the input is representable
 203                  * as a byte.
 204                  */
 205 
 206                 /*
 207                  * The top half is the type mask array.  Because we
 208                  * want to support both legacy Solaris code (which have
 209                  * mask valeus baked in to them), and we want to be able
 210                  * to import locale files from other sources (FreeBSD)
 211                  * which probably uses different masks, we have to perform
 212                  * a conversion here.  Ugh.  Note that the _CTYPE definitions
 213                  * we use from FreeBSD are richer than the Solaris legacy.
 214                  *
 215                  * We have to cope with these limitations though, because the
 216                  * inadequate Solaris definitions were baked into binaries.
 217                  */
 218                 for (int i = 0; i < _CACHED_RUNES; i++) {
 219                         /* ctype can only encode the lower 8 bits. */
 220                         __ctype[i+1] = rl->__runetype[i] & 0xff;
 221                         __ctype_mask[i] = rl->__runetype[i];
 222                 }
 223 
 224                 /* The bottom half is the toupper/lower array */
 225                 for (int i = 0; i < _CACHED_RUNES; i++) {
 226                         __ctype[258 + i] = i;
 227                         if (rl->__mapupper[i] && rl->__mapupper[i] != i)
 228                                 __ctype[258+i] = rl->__mapupper[i];
 229                         if (rl->__maplower[i] && rl->__maplower[i] != i)
 230                                 __ctype[258+i] = rl->__maplower[i];
 231 
 232                         /* Don't forget these annoyances either! */
 233                         __trans_upper[i] = rl->__mapupper[i];
 234                         __trans_lower[i] = rl->__maplower[i];
 235                 }
 236 
 237                 /*
 238                  * Note that we expect the init code will have populated
 239                  * the CSWIDTH array (__ctype[514-520]) properly.
 240                  */
 241         } else {
 242                 l->__mbrtowc = old__mbrtowc;
 243                 l->__mbsinit = old__mbsinit;
 244                 l->__mbsnrtowcs = old__mbsnrtowcs;
 245                 l->__wcrtomb = old__wcrtomb;
 246                 l->__wcsnrtombs = old__wcsnrtombs;
 247                 free(rl);
 248         }
 249 
 250         return (ret);
 251 }
 252 
 253 int
 254 __wrap_setrunelocale(const char *locale)
 255 {
 256         int ret = __setrunelocale(&__xlocale_global_ctype, locale);
 257 
 258         if (ret != 0) {
 259                 errno = ret;
 260                 return (_LDP_ERROR);
 261         }
 262         /* XXX */
 263 //      __mb_cur_max = __xlocale_global_ctype.__mb_cur_max;
 264 //      __mb_sb_limit = __xlocale_global_ctype.__mb_sb_limit;
 265         _CurrentRuneLocale = __xlocale_global_ctype.runes;
 266         return (_LDP_LOADED);
 267 }
 268 
 269 void
 270 __set_thread_rune_locale(locale_t loc)
 271 {
 272 
 273         if (loc == NULL) {
 274                 _ThreadRuneLocale = &_DefaultRuneLocale;
 275         } else {
 276                 _ThreadRuneLocale = XLOCALE_CTYPE(loc)->runes;
 277         }
 278 }
 279 
 280 void *
 281 __ctype_load(const char *locale, locale_t unused)
 282 {
 283         struct xlocale_ctype *l;
 284 
 285         l = calloc(sizeof(struct xlocale_ctype), 1);
 286         /* XXX */
 287 
 288         return (l);
 289 }