Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD
   1 /*

   2  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Paul Borman at Krystal Technologies.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND


  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33 
  34 #include "lint.h"
  35 #include "file64.h"
  36 #include <errno.h>
  37 #include <limits.h>
  38 #include <string.h>
  39 #include <stdio.h>
  40 #include <stdlib.h>
  41 #include <unistd.h>
  42 #include <wchar.h>
  43 #include "runetype.h"
  44 #include "ldpart.h"
  45 #include "mblocal.h"
  46 #include "setlocale.h"
  47 #include "_ctype.h"
  48 #include "../i18n/_locale.h"

  49 
  50 extern _RuneLocale      *_Read_RuneMagi(FILE *);
  51 extern unsigned char    __ctype_C[];
  52 
  53 static int              __setrunelocale(const char *);











  54 
  55 static int
  56 __setrunelocale(const char *encoding)
  57 {
  58         FILE *fp;
  59         char name[PATH_MAX];
  60         _RuneLocale *rl;
  61         int saverr, ret;
  62         size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  63             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  64         size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  65             mbstate_t *_RESTRICT_KYWD);
  66         int (*old__mbsinit)(const mbstate_t *);
  67         size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  68             const char **_RESTRICT_KYWD, size_t, size_t,
  69             mbstate_t *_RESTRICT_KYWD);
  70         size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
  71             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  72             mbstate_t *_RESTRICT_KYWD);
  73         static char ctype_encoding[ENCODING_LEN + 1];
  74         static _RuneLocale *CachedRuneLocale;
  75         static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  76             const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  77         static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  78             mbstate_t *_RESTRICT_KYWD);
  79         static int (*Cached__mbsinit)(const mbstate_t *);
  80         static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  81             const char **_RESTRICT_KYWD, size_t, size_t,
  82             mbstate_t *_RESTRICT_KYWD);
  83         static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
  84             const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  85             mbstate_t *_RESTRICT_KYWD);
  86 
  87         /*
  88          * The "C" and "POSIX" locale are always here.


  89          */
  90         if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
  91                 int i;















  92 
  93                 (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
  94 
  95                 for (i = 0; i < _CACHED_RUNES; i++) {
  96                         __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
  97                         __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
  98                         __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
  99                 }



 100 
 101                 (void) _none_init(&_DefaultRuneLocale);
 102                 return (0);
 103         }
 104 
 105         /*
 106          * If the locale name is the same as our cache, use the cache.
 107          */
 108         if (CachedRuneLocale != NULL &&
 109             strcmp(encoding, ctype_encoding) == 0) {
 110                 _CurrentRuneLocale = CachedRuneLocale;
 111                 __mbrtowc = Cached__mbrtowc;
 112                 __mbsinit = Cached__mbsinit;
 113                 __mbsnrtowcs = Cached__mbsnrtowcs;
 114                 __wcrtomb = Cached__wcrtomb;
 115                 __wcsnrtombs = Cached__wcsnrtombs;
 116                 return (0);
 117         }
 118 
 119         /*
 120          * Slurp the locale file into the cache.
 121          */
 122 
 123         (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
 124             _PathLocale, encoding);
 125 
 126         if ((fp = fopen(name, "r")) == NULL)
 127                 return (errno == 0 ? ENOENT : errno);
 128 
 129         if ((rl = _Read_RuneMagi(fp)) == NULL) {
 130                 saverr = (errno == 0 ? EINVAL : errno);
 131                 (void) fclose(fp);
 132                 return (saverr);
 133         }
 134         (void) fclose(fp);
 135 
 136         old__mbrtowc = __mbrtowc;
 137         old__mbsinit = __mbsinit;
 138         old__mbsnrtowcs = __mbsnrtowcs;
 139         old__wcrtomb = __wcrtomb;
 140         old__wcsnrtombs = __wcsnrtombs;



 141 
 142         __mbrtowc = NULL;
 143         __mbsinit = NULL;
 144         __mbsnrtowcs = __mbsnrtowcs_std;
 145         __wcrtomb = NULL;
 146         __wcsnrtombs = __wcsnrtombs_std;
 147 
 148         if (strcmp(rl->__encoding, "NONE") == 0)
 149                 ret = _none_init(rl);
 150         else if (strcmp(rl->__encoding, "UTF-8") == 0)
 151                 ret = _UTF8_init(rl);
 152         else if (strcmp(rl->__encoding, "EUC-CN") == 0)
 153                 ret = _EUC_CN_init(rl);
 154         else if (strcmp(rl->__encoding, "EUC-JP") == 0)
 155                 ret = _EUC_JP_init(rl);
 156         else if (strcmp(rl->__encoding, "EUC-KR") == 0)
 157                 ret = _EUC_KR_init(rl);
 158         else if (strcmp(rl->__encoding, "EUC-TW") == 0)
 159                 ret = _EUC_TW_init(rl);
 160         else if (strcmp(rl->__encoding, "GB18030") == 0)
 161                 ret = _GB18030_init(rl);
 162         else if (strcmp(rl->__encoding, "GB2312") == 0)
 163                 ret = _GB2312_init(rl);
 164         else if (strcmp(rl->__encoding, "GBK") == 0)
 165                 ret = _GBK_init(rl);
 166         else if (strcmp(rl->__encoding, "BIG5") == 0)
 167                 ret = _BIG5_init(rl);
 168         else if (strcmp(rl->__encoding, "MSKanji") == 0)
 169                 ret = _MSKanji_init(rl);
 170         else
 171                 ret = EINVAL;
 172 
 173         if (ret == 0) {
 174                 if (CachedRuneLocale != NULL) {
 175                         free(CachedRuneLocale);
 176                 }
 177                 CachedRuneLocale = _CurrentRuneLocale;
 178                 Cached__mbrtowc = __mbrtowc;
 179                 Cached__mbsinit = __mbsinit;
 180                 Cached__mbsnrtowcs = __mbsnrtowcs;
 181                 Cached__wcrtomb = __wcrtomb;
 182                 Cached__wcsnrtombs = __wcsnrtombs;
 183                 (void) strcpy(ctype_encoding, encoding);
 184 
 185                 /*
 186                  * We need to overwrite the _ctype array.  This requires
 187                  * some finagling.  This is because references to it may
 188                  * have been baked into applications.
 189                  *
 190                  * Note that it is interesting that toupper/tolower only
 191                  * produce defined results when the input is representable
 192                  * as a byte.
 193                  */
 194 
 195                 /*
 196                  * The top half is the type mask array.  Because we
 197                  * want to support both legacy Solaris code (which have
 198                  * mask valeus baked in to them), and we want to be able
 199                  * to import locale files from other sources (FreeBSD)
 200                  * which probably uses different masks, we have to perform
 201                  * a conversion here.  Ugh.  Note that the _CTYPE definitions
 202                  * we use from FreeBSD are richer than the Solaris legacy.
 203                  *
 204                  * We have to cope with these limitations though, because the
 205                  * inadequate Solaris definitions were baked into binaries.
 206                  */
 207                 for (int i = 0; i < _CACHED_RUNES; i++) {
 208                         /* ctype can only encode the lower 8 bits. */
 209                         __ctype[i+1] = rl->__runetype[i] & 0xff;
 210                         __ctype_mask[i] = rl->__runetype[i];
 211                 }
 212 
 213                 /* The bottom half is the toupper/lower array */
 214                 for (int i = 0; i < _CACHED_RUNES; i++) {
 215                         __ctype[258 + i] = i;
 216                         if (rl->__mapupper[i] && rl->__mapupper[i] != i)
 217                                 __ctype[258+i] = rl->__mapupper[i];
 218                         if (rl->__maplower[i] && rl->__maplower[i] != i)
 219                                 __ctype[258+i] = rl->__maplower[i];
 220 
 221                         /* Don't forget these annoyances either! */
 222                         __trans_upper[i] = rl->__mapupper[i];
 223                         __trans_lower[i] = rl->__maplower[i];
 224                 }
 225 
 226                 /*
 227                  * Note that we expect the init code will have populated
 228                  * the CSWIDTH array (__ctype[514-520]) properly.
 229                  */
 230         } else {
 231                 __mbrtowc = old__mbrtowc;
 232                 __mbsinit = old__mbsinit;
 233                 __mbsnrtowcs = old__mbsnrtowcs;
 234                 __wcrtomb = old__wcrtomb;
 235                 __wcsnrtombs = old__wcsnrtombs;
 236                 free(rl);
 237         }
 238 
 239         return (ret);
 240 }
 241 
 242 int
 243 __wrap_setrunelocale(const char *locale)
 244 {
 245         int ret = __setrunelocale(locale);
 246 
 247         if (ret != 0) {
 248                 errno = ret;
 249                 return (_LDP_ERROR);
 250         }
 251         return (_LDP_LOADED);
 252 }
   1 /*
   2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   3  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   4  * Copyright (c) 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Paul Borman at Krystal Technologies.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND


  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 
  35 #include "lint.h"
  36 #include "file64.h"
  37 #include <errno.h>
  38 #include <limits.h>
  39 #include <string.h>
  40 #include <stdio.h>
  41 #include <stdlib.h>
  42 #include <unistd.h>
  43 #include <wchar.h>
  44 #include "runetype.h"
  45 #include "ldpart.h"
  46 #include "mblocal.h"
  47 #include "setlocale.h"
  48 #include "_ctype.h"
  49 #include "lctype.h"
  50 #include "localeimpl.h"
  51 
  52 extern _RuneLocale      *_Read_RuneMagi(const char *);

  53 
  54 struct lc_ctype lc_ctype_posix = {
  55         .lc_mbrtowc = __mbrtowc_ascii,
  56         .lc_mbsinit = __mbsinit_ascii,
  57         .lc_mbsnrtowcs = __mbsnrtowcs_ascii,
  58         .lc_wcrtomb = __wcrtomb_ascii,
  59         .lc_wcsnrtombs = __wcsnrtombs_ascii,
  60         .lc_is_ascii = 1,
  61         .lc_max_mblen = 1,
  62         .lc_trans_upper = _DefaultRuneLocale.__mapupper,
  63         .lc_trans_lower = _DefaultRuneLocale.__maplower,
  64         .lc_ctype_mask = _DefaultRuneLocale.__runetype,
  65 };
  66 
  67 struct locdata __posix_ctype_locdata = {
  68         .l_lname = "C",
  69         .l_data = { &lc_ctype_posix, &_DefaultRuneLocale }
  70 };



























  71 
  72 
  73 /*
  74  * Table of initializers for encodings.  When you add a new encoding type,
  75  * this table should be updated.
  76  */
  77 static struct {
  78         const char *e_name;
  79         void (*e_init)(struct lc_ctype *);
  80 } encodings[] = {
  81         { "NONE", _none_init },
  82         { "UTF-8",      _UTF8_init },
  83         { "EUC-CN",     _EUC_CN_init },
  84         { "EUC-JP",     _EUC_JP_init },
  85         { "EUC-KR",     _EUC_KR_init },
  86         { "EUC-TW",     _EUC_TW_init },
  87         { "GB18030",    _GB18030_init },
  88         { "GB2312",     _GB2312_init },
  89         { "GBK",        _GBK_init },
  90         { "BIG5",       _BIG5_init },
  91         { "MSKanji",    _MSKanji_init },
  92         { NULL,         NULL }
  93 };
  94 

  95 
  96 struct locdata *
  97 __lc_ctype_load(const char *name)
  98 {
  99         struct locdata *ldata;
 100         struct lc_ctype *lct;
 101         _RuneLocale *rl;
 102         int i;
 103         char path[PATH_MAX];
 104 
 105         if ((ldata = __locdata_alloc(name, sizeof (*lct))) == NULL)
 106                 return (NULL);
 107         lct = ldata->l_data[0];

 108         /*














 109          * Slurp the locale file into the cache.
 110          */
 111 
 112         (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE/LCL_DATA",
 113             _PathLocale, name);
 114 
 115         if ((rl = _Read_RuneMagi(path)) == NULL) {
 116                 __locdata_free(ldata);
 117                 errno = EINVAL;
 118                 return (NULL);



 119         }
 120         ldata->l_data[1] = rl;
 121 
 122         lct->lc_mbrtowc = NULL;
 123         lct->lc_mbsinit = NULL;
 124         lct->lc_mbsnrtowcs = NULL;
 125         lct->lc_wcrtomb = NULL;
 126         lct->lc_wcsnrtombs = NULL;
 127         lct->lc_ctype_mask = rl->__runetype;
 128         lct->lc_trans_upper = rl->__mapupper;
 129         lct->lc_trans_lower = rl->__maplower;
 130 
 131         /* set up the function pointers */
 132         for (i = 0; encodings[i].e_name != NULL; i++) {
 133                 int l = strlen(encodings[i].e_name);
 134                 if ((strncmp(rl->__encoding, encodings[i].e_name, l) == 0) &&
 135                     (rl->__encoding[l] == '\0' || rl->__encoding[l] == '@')) {
 136                         encodings[i].e_init(lct);
 137                         break;



























 138                 }


































 139         }
 140         if (encodings[i].e_name == NULL) {
 141                 __locdata_free(ldata);
 142                 errno = EINVAL;
 143                 return (NULL);








 144         }
 145 












 146 
 147         return (ldata);












 148 }