Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libc/port/locale/setrunelocale.c
          +++ new/usr/src/lib/libc/port/locale/setrunelocale.c
   1    1  /*
        2 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   2    3   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
   3    4   * Copyright (c) 1993
   4    5   *      The Regents of the University of California.  All rights reserved.
   5    6   *
   6    7   * This code is derived from software contributed to Berkeley by
   7    8   * Paul Borman at Krystal Technologies.
   8    9   *
   9   10   * Redistribution and use in source and binary forms, with or without
  10   11   * modification, are permitted provided that the following conditions
  11   12   * are met:
↓ open down ↓ 26 lines elided ↑ open up ↑
  38   39  #include <string.h>
  39   40  #include <stdio.h>
  40   41  #include <stdlib.h>
  41   42  #include <unistd.h>
  42   43  #include <wchar.h>
  43   44  #include "runetype.h"
  44   45  #include "ldpart.h"
  45   46  #include "mblocal.h"
  46   47  #include "setlocale.h"
  47   48  #include "_ctype.h"
  48      -#include "../i18n/_locale.h"
       49 +#include "lctype.h"
       50 +#include "localeimpl.h"
  49   51  
  50      -extern _RuneLocale      *_Read_RuneMagi(FILE *);
  51      -extern unsigned char    __ctype_C[];
       52 +extern _RuneLocale      *_Read_RuneMagi(const char *);
  52   53  
  53      -static int              __setrunelocale(const char *);
       54 +struct lc_ctype lc_ctype_posix = {
       55 +        .lc_mbrtowc = __mbrtowc_ascii,
       56 +        .lc_mbsinit = __mbsinit_ascii,
       57 +        .lc_mbsnrtowcs = __mbsnrtowcs_ascii,
       58 +        .lc_wcrtomb = __wcrtomb_ascii,
       59 +        .lc_wcsnrtombs = __wcsnrtombs_ascii,
       60 +        .lc_is_ascii = 1,
       61 +        .lc_max_mblen = 1,
       62 +        .lc_trans_upper = _DefaultRuneLocale.__mapupper,
       63 +        .lc_trans_lower = _DefaultRuneLocale.__maplower,
       64 +        .lc_ctype_mask = _DefaultRuneLocale.__runetype,
       65 +};
  54   66  
  55      -static int
  56      -__setrunelocale(const char *encoding)
  57      -{
  58      -        FILE *fp;
  59      -        char name[PATH_MAX];
  60      -        _RuneLocale *rl;
  61      -        int saverr, ret;
  62      -        size_t (*old__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  63      -            const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  64      -        size_t (*old__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  65      -            mbstate_t *_RESTRICT_KYWD);
  66      -        int (*old__mbsinit)(const mbstate_t *);
  67      -        size_t (*old__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  68      -            const char **_RESTRICT_KYWD, size_t, size_t,
  69      -            mbstate_t *_RESTRICT_KYWD);
  70      -        size_t (*old__wcsnrtombs)(char *_RESTRICT_KYWD,
  71      -            const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  72      -            mbstate_t *_RESTRICT_KYWD);
  73      -        static char ctype_encoding[ENCODING_LEN + 1];
  74      -        static _RuneLocale *CachedRuneLocale;
  75      -        static size_t (*Cached__mbrtowc)(wchar_t *_RESTRICT_KYWD,
  76      -            const char *_RESTRICT_KYWD, size_t, mbstate_t *_RESTRICT_KYWD);
  77      -        static size_t (*Cached__wcrtomb)(char *_RESTRICT_KYWD, wchar_t,
  78      -            mbstate_t *_RESTRICT_KYWD);
  79      -        static int (*Cached__mbsinit)(const mbstate_t *);
  80      -        static size_t (*Cached__mbsnrtowcs)(wchar_t *_RESTRICT_KYWD,
  81      -            const char **_RESTRICT_KYWD, size_t, size_t,
  82      -            mbstate_t *_RESTRICT_KYWD);
  83      -        static size_t (*Cached__wcsnrtombs)(char *_RESTRICT_KYWD,
  84      -            const wchar_t **_RESTRICT_KYWD, size_t, size_t,
  85      -            mbstate_t *_RESTRICT_KYWD);
       67 +struct locdata __posix_ctype_locdata = {
       68 +        .l_lname = "C",
       69 +        .l_refcnt = (uint32_t)-1,
       70 +        .l_data = { &lc_ctype_posix, &_DefaultRuneLocale }
       71 +};
  86   72  
  87      -        /*
  88      -         * The "C" and "POSIX" locale are always here.
  89      -         */
  90      -        if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
  91      -                int i;
  92   73  
  93      -                (void) memcpy(__ctype, __ctype_C, SZ_TOTAL);
       74 +/*
       75 + * Table of initializers for encodings.  When you add a new encoding type,
       76 + * this table should be updated.
       77 + */
       78 +static struct {
       79 +        const char *e_name;
       80 +        void (*e_init)(struct lc_ctype *);
       81 +} encodings[] = {
       82 +        { "NONE", _none_init },
       83 +        { "UTF-8",      _UTF8_init },
       84 +        { "EUC-CN",     _EUC_CN_init },
       85 +        { "EUC-JP",     _EUC_JP_init },
       86 +        { "EUC-KR",     _EUC_KR_init },
       87 +        { "EUC-TW",     _EUC_TW_init },
       88 +        { "GB18030",    _GB18030_init },
       89 +        { "GB2312",     _GB2312_init },
       90 +        { "GBK",        _GBK_init },
       91 +        { "BIG5",       _BIG5_init },
       92 +        { "MSKanji",    _MSKanji_init },
       93 +        { NULL,         NULL }
       94 +};
  94   95  
  95      -                for (i = 0; i < _CACHED_RUNES; i++) {
  96      -                        __ctype_mask[i] = _DefaultRuneLocale.__runetype[i];
  97      -                        __trans_upper[i] = _DefaultRuneLocale.__mapupper[i];
  98      -                        __trans_lower[i] = _DefaultRuneLocale.__maplower[i];
  99      -                }
 100   96  
 101      -                (void) _none_init(&_DefaultRuneLocale);
 102      -                return (0);
 103      -        }
       97 +struct locdata *
       98 +__lc_ctype_load(const char *name)
       99 +{
      100 +        struct locdata *ldata;
      101 +        struct lc_ctype *lct;
      102 +        _RuneLocale *rl;
      103 +        int i;
      104 +        char path[PATH_MAX];
 104  105  
      106 +        if ((ldata = __locdata_alloc(name, sizeof (*lct))) == NULL)
      107 +                return (NULL);
      108 +        lct = ldata->l_data[0];
 105  109          /*
 106      -         * If the locale name is the same as our cache, use the cache.
 107      -         */
 108      -        if (CachedRuneLocale != NULL &&
 109      -            strcmp(encoding, ctype_encoding) == 0) {
 110      -                _CurrentRuneLocale = CachedRuneLocale;
 111      -                __mbrtowc = Cached__mbrtowc;
 112      -                __mbsinit = Cached__mbsinit;
 113      -                __mbsnrtowcs = Cached__mbsnrtowcs;
 114      -                __wcrtomb = Cached__wcrtomb;
 115      -                __wcsnrtombs = Cached__wcsnrtombs;
 116      -                return (0);
 117      -        }
 118      -
 119      -        /*
 120  110           * Slurp the locale file into the cache.
 121  111           */
 122  112  
 123      -        (void) snprintf(name, sizeof (name), "%s/%s/LC_CTYPE/LCL_DATA",
 124      -            _PathLocale, encoding);
      113 +        (void) snprintf(path, sizeof (path), "%s/%s/LC_CTYPE/LCL_DATA",
      114 +            _PathLocale, name);
 125  115  
 126      -        if ((fp = fopen(name, "r")) == NULL)
 127      -                return (errno == 0 ? ENOENT : errno);
 128      -
 129      -        if ((rl = _Read_RuneMagi(fp)) == NULL) {
 130      -                saverr = (errno == 0 ? EINVAL : errno);
 131      -                (void) fclose(fp);
 132      -                return (saverr);
      116 +        if ((rl = _Read_RuneMagi(path)) == NULL) {
      117 +                __locdata_release(ldata);
      118 +                errno = EINVAL;
      119 +                return (NULL);
 133  120          }
 134      -        (void) fclose(fp);
      121 +        ldata->l_data[1] = rl;
 135  122  
 136      -        old__mbrtowc = __mbrtowc;
 137      -        old__mbsinit = __mbsinit;
 138      -        old__mbsnrtowcs = __mbsnrtowcs;
 139      -        old__wcrtomb = __wcrtomb;
 140      -        old__wcsnrtombs = __wcsnrtombs;
      123 +        lct->lc_mbrtowc = NULL;
      124 +        lct->lc_mbsinit = NULL;
      125 +        lct->lc_mbsnrtowcs = NULL;
      126 +        lct->lc_wcrtomb = NULL;
      127 +        lct->lc_wcsnrtombs = NULL;
      128 +        lct->lc_ctype_mask = rl->__runetype;
      129 +        lct->lc_trans_upper = rl->__mapupper;
      130 +        lct->lc_trans_lower = rl->__maplower;
 141  131  
 142      -        __mbrtowc = NULL;
 143      -        __mbsinit = NULL;
 144      -        __mbsnrtowcs = __mbsnrtowcs_std;
 145      -        __wcrtomb = NULL;
 146      -        __wcsnrtombs = __wcsnrtombs_std;
 147      -
 148      -        if (strcmp(rl->__encoding, "NONE") == 0)
 149      -                ret = _none_init(rl);
 150      -        else if (strcmp(rl->__encoding, "UTF-8") == 0)
 151      -                ret = _UTF8_init(rl);
 152      -        else if (strcmp(rl->__encoding, "EUC-CN") == 0)
 153      -                ret = _EUC_CN_init(rl);
 154      -        else if (strcmp(rl->__encoding, "EUC-JP") == 0)
 155      -                ret = _EUC_JP_init(rl);
 156      -        else if (strcmp(rl->__encoding, "EUC-KR") == 0)
 157      -                ret = _EUC_KR_init(rl);
 158      -        else if (strcmp(rl->__encoding, "EUC-TW") == 0)
 159      -                ret = _EUC_TW_init(rl);
 160      -        else if (strcmp(rl->__encoding, "GB18030") == 0)
 161      -                ret = _GB18030_init(rl);
 162      -        else if (strcmp(rl->__encoding, "GB2312") == 0)
 163      -                ret = _GB2312_init(rl);
 164      -        else if (strcmp(rl->__encoding, "GBK") == 0)
 165      -                ret = _GBK_init(rl);
 166      -        else if (strcmp(rl->__encoding, "BIG5") == 0)
 167      -                ret = _BIG5_init(rl);
 168      -        else if (strcmp(rl->__encoding, "MSKanji") == 0)
 169      -                ret = _MSKanji_init(rl);
 170      -        else
 171      -                ret = EINVAL;
 172      -
 173      -        if (ret == 0) {
 174      -                if (CachedRuneLocale != NULL) {
 175      -                        free(CachedRuneLocale);
      132 +        /* set up the function pointers */
      133 +        for (i = 0; encodings[i].e_name != NULL; i++) {
      134 +                int l = strlen(encodings[i].e_name);
      135 +                if ((strncmp(rl->__encoding, encodings[i].e_name, l) == 0) &&
      136 +                    (rl->__encoding[l] == '\0' || rl->__encoding[l] == '@')) {
      137 +                        encodings[i].e_init(lct);
      138 +                        break;
 176  139                  }
 177      -                CachedRuneLocale = _CurrentRuneLocale;
 178      -                Cached__mbrtowc = __mbrtowc;
 179      -                Cached__mbsinit = __mbsinit;
 180      -                Cached__mbsnrtowcs = __mbsnrtowcs;
 181      -                Cached__wcrtomb = __wcrtomb;
 182      -                Cached__wcsnrtombs = __wcsnrtombs;
 183      -                (void) strcpy(ctype_encoding, encoding);
 184      -
 185      -                /*
 186      -                 * We need to overwrite the _ctype array.  This requires
 187      -                 * some finagling.  This is because references to it may
 188      -                 * have been baked into applications.
 189      -                 *
 190      -                 * Note that it is interesting that toupper/tolower only
 191      -                 * produce defined results when the input is representable
 192      -                 * as a byte.
 193      -                 */
 194      -
 195      -                /*
 196      -                 * The top half is the type mask array.  Because we
 197      -                 * want to support both legacy Solaris code (which have
 198      -                 * mask valeus baked in to them), and we want to be able
 199      -                 * to import locale files from other sources (FreeBSD)
 200      -                 * which probably uses different masks, we have to perform
 201      -                 * a conversion here.  Ugh.  Note that the _CTYPE definitions
 202      -                 * we use from FreeBSD are richer than the Solaris legacy.
 203      -                 *
 204      -                 * We have to cope with these limitations though, because the
 205      -                 * inadequate Solaris definitions were baked into binaries.
 206      -                 */
 207      -                for (int i = 0; i < _CACHED_RUNES; i++) {
 208      -                        /* ctype can only encode the lower 8 bits. */
 209      -                        __ctype[i+1] = rl->__runetype[i] & 0xff;
 210      -                        __ctype_mask[i] = rl->__runetype[i];
 211      -                }
 212      -
 213      -                /* The bottom half is the toupper/lower array */
 214      -                for (int i = 0; i < _CACHED_RUNES; i++) {
 215      -                        __ctype[258 + i] = i;
 216      -                        if (rl->__mapupper[i] && rl->__mapupper[i] != i)
 217      -                                __ctype[258+i] = rl->__mapupper[i];
 218      -                        if (rl->__maplower[i] && rl->__maplower[i] != i)
 219      -                                __ctype[258+i] = rl->__maplower[i];
 220      -
 221      -                        /* Don't forget these annoyances either! */
 222      -                        __trans_upper[i] = rl->__mapupper[i];
 223      -                        __trans_lower[i] = rl->__maplower[i];
 224      -                }
 225      -
 226      -                /*
 227      -                 * Note that we expect the init code will have populated
 228      -                 * the CSWIDTH array (__ctype[514-520]) properly.
 229      -                 */
 230      -        } else {
 231      -                __mbrtowc = old__mbrtowc;
 232      -                __mbsinit = old__mbsinit;
 233      -                __mbsnrtowcs = old__mbsnrtowcs;
 234      -                __wcrtomb = old__wcrtomb;
 235      -                __wcsnrtombs = old__wcsnrtombs;
 236      -                free(rl);
 237  140          }
      141 +        if (encodings[i].e_name == NULL) {
      142 +                __locdata_release(ldata);
      143 +                errno = EINVAL;
      144 +                return (NULL);
      145 +        }
 238  146  
 239      -        return (ret);
 240      -}
 241  147  
 242      -int
 243      -__wrap_setrunelocale(const char *locale)
 244      -{
 245      -        int ret = __setrunelocale(locale);
 246      -
 247      -        if (ret != 0) {
 248      -                errno = ret;
 249      -                return (_LDP_ERROR);
 250      -        }
 251      -        return (_LDP_LOADED);
      148 +        return (ldata);
 252  149  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX