Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libc/port/locale/iswctype.c
          +++ new/usr/src/lib/libc/port/locale/iswctype.c
   1    1  /*
        2 + * Copyright 2014 Garrett D'Amore <garrett@damore.org>
   2    3   * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3    4   * Copyright (c) 1989, 1993
   4    5   *      The Regents of the University of California.  All rights reserved.
   5    6   * (c) UNIX System Laboratories, Inc.
   6    7   * All or some portions of this file are derived from material licensed
   7    8   * to the University of California by American Telephone and Telegraph
   8    9   * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   9   10   * the permission of UNIX System Laboratories, Inc.
  10   11   *
  11   12   * This code is derived from software contributed to Berkeley by
↓ open down ↓ 19 lines elided ↑ open up ↑
  31   32   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32   33   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33   34   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34   35   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35   36   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36   37   * SUCH DAMAGE.
  37   38   */
  38   39  
  39   40  #include "lint.h"
  40   41  #include <wctype.h>
       42 +#include <locale.h>
  41   43  #include "runefile.h"
  42   44  #include "runetype.h"
       45 +#include "localeimpl.h"
  43   46  #include "_ctype.h"
  44   47  
  45   48  /*
  46      - * We removed: iswascii, iswhexnumber, and iswnumber, as
  47      - * these are not present on Solaris.  Note that the standard requires
  48      - * iswascii to be a macro, so it is defined in our headers.
       49 + * Note that the standard requires iswascii to be a macro, so it is defined
       50 + * in our headers.
  49   51   *
  50      - * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the
  51      - * equivalent values without "w".  We added a new isnumber, that looks
  52      - * for non-ASCII numbers.
       52 + * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
       53 + * equivalent values without "w".  The Solaris specific function isenglish()
       54 + * is here, but does not get an isw* equivalent.
       55 + *
       56 + * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
       57 + * only return true for characters in the portable set.  While the assumption
       58 + * is not technically correct, it turns out that for all of our locales this
       59 + * is true.  iswhexnumber is aliased to iswxdigit.
  53   60   */
  54   61  
  55   62  static int
  56      -__istype(wint_t c, unsigned int f)
       63 +__istype_l(locale_t loc, wint_t c, unsigned int f)
  57   64  {
  58   65          unsigned int rt;
  59   66  
  60      -        /* Fast path for single byte locales */
  61   67          if (c < 0 || c >= _CACHED_RUNES)
  62      -                rt =  ___runetype(c);
       68 +                rt = __runetype(loc->runelocale, c);
  63   69          else
  64      -                rt = _CurrentRuneLocale->__runetype[c];
       70 +                rt = loc->runelocale->__runetype[c];
  65   71          return (rt & f);
  66   72  }
  67   73  
  68   74  static int
  69      -__isctype(wint_t c, unsigned int f)
       75 +__istype(wint_t c, unsigned int f)
  70   76  {
  71      -        unsigned int rt;
       77 +        return (__istype_l(uselocale(NULL), c, f));
       78 +}
  72   79  
  73      -        /* Fast path for single byte locales */
  74      -        if (c < 0 || c >= _CACHED_RUNES)
  75      -                return (0);
  76      -        else
  77      -                rt = _CurrentRuneLocale->__runetype[c];
  78      -        return (rt & f);
       80 +int
       81 +iswctype_l(wint_t wc, wctype_t class, locale_t loc)
       82 +{
       83 +        if (iswascii(wc))
       84 +                return (__ctype_mask[wc] & class);
       85 +        return (__istype_l(loc, wc, class));
  79   86  }
  80   87  
  81   88  #undef iswctype
  82   89  int
  83   90  iswctype(wint_t wc, wctype_t class)
  84   91  {
       92 +        /*
       93 +         * Note that we don't just call iswctype_l because we optimize for
       94 +         * the iswascii() case, so that most of the time we have no need to
       95 +         * call uselocale().
       96 +         */
       97 +        if (iswascii(wc))
       98 +                return (__ctype_mask[wc] & class);
  85   99          return (__istype(wc, class));
  86  100  }
  87  101  
      102 +/*
      103 + * This is a legacy version, baked into binaries.
      104 + */
  88  105  #undef _iswctype
  89  106  unsigned
  90  107  _iswctype(wchar_t wc, int class)
  91  108  {
      109 +        if (iswascii(wc))
      110 +                return (__ctype_mask[wc] & class);
  92  111          return (__istype((wint_t)wc, (unsigned int)class));
  93  112  }
  94  113  
  95      -#undef iswalnum
  96      -int
  97      -iswalnum(wint_t wc)
  98      -{
  99      -        return (__istype(wc, _CTYPE_A|_CTYPE_D));
      114 +#define DEFN_ISWTYPE(type, mask)                \
      115 +int                                             \
      116 +isw##type##_l(wint_t wc, locale_t loc)          \
      117 +{                                               \
      118 +        return (iswascii(wc) ?                  \
      119 +                (__ctype_mask[wc] & (mask)) :   \
      120 +                __istype_l(loc, wc, mask));     \
      121 +}                                               \
      122 +                                                \
      123 +int                                             \
      124 +isw##type(wint_t wc)                            \
      125 +{                                               \
      126 +        return (iswascii(wc) ?                  \
      127 +                (__ctype_mask[wc] & (mask)) :   \
      128 +                __istype(wc, mask));            \
 100  129  }
 101  130  
 102      -#undef iswalpha
 103      -int
 104      -iswalpha(wint_t wc)
 105      -{
 106      -        return (__istype(wc, _CTYPE_A));
 107      -}
      131 +/* kill off any macros */
      132 +#undef  iswalnum
      133 +#undef  iswalpha
      134 +#undef  iswblank
 108  135  
 109      -#undef iswblank
 110      -int
 111      -iswblank(wint_t wc)
 112      -{
 113      -        return (__istype(wc, _CTYPE_B));
 114      -}
      136 +DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
      137 +DEFN_ISWTYPE(alpha, _CTYPE_A)
      138 +DEFN_ISWTYPE(blank, _CTYPE_B)
      139 +DEFN_ISWTYPE(cntrl, _CTYPE_C)
      140 +DEFN_ISWTYPE(digit, _CTYPE_D)
      141 +DEFN_ISWTYPE(graph, _CTYPE_D)
      142 +DEFN_ISWTYPE(lower, _CTYPE_L)
      143 +DEFN_ISWTYPE(upper, _CTYPE_U)
      144 +DEFN_ISWTYPE(print, _CTYPE_R)
      145 +DEFN_ISWTYPE(punct, _CTYPE_P)
      146 +DEFN_ISWTYPE(space, _CTYPE_S)
      147 +DEFN_ISWTYPE(xdigit, _CTYPE_X)
      148 +DEFN_ISWTYPE(ideogram, _CTYPE_I)
      149 +DEFN_ISWTYPE(phonogram, _CTYPE_Q)
      150 +DEFN_ISWTYPE(special, _CTYPE_T)
      151 +DEFN_ISWTYPE(number, _CTYPE_N)
 115  152  
 116      -#undef iswcntrl
 117      -int
 118      -iswcntrl(wint_t wc)
 119      -{
 120      -        return (__istype(wc, _CTYPE_C));
 121      -}
 122  153  
 123      -#undef iswdigit
 124      -int
 125      -iswdigit(wint_t wc)
 126      -{
 127      -        return (__isctype(wc, _CTYPE_D));
 128      -}
      154 +#undef iswhexnumber
      155 +#pragma weak iswhexnumber = iswxdigit
      156 +#pragma weak iswhexnumber_l = iswxdigit_l
 129  157  
 130      -#undef iswgraph
 131      -int
 132      -iswgraph(wint_t wc)
 133      -{
 134      -        return (__istype(wc, _CTYPE_G));
 135      -}
 136      -
 137  158  #undef isideogram
 138      -int
 139      -isideogram(wint_t wc)
 140      -{
 141      -        return (__istype(wc, _CTYPE_I));
 142      -}
      159 +#pragma weak isideogram = iswideogram
 143  160  
 144      -#undef iswlower
 145      -int
 146      -iswlower(wint_t wc)
 147      -{
 148      -        return (__istype(wc, _CTYPE_L));
 149      -}
 150      -
 151  161  #undef isphonogram
 152      -int
 153      -isphonogram(wint_t wc)
 154      -{
 155      -        return (__istype(wc, _CTYPE_Q));
 156      -}
      162 +#pragma weak isphonogram = iswphonogram
 157  163  
 158      -#undef iswprint
 159      -int
 160      -iswprint(wint_t wc)
 161      -{
 162      -        return (__istype(wc, _CTYPE_R));
 163      -}
 164      -
 165      -#undef iswpunct
 166      -int
 167      -iswpunct(wint_t wc)
 168      -{
 169      -        return (__istype(wc, _CTYPE_P));
 170      -}
 171      -
 172      -#undef iswspace
 173      -int
 174      -iswspace(wint_t wc)
 175      -{
 176      -        return (__istype(wc, _CTYPE_S));
 177      -}
 178      -
 179      -#undef iswupper
 180      -int
 181      -iswupper(wint_t wc)
 182      -{
 183      -        return (__istype(wc, _CTYPE_U));
 184      -}
 185      -
 186      -#undef iswxdigit
 187      -int
 188      -iswxdigit(wint_t wc)
 189      -{
 190      -        return (__isctype(wc, _CTYPE_X));
 191      -}
 192      -
 193      -#undef isenglish
 194      -int
 195      -isenglish(wint_t wc)
 196      -{
 197      -        return (__istype(wc, _CTYPE_E));
 198      -}
 199      -
 200  164  #undef isspecial
 201      -int
 202      -isspecial(wint_t wc)
 203      -{
 204      -        return (__istype(wc, _CTYPE_T));
 205      -}
      165 +#pragma weak isspecial = iswspecial
 206  166  
 207  167  #undef isnumber
 208      -int
 209      -isnumber(wint_t wc)
 210      -{
 211      -        return (__istype(wc, _CTYPE_N));
 212      -}
      168 +#pragma weak isnumber = iswnumber
 213  169  
 214  170  /*
 215  171   * FreeBSD has iswrune() for use by external programs, and this is used by
 216  172   * the "tr" program.  As that program is part of our consolidation, we
 217  173   * provide an _ILLUMOS_PRIVATE version of this function that we can use.
 218  174   *
 219  175   * No programs that are not part of the illumos stack itself should use
 220  176   * this function -- programs that do reference will not be portable to
 221  177   * other versions of SunOS or Solaris.
 222  178   */
 223  179  int
 224  180  __iswrune(wint_t wc)
 225  181  {
 226  182          /*
 227  183           * Note, FreeBSD ignored the low order byte, as they encode their
 228  184           * ctype values differently.  We can't do that (ctype is baked into
 229  185           * applications), but instead can just check if *any* bit is set in
 230  186           * the ctype.  Any bit being set indicates its a valid rune.
      187 +         *
      188 +         * NB: For ASCII all positions except NULL are runes.
 231  189           */
 232      -        return (__istype(wc, 0xffffffffU));
      190 +        return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
      191 +}
      192 +
      193 +/*
      194 + * isenglish is a Solaris legacy.  No isw* equivalent.  Note that this most
      195 + * likely doesn't work, as the locale data we have doesn't include it.  It
      196 + * specifically is only valid for non-ASCII characters.  We're not sure this
      197 + * is in actual use in the wild.
      198 + */
      199 +#undef isenglish
      200 +int
      201 +isenglish(wint_t wc)
      202 +{
      203 +        return (__istype(wc, _CTYPE_E));
 233  204  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX