Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD
   1 /*

   2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1989, 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  * (c) UNIX System Laboratories, Inc.
   6  * All or some portions of this file are derived from material licensed
   7  * to the University of California by American Telephone and Telegraph
   8  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   9  * the permission of UNIX System Laboratories, Inc.
  10  *
  11  * This code is derived from software contributed to Berkeley by
  12  * Paul Borman at Krystal Technologies.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions
  16  * are met:
  17  * 1. Redistributions of source code must retain the above copyright
  18  *    notice, this list of conditions and the following disclaimer.
  19  * 2. Redistributions in binary form must reproduce the above copyright
  20  *    notice, this list of conditions and the following disclaimer in the
  21  *    documentation and/or other materials provided with the distribution.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  */
  38 
  39 #include "lint.h"
  40 #include <wctype.h>

  41 #include "runefile.h"
  42 #include "runetype.h"

  43 #include "_ctype.h"
  44 
  45 /*
  46  * We removed: iswascii, iswhexnumber, and iswnumber, as
  47  * these are not present on Solaris.  Note that the standard requires
  48  * iswascii to be a macro, so it is defined in our headers.
  49  *
  50  * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the
  51  * equivalent values without "w".  We added a new isnumber, that looks
  52  * for non-ASCII numbers.





  53  */
  54 
  55 static int
  56 __istype(wint_t c, unsigned int f)
  57 {
  58         unsigned int rt;
  59 
  60         /* Fast path for single byte locales */
  61         if (c < 0 || c >= _CACHED_RUNES)
  62                 rt =  ___runetype(c);
  63         else
  64                 rt = _CurrentRuneLocale->__runetype[c];
  65         return (rt & f);
  66 }
  67 
  68 static int
  69 __isctype(wint_t c, unsigned int f)
  70 {
  71         unsigned int rt;

  72 
  73         /* Fast path for single byte locales */
  74         if (c < 0 || c >= _CACHED_RUNES)
  75                 return (0);
  76         else
  77                 rt = _CurrentRuneLocale->__runetype[c];
  78         return (rt & f);
  79 }
  80 
  81 #undef iswctype
  82 int
  83 iswctype(wint_t wc, wctype_t class)
  84 {







  85         return (__istype(wc, class));
  86 }
  87 



  88 #undef _iswctype
  89 unsigned
  90 _iswctype(wchar_t wc, int class)
  91 {


  92         return (__istype((wint_t)wc, (unsigned int)class));
  93 }
  94 
  95 #undef iswalnum
  96 int
  97 iswalnum(wint_t wc)
  98 {
  99         return (__istype(wc, _CTYPE_A|_CTYPE_D));










 100 }
 101 


 102 #undef iswalpha
 103 int
 104 iswalpha(wint_t wc)
 105 {
 106         return (__istype(wc, _CTYPE_A));
 107 }
 108 
 109 #undef iswblank
 110 int
 111 iswblank(wint_t wc)
 112 {
 113         return (__istype(wc, _CTYPE_B));
 114 }
 115 
 116 #undef iswcntrl
 117 int
 118 iswcntrl(wint_t wc)
 119 {
 120         return (__istype(wc, _CTYPE_C));
 121 }










 122 
 123 #undef iswdigit
 124 int
 125 iswdigit(wint_t wc)
 126 {
 127         return (__isctype(wc, _CTYPE_D));
 128 }
 129 
 130 #undef iswgraph
 131 int
 132 iswgraph(wint_t wc)
 133 {
 134         return (__istype(wc, _CTYPE_G));
 135 }
 136 
 137 #undef isideogram
 138 int
 139 isideogram(wint_t wc)
 140 {
 141         return (__istype(wc, _CTYPE_I));
 142 }
 143 
 144 #undef iswlower
 145 int
 146 iswlower(wint_t wc)
 147 {
 148         return (__istype(wc, _CTYPE_L));
 149 }
 150 
 151 #undef isphonogram
 152 int
 153 isphonogram(wint_t wc)
 154 {
 155         return (__istype(wc, _CTYPE_Q));
 156 }
 157 
 158 #undef iswprint
 159 int
 160 iswprint(wint_t wc)
 161 {
 162         return (__istype(wc, _CTYPE_R));
 163 }
 164 
 165 #undef iswpunct
 166 int
 167 iswpunct(wint_t wc)
 168 {
 169         return (__istype(wc, _CTYPE_P));
 170 }
 171 
 172 #undef iswspace
 173 int
 174 iswspace(wint_t wc)
 175 {
 176         return (__istype(wc, _CTYPE_S));
 177 }
 178 
 179 #undef iswupper
 180 int
 181 iswupper(wint_t wc)
 182 {
 183         return (__istype(wc, _CTYPE_U));
 184 }
 185 
 186 #undef iswxdigit
 187 int
 188 iswxdigit(wint_t wc)
 189 {
 190         return (__isctype(wc, _CTYPE_X));
 191 }
 192 
 193 #undef isenglish
 194 int
 195 isenglish(wint_t wc)
 196 {
 197         return (__istype(wc, _CTYPE_E));
 198 }
 199 
 200 #undef isspecial
 201 int
 202 isspecial(wint_t wc)
 203 {
 204         return (__istype(wc, _CTYPE_T));
 205 }
 206 
 207 #undef isnumber
 208 int
 209 isnumber(wint_t wc)
 210 {
 211         return (__istype(wc, _CTYPE_N));
 212 }
 213 
 214 /*
 215  * FreeBSD has iswrune() for use by external programs, and this is used by
 216  * the "tr" program.  As that program is part of our consolidation, we
 217  * provide an _ILLUMOS_PRIVATE version of this function that we can use.
 218  *
 219  * No programs that are not part of the illumos stack itself should use
 220  * this function -- programs that do reference will not be portable to
 221  * other versions of SunOS or Solaris.
 222  */
 223 int
 224 __iswrune(wint_t wc)
 225 {
 226         /*
 227          * Note, FreeBSD ignored the low order byte, as they encode their
 228          * ctype values differently.  We can't do that (ctype is baked into
 229          * applications), but instead can just check if *any* bit is set in
 230          * the ctype.  Any bit being set indicates its a valid rune.


 231          */
 232         return (__istype(wc, 0xffffffffU));













 233 }
   1 /*
   2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
   3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   4  * Copyright (c) 1989, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  * (c) UNIX System Laboratories, Inc.
   7  * All or some portions of this file are derived from material licensed
   8  * to the University of California by American Telephone and Telegraph
   9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  10  * the permission of UNIX System Laboratories, Inc.
  11  *
  12  * This code is derived from software contributed to Berkeley by
  13  * Paul Borman at Krystal Technologies.
  14  *
  15  * Redistribution and use in source and binary forms, with or without
  16  * modification, are permitted provided that the following conditions
  17  * are met:
  18  * 1. Redistributions of source code must retain the above copyright
  19  *    notice, this list of conditions and the following disclaimer.
  20  * 2. Redistributions in binary form must reproduce the above copyright
  21  *    notice, this list of conditions and the following disclaimer in the
  22  *    documentation and/or other materials provided with the distribution.
  23  * 4. Neither the name of the University nor the names of its contributors
  24  *    may be used to endorse or promote products derived from this software
  25  *    without specific prior written permission.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  37  * SUCH DAMAGE.
  38  */
  39 
  40 #include "lint.h"
  41 #include <wctype.h>
  42 #include <locale.h>
  43 #include "runefile.h"
  44 #include "runetype.h"
  45 #include "localeimpl.h"
  46 #include "_ctype.h"
  47 
  48 /*
  49  * Note that the standard requires iswascii to be a macro, so it is defined
  50  * in our headers.

  51  *
  52  * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
  53  * equivalent values without "w".  The Solaris specific function isenglish()
  54  * is here, but does not get an isw* equivalent.
  55  *
  56  * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
  57  * only return true for characters in the portable set.  While the assumption
  58  * is not technically correct, it turns out that for all of our locales this
  59  * is true.  iswhexnumber is aliased to iswxdigit.
  60  */
  61 
  62 static int
  63 __istype_l(locale_t loc, wint_t c, unsigned int f)
  64 {
  65         unsigned int rt;
  66 

  67         if (c < 0 || c >= _CACHED_RUNES)
  68                 rt = __runetype(loc->runelocale, c);
  69         else
  70                 rt = loc->runelocale->__runetype[c];
  71         return (rt & f);
  72 }
  73 
  74 static int
  75 __istype(wint_t c, unsigned int f)
  76 {
  77         return (__istype_l(uselocale(NULL), c, f));
  78 }
  79 
  80 int
  81 iswctype_l(wint_t wc, wctype_t class, locale_t loc)
  82 {
  83         if (iswascii(wc))
  84                 return (__ctype_mask[wc] & class);
  85         return (__istype_l(loc, wc, class));
  86 }
  87 
  88 #undef iswctype
  89 int
  90 iswctype(wint_t wc, wctype_t class)
  91 {
  92         /*
  93          * Note that we don't just call iswctype_l because we optimize for
  94          * the iswascii() case, so that most of the time we have no need to
  95          * call uselocale().
  96          */
  97         if (iswascii(wc))
  98                 return (__ctype_mask[wc] & class);
  99         return (__istype(wc, class));
 100 }
 101 
 102 /*
 103  * This is a legacy version, baked into binaries.
 104  */
 105 #undef _iswctype
 106 unsigned
 107 _iswctype(wchar_t wc, int class)
 108 {
 109         if (iswascii(wc))
 110                 return (__ctype_mask[wc] & class);
 111         return (__istype((wint_t)wc, (unsigned int)class));
 112 }
 113 
 114 #define DEFN_ISWTYPE(type, mask)                \
 115 int                                             \
 116 isw##type##_l(wint_t wc, locale_t loc)          \
 117 {                                               \
 118         return (iswascii(wc) ?                  \
 119                 (__ctype_mask[wc] & (mask)) :       \
 120                 __istype_l(loc, wc, mask));     \
 121 }                                               \
 122                                                 \
 123 int                                             \
 124 isw##type(wint_t wc)                            \
 125 {                                               \
 126         return (iswascii(wc) ?                  \
 127                 (__ctype_mask[wc] & (mask)) :       \
 128                 __istype(wc, mask));            \
 129 }
 130 
 131 /* kill off any macros */
 132 #undef  iswalnum
 133 #undef  iswalpha






 134 #undef  iswblank





 135 
 136 DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
 137 DEFN_ISWTYPE(alpha, _CTYPE_A)
 138 DEFN_ISWTYPE(blank, _CTYPE_B)
 139 DEFN_ISWTYPE(cntrl, _CTYPE_C)
 140 DEFN_ISWTYPE(digit, _CTYPE_D)
 141 DEFN_ISWTYPE(graph, _CTYPE_D)
 142 DEFN_ISWTYPE(lower, _CTYPE_L)
 143 DEFN_ISWTYPE(upper, _CTYPE_U)
 144 DEFN_ISWTYPE(print, _CTYPE_R)
 145 DEFN_ISWTYPE(punct, _CTYPE_P)
 146 DEFN_ISWTYPE(space, _CTYPE_S)
 147 DEFN_ISWTYPE(xdigit, _CTYPE_X)
 148 DEFN_ISWTYPE(ideogram, _CTYPE_I)
 149 DEFN_ISWTYPE(phonogram, _CTYPE_Q)
 150 DEFN_ISWTYPE(special, _CTYPE_T)
 151 DEFN_ISWTYPE(number, _CTYPE_N)
 152 






 153 
 154 #undef iswhexnumber
 155 #pragma weak iswhexnumber = iswxdigit
 156 #pragma weak iswhexnumber_l = iswxdigit_l



 157 
 158 #undef isideogram
 159 #pragma weak isideogram = iswideogram




 160 







 161 #undef isphonogram
 162 #pragma weak isphonogram = iswphonogram




 163 










































 164 #undef isspecial
 165 #pragma weak isspecial = iswspecial




 166 
 167 #undef isnumber
 168 #pragma weak isnumber = iswnumber




 169 
 170 /*
 171  * FreeBSD has iswrune() for use by external programs, and this is used by
 172  * the "tr" program.  As that program is part of our consolidation, we
 173  * provide an _ILLUMOS_PRIVATE version of this function that we can use.
 174  *
 175  * No programs that are not part of the illumos stack itself should use
 176  * this function -- programs that do reference will not be portable to
 177  * other versions of SunOS or Solaris.
 178  */
 179 int
 180 __iswrune(wint_t wc)
 181 {
 182         /*
 183          * Note, FreeBSD ignored the low order byte, as they encode their
 184          * ctype values differently.  We can't do that (ctype is baked into
 185          * applications), but instead can just check if *any* bit is set in
 186          * the ctype.  Any bit being set indicates its a valid rune.
 187          *
 188          * NB: For ASCII all positions except NULL are runes.
 189          */
 190         return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
 191 }
 192 
 193 /*
 194  * isenglish is a Solaris legacy.  No isw* equivalent.  Note that this most
 195  * likely doesn't work, as the locale data we have doesn't include it.  It
 196  * specifically is only valid for non-ASCII characters.  We're not sure this
 197  * is in actual use in the wild.
 198  */
 199 #undef isenglish
 200 int
 201 isenglish(wint_t wc)
 202 {
 203         return (__istype(wc, _CTYPE_E));
 204 }