1 /*
   2  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
   3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   4  * Copyright (c) 1989, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  * (c) UNIX System Laboratories, Inc.
   7  * All or some portions of this file are derived from material licensed
   8  * to the University of California by American Telephone and Telegraph
   9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  10  * the permission of UNIX System Laboratories, Inc.
  11  *
  12  * This code is derived from software contributed to Berkeley by
  13  * Paul Borman at Krystal Technologies.
  14  *
  15  * Redistribution and use in source and binary forms, with or without
  16  * modification, are permitted provided that the following conditions
  17  * are met:
  18  * 1. Redistributions of source code must retain the above copyright
  19  *    notice, this list of conditions and the following disclaimer.
  20  * 2. Redistributions in binary form must reproduce the above copyright
  21  *    notice, this list of conditions and the following disclaimer in the
  22  *    documentation and/or other materials provided with the distribution.
  23  * 4. Neither the name of the University nor the names of its contributors
  24  *    may be used to endorse or promote products derived from this software
  25  *    without specific prior written permission.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  37  * SUCH DAMAGE.
  38  */
  39 
  40 #include "lint.h"
  41 #include <wctype.h>
  42 #include <locale.h>
  43 #include "runefile.h"
  44 #include "runetype.h"
  45 #include "localeimpl.h"
  46 #include "_ctype.h"
  47 
  48 /*
  49  * Note that the standard requires iswascii to be a macro, so it is defined
  50  * in our headers.
  51  *
  52  * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
  53  * equivalent values without "w".  The Solaris specific function isenglish()
  54  * is here, but does not get an isw* equivalent.
  55  *
  56  * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
  57  * only return true for characters in the portable set.  While the assumption
  58  * is not technically correct, it turns out that for all of our locales this
  59  * is true.  iswhexnumber is aliased to iswxdigit.
  60  */
  61 
  62 static int
  63 __istype_l(locale_t loc, wint_t c, unsigned int f)
  64 {
  65         unsigned int rt;
  66 
  67         if (c < 0 || c >= _CACHED_RUNES)
  68                 rt = __runetype(loc->runelocale, c);
  69         else
  70                 rt = loc->runelocale->__runetype[c];
  71         return (rt & f);
  72 }
  73 
  74 static int
  75 __istype(wint_t c, unsigned int f)
  76 {
  77         return (__istype_l(uselocale(NULL), c, f));
  78 }
  79 
  80 int
  81 iswctype_l(wint_t wc, wctype_t class, locale_t loc)
  82 {
  83         if (iswascii(wc))
  84                 return (__ctype_mask[wc] & class);
  85         return (__istype_l(loc, wc, class));
  86 }
  87 
  88 #undef iswctype
  89 int
  90 iswctype(wint_t wc, wctype_t class)
  91 {
  92         /*
  93          * Note that we don't just call iswctype_l because we optimize for
  94          * the iswascii() case, so that most of the time we have no need to
  95          * call uselocale().
  96          */
  97         if (iswascii(wc))
  98                 return (__ctype_mask[wc] & class);
  99         return (__istype(wc, class));
 100 }
 101 
 102 /*
 103  * This is a legacy version, baked into binaries.
 104  */
 105 #undef _iswctype
 106 unsigned
 107 _iswctype(wchar_t wc, int class)
 108 {
 109         if (iswascii(wc))
 110                 return (__ctype_mask[wc] & class);
 111         return (__istype((wint_t)wc, (unsigned int)class));
 112 }
 113 
 114 #define DEFN_ISWTYPE(type, mask)                \
 115 int                                             \
 116 isw##type##_l(wint_t wc, locale_t loc)          \
 117 {                                               \
 118         return (iswascii(wc) ?                  \
 119                 (__ctype_mask[wc] & (mask)) :       \
 120                 __istype_l(loc, wc, mask));     \
 121 }                                               \
 122                                                 \
 123 int                                             \
 124 isw##type(wint_t wc)                            \
 125 {                                               \
 126         return (iswascii(wc) ?                  \
 127                 (__ctype_mask[wc] & (mask)) :       \
 128                 __istype(wc, mask));            \
 129 }
 130 
 131 /* kill off any macros */
 132 #undef  iswalnum
 133 #undef  iswalpha
 134 #undef  iswblank
 135 
 136 DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
 137 DEFN_ISWTYPE(alpha, _CTYPE_A)
 138 DEFN_ISWTYPE(blank, _CTYPE_B)
 139 DEFN_ISWTYPE(cntrl, _CTYPE_C)
 140 DEFN_ISWTYPE(digit, _CTYPE_D)
 141 DEFN_ISWTYPE(graph, _CTYPE_D)
 142 DEFN_ISWTYPE(lower, _CTYPE_L)
 143 DEFN_ISWTYPE(upper, _CTYPE_U)
 144 DEFN_ISWTYPE(print, _CTYPE_R)
 145 DEFN_ISWTYPE(punct, _CTYPE_P)
 146 DEFN_ISWTYPE(space, _CTYPE_S)
 147 DEFN_ISWTYPE(xdigit, _CTYPE_X)
 148 DEFN_ISWTYPE(ideogram, _CTYPE_I)
 149 DEFN_ISWTYPE(phonogram, _CTYPE_Q)
 150 DEFN_ISWTYPE(special, _CTYPE_T)
 151 DEFN_ISWTYPE(number, _CTYPE_N)
 152 
 153 
 154 #undef iswhexnumber
 155 #pragma weak iswhexnumber = iswxdigit
 156 #pragma weak iswhexnumber_l = iswxdigit_l
 157 
 158 #undef isideogram
 159 #pragma weak isideogram = iswideogram
 160 
 161 #undef isphonogram
 162 #pragma weak isphonogram = iswphonogram
 163 
 164 #undef isspecial
 165 #pragma weak isspecial = iswspecial
 166 
 167 #undef isnumber
 168 #pragma weak isnumber = iswnumber
 169 
 170 /*
 171  * FreeBSD has iswrune() for use by external programs, and this is used by
 172  * the "tr" program.  As that program is part of our consolidation, we
 173  * provide an _ILLUMOS_PRIVATE version of this function that we can use.
 174  *
 175  * No programs that are not part of the illumos stack itself should use
 176  * this function -- programs that do reference will not be portable to
 177  * other versions of SunOS or Solaris.
 178  */
 179 int
 180 __iswrune(wint_t wc)
 181 {
 182         /*
 183          * Note, FreeBSD ignored the low order byte, as they encode their
 184          * ctype values differently.  We can't do that (ctype is baked into
 185          * applications), but instead can just check if *any* bit is set in
 186          * the ctype.  Any bit being set indicates its a valid rune.
 187          *
 188          * NB: For ASCII all positions except NULL are runes.
 189          */
 190         return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
 191 }
 192 
 193 /*
 194  * isenglish is a Solaris legacy.  No isw* equivalent.  Note that this most
 195  * likely doesn't work, as the locale data we have doesn't include it.  It
 196  * specifically is only valid for non-ASCII characters.  We're not sure this
 197  * is in actual use in the wild.
 198  */
 199 #undef isenglish
 200 int
 201 isenglish(wint_t wc)
 202 {
 203         return (__istype(wc, _CTYPE_E));
 204 }