Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/iswctype.c
+++ new/usr/src/lib/libc/port/locale/iswctype.c
1 1 /*
2 + * Copyright 2014 Garrett D'Amore <garrett@damore.org>
2 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3 4 * Copyright (c) 1989, 1993
4 5 * The Regents of the University of California. All rights reserved.
5 6 * (c) UNIX System Laboratories, Inc.
6 7 * All or some portions of this file are derived from material licensed
7 8 * to the University of California by American Telephone and Telegraph
8 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9 10 * the permission of UNIX System Laboratories, Inc.
10 11 *
11 12 * This code is derived from software contributed to Berkeley by
12 13 * Paul Borman at Krystal Technologies.
13 14 *
14 15 * Redistribution and use in source and binary forms, with or without
15 16 * modification, are permitted provided that the following conditions
16 17 * are met:
17 18 * 1. Redistributions of source code must retain the above copyright
18 19 * notice, this list of conditions and the following disclaimer.
19 20 * 2. Redistributions in binary form must reproduce the above copyright
20 21 * notice, this list of conditions and the following disclaimer in the
21 22 * documentation and/or other materials provided with the distribution.
22 23 * 4. Neither the name of the University nor the names of its contributors
23 24 * may be used to endorse or promote products derived from this software
24 25 * without specific prior written permission.
25 26 *
26 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
31 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 37 * SUCH DAMAGE.
37 38 */
38 39
39 40 #include "lint.h"
40 41 #include <wctype.h>
42 +#include <locale.h>
41 43 #include "runefile.h"
42 44 #include "runetype.h"
45 +#include "localeimpl.h"
43 46 #include "_ctype.h"
44 47
45 48 /*
46 - * We removed: iswascii, iswhexnumber, and iswnumber, as
47 - * these are not present on Solaris. Note that the standard requires
48 - * iswascii to be a macro, so it is defined in our headers.
49 + * Note that the standard requires iswascii to be a macro, so it is defined
50 + * in our headers.
49 51 *
50 - * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the
51 - * equivalent values without "w". We added a new isnumber, that looks
52 - * for non-ASCII numbers.
52 + * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
53 + * equivalent values without "w". The Solaris specific function isenglish()
54 + * is here, but does not get an isw* equivalent.
55 + *
56 + * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
57 + * only return true for characters in the portable set. While the assumption
58 + * is not technically correct, it turns out that for all of our locales this
59 + * is true. iswhexnumber is aliased to iswxdigit.
53 60 */
54 61
55 62 static int
56 -__istype(wint_t c, unsigned int f)
63 +__istype_l(locale_t loc, wint_t c, unsigned int f)
57 64 {
58 65 unsigned int rt;
59 66
60 - /* Fast path for single byte locales */
61 67 if (c < 0 || c >= _CACHED_RUNES)
62 - rt = ___runetype(c);
68 + rt = __runetype(loc->runelocale, c);
63 69 else
64 - rt = _CurrentRuneLocale->__runetype[c];
70 + rt = loc->runelocale->__runetype[c];
65 71 return (rt & f);
66 72 }
67 73
68 74 static int
69 -__isctype(wint_t c, unsigned int f)
75 +__istype(wint_t c, unsigned int f)
70 76 {
71 - unsigned int rt;
77 + return (__istype_l(uselocale(NULL), c, f));
78 +}
72 79
73 - /* Fast path for single byte locales */
74 - if (c < 0 || c >= _CACHED_RUNES)
75 - return (0);
76 - else
77 - rt = _CurrentRuneLocale->__runetype[c];
78 - return (rt & f);
80 +int
81 +iswctype_l(wint_t wc, wctype_t class, locale_t loc)
82 +{
83 + if (iswascii(wc))
84 + return (__ctype_mask[wc] & class);
85 + return (__istype_l(loc, wc, class));
79 86 }
80 87
81 88 #undef iswctype
82 89 int
83 90 iswctype(wint_t wc, wctype_t class)
84 91 {
92 + /*
93 + * Note that we don't just call iswctype_l because we optimize for
94 + * the iswascii() case, so that most of the time we have no need to
95 + * call uselocale().
96 + */
97 + if (iswascii(wc))
98 + return (__ctype_mask[wc] & class);
85 99 return (__istype(wc, class));
86 100 }
87 101
102 +/*
103 + * This is a legacy version, baked into binaries.
104 + */
88 105 #undef _iswctype
89 106 unsigned
90 107 _iswctype(wchar_t wc, int class)
91 108 {
109 + if (iswascii(wc))
110 + return (__ctype_mask[wc] & class);
92 111 return (__istype((wint_t)wc, (unsigned int)class));
93 112 }
94 113
95 -#undef iswalnum
96 -int
97 -iswalnum(wint_t wc)
98 -{
99 - return (__istype(wc, _CTYPE_A|_CTYPE_D));
114 +#define DEFN_ISWTYPE(type, mask) \
115 +int \
116 +isw##type##_l(wint_t wc, locale_t loc) \
117 +{ \
118 + return (iswascii(wc) ? \
119 + (__ctype_mask[wc] & (mask)) : \
120 + __istype_l(loc, wc, mask)); \
121 +} \
122 + \
123 +int \
124 +isw##type(wint_t wc) \
125 +{ \
126 + return (iswascii(wc) ? \
127 + (__ctype_mask[wc] & (mask)) : \
128 + __istype(wc, mask)); \
100 129 }
101 130
102 -#undef iswalpha
103 -int
104 -iswalpha(wint_t wc)
105 -{
106 - return (__istype(wc, _CTYPE_A));
107 -}
131 +/* kill off any macros */
132 +#undef iswalnum
133 +#undef iswalpha
134 +#undef iswblank
108 135
109 -#undef iswblank
110 -int
111 -iswblank(wint_t wc)
112 -{
113 - return (__istype(wc, _CTYPE_B));
114 -}
136 +DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
137 +DEFN_ISWTYPE(alpha, _CTYPE_A)
138 +DEFN_ISWTYPE(blank, _CTYPE_B)
139 +DEFN_ISWTYPE(cntrl, _CTYPE_C)
140 +DEFN_ISWTYPE(digit, _CTYPE_D)
141 +DEFN_ISWTYPE(graph, _CTYPE_D)
142 +DEFN_ISWTYPE(lower, _CTYPE_L)
143 +DEFN_ISWTYPE(upper, _CTYPE_U)
144 +DEFN_ISWTYPE(print, _CTYPE_R)
145 +DEFN_ISWTYPE(punct, _CTYPE_P)
146 +DEFN_ISWTYPE(space, _CTYPE_S)
147 +DEFN_ISWTYPE(xdigit, _CTYPE_X)
148 +DEFN_ISWTYPE(ideogram, _CTYPE_I)
149 +DEFN_ISWTYPE(phonogram, _CTYPE_Q)
150 +DEFN_ISWTYPE(special, _CTYPE_T)
151 +DEFN_ISWTYPE(number, _CTYPE_N)
115 152
116 -#undef iswcntrl
117 -int
118 -iswcntrl(wint_t wc)
119 -{
120 - return (__istype(wc, _CTYPE_C));
121 -}
122 153
123 -#undef iswdigit
124 -int
125 -iswdigit(wint_t wc)
126 -{
127 - return (__isctype(wc, _CTYPE_D));
128 -}
154 +#undef iswhexnumber
155 +#pragma weak iswhexnumber = iswxdigit
156 +#pragma weak iswhexnumber_l = iswxdigit_l
129 157
130 -#undef iswgraph
131 -int
132 -iswgraph(wint_t wc)
133 -{
134 - return (__istype(wc, _CTYPE_G));
135 -}
136 -
137 158 #undef isideogram
138 -int
139 -isideogram(wint_t wc)
140 -{
141 - return (__istype(wc, _CTYPE_I));
142 -}
159 +#pragma weak isideogram = iswideogram
143 160
144 -#undef iswlower
145 -int
146 -iswlower(wint_t wc)
147 -{
148 - return (__istype(wc, _CTYPE_L));
149 -}
150 -
151 161 #undef isphonogram
152 -int
153 -isphonogram(wint_t wc)
154 -{
155 - return (__istype(wc, _CTYPE_Q));
156 -}
162 +#pragma weak isphonogram = iswphonogram
157 163
158 -#undef iswprint
159 -int
160 -iswprint(wint_t wc)
161 -{
162 - return (__istype(wc, _CTYPE_R));
163 -}
164 -
165 -#undef iswpunct
166 -int
167 -iswpunct(wint_t wc)
168 -{
169 - return (__istype(wc, _CTYPE_P));
170 -}
171 -
172 -#undef iswspace
173 -int
174 -iswspace(wint_t wc)
175 -{
176 - return (__istype(wc, _CTYPE_S));
177 -}
178 -
179 -#undef iswupper
180 -int
181 -iswupper(wint_t wc)
182 -{
183 - return (__istype(wc, _CTYPE_U));
184 -}
185 -
186 -#undef iswxdigit
187 -int
188 -iswxdigit(wint_t wc)
189 -{
190 - return (__isctype(wc, _CTYPE_X));
191 -}
192 -
193 -#undef isenglish
194 -int
195 -isenglish(wint_t wc)
196 -{
197 - return (__istype(wc, _CTYPE_E));
198 -}
199 -
200 164 #undef isspecial
201 -int
202 -isspecial(wint_t wc)
203 -{
204 - return (__istype(wc, _CTYPE_T));
205 -}
165 +#pragma weak isspecial = iswspecial
206 166
207 167 #undef isnumber
208 -int
209 -isnumber(wint_t wc)
210 -{
211 - return (__istype(wc, _CTYPE_N));
212 -}
168 +#pragma weak isnumber = iswnumber
213 169
214 170 /*
215 171 * FreeBSD has iswrune() for use by external programs, and this is used by
216 172 * the "tr" program. As that program is part of our consolidation, we
217 173 * provide an _ILLUMOS_PRIVATE version of this function that we can use.
218 174 *
219 175 * No programs that are not part of the illumos stack itself should use
220 176 * this function -- programs that do reference will not be portable to
221 177 * other versions of SunOS or Solaris.
222 178 */
223 179 int
224 180 __iswrune(wint_t wc)
225 181 {
226 182 /*
227 183 * Note, FreeBSD ignored the low order byte, as they encode their
228 184 * ctype values differently. We can't do that (ctype is baked into
229 185 * applications), but instead can just check if *any* bit is set in
230 186 * the ctype. Any bit being set indicates its a valid rune.
187 + *
188 + * NB: For ASCII all positions except NULL are runes.
231 189 */
232 - return (__istype(wc, 0xffffffffU));
190 + return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
191 +}
192 +
193 +/*
194 + * isenglish is a Solaris legacy. No isw* equivalent. Note that this most
195 + * likely doesn't work, as the locale data we have doesn't include it. It
196 + * specifically is only valid for non-ASCII characters. We're not sure this
197 + * is in actual use in the wild.
198 + */
199 +#undef isenglish
200 +int
201 +isenglish(wint_t wc)
202 +{
203 + return (__istype(wc, _CTYPE_E));
233 204 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX