Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/fnmatch.c
+++ new/usr/src/lib/libc/port/locale/fnmatch.c
1 1 /*
2 2 * Copyright (c) 1989, 1993, 1994
3 3 * The Regents of the University of California. All rights reserved.
4 4 *
5 5 * This code is derived from software contributed to Berkeley by
6 6 * Guido van Rossum.
7 7 *
8 8 * Redistribution and use in source and binary forms, with or without
9 9 * modification, are permitted provided that the following conditions
10 10 * are met:
11 11 * 1. Redistributions of source code must retain the above copyright
12 12 * notice, this list of conditions and the following disclaimer.
13 13 * 2. Redistributions in binary form must reproduce the above copyright
14 14 * notice, this list of conditions and the following disclaimer in the
15 15 * documentation and/or other materials provided with the distribution.
16 16 * 4. Neither the name of the University nor the names of its contributors
17 17 * may be used to endorse or promote products derived from this software
18 18 * without specific prior written permission.
19 19 *
20 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
24 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 30 * SUCH DAMAGE.
31 31 */
32 32
33 33 /*
34 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
34 35 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
35 36 * Use is subject to license terms.
36 37 */
37 38
38 39 /*
39 40 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
40 41 * Compares a filename or pathname to a pattern.
41 42 */
42 43
43 44 /*
44 45 * Some notes on multibyte character support:
45 46 * 1. Patterns with illegal byte sequences match nothing.
46 47 * 2. Illegal byte sequences in the "string" argument are handled by treating
47 48 * them as single-byte characters with a value of the first byte of the
48 49 * sequence cast to wchar_t.
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
49 50 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
50 51 * used for most, but not all, conversions. Further work will be required
51 52 * to support state-dependent encodings.
52 53 */
53 54
54 55 #include "lint.h"
55 56 #include <fnmatch.h>
56 57 #include <limits.h>
57 58 #include <string.h>
58 59 #include <wchar.h>
60 +#include <xlocale.h>
59 61 #include <wctype.h>
60 -
62 +#include "localeimpl.h"
61 63 #include "collate.h"
62 64
63 65 #define EOS '\0'
64 66
65 67 #define RANGE_MATCH 1
66 68 #define RANGE_NOMATCH 0
67 69 #define RANGE_ERROR (-1)
68 70
69 -static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
71 +static int rangematch(const char *, wchar_t, int, char **, mbstate_t *,
72 + locale_t);
70 73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
71 - mbstate_t);
74 + mbstate_t, locale_t);
72 75
73 76 int
74 77 fnmatch(pattern, string, flags)
75 78 const char *pattern, *string;
76 79 int flags;
77 80 {
81 + locale_t loc = uselocale(NULL);
78 82 static const mbstate_t initial = { 0 };
79 83
80 - return (fnmatch1(pattern, string, string, flags, initial, initial));
84 + return (fnmatch1(pattern, string, string, flags, initial, initial,
85 + loc));
81 86 }
82 87
83 88 static int
84 89 fnmatch1(const char *pattern, const char *string, const char *stringstart,
85 - int flags, mbstate_t patmbs, mbstate_t strmbs)
90 + int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc)
86 91 {
87 92 char *newp;
88 93 char c;
89 94 wchar_t pc, sc;
90 95 size_t pclen, sclen;
91 96
92 97 for (;;) {
93 - pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
98 + pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
94 99 if (pclen == (size_t)-1 || pclen == (size_t)-2)
95 100 return (FNM_NOMATCH);
96 101 pattern += pclen;
97 - sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
102 + sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
98 103 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
99 104 sc = (unsigned char)*string;
100 105 sclen = 1;
101 106 (void) memset(&strmbs, 0, sizeof (strmbs));
102 107 }
103 108 switch (pc) {
104 109 case EOS:
105 110 /*
106 111 * Removed FNM_LEADING_DIR, as it is not present
107 112 * on Solaris.
108 113 */
109 114 return (sc == EOS ? 0 : FNM_NOMATCH);
110 115 case '?':
111 116 if (sc == EOS)
112 117 return (FNM_NOMATCH);
113 118 if (sc == '/' && (flags & FNM_PATHNAME))
114 119 return (FNM_NOMATCH);
115 120 if (sc == '.' && (flags & FNM_PERIOD) &&
116 121 (string == stringstart ||
117 122 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
118 123 return (FNM_NOMATCH);
119 124 string += sclen;
120 125 break;
121 126 case '*':
122 127 c = *pattern;
123 128 /* Collapse multiple stars. */
124 129 while (c == '*')
125 130 c = *++pattern;
126 131
127 132 if (sc == '.' && (flags & FNM_PERIOD) &&
128 133 (string == stringstart ||
129 134 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
130 135 return (FNM_NOMATCH);
131 136
132 137 /* Optimize for pattern with * at end or before /. */
133 138 if (c == EOS)
134 139 if (flags & FNM_PATHNAME)
135 140 return (strchr(string, '/') == NULL ?
136 141 0 : FNM_NOMATCH);
137 142 else
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
138 143 return (0);
139 144 else if (c == '/' && flags & FNM_PATHNAME) {
140 145 if ((string = strchr(string, '/')) == NULL)
141 146 return (FNM_NOMATCH);
142 147 break;
143 148 }
144 149
145 150 /* General case, use recursion. */
146 151 while (sc != EOS) {
147 152 if (!fnmatch1(pattern, string, stringstart,
148 - flags, patmbs, strmbs))
153 + flags, patmbs, strmbs, loc))
149 154 return (0);
150 - sclen = mbrtowc(&sc, string, MB_LEN_MAX,
151 - &strmbs);
155 + sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
156 + &strmbs, loc);
152 157 if (sclen == (size_t)-1 ||
153 158 sclen == (size_t)-2) {
154 159 sc = (unsigned char)*string;
155 160 sclen = 1;
156 161 (void) memset(&strmbs, 0,
157 162 sizeof (strmbs));
158 163 }
159 164 if (sc == '/' && flags & FNM_PATHNAME)
160 165 break;
161 166 string += sclen;
162 167 }
163 168 return (FNM_NOMATCH);
164 169 case '[':
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
165 170 if (sc == EOS)
166 171 return (FNM_NOMATCH);
167 172 if (sc == '/' && (flags & FNM_PATHNAME))
168 173 return (FNM_NOMATCH);
169 174 if (sc == '.' && (flags & FNM_PERIOD) &&
170 175 (string == stringstart ||
171 176 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
172 177 return (FNM_NOMATCH);
173 178
174 179 switch (rangematch(pattern, sc, flags, &newp,
175 - &patmbs)) {
180 + &patmbs, loc)) {
176 181 case RANGE_ERROR:
177 182 goto norm;
178 183 case RANGE_MATCH:
179 184 pattern = newp;
180 185 break;
181 186 case RANGE_NOMATCH:
182 187 return (FNM_NOMATCH);
183 188 }
184 189 string += sclen;
185 190 break;
186 191 case '\\':
187 192 if (!(flags & FNM_NOESCAPE)) {
188 - pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
189 - &patmbs);
193 + pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
194 + &patmbs, loc);
190 195 if (pclen == (size_t)-1 || pclen == (size_t)-2)
191 196 return (FNM_NOMATCH);
192 197 if (pclen == 0)
193 198 pc = '\\';
194 199 pattern += pclen;
195 200 }
196 201 /* FALLTHROUGH */
197 202 default:
198 203 norm:
199 204 if (pc == sc)
200 205 string += sclen;
201 206
202 207 else if ((flags & FNM_IGNORECASE) &&
203 - (towlower(pc) == towlower(sc)))
208 + (towlower_l(pc, loc) == towlower_l(sc, loc)))
204 209 string += sclen;
205 210 else
206 211 return (FNM_NOMATCH);
207 212
208 213 break;
209 214 }
210 215 }
211 216 /* NOTREACHED */
212 217 }
213 218
214 219 static int
215 -rangematch(pattern, test, flags, newp, patmbs)
216 - const char *pattern;
217 - wchar_t test;
218 - int flags;
219 - char **newp;
220 - mbstate_t *patmbs;
220 +rangematch(const char *pattern, wchar_t test, int flags, char **newp,
221 + mbstate_t *patmbs, locale_t loc)
221 222 {
222 223 int negate, ok;
223 224 wchar_t c, c2;
224 225 size_t pclen;
225 226 const char *origpat;
226 227
227 228 /*
228 229 * A bracket expression starting with an unquoted circumflex
229 230 * character produces unspecified results (IEEE 1003.2-1992,
230 231 * 3.13.2). This implementation treats it like '!', for
231 232 * consistency with the regular expression syntax.
232 233 * J.T. Conklin (conklin@ngai.kaleida.com)
233 234 */
234 235 if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
235 236 ++pattern;
236 237
237 238 if (flags & FNM_IGNORECASE)
238 - test = towlower(test);
239 + test = towlower_l(test, loc);
239 240
240 241 /*
241 242 * A right bracket shall lose its special meaning and represent
242 243 * itself in a bracket expression if it occurs first in the list.
243 244 * -- POSIX.2 2.8.3.2
244 245 */
245 246 ok = 0;
246 247 origpat = pattern;
247 248 for (;;) {
248 249 if (*pattern == ']' && pattern > origpat) {
249 250 pattern++;
250 251 break;
251 252 } else if (*pattern == '\0') {
252 253 return (RANGE_ERROR);
253 254 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
254 255 return (RANGE_NOMATCH);
255 256 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
256 257 pattern++;
257 - pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
258 + pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
258 259 if (pclen == (size_t)-1 || pclen == (size_t)-2)
259 260 return (RANGE_NOMATCH);
260 261 pattern += pclen;
261 262
262 263 if (flags & FNM_IGNORECASE)
263 - c = towlower(c);
264 + c = towlower_l(c, loc);
264 265
265 266 if (*pattern == '-' && *(pattern + 1) != EOS &&
266 267 *(pattern + 1) != ']') {
267 268 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
268 269 if (*pattern != EOS)
269 270 pattern++;
270 - pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
271 + pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs,
272 + loc);
271 273 if (pclen == (size_t)-1 || pclen == (size_t)-2)
272 274 return (RANGE_NOMATCH);
273 275 pattern += pclen;
274 276 if (c2 == EOS)
275 277 return (RANGE_ERROR);
276 278
277 279 if (flags & FNM_IGNORECASE)
278 - c2 = towlower(c2);
280 + c2 = towlower_l(c2, loc);
279 281
280 - if (_collate_load_error ?
282 + if (loc->collate->lc_is_posix ?
281 283 c <= test && test <= c2 :
282 - _collate_range_cmp(c, test) <= 0 &&
283 - _collate_range_cmp(test, c2) <= 0)
284 + _collate_range_cmp(c, test, loc) <= 0 &&
285 + _collate_range_cmp(test, c2, loc) <= 0)
284 286 ok = 1;
285 287 } else if (c == test)
286 288 ok = 1;
287 289 }
288 290
289 291 *newp = (char *)pattern;
290 292 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
291 293 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX