14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
35 * Use is subject to license terms.
36 */
37
38 /*
39 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
40 * Compares a filename or pathname to a pattern.
41 */
42
43 /*
44 * Some notes on multibyte character support:
45 * 1. Patterns with illegal byte sequences match nothing.
46 * 2. Illegal byte sequences in the "string" argument are handled by treating
47 * them as single-byte characters with a value of the first byte of the
48 * sequence cast to wchar_t.
49 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
50 * used for most, but not all, conversions. Further work will be required
51 * to support state-dependent encodings.
52 */
53
54 #include "lint.h"
55 #include <fnmatch.h>
56 #include <limits.h>
57 #include <string.h>
58 #include <wchar.h>
59 #include <wctype.h>
60
61 #include "collate.h"
62
63 #define EOS '\0'
64
65 #define RANGE_MATCH 1
66 #define RANGE_NOMATCH 0
67 #define RANGE_ERROR (-1)
68
69 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
70 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
71 mbstate_t);
72
73 int
74 fnmatch(pattern, string, flags)
75 const char *pattern, *string;
76 int flags;
77 {
78 static const mbstate_t initial = { 0 };
79
80 return (fnmatch1(pattern, string, string, flags, initial, initial));
81 }
82
83 static int
84 fnmatch1(const char *pattern, const char *string, const char *stringstart,
85 int flags, mbstate_t patmbs, mbstate_t strmbs)
86 {
87 char *newp;
88 char c;
89 wchar_t pc, sc;
90 size_t pclen, sclen;
91
92 for (;;) {
93 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
94 if (pclen == (size_t)-1 || pclen == (size_t)-2)
95 return (FNM_NOMATCH);
96 pattern += pclen;
97 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
98 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
99 sc = (unsigned char)*string;
100 sclen = 1;
101 (void) memset(&strmbs, 0, sizeof (strmbs));
102 }
103 switch (pc) {
104 case EOS:
105 /*
106 * Removed FNM_LEADING_DIR, as it is not present
107 * on Solaris.
108 */
109 return (sc == EOS ? 0 : FNM_NOMATCH);
110 case '?':
111 if (sc == EOS)
112 return (FNM_NOMATCH);
113 if (sc == '/' && (flags & FNM_PATHNAME))
114 return (FNM_NOMATCH);
115 if (sc == '.' && (flags & FNM_PERIOD) &&
116 (string == stringstart ||
117 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
128 (string == stringstart ||
129 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
130 return (FNM_NOMATCH);
131
132 /* Optimize for pattern with * at end or before /. */
133 if (c == EOS)
134 if (flags & FNM_PATHNAME)
135 return (strchr(string, '/') == NULL ?
136 0 : FNM_NOMATCH);
137 else
138 return (0);
139 else if (c == '/' && flags & FNM_PATHNAME) {
140 if ((string = strchr(string, '/')) == NULL)
141 return (FNM_NOMATCH);
142 break;
143 }
144
145 /* General case, use recursion. */
146 while (sc != EOS) {
147 if (!fnmatch1(pattern, string, stringstart,
148 flags, patmbs, strmbs))
149 return (0);
150 sclen = mbrtowc(&sc, string, MB_LEN_MAX,
151 &strmbs);
152 if (sclen == (size_t)-1 ||
153 sclen == (size_t)-2) {
154 sc = (unsigned char)*string;
155 sclen = 1;
156 (void) memset(&strmbs, 0,
157 sizeof (strmbs));
158 }
159 if (sc == '/' && flags & FNM_PATHNAME)
160 break;
161 string += sclen;
162 }
163 return (FNM_NOMATCH);
164 case '[':
165 if (sc == EOS)
166 return (FNM_NOMATCH);
167 if (sc == '/' && (flags & FNM_PATHNAME))
168 return (FNM_NOMATCH);
169 if (sc == '.' && (flags & FNM_PERIOD) &&
170 (string == stringstart ||
171 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
172 return (FNM_NOMATCH);
173
174 switch (rangematch(pattern, sc, flags, &newp,
175 &patmbs)) {
176 case RANGE_ERROR:
177 goto norm;
178 case RANGE_MATCH:
179 pattern = newp;
180 break;
181 case RANGE_NOMATCH:
182 return (FNM_NOMATCH);
183 }
184 string += sclen;
185 break;
186 case '\\':
187 if (!(flags & FNM_NOESCAPE)) {
188 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
189 &patmbs);
190 if (pclen == (size_t)-1 || pclen == (size_t)-2)
191 return (FNM_NOMATCH);
192 if (pclen == 0)
193 pc = '\\';
194 pattern += pclen;
195 }
196 /* FALLTHROUGH */
197 default:
198 norm:
199 if (pc == sc)
200 string += sclen;
201
202 else if ((flags & FNM_IGNORECASE) &&
203 (towlower(pc) == towlower(sc)))
204 string += sclen;
205 else
206 return (FNM_NOMATCH);
207
208 break;
209 }
210 }
211 /* NOTREACHED */
212 }
213
214 static int
215 rangematch(pattern, test, flags, newp, patmbs)
216 const char *pattern;
217 wchar_t test;
218 int flags;
219 char **newp;
220 mbstate_t *patmbs;
221 {
222 int negate, ok;
223 wchar_t c, c2;
224 size_t pclen;
225 const char *origpat;
226
227 /*
228 * A bracket expression starting with an unquoted circumflex
229 * character produces unspecified results (IEEE 1003.2-1992,
230 * 3.13.2). This implementation treats it like '!', for
231 * consistency with the regular expression syntax.
232 * J.T. Conklin (conklin@ngai.kaleida.com)
233 */
234 if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
235 ++pattern;
236
237 if (flags & FNM_IGNORECASE)
238 test = towlower(test);
239
240 /*
241 * A right bracket shall lose its special meaning and represent
242 * itself in a bracket expression if it occurs first in the list.
243 * -- POSIX.2 2.8.3.2
244 */
245 ok = 0;
246 origpat = pattern;
247 for (;;) {
248 if (*pattern == ']' && pattern > origpat) {
249 pattern++;
250 break;
251 } else if (*pattern == '\0') {
252 return (RANGE_ERROR);
253 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
254 return (RANGE_NOMATCH);
255 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
256 pattern++;
257 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
258 if (pclen == (size_t)-1 || pclen == (size_t)-2)
259 return (RANGE_NOMATCH);
260 pattern += pclen;
261
262 if (flags & FNM_IGNORECASE)
263 c = towlower(c);
264
265 if (*pattern == '-' && *(pattern + 1) != EOS &&
266 *(pattern + 1) != ']') {
267 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
268 if (*pattern != EOS)
269 pattern++;
270 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
271 if (pclen == (size_t)-1 || pclen == (size_t)-2)
272 return (RANGE_NOMATCH);
273 pattern += pclen;
274 if (c2 == EOS)
275 return (RANGE_ERROR);
276
277 if (flags & FNM_IGNORECASE)
278 c2 = towlower(c2);
279
280 if (_collate_load_error ?
281 c <= test && test <= c2 :
282 _collate_range_cmp(c, test) <= 0 &&
283 _collate_range_cmp(test, c2) <= 0)
284 ok = 1;
285 } else if (c == test)
286 ok = 1;
287 }
288
289 *newp = (char *)pattern;
290 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
291 }
|
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright 2013 Garrett D'Amore <garrett@damore.org>
35 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
36 * Use is subject to license terms.
37 */
38
39 /*
40 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
41 * Compares a filename or pathname to a pattern.
42 */
43
44 /*
45 * Some notes on multibyte character support:
46 * 1. Patterns with illegal byte sequences match nothing.
47 * 2. Illegal byte sequences in the "string" argument are handled by treating
48 * them as single-byte characters with a value of the first byte of the
49 * sequence cast to wchar_t.
50 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
51 * used for most, but not all, conversions. Further work will be required
52 * to support state-dependent encodings.
53 */
54
55 #include "lint.h"
56 #include <fnmatch.h>
57 #include <limits.h>
58 #include <string.h>
59 #include <wchar.h>
60 #include <xlocale.h>
61 #include <wctype.h>
62 #include "localeimpl.h"
63 #include "collate.h"
64
65 #define EOS '\0'
66
67 #define RANGE_MATCH 1
68 #define RANGE_NOMATCH 0
69 #define RANGE_ERROR (-1)
70
71 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *,
72 locale_t);
73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
74 mbstate_t, locale_t);
75
76 int
77 fnmatch(pattern, string, flags)
78 const char *pattern, *string;
79 int flags;
80 {
81 locale_t loc = uselocale(NULL);
82 static const mbstate_t initial = { 0 };
83
84 return (fnmatch1(pattern, string, string, flags, initial, initial,
85 loc));
86 }
87
88 static int
89 fnmatch1(const char *pattern, const char *string, const char *stringstart,
90 int flags, mbstate_t patmbs, mbstate_t strmbs, locale_t loc)
91 {
92 char *newp;
93 char c;
94 wchar_t pc, sc;
95 size_t pclen, sclen;
96
97 for (;;) {
98 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX, &patmbs, loc);
99 if (pclen == (size_t)-1 || pclen == (size_t)-2)
100 return (FNM_NOMATCH);
101 pattern += pclen;
102 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX, &strmbs, loc);
103 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
104 sc = (unsigned char)*string;
105 sclen = 1;
106 (void) memset(&strmbs, 0, sizeof (strmbs));
107 }
108 switch (pc) {
109 case EOS:
110 /*
111 * Removed FNM_LEADING_DIR, as it is not present
112 * on Solaris.
113 */
114 return (sc == EOS ? 0 : FNM_NOMATCH);
115 case '?':
116 if (sc == EOS)
117 return (FNM_NOMATCH);
118 if (sc == '/' && (flags & FNM_PATHNAME))
119 return (FNM_NOMATCH);
120 if (sc == '.' && (flags & FNM_PERIOD) &&
121 (string == stringstart ||
122 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
133 (string == stringstart ||
134 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
135 return (FNM_NOMATCH);
136
137 /* Optimize for pattern with * at end or before /. */
138 if (c == EOS)
139 if (flags & FNM_PATHNAME)
140 return (strchr(string, '/') == NULL ?
141 0 : FNM_NOMATCH);
142 else
143 return (0);
144 else if (c == '/' && flags & FNM_PATHNAME) {
145 if ((string = strchr(string, '/')) == NULL)
146 return (FNM_NOMATCH);
147 break;
148 }
149
150 /* General case, use recursion. */
151 while (sc != EOS) {
152 if (!fnmatch1(pattern, string, stringstart,
153 flags, patmbs, strmbs, loc))
154 return (0);
155 sclen = mbrtowc_l(&sc, string, MB_LEN_MAX,
156 &strmbs, loc);
157 if (sclen == (size_t)-1 ||
158 sclen == (size_t)-2) {
159 sc = (unsigned char)*string;
160 sclen = 1;
161 (void) memset(&strmbs, 0,
162 sizeof (strmbs));
163 }
164 if (sc == '/' && flags & FNM_PATHNAME)
165 break;
166 string += sclen;
167 }
168 return (FNM_NOMATCH);
169 case '[':
170 if (sc == EOS)
171 return (FNM_NOMATCH);
172 if (sc == '/' && (flags & FNM_PATHNAME))
173 return (FNM_NOMATCH);
174 if (sc == '.' && (flags & FNM_PERIOD) &&
175 (string == stringstart ||
176 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
177 return (FNM_NOMATCH);
178
179 switch (rangematch(pattern, sc, flags, &newp,
180 &patmbs, loc)) {
181 case RANGE_ERROR:
182 goto norm;
183 case RANGE_MATCH:
184 pattern = newp;
185 break;
186 case RANGE_NOMATCH:
187 return (FNM_NOMATCH);
188 }
189 string += sclen;
190 break;
191 case '\\':
192 if (!(flags & FNM_NOESCAPE)) {
193 pclen = mbrtowc_l(&pc, pattern, MB_LEN_MAX,
194 &patmbs, loc);
195 if (pclen == (size_t)-1 || pclen == (size_t)-2)
196 return (FNM_NOMATCH);
197 if (pclen == 0)
198 pc = '\\';
199 pattern += pclen;
200 }
201 /* FALLTHROUGH */
202 default:
203 norm:
204 if (pc == sc)
205 string += sclen;
206
207 else if ((flags & FNM_IGNORECASE) &&
208 (towlower_l(pc, loc) == towlower_l(sc, loc)))
209 string += sclen;
210 else
211 return (FNM_NOMATCH);
212
213 break;
214 }
215 }
216 /* NOTREACHED */
217 }
218
219 static int
220 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
221 mbstate_t *patmbs, locale_t loc)
222 {
223 int negate, ok;
224 wchar_t c, c2;
225 size_t pclen;
226 const char *origpat;
227
228 /*
229 * A bracket expression starting with an unquoted circumflex
230 * character produces unspecified results (IEEE 1003.2-1992,
231 * 3.13.2). This implementation treats it like '!', for
232 * consistency with the regular expression syntax.
233 * J.T. Conklin (conklin@ngai.kaleida.com)
234 */
235 if ((negate = (*pattern == '!' || *pattern == '^')) != 0)
236 ++pattern;
237
238 if (flags & FNM_IGNORECASE)
239 test = towlower_l(test, loc);
240
241 /*
242 * A right bracket shall lose its special meaning and represent
243 * itself in a bracket expression if it occurs first in the list.
244 * -- POSIX.2 2.8.3.2
245 */
246 ok = 0;
247 origpat = pattern;
248 for (;;) {
249 if (*pattern == ']' && pattern > origpat) {
250 pattern++;
251 break;
252 } else if (*pattern == '\0') {
253 return (RANGE_ERROR);
254 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
255 return (RANGE_NOMATCH);
256 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
257 pattern++;
258 pclen = mbrtowc_l(&c, pattern, MB_LEN_MAX, patmbs, loc);
259 if (pclen == (size_t)-1 || pclen == (size_t)-2)
260 return (RANGE_NOMATCH);
261 pattern += pclen;
262
263 if (flags & FNM_IGNORECASE)
264 c = towlower_l(c, loc);
265
266 if (*pattern == '-' && *(pattern + 1) != EOS &&
267 *(pattern + 1) != ']') {
268 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
269 if (*pattern != EOS)
270 pattern++;
271 pclen = mbrtowc_l(&c2, pattern, MB_LEN_MAX, patmbs,
272 loc);
273 if (pclen == (size_t)-1 || pclen == (size_t)-2)
274 return (RANGE_NOMATCH);
275 pattern += pclen;
276 if (c2 == EOS)
277 return (RANGE_ERROR);
278
279 if (flags & FNM_IGNORECASE)
280 c2 = towlower_l(c2, loc);
281
282 if (loc->collate->lc_is_posix ?
283 c <= test && test <= c2 :
284 _collate_range_cmp(c, test, loc) <= 0 &&
285 _collate_range_cmp(test, c2, loc) <= 0)
286 ok = 1;
287 } else if (c == test)
288 ok = 1;
289 }
290
291 *newp = (char *)pattern;
292 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
293 }
|