Print this page
1097 glob(3c) needs to support non-POSIX options
3341 The sftp command should use the native glob()
@@ -18,282 +18,1210 @@
*
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
+ * Copyright (c) 2012 Gary Mills
*/
+/* $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */
/*
- * This code is MKS code ported to Solaris originally with minimum
- * modifications so that upgrades from MKS would readily integrate.
- * The MKS basis for this modification was:
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
*
- * $Id: glob.c 1.31 1994/04/07 22:50:43 mark
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
*
- * Additional modifications have been made to this code to make it
- * 64-bit clean.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
/*
- * glob, globfree -- POSIX.2 compatible file name expansion routines.
+ * glob(3) -- a superset of the one defined in POSIX 1003.2.
*
- * Copyright 1985, 1991 by Mortice Kern Systems Inc. All rights reserved.
+ * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
*
- * Written by Eric Gisin.
+ * Optional extra services, controlled by flags not defined by POSIX:
+ *
+ * GLOB_QUOTE:
+ * Escaping convention: \ inhibits any special meaning the following
+ * character might have (except \ at end of string is retained).
+ * GLOB_MAGCHAR:
+ * Set in gl_flags if pattern contained a globbing character.
+ * GLOB_NOMAGIC:
+ * Same as GLOB_NOCHECK, but it will only append pattern if it did
+ * not contain any magic characters. [Used in csh style globbing]
+ * GLOB_ALTDIRFUNC:
+ * Use alternately specified directory access functions.
+ * GLOB_TILDE:
+ * expand ~user/foo to the /home/dir/of/user/foo
+ * GLOB_BRACE:
+ * expand {1,2}{a,b} to 1a 1b 2a 2b
+ * gl_matchc:
+ * Number of matches in the current invocation of glob.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/param.h>
+#include <sys/stat.h>
-#pragma weak _glob = glob
-#pragma weak _globfree = globfree
-
-#include "lint.h"
-#include <stdio.h>
-#include <unistd.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <glob.h>
#include <limits.h>
+#include <pwd.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <glob.h>
-#include <errno.h>
-#include <fnmatch.h>
+#include <unistd.h>
+#include <wchar.h>
-#define GLOB__CHECK 0x80 /* stat generated paths */
+#include "charclass.h"
-#define INITIAL 8 /* initial pathv allocation */
-#define NULLCPP ((char **)0) /* Null char ** */
-#define NAME_MAX 1024 /* something large */
+#define DOLLAR '$'
+#define DOT '.'
+#define EOS '\0'
+#define LBRACKET '['
+#define NOT '!'
+#define QUESTION '?'
+#define QUOTE '\\'
+#define RANGE '-'
+#define RBRACKET ']'
+#define SEP '/'
+#define STAR '*'
+#define TILDE '~'
+#define UNDERSCORE '_'
+#define LBRACE '{'
+#define RBRACE '}'
+#define SLASH '/'
+#define COMMA ','
+#define COLON ':'
-static int globit(size_t, const char *, glob_t *, int,
- int (*)(const char *, int), char **);
-static int pstrcmp(const void *, const void *);
-static int append(glob_t *, const char *);
+#define M_QUOTE 0x800000
+#define M_PROTECT 0x400000
+typedef struct Char_s {
+ wchar_t wc;
+ uint_t at;
+} Char;
+
+#define M_ALL '*' /* Plus M_QUOTE */
+#define M_END ']' /* Plus M_QUOTE */
+#define M_NOT '!' /* Plus M_QUOTE */
+#define M_ONE '?' /* Plus M_QUOTE */
+#define M_RNG '-' /* Plus M_QUOTE */
+#define M_SET '[' /* Plus M_QUOTE */
+#define M_CLASS ':' /* Plus M_QUOTE */
+#define ismeta(c) (((c).at&M_QUOTE) != 0)
+
+#define GLOB_LIMIT_MALLOC 65536
+#define GLOB_LIMIT_STAT 2048
+#define GLOB_LIMIT_READDIR 16384
+
+/* Limit of recursion during matching attempts. */
+#define GLOB_LIMIT_RECUR 64
+
+struct glob_lim {
+ size_t glim_malloc;
+ size_t glim_stat;
+ size_t glim_readdir;
+};
+
+struct glob_path_stat {
+ char *gps_path;
+ struct stat *gps_stat;
+};
+
+static int compare(const void *, const void *);
+static int compare_gps(const void *, const void *);
+static int g_Ctoc(const Char *, char *, uint_t);
+static int g_lstat(Char *, struct stat *, glob_t *);
+static DIR *g_opendir(Char *, glob_t *);
+static Char *g_strchr(const Char *, wchar_t);
+static int g_stat(Char *, struct stat *, glob_t *);
+static int glob0(const Char *, glob_t *, struct glob_lim *,
+ int (*)(const char *, int));
+static int glob1(Char *, Char *, glob_t *, struct glob_lim *,
+ int (*)(const char *, int));
+static int glob2(Char *, Char *, Char *, Char *, Char *, Char *,
+ glob_t *, struct glob_lim *,
+ int (*)(const char *, int));
+static int glob3(Char *, Char *, Char *, Char *, Char *,
+ Char *, Char *, glob_t *, struct glob_lim *,
+ int (*)(const char *, int));
+static int globextend(const Char *, glob_t *, struct glob_lim *,
+ struct stat *);
+static
+const Char *globtilde(const Char *, Char *, size_t, glob_t *);
+static int globexp1(const Char *, glob_t *, struct glob_lim *,
+ int (*)(const char *, int));
+static int globexp2(const Char *, const Char *, glob_t *,
+ struct glob_lim *, int (*)(const char *, int));
+static int match(Char *, Char *, Char *, int);
+#ifdef DEBUG
+static void qprintf(const char *, Char *);
+#endif
+
+int
+glob(const char *pattern, int flags, int (*errfunc)(const char *, int),
+ glob_t *pglob)
+{
+ const char *patnext;
+ size_t n;
+ wchar_t c;
+ Char *bufnext, *bufend, patbuf[MAXPATHLEN];
+ struct glob_lim limit = { 0, 0, 0 };
+
+ if (strnlen(pattern, PATH_MAX) == PATH_MAX)
+ return (GLOB_NOMATCH);
+
+ patnext = pattern;
+ if (!(flags & GLOB_APPEND)) {
+ pglob->gl_pathc = 0;
+ pglob->gl_pathv = NULL;
+ if ((flags & GLOB_KEEPSTAT) != 0)
+ pglob->gl_statv = NULL;
+ if (!(flags & GLOB_DOOFFS))
+ pglob->gl_offs = 0;
+ }
+ pglob->gl_flags = flags & ~GLOB_MAGCHAR;
+ pglob->gl_matchc = 0;
+
+ if (pglob->gl_offs < 0 || pglob->gl_pathc < 0 ||
+ pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX ||
+ pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1)
+ return (GLOB_NOSPACE);
+
+ bufnext = patbuf;
+ bufend = bufnext + MAXPATHLEN - 1;
+ if (flags & GLOB_NOESCAPE) {
+ while (bufnext < bufend) {
+ if ((n = mbtowc(&c, patnext, PATH_MAX)) > 0) {
+ patnext += n;
+ bufnext->at = 0;
+ (bufnext++)->wc = c;
+ } else if (n == 0) {
+ break;
+ } else {
+ return (GLOB_NOMATCH);
+ }
+ }
+ } else {
+ /* Protect the quoted characters. */
+ while (bufnext < bufend) {
+ if ((n = mbtowc(&c, patnext, PATH_MAX)) > 0) {
+ patnext += n;
+ if (c == QUOTE) {
+ n = mbtowc(&c, patnext, PATH_MAX);
+ if (n < 0)
+ return (GLOB_NOMATCH);
+ if (n > 0)
+ patnext += n;
+ if (n == 0)
+ c = QUOTE;
+ bufnext->at = M_PROTECT;
+ (bufnext++)->wc = c;
+ } else {
+ bufnext->at = 0;
+ (bufnext++)->wc = c;
+ }
+ } else if (n == 0) {
+ break;
+ } else {
+ return (GLOB_NOMATCH);
+ }
+ }
+ }
+ bufnext->at = 0;
+ bufnext->wc = EOS;
+
+ if (flags & GLOB_BRACE)
+ return (globexp1(patbuf, pglob, &limit, errfunc));
+ else
+ return (glob0(patbuf, pglob, &limit, errfunc));
+}
+
/*
- * Free all space consumed by glob.
+ * Expand recursively a glob {} pattern. When there is no more expansion
+ * invoke the standard globbing routine to glob the rest of the magic
+ * characters
*/
-void
-globfree(glob_t *gp)
+static int
+globexp1(const Char *pattern, glob_t *pglob, struct glob_lim *limitp,
+ int (*errfunc)(const char *, int))
{
- size_t i;
+ const Char* ptr = pattern;
- if (gp->gl_pathv == 0)
- return;
+ /* Protect a single {}, for find(1), like csh */
+ if (pattern[0].wc == LBRACE && pattern[1].wc == RBRACE &&
+ pattern[2].wc == EOS)
+ return (glob0(pattern, pglob, limitp, errfunc));
- for (i = gp->gl_offs; i < gp->gl_offs + gp->gl_pathc; ++i)
- free(gp->gl_pathv[i]);
- free((void *)gp->gl_pathv);
+ if ((ptr = (const Char *) g_strchr(ptr, LBRACE)) != NULL)
+ return (globexp2(ptr, pattern, pglob, limitp, errfunc));
- gp->gl_pathc = 0;
- gp->gl_pathv = NULLCPP;
+ return (glob0(pattern, pglob, limitp, errfunc));
}
+
/*
- * Do filename expansion.
+ * Recursive brace globbing helper. Tries to expand a single brace.
+ * If it succeeds then it invokes globexp1 with the new pattern.
+ * If it fails then it tries to glob the rest of the pattern and returns.
*/
-int
-glob(const char *pattern, int flags,
- int (*errfn)(const char *, int), glob_t *gp)
+static int
+globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
+ struct glob_lim *limitp, int (*errfunc)(const char *, int))
{
- int rv;
- size_t i;
- size_t ipathc;
- char *path;
+ int i, rv;
+ Char *lm, *ls;
+ const Char *pe, *pm, *pl;
+ Char patbuf[MAXPATHLEN];
- if ((flags & GLOB_DOOFFS) == 0)
- gp->gl_offs = 0;
+ /* copy part up to the brace */
+ for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
+ ;
+ lm->at = 0;
+ lm->wc = EOS;
+ ls = lm;
- if (!(flags & GLOB_APPEND)) {
- gp->gl_pathc = 0;
- gp->gl_pathn = gp->gl_offs + INITIAL;
- gp->gl_pathv = (char **)malloc(sizeof (char *) * gp->gl_pathn);
+ /* Find the balanced brace */
+ for (i = 0, pe = ++ptr; pe->wc != EOS; pe++)
+ if (pe->wc == LBRACKET) {
+ /* Ignore everything between [] */
+ for (pm = pe++; pe->wc != RBRACKET &&
+ pe->wc != EOS; pe++)
+ ;
+ if (pe->wc == EOS) {
+ /*
+ * We could not find a matching RBRACKET.
+ * Ignore and just look for RBRACE
+ */
+ pe = pm;
+ }
+ } else if (pe->wc == LBRACE) {
+ i++;
+ } else if (pe->wc == RBRACE) {
+ if (i == 0)
+ break;
+ i--;
+ }
- if (gp->gl_pathv == NULLCPP)
- return (GLOB_NOSPACE);
- gp->gl_pathp = gp->gl_pathv + gp->gl_offs;
+ /* Non matching braces; just glob the pattern */
+ if (i != 0 || pe->wc == EOS)
+ return (glob0(patbuf, pglob, limitp, errfunc));
- for (i = 0; i < gp->gl_offs; ++i)
- gp->gl_pathv[i] = NULL;
+ for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
+ switch (pm->wc) {
+ case LBRACKET:
+ /* Ignore everything between [] */
+ for (pl = pm++; pm->wc != RBRACKET && pm->wc != EOS;
+ pm++)
+ ;
+ if (pm->wc == EOS) {
+ /*
+ * We could not find a matching RBRACKET.
+ * Ignore and just look for RBRACE
+ */
+ pm = pl;
}
+ break;
- if ((path = malloc(strlen(pattern)+1)) == NULL)
- return (GLOB_NOSPACE);
+ case LBRACE:
+ i++;
+ break;
- ipathc = gp->gl_pathc;
- rv = globit(0, pattern, gp, flags, errfn, &path);
+ case RBRACE:
+ if (i) {
+ i--;
+ break;
+ }
+ /* FALLTHROUGH */
+ case COMMA:
+ if (i && pm->wc == COMMA)
+ break;
+ else {
+ /* Append the current string */
+ for (lm = ls; (pl < pm); *lm++ = *pl++)
+ ;
- if (rv == GLOB_ABORTED) {
/*
- * User's error function returned non-zero, or GLOB_ERR was
- * set, and we encountered a directory we couldn't search.
+ * Append the rest of the pattern after the
+ * closing brace
*/
- free(path);
- return (GLOB_ABORTED);
+ for (pl = pe + 1;
+ (*lm++ = *pl++).wc != EOS; /* */)
+ ;
+
+ /* Expand the current pattern */
+ rv = globexp1(patbuf, pglob, limitp, errfunc);
+ if (rv && rv != GLOB_NOMATCH)
+ return (rv);
+
+ /* move after the comma, to the next string */
+ pl = pm + 1;
}
+ break;
- i = gp->gl_pathc - ipathc;
- if (i >= 1 && !(flags & GLOB_NOSORT)) {
- qsort((char *)(gp->gl_pathp+ipathc), i, sizeof (char *),
- pstrcmp);
+ default:
+ break;
}
- if (i == 0) {
- if (flags & GLOB_NOCHECK)
- (void) append(gp, pattern);
- else
- rv = GLOB_NOMATCH;
}
- gp->gl_pathp[gp->gl_pathc] = NULL;
- free(path);
-
- return (rv);
+ return (0);
}
+
/*
- * Recursive routine to match glob pattern, and walk directories.
+ * expand tilde from the passwd file.
*/
-int
-globit(size_t dend, const char *sp, glob_t *gp, int flags,
- int (*errfn)(const char *, int), char **path)
+static const Char *
+globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
{
+ struct passwd *pwd;
+ char *h;
+ const Char *p;
+ Char *b, *eb, *q;
size_t n;
- size_t m;
- ssize_t end = 0; /* end of expanded directory */
- char *pat = (char *)sp; /* pattern component */
- char *dp = (*path) + dend;
- int expand = 0; /* path has pattern */
- char *cp;
- struct stat64 sb;
- DIR *dirp;
- struct dirent64 *d;
- int err;
+ wchar_t c;
- for (;;)
- switch (*dp++ = *(unsigned char *)sp++) {
- case '\0': /* end of source path */
- if (expand)
- goto Expand;
- else {
- if (!(flags & GLOB_NOCHECK) ||
- flags & (GLOB__CHECK|GLOB_MARK))
- if (stat64(*path, &sb) < 0) {
- return (0);
+ if (pattern->wc != TILDE || !(pglob->gl_flags & GLOB_TILDE))
+ return (pattern);
+
+ /* Copy up to the end of the string or / */
+ eb = &patbuf[patbuf_len - 1];
+ for (p = pattern + 1, q = patbuf;
+ q < eb && p->wc != EOS && p->wc != SLASH; *q++ = *p++)
+ ;
+
+ q->at = 0;
+ q->wc = EOS;
+
+ /* What to do if patbuf is full? */
+
+ if (patbuf[0].wc == EOS) {
+ /*
+ * handle a plain ~ or ~/ by expanding $HOME
+ * first and then trying the password file
+ */
+ if (issetugid() != 0 || (h = getenv("HOME")) == NULL) {
+ if ((pwd = getpwuid(getuid())) == NULL)
+ return (pattern);
+ else
+ h = pwd->pw_dir;
}
- if (flags & GLOB_MARK && S_ISDIR(sb.st_mode)) {
- *dp = '\0';
- *--dp = '/';
+ } else {
+ /*
+ * Expand a ~user
+ */
+ if ((pwd = getpwnam((char *)patbuf)) == NULL)
+ return (pattern);
+ else
+ h = pwd->pw_dir;
}
- if (append(gp, *path) < 0) {
- return (GLOB_NOSPACE);
+
+ /* Copy the home directory */
+ for (b = patbuf; b < eb && *h != EOS; b++) {
+ if ((n = mbtowc(&c, h, PATH_MAX)) > 0) {
+ h += n;
+ b->at = 0;
+ b->wc = c;
+ } else {
+ break;
}
- return (0);
}
- /*NOTREACHED*/
- case '*':
- case '?':
- case '[':
- case '\\':
- ++expand;
+ /* Append the rest of the pattern */
+ while (b < eb && (*b++ = *p++).wc != EOS)
+ ;
+ b->at = 0;
+ b->wc = EOS;
+
+ return (patbuf);
+}
+
+static int
+g_charclass(const Char **patternp, Char **bufnextp)
+{
+ const Char *pattern = *patternp + 1;
+ Char *bufnext = *bufnextp;
+ const Char *colon;
+ struct cclass *cc;
+ size_t len;
+
+ if ((colon = g_strchr(pattern, COLON)) == NULL ||
+ colon[1].wc != RBRACKET)
+ return (1); /* not a character class */
+
+ len = (size_t)(colon - pattern);
+ for (cc = cclasses; cc->name != NULL; cc++) {
+ wchar_t w;
+ const Char *s1 = pattern;
+ const char *s2 = cc->name;
+ size_t n = len;
+
+ /* Are the strings the same? */
+ while (n > 0 && (w = (s1++)->wc) == *s2 && w != EOS) {
+ n--;
+ s2++;
+ }
+ if (n == 0 && *s2 == EOS)
break;
+ }
+ if (cc->name == NULL)
+ return (-1); /* invalid character class */
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_CLASS;
+ bufnext->at = (cc - &cclasses[0]);
+ (bufnext++)->wc = COLON;
+ *bufnextp = bufnext;
+ *patternp += len + 3;
- case '/':
- if (expand)
- goto Expand;
- end = dp - *path;
- pat = (char *)sp;
+ return (0);
+}
+
+/*
+ * The main glob() routine: compiles the pattern (optionally processing
+ * quotes), calls glob1() to do the real pattern matching, and finally
+ * sorts the list (unless unsorted operation is requested). Returns 0
+ * if things went well, nonzero if errors occurred. It is not an error
+ * to find no matches.
+ */
+static int
+glob0(const Char *pattern, glob_t *pglob, struct glob_lim *limitp,
+ int (*errfunc)(const char *, int))
+{
+ const Char *qpatnext;
+ int err, oldpathc;
+ wchar_t c;
+ int a;
+ Char *bufnext, patbuf[MAXPATHLEN];
+
+ qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
+ oldpathc = pglob->gl_pathc;
+ bufnext = patbuf;
+
+ /* We don't need to check for buffer overflow any more. */
+ while ((a = qpatnext->at), (c = (qpatnext++)->wc) != EOS) {
+ switch (c) {
+ case LBRACKET:
+ if (a != 0) {
+ bufnext->at = a;
+ (bufnext++)->wc = c;
break;
+ }
+ a = qpatnext->at;
+ c = qpatnext->wc;
+ if (a == 0 && c == NOT)
+ ++qpatnext;
+ if (qpatnext->wc == EOS ||
+ g_strchr(qpatnext+1, RBRACKET) == NULL) {
+ bufnext->at = 0;
+ (bufnext++)->wc = LBRACKET;
+ if (a == 0 && c == NOT)
+ --qpatnext;
+ break;
+ }
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_SET;
+ if (a == 0 && c == NOT) {
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_NOT;
+ }
+ a = qpatnext->at;
+ c = (qpatnext++)->wc;
+ do {
+ if (a == 0 && c == LBRACKET &&
+ qpatnext->wc == COLON) {
+ do {
+ err = g_charclass(&qpatnext,
+ &bufnext);
+ if (err)
+ break;
+ a = qpatnext->at;
+ c = (qpatnext++)->wc;
+ } while (a == 0 && c == LBRACKET &&
+ qpatnext->wc == COLON);
+ if (err == -1 &&
+ !(pglob->gl_flags & GLOB_NOCHECK))
+ return (GLOB_NOMATCH);
+ if (a == 0 && c == RBRACKET)
+ break;
+ }
+ bufnext->at = a;
+ (bufnext++)->wc = c;
+ if (qpatnext->at == 0 &&
+ qpatnext->wc == RANGE) {
+ a = qpatnext[1].at;
+ c = qpatnext[1].wc;
+ if (qpatnext[1].at != 0 ||
+ qpatnext[1].wc != RBRACKET) {
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_RNG;
+ bufnext->at = a;
+ (bufnext++)->wc = c;
+ qpatnext += 2;
+ }
+ }
+ a = qpatnext->at;
+ c = (qpatnext++)->wc;
+ } while (a != 0 || c != RBRACKET);
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_END;
+ break;
+ case QUESTION:
+ if (a != 0) {
+ bufnext->at = a;
+ (bufnext++)->wc = c;
+ break;
+ }
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_ONE;
+ break;
+ case STAR:
+ if (a != 0) {
+ bufnext->at = a;
+ (bufnext++)->wc = c;
+ break;
+ }
+ pglob->gl_flags |= GLOB_MAGCHAR;
+ /*
+ * collapse adjacent stars to one,
+ * to avoid exponential behavior
+ */
+ if (bufnext == patbuf ||
+ bufnext[-1].at != M_QUOTE ||
+ bufnext[-1].wc != M_ALL) {
+ bufnext->at = M_QUOTE;
+ (bufnext++)->wc = M_ALL;
+ }
+ break;
+ default:
+ bufnext->at = a;
+ (bufnext++)->wc = c;
+ break;
+ }
+ }
+ bufnext->at = 0;
+ bufnext->wc = EOS;
+#ifdef DEBUG
+ qprintf("glob0:glob1:patbuf", patbuf);
+#endif
- Expand:
- /* determine directory and open it */
- (*path)[end] = '\0';
- dirp = opendir(**path == '\0' ? "." : *path);
- if (dirp == NULL) {
- if (errfn != 0 && errfn(*path, errno) != 0 ||
- flags&GLOB_ERR) {
- return (GLOB_ABORTED);
+ if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc))
+ != 0)
+ return (err);
+
+ /*
+ * If there was no match we are going to append the pattern
+ * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
+ * and the pattern did not contain any magic characters
+ * GLOB_NOMAGIC is there just for compatibility with csh.
+ */
+ if (pglob->gl_pathc == oldpathc) {
+ if ((pglob->gl_flags & GLOB_NOCHECK) ||
+ ((pglob->gl_flags & GLOB_NOMAGIC) &&
+ !(pglob->gl_flags & GLOB_MAGCHAR)))
+ return (globextend(pattern, pglob, limitp, NULL));
+ else
+ return (GLOB_NOMATCH);
}
+ if (!(pglob->gl_flags & GLOB_NOSORT)) {
+ if ((pglob->gl_flags & GLOB_KEEPSTAT)) {
+ /* Keep the paths and stat info synced during sort */
+ struct glob_path_stat *path_stat;
+ int i;
+ int n = pglob->gl_pathc - oldpathc;
+ int o = pglob->gl_offs + oldpathc;
+
+ if ((path_stat = calloc(n, sizeof (*path_stat))) ==
+ NULL)
+ return (GLOB_NOSPACE);
+ for (i = 0; i < n; i++) {
+ path_stat[i].gps_path = pglob->gl_pathv[o + i];
+ path_stat[i].gps_stat = pglob->gl_statv[o + i];
+ }
+ qsort(path_stat, n, sizeof (*path_stat), compare_gps);
+ for (i = 0; i < n; i++) {
+ pglob->gl_pathv[o + i] = path_stat[i].gps_path;
+ pglob->gl_statv[o + i] = path_stat[i].gps_stat;
+ }
+ free(path_stat);
+ } else {
+ qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
+ pglob->gl_pathc - oldpathc, sizeof (char *),
+ compare);
+ }
+ }
return (0);
+}
+
+static int
+compare(const void *p, const void *q)
+{
+ return (strcmp(*(char **)p, *(char **)q));
+}
+
+static int
+compare_gps(const void *_p, const void *_q)
+{
+ const struct glob_path_stat *p = (const struct glob_path_stat *)_p;
+ const struct glob_path_stat *q = (const struct glob_path_stat *)_q;
+
+ return (strcmp(p->gps_path, q->gps_path));
+}
+
+static int
+glob1(Char *pattern, Char *pattern_last, glob_t *pglob,
+ struct glob_lim *limitp, int (*errfunc)(const char *, int))
+{
+ Char pathbuf[MAXPATHLEN];
+
+ /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
+ if (pattern->wc == EOS)
+ return (0);
+ return (glob2(pathbuf, pathbuf+MAXPATHLEN-1,
+ pathbuf, pathbuf+MAXPATHLEN-1,
+ pattern, pattern_last, pglob, limitp, errfunc));
+}
+
+/*
+ * The functions glob2 and glob3 are mutually recursive; there is one level
+ * of recursion for each segment in the pattern that contains one or more
+ * meta characters.
+ */
+static int
+glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
+ Char *pattern, Char *pattern_last, glob_t *pglob,
+ struct glob_lim *limitp, int (*errfunc)(const char *, int))
+{
+ struct stat sb;
+ Char *p, *q;
+ int anymeta;
+
+ /*
+ * Loop over pattern segments until end of pattern or until
+ * segment with meta character found.
+ */
+ for (anymeta = 0; ; ) {
+ if (pattern->wc == EOS) { /* End of pattern? */
+ pathend->at = 0;
+ pathend->wc = EOS;
+
+ if ((pglob->gl_flags & GLOB_LIMIT) &&
+ limitp->glim_stat++ >= GLOB_LIMIT_STAT) {
+ errno = 0;
+ pathend->at = 0;
+ (pathend++)->wc = SEP;
+ pathend->at = 0;
+ pathend->wc = EOS;
+ return (GLOB_NOSPACE);
}
+ if (g_lstat(pathbuf, &sb, pglob))
+ return (0);
- /* extract pattern component */
- n = sp - pat;
- if ((cp = malloc(n)) == NULL) {
- (void) closedir(dirp);
+ if (((pglob->gl_flags & GLOB_MARK) &&
+ (pathend[-1].at != 0 ||
+ pathend[-1].wc != SEP)) &&
+ (S_ISDIR(sb.st_mode) ||
+ (S_ISLNK(sb.st_mode) &&
+ (g_stat(pathbuf, &sb, pglob) == 0) &&
+ S_ISDIR(sb.st_mode)))) {
+ if (pathend+1 > pathend_last)
return (GLOB_NOSPACE);
+ pathend->at = 0;
+ (pathend++)->wc = SEP;
+ pathend->at = 0;
+ pathend->wc = EOS;
}
- pat = memcpy(cp, pat, n);
- pat[n-1] = '\0';
- if (*--sp != '\0')
- flags |= GLOB__CHECK;
+ ++pglob->gl_matchc;
+ return (globextend(pathbuf, pglob, limitp, &sb));
+ }
- /* expand path to max. expansion */
- n = dp - *path;
- *path = realloc(*path,
- strlen(*path) + NAME_MAX + strlen(sp) + 1);
- if (*path == NULL) {
- (void) closedir(dirp);
- free(pat);
+ /* Find end of next segment, copy tentatively to pathend. */
+ q = pathend;
+ p = pattern;
+ while (p->wc != EOS && p->wc != SEP) {
+ if (ismeta(*p))
+ anymeta = 1;
+ if (q+1 > pathend_last)
return (GLOB_NOSPACE);
+ *q++ = *p++;
}
- dp = (*path) + n;
- /* read directory and match entries */
+ if (!anymeta) { /* No expansion, do next segment. */
+ pathend = q;
+ pattern = p;
+ while (pattern->wc == SEP) {
+ if (pathend+1 > pathend_last)
+ return (GLOB_NOSPACE);
+ *pathend++ = *pattern++;
+ }
+ } else {
+ /* Need expansion, recurse. */
+ return (glob3(pathbuf, pathbuf_last, pathend,
+ pathend_last, pattern, p, pattern_last,
+ pglob, limitp, errfunc));
+ }
+ }
+ /* NOTREACHED */
+}
+
+static int
+glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
+ Char *pattern, Char *restpattern, Char *restpattern_last, glob_t *pglob,
+ struct glob_lim *limitp, int (*errfunc)(const char *, int))
+{
+ struct dirent *dp;
+ DIR *dirp;
+ int err;
+ char buf[MAXPATHLEN];
+
+ /*
+ * The readdirfunc declaration can't be prototyped, because it is
+ * assigned, below, to two functions which are prototyped in glob.h
+ * and dirent.h as taking pointers to differently typed opaque
+ * structures.
+ */
+ struct dirent *(*readdirfunc)(void *);
+
+ if (pathend > pathend_last)
+ return (GLOB_NOSPACE);
+ pathend->at = 0;
+ pathend->wc = EOS;
+ errno = 0;
+
+ if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
+ /* TODO: don't call for ENOENT or ENOTDIR? */
+ if (errfunc) {
+ if (g_Ctoc(pathbuf, buf, sizeof (buf)))
+ return (GLOB_ABORTED);
+ if (errfunc(buf, errno) ||
+ pglob->gl_flags & GLOB_ERR)
+ return (GLOB_ABORTED);
+ }
+ return (0);
+ }
+
err = 0;
- while ((d = readdir64(dirp)) != NULL) {
- cp = d->d_name;
- if ((flags&GLOB_NOESCAPE)
- ? fnmatch(pat, cp, FNM_PERIOD|FNM_NOESCAPE)
- : fnmatch(pat, cp, FNM_PERIOD))
+
+ /* Search directory for matching names. */
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ readdirfunc = pglob->gl_readdir;
+ else
+ readdirfunc = (struct dirent *(*)(void *))readdir;
+ while ((dp = (*readdirfunc)(dirp))) {
+ char *sc;
+ Char *dc;
+ size_t n;
+ wchar_t w;
+
+ if ((pglob->gl_flags & GLOB_LIMIT) &&
+ limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) {
+ errno = 0;
+ pathend->at = 0;
+ (pathend++)->wc = SEP;
+ pathend->at = 0;
+ pathend->wc = EOS;
+ err = GLOB_NOSPACE;
+ break;
+ }
+
+ /* Initial DOT must be matched literally. */
+ if (dp->d_name[0] == DOT && pattern->wc != DOT)
continue;
+ dc = pathend;
+ sc = dp->d_name;
+ while (dc < pathend_last) {
+ if ((n = mbtowc(&w, sc, MB_LEN_MAX)) <= 0) {
+ sc += 1;
+ dc->at = 0;
+ dc->wc = EOS;
+ } else {
+ sc += n;
+ dc->at = 0;
+ dc->wc = w;
+ }
+ dc++;
+ if (n <= 0)
+ break;
+ }
+ if (dc >= pathend_last) {
+ dc->at = 0;
+ dc->wc = EOS;
+ err = GLOB_NOSPACE;
+ break;
+ }
+ if (n < 0) {
+ dc->at = 0;
+ dc->wc = EOS;
+ continue;
+ }
- n = strlen(cp);
- (void) memcpy((*path) + end, cp, n);
- m = dp - *path;
- err = globit(end+n, sp, gp, flags, errfn, path);
- dp = (*path) + m; /* globit can move path */
- if (err != 0)
+ if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
+ pathend->at = 0;
+ pathend->wc = EOS;
+ continue;
+ }
+ err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
+ restpattern, restpattern_last, pglob, limitp,
+ errfunc);
+ if (err)
break;
}
- (void) closedir(dirp);
- free(pat);
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ (*pglob->gl_closedir)(dirp);
+ else
+ closedir(dirp);
return (err);
- }
- /* NOTREACHED */
}
+
/*
- * Comparison routine for two name arguments, called by qsort.
+ * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
+ * add the new item, and update gl_pathc.
+ *
+ * This assumes the BSD realloc, which only copies the block when its size
+ * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
+ * behavior.
+ *
+ * Return 0 if new item added, error code if memory couldn't be allocated.
+ *
+ * Invariant of the glob_t structure:
+ * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
+ * gl_pathv points to (gl_offs + gl_pathc + 1) items.
*/
-int
-pstrcmp(const void *npp1, const void *npp2)
+static int
+globextend(const Char *path, glob_t *pglob, struct glob_lim *limitp,
+ struct stat *sb)
{
- return (strcoll(*(char **)npp1, *(char **)npp2));
+ char **pathv;
+ ssize_t i;
+ size_t newn, len;
+ char *copy = NULL;
+ const Char *p;
+ struct stat **statv;
+ char junk[MB_LEN_MAX];
+ int n;
+
+ newn = 2 + pglob->gl_pathc + pglob->gl_offs;
+ if (pglob->gl_offs >= INT_MAX ||
+ pglob->gl_pathc >= INT_MAX ||
+ newn >= INT_MAX ||
+ SIZE_MAX / sizeof (*pathv) <= newn ||
+ SIZE_MAX / sizeof (*statv) <= newn) {
+ nospace:
+ for (i = pglob->gl_offs; i < (ssize_t)(newn - 2); i++) {
+ if (pglob->gl_pathv && pglob->gl_pathv[i])
+ free(pglob->gl_pathv[i]);
+ if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
+ pglob->gl_pathv && pglob->gl_pathv[i])
+ free(pglob->gl_statv[i]);
+ }
+ if (pglob->gl_pathv) {
+ free(pglob->gl_pathv);
+ pglob->gl_pathv = NULL;
+ }
+ if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
+ pglob->gl_statv) {
+ free(pglob->gl_statv);
+ pglob->gl_statv = NULL;
+ }
+ return (GLOB_NOSPACE);
+ }
+
+ pathv = realloc(pglob->gl_pathv, newn * sizeof (*pathv));
+ if (pathv == NULL)
+ goto nospace;
+ if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
+ /* first time around -- clear initial gl_offs items */
+ pathv += pglob->gl_offs;
+ for (i = pglob->gl_offs; --i >= 0; )
+ *--pathv = NULL;
+ }
+ pglob->gl_pathv = pathv;
+
+ if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
+ statv = realloc(pglob->gl_statv, newn * sizeof (*statv));
+ if (statv == NULL)
+ goto nospace;
+ if (pglob->gl_statv == NULL && pglob->gl_offs > 0) {
+ /* first time around -- clear initial gl_offs items */
+ statv += pglob->gl_offs;
+ for (i = pglob->gl_offs; --i >= 0; )
+ *--statv = NULL;
+ }
+ pglob->gl_statv = statv;
+ if (sb == NULL)
+ statv[pglob->gl_offs + pglob->gl_pathc] = NULL;
+ else {
+ limitp->glim_malloc += sizeof (**statv);
+ if ((pglob->gl_flags & GLOB_LIMIT) &&
+ limitp->glim_malloc >= GLOB_LIMIT_MALLOC) {
+ errno = 0;
+ return (GLOB_NOSPACE);
+ }
+ if ((statv[pglob->gl_offs + pglob->gl_pathc] =
+ malloc(sizeof (**statv))) == NULL)
+ goto copy_error;
+ memcpy(statv[pglob->gl_offs + pglob->gl_pathc], sb,
+ sizeof (*sb));
+ }
+ statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL;
+ }
+
+ len = MB_LEN_MAX;
+ p = path;
+ while ((n = wctomb(junk, p->wc)) > 0) {
+ len += n;
+ if ((p++)->wc == EOS)
+ break;
+ }
+
+ limitp->glim_malloc += len;
+ if ((copy = malloc(len)) != NULL) {
+ if (g_Ctoc(path, copy, len)) {
+ free(copy);
+ return (GLOB_NOSPACE);
+ }
+ pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
+ }
+ pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
+
+ if ((pglob->gl_flags & GLOB_LIMIT) &&
+ (newn * sizeof (*pathv)) + limitp->glim_malloc >
+ GLOB_LIMIT_MALLOC) {
+ errno = 0;
+ return (GLOB_NOSPACE);
+ }
+ copy_error:
+ return (copy == NULL ? GLOB_NOSPACE : 0);
}
+
/*
- * Add a new matched filename to the glob_t structure, increasing the
- * size of that array, as required.
+ * pattern matching function for filenames. Each occurrence of the *
+ * pattern causes a recursion level.
*/
-int
-append(glob_t *gp, const char *str)
+static int
+match(Char *name, Char *pat, Char *patend, int recur)
{
- char *cp;
+ int ok, negate_range;
+ Char c, k;
- if ((cp = malloc(strlen(str)+1)) == NULL)
- return (GLOB_NOSPACE);
- gp->gl_pathp[gp->gl_pathc++] = strcpy(cp, str);
+ if (recur-- == 0)
+ return (1);
- if ((gp->gl_pathc + gp->gl_offs) >= gp->gl_pathn) {
- gp->gl_pathn *= 2;
- gp->gl_pathv = (char **)realloc((void *)gp->gl_pathv,
- gp->gl_pathn * sizeof (char *));
- if (gp->gl_pathv == NULLCPP)
- return (GLOB_NOSPACE);
- gp->gl_pathp = gp->gl_pathv + gp->gl_offs;
+ while (pat < patend) {
+ c = *pat++;
+ switch (c.wc) {
+ case M_ALL:
+ if (c.at != M_QUOTE) {
+ k = *name++;
+ if (k.at != c.at || k.wc != c.wc)
+ return (0);
+ break;
}
+ while (pat < patend && pat->at == M_QUOTE &&
+ pat->wc == M_ALL)
+ pat++; /* eat consecutive '*' */
+ if (pat == patend)
+ return (1);
+ do {
+ if (match(name, pat, patend, recur))
+ return (1);
+ } while ((name++)->wc != EOS);
return (0);
+ case M_ONE:
+ if (c.at != M_QUOTE) {
+ k = *name++;
+ if (k.at != c.at || k.wc != c.wc)
+ return (0);
+ break;
+ }
+ if ((name++)->wc == EOS)
+ return (0);
+ break;
+ case M_SET:
+ if (c.at != M_QUOTE) {
+ k = *name++;
+ if (k.at != c.at || k.wc != c.wc)
+ return (0);
+ break;
+ }
+ ok = 0;
+ if ((k = *name++).wc == EOS)
+ return (0);
+ if ((negate_range = (pat->at == M_QUOTE &&
+ pat->wc == M_NOT)) != 0)
+ ++pat;
+ while (((c = *pat++).at != M_QUOTE) || c.wc != M_END) {
+ if (c.at == M_QUOTE && c.wc == M_CLASS) {
+ Char idx;
+
+ idx.at = pat->at;
+ idx.wc = pat->wc;
+ if (idx.at < NCCLASSES &&
+ cclasses[idx.at].isctype(k.wc))
+ ok = 1;
+ ++pat;
+ }
+ if (pat->at == M_QUOTE && pat->wc == M_RNG) {
+ if (c.wc <= k.wc && k.wc <= pat[1].wc)
+ ok = 1;
+ pat += 2;
+ } else if (c.wc == k.wc)
+ ok = 1;
+ }
+ if (ok == negate_range)
+ return (0);
+ break;
+ default:
+ k = *name++;
+ if (k.at != c.at || k.wc != c.wc)
+ return (0);
+ break;
+ }
+ }
+ return (name->wc == EOS);
}
+
+/* Free allocated data belonging to a glob_t structure. */
+void
+globfree(glob_t *pglob)
+{
+ int i;
+ char **pp;
+
+ if (pglob->gl_pathv != NULL) {
+ pp = pglob->gl_pathv + pglob->gl_offs;
+ for (i = pglob->gl_pathc; i--; ++pp)
+ if (*pp)
+ free(*pp);
+ free(pglob->gl_pathv);
+ pglob->gl_pathv = NULL;
+ }
+ if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
+ pglob->gl_statv != NULL) {
+ for (i = 0; i < pglob->gl_pathc; i++) {
+ if (pglob->gl_statv[i] != NULL)
+ free(pglob->gl_statv[i]);
+ }
+ free(pglob->gl_statv);
+ pglob->gl_statv = NULL;
+ }
+}
+
+static DIR *
+g_opendir(Char *str, glob_t *pglob)
+{
+ char buf[MAXPATHLEN];
+
+ if (str->wc == EOS)
+ strlcpy(buf, ".", sizeof (buf));
+ else {
+ if (g_Ctoc(str, buf, sizeof (buf)))
+ return (NULL);
+ }
+
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ return ((*pglob->gl_opendir)(buf));
+
+ return (opendir(buf));
+}
+
+static int
+g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
+{
+ char buf[MAXPATHLEN];
+
+ if (g_Ctoc(fn, buf, sizeof (buf)))
+ return (-1);
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ return ((*pglob->gl_lstat)(buf, sb));
+ return (lstat(buf, sb));
+}
+
+static int
+g_stat(Char *fn, struct stat *sb, glob_t *pglob)
+{
+ char buf[MAXPATHLEN];
+
+ if (g_Ctoc(fn, buf, sizeof (buf)))
+ return (-1);
+ if (pglob->gl_flags & GLOB_ALTDIRFUNC)
+ return ((*pglob->gl_stat)(buf, sb));
+ return (stat(buf, sb));
+}
+
+static Char *
+g_strchr(const Char *str, wchar_t ch)
+{
+ do {
+ if (str->at == 0 && str->wc == ch)
+ return ((Char *)str);
+ } while ((str++)->wc != EOS);
+ return (NULL);
+}
+
+static int
+g_Ctoc(const Char *str, char *buf, uint_t len)
+{
+ int n;
+ wchar_t w;
+
+ while (len >= MB_LEN_MAX) {
+ w = (str++)->wc;
+ if ((n = wctomb(buf, w)) > 0) {
+ len -= n;
+ buf += n;
+ }
+ if (n < 0)
+ break;
+ if (w == EOS)
+ return (0);
+ }
+ return (1);
+}
+
+#ifdef DEBUG
+static void
+qprintf(const char *str, Char *s)
+{
+ Char *p;
+
+ (void) printf("%s:\n", str);
+ for (p = s; p->wc != EOS; p++)
+ (void) printf("%wc", p->wc);
+ (void) printf("\n");
+ for (p = s; p->wc != EOS; p++)
+ (void) printf("%c", p->at & M_PROTECT ? '"' : ' ');
+ (void) printf("\n");
+ for (p = s; p->wc != EOS; p++)
+ (void) printf("%c", ismeta(*p) ? '_' : ' ');
+ (void) printf("\n");
+}
+#endif