1 /*
   2  * Copyright (c) 2012 Gary Mills
   3  */
   4 /*      $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */
   5 /*
   6  * Copyright (c) 1989, 1993
   7  *      The Regents of the University of California.  All rights reserved.
   8  *
   9  * This code is derived from software contributed to Berkeley by
  10  * Guido van Rossum.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  */
  36 
  37 /*
  38  * glob(3) -- a superset of the one defined in POSIX 1003.2.
  39  *
  40  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
  41  *
  42  * Optional extra services, controlled by flags not defined by POSIX:
  43  *
  44  * GLOB_QUOTE:
  45  *      Escaping convention: \ inhibits any special meaning the following
  46  *      character might have (except \ at end of string is retained).
  47  * GLOB_MAGCHAR:
  48  *      Set in gl_flags if pattern contained a globbing character.
  49  * GLOB_NOMAGIC:
  50  *      Same as GLOB_NOCHECK, but it will only append pattern if it did
  51  *      not contain any magic characters.  [Used in csh style globbing]
  52  * GLOB_ALTDIRFUNC:
  53  *      Use alternately specified directory access functions.
  54  * GLOB_TILDE:
  55  *      expand ~user/foo to the /home/dir/of/user/foo
  56  * GLOB_BRACE:
  57  *      expand {1,2}{a,b} to 1a 1b 2a 2b
  58  * gl_matchc:
  59  *      Number of matches in the current invocation of glob.
  60  */
  61 
  62 #include <sys/param.h>
  63 #include <sys/stat.h>
  64 
  65 #include <ctype.h>
  66 #include <dirent.h>
  67 #include <errno.h>
  68 #include <glob.h>
  69 #include <limits.h>
  70 #include <pwd.h>
  71 #include <stdio.h>
  72 #include <stdlib.h>
  73 #include <string.h>
  74 #include <unistd.h>
  75 #include <wchar.h>
  76 #include <wctype.h>
  77 
  78 #define DOLLAR          '$'
  79 #define DOT             '.'
  80 #define EOS             '\0'
  81 #define LBRACKET        '['
  82 #define NOT             '!'
  83 #define QUESTION        '?'
  84 #define QUOTE           '\\'
  85 #define RANGE           '-'
  86 #define RBRACKET        ']'
  87 #define SEP             '/'
  88 #define STAR            '*'
  89 #define TILDE           '~'
  90 #define UNDERSCORE      '_'
  91 #define LBRACE          '{'
  92 #define RBRACE          '}'
  93 #define SLASH           '/'
  94 #define COMMA           ','
  95 #define COLON           ':'
  96 
  97 #define M_QUOTE         0x800000
  98 #define M_PROTECT       0x400000
  99 
 100 typedef struct wcat {
 101         wchar_t w_wc;
 102         uint_t w_at;
 103 } wcat_t;
 104 
 105 #define M_ALL           '*'     /* Plus M_QUOTE */
 106 #define M_END           ']'     /* Plus M_QUOTE */
 107 #define M_NOT           '!'     /* Plus M_QUOTE */
 108 #define M_ONE           '?'     /* Plus M_QUOTE */
 109 #define M_RNG           '-'     /* Plus M_QUOTE */
 110 #define M_SET           '['     /* Plus M_QUOTE */
 111 #define M_CLASS         ':'     /* Plus M_QUOTE */
 112 #define ismeta(c)       (((c).w_at&M_QUOTE) != 0)
 113 
 114 #define GLOB_LIMIT_MALLOC       65536
 115 #define GLOB_LIMIT_STAT         2048
 116 #define GLOB_LIMIT_READDIR      16384
 117 
 118 /* Limit of recursion during matching attempts. */
 119 #define GLOB_LIMIT_RECUR        64
 120 
 121 struct glob_lim {
 122         size_t  glim_malloc;
 123         size_t  glim_stat;
 124         size_t  glim_readdir;
 125 };
 126 
 127 struct glob_path_stat {
 128         char            *gps_path;
 129         struct stat     *gps_stat;
 130 };
 131 
 132 static int       compare(const void *, const void *);
 133 static int       compare_gps(const void *, const void *);
 134 static int       g_Ctoc(const wcat_t *, char *, uint_t);
 135 static int       g_lstat(wcat_t *, struct stat *, glob_t *);
 136 static DIR      *g_opendir(wcat_t *, glob_t *);
 137 static wcat_t   *g_strchr(const wcat_t *, wchar_t);
 138 static int       g_stat(wcat_t *, struct stat *, glob_t *);
 139 static int       glob0(const wcat_t *, glob_t *, struct glob_lim *,
 140                         int (*)(const char *, int));
 141 static int       glob1(wcat_t *, wcat_t *, glob_t *, struct glob_lim *,
 142                         int (*)(const char *, int));
 143 static int       glob2(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *,
 144                         wcat_t *, glob_t *, struct glob_lim *,
 145                         int (*)(const char *, int));
 146 static int       glob3(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *,
 147                         wcat_t *, wcat_t *, glob_t *, struct glob_lim *,
 148                         int (*)(const char *, int));
 149 static int       globextend(const wcat_t *, glob_t *, struct glob_lim *,
 150                     struct stat *);
 151 static
 152 const wcat_t    *globtilde(const wcat_t *, wcat_t *, size_t, glob_t *);
 153 static int       globexp1(const wcat_t *, glob_t *, struct glob_lim *,
 154                     int (*)(const char *, int));
 155 static int       globexp2(const wcat_t *, const wcat_t *, glob_t *,
 156                     struct glob_lim *, int (*)(const char *, int));
 157 static int       match(wcat_t *, wcat_t *, wcat_t *, int);
 158 #ifdef DEBUG
 159 static void      qprintf(const char *, wcat_t *);
 160 #endif
 161 
 162 int
 163 glob(const char *pattern, int flags, int (*errfunc)(const char *, int),
 164     glob_t *pglob)
 165 {
 166         const char *patnext;
 167         size_t n;
 168         wchar_t c;
 169         wcat_t *bufnext, *bufend, patbuf[MAXPATHLEN];
 170         struct glob_lim limit = { 0, 0, 0 };
 171 
 172         if (strnlen(pattern, PATH_MAX) == PATH_MAX)
 173                 return (GLOB_NOMATCH);
 174 
 175         patnext = pattern;
 176         if (!(flags & GLOB_APPEND)) {
 177                 pglob->gl_pathc = 0;
 178                 pglob->gl_pathv = NULL;
 179                 if ((flags & GLOB_KEEPSTAT) != 0)
 180                         pglob->gl_statv = NULL;
 181                 if (!(flags & GLOB_DOOFFS))
 182                         pglob->gl_offs = 0;
 183         }
 184         pglob->gl_flags = flags & ~GLOB_MAGCHAR;
 185         pglob->gl_matchc = 0;
 186 
 187         if (pglob->gl_offs < 0 || pglob->gl_pathc < 0 ||
 188             pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX ||
 189             pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1)
 190                 return (GLOB_NOSPACE);
 191 
 192         bufnext = patbuf;
 193         bufend = bufnext + MAXPATHLEN - 1;
 194         if (flags & GLOB_NOESCAPE) {
 195                 while (bufnext < bufend) {
 196                         if ((n = mbtowc(&c, patnext, MB_LEN_MAX)) > 0) {
 197                                 patnext += n;
 198                                 bufnext->w_at = 0;
 199                                 (bufnext++)->w_wc = c;
 200                         } else if (n == 0) {
 201                                 break;
 202                         } else {
 203                                 return (GLOB_NOMATCH);
 204                         }
 205                 }
 206         } else {
 207                 /* Protect the quoted characters. */
 208                 while (bufnext < bufend) {
 209                         if ((n = mbtowc(&c, patnext, MB_LEN_MAX)) > 0) {
 210                                 patnext += n;
 211                                 if (c == QUOTE) {
 212                                         n = mbtowc(&c, patnext, MB_LEN_MAX);
 213                                         if (n < 0)
 214                                                 return (GLOB_NOMATCH);
 215                                         if (n > 0)
 216                                                 patnext += n;
 217                                         if (n == 0)
 218                                                 c = QUOTE;
 219                                         bufnext->w_at = M_PROTECT;
 220                                         (bufnext++)->w_wc = c;
 221                                 } else {
 222                                         bufnext->w_at = 0;
 223                                         (bufnext++)->w_wc = c;
 224                                 }
 225                         } else if (n == 0) {
 226                                 break;
 227                         } else {
 228                                 return (GLOB_NOMATCH);
 229                         }
 230                 }
 231         }
 232         bufnext->w_at = 0;
 233         bufnext->w_wc = EOS;
 234 
 235         if (flags & GLOB_BRACE)
 236                 return (globexp1(patbuf, pglob, &limit, errfunc));
 237         else
 238                 return (glob0(patbuf, pglob, &limit, errfunc));
 239 }
 240 
 241 /*
 242  * Expand recursively a glob {} pattern. When there is no more expansion
 243  * invoke the standard globbing routine to glob the rest of the magic
 244  * characters
 245  */
 246 static int
 247 globexp1(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
 248     int (*errfunc)(const char *, int))
 249 {
 250         const wcat_t *ptr = pattern;
 251 
 252         /* Protect a single {}, for find(1), like csh */
 253         if (pattern[0].w_wc == LBRACE && pattern[1].w_wc == RBRACE &&
 254             pattern[2].w_wc == EOS)
 255                 return (glob0(pattern, pglob, limitp, errfunc));
 256 
 257         if ((ptr = (const wcat_t *) g_strchr(ptr, LBRACE)) != NULL)
 258                 return (globexp2(ptr, pattern, pglob, limitp, errfunc));
 259 
 260         return (glob0(pattern, pglob, limitp, errfunc));
 261 }
 262 
 263 
 264 /*
 265  * Recursive brace globbing helper. Tries to expand a single brace.
 266  * If it succeeds then it invokes globexp1 with the new pattern.
 267  * If it fails then it tries to glob the rest of the pattern and returns.
 268  */
 269 static int
 270 globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob,
 271     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 272 {
 273         int     i, rv;
 274         wcat_t   *lm, *ls;
 275         const wcat_t *pe, *pm, *pl;
 276         wcat_t    patbuf[MAXPATHLEN];
 277 
 278         /* copy part up to the brace */
 279         for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
 280                 ;
 281         lm->w_at = 0;
 282         lm->w_wc = EOS;
 283         ls = lm;
 284 
 285         /* Find the balanced brace */
 286         for (i = 0, pe = ++ptr; pe->w_wc != EOS; pe++)
 287                 if (pe->w_wc == LBRACKET) {
 288                         /* Ignore everything between [] */
 289                         for (pm = pe++; pe->w_wc != RBRACKET &&
 290                             pe->w_wc != EOS; pe++)
 291                                 ;
 292                         if (pe->w_wc == EOS) {
 293                                 /*
 294                                  * We could not find a matching RBRACKET.
 295                                  * Ignore and just look for RBRACE
 296                                  */
 297                                 pe = pm;
 298                         }
 299                 } else if (pe->w_wc == LBRACE) {
 300                         i++;
 301                 } else if (pe->w_wc == RBRACE) {
 302                         if (i == 0)
 303                                 break;
 304                         i--;
 305                 }
 306 
 307         /* Non matching braces; just glob the pattern */
 308         if (i != 0 || pe->w_wc == EOS)
 309                 return (glob0(patbuf, pglob, limitp, errfunc));
 310 
 311         for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
 312                 switch (pm->w_wc) {
 313                 case LBRACKET:
 314                         /* Ignore everything between [] */
 315                         for (pl = pm++; pm->w_wc != RBRACKET && pm->w_wc != EOS;
 316                             pm++)
 317                                 ;
 318                         if (pm->w_wc == EOS) {
 319                                 /*
 320                                  * We could not find a matching RBRACKET.
 321                                  * Ignore and just look for RBRACE
 322                                  */
 323                                 pm = pl;
 324                         }
 325                         break;
 326 
 327                 case LBRACE:
 328                         i++;
 329                         break;
 330 
 331                 case RBRACE:
 332                         if (i) {
 333                                 i--;
 334                                 break;
 335                         }
 336                         /* FALLTHROUGH */
 337                 case COMMA:
 338                         if (i && pm->w_wc == COMMA)
 339                                 break;
 340                         else {
 341                                 /* Append the current string */
 342                                 for (lm = ls; (pl < pm); *lm++ = *pl++)
 343                                         ;
 344 
 345                                 /*
 346                                  * Append the rest of the pattern after the
 347                                  * closing brace
 348                                  */
 349                                 for (pl = pe + 1;
 350                                     (*lm++ = *pl++).w_wc != EOS; /* */)
 351                                         ;
 352 
 353                                 /* Expand the current pattern */
 354                                 rv = globexp1(patbuf, pglob, limitp, errfunc);
 355                                 if (rv && rv != GLOB_NOMATCH)
 356                                         return (rv);
 357 
 358                                 /* move after the comma, to the next string */
 359                                 pl = pm + 1;
 360                         }
 361                         break;
 362 
 363                 default:
 364                         break;
 365                 }
 366         }
 367         return (0);
 368 }
 369 
 370 
 371 
 372 /*
 373  * expand tilde from the passwd file.
 374  */
 375 static const wcat_t *
 376 globtilde(const wcat_t *pattern, wcat_t *patbuf, size_t patbuf_len,
 377     glob_t *pglob)
 378 {
 379         struct passwd *pwd;
 380         char *h;
 381         const wcat_t *p;
 382         wcat_t *b, *eb, *q;
 383         size_t n;
 384         wchar_t c;
 385 
 386         if (pattern->w_wc != TILDE || !(pglob->gl_flags & GLOB_TILDE))
 387                 return (pattern);
 388 
 389         /* Copy up to the end of the string or / */
 390         eb = &patbuf[patbuf_len - 1];
 391         for (p = pattern + 1, q = patbuf;
 392             q < eb && p->w_wc != EOS && p->w_wc != SLASH; *q++ = *p++)
 393                 ;
 394 
 395         q->w_at = 0;
 396         q->w_wc = EOS;
 397 
 398         /* What to do if patbuf is full? */
 399 
 400         if (patbuf[0].w_wc == EOS) {
 401                 /*
 402                  * handle a plain ~ or ~/ by expanding $HOME
 403                  * first and then trying the password file
 404                  */
 405                 if (issetugid() != 0)
 406                         return (pattern);
 407                 if ((h = getenv("HOME")) == NULL) {
 408                         if ((pwd = getpwuid(getuid())) == NULL)
 409                                 return (pattern);
 410                         else
 411                                 h = pwd->pw_dir;
 412                 }
 413         } else {
 414                 /*
 415                  * Expand a ~user
 416                  */
 417                 if ((pwd = getpwnam((char *)patbuf)) == NULL)
 418                         return (pattern);
 419                 else
 420                         h = pwd->pw_dir;
 421         }
 422 
 423         /* Copy the home directory */
 424         for (b = patbuf; b < eb && *h != EOS; b++) {
 425                 if ((n = mbtowc(&c, h, MB_LEN_MAX)) > 0) {
 426                         h += n;
 427                         b->w_at = 0;
 428                         b->w_wc = c;
 429                 } else if (n < 0) {
 430                         return (pattern);
 431                 } else {
 432                         break;
 433                 }
 434         }
 435 
 436         /* Append the rest of the pattern */
 437         while (b < eb && (*b++ = *p++).w_wc != EOS)
 438                 ;
 439         b->w_at = 0;
 440         b->w_wc = EOS;
 441 
 442         return (patbuf);
 443 }
 444 
 445 static int
 446 g_charclass(const wcat_t **patternp, wcat_t **bufnextp)
 447 {
 448         const wcat_t *pattern = *patternp + 1;
 449         wcat_t *bufnext = *bufnextp;
 450         const wcat_t *colon;
 451         char cbuf[MB_LEN_MAX + 32];
 452         wctype_t cc;
 453         size_t len;
 454 
 455         if ((colon = g_strchr(pattern, COLON)) == NULL ||
 456             colon[1].w_wc != RBRACKET)
 457                 return (1);     /* not a character class */
 458 
 459         len = (size_t)(colon - pattern);
 460         if (len + MB_LEN_MAX + 1 > sizeof (cbuf))
 461                 return (-1);    /* invalid character class */
 462         {
 463                 wchar_t w;
 464                 const wcat_t *s1 = pattern;
 465                 char *s2 = cbuf;
 466                 size_t n = len;
 467 
 468                 /* Copy the string. */
 469                 while (n > 0) {
 470                         w = (s1++)->w_wc;
 471                         /* Character class names must be ASCII. */
 472                         if (iswascii(w)) {
 473                                 n--;
 474                                 *s2++ = w;
 475                         } else {
 476                                 return (-1);    /* invalid character class */
 477                         }
 478                 }
 479                 *s2 = EOS;
 480         }
 481         if ((cc = wctype(cbuf)) == 0)
 482                 return (-1);    /* invalid character class */
 483         bufnext->w_at = M_QUOTE;
 484         (bufnext++)->w_wc = M_CLASS;
 485         bufnext->w_at = 0;
 486         (bufnext++)->w_wc = cc;
 487         *bufnextp = bufnext;
 488         *patternp += len + 3;
 489 
 490         return (0);
 491 }
 492 
 493 /*
 494  * The main glob() routine: compiles the pattern (optionally processing
 495  * quotes), calls glob1() to do the real pattern matching, and finally
 496  * sorts the list (unless unsorted operation is requested).  Returns 0
 497  * if things went well, nonzero if errors occurred.  It is not an error
 498  * to find no matches.
 499  */
 500 static int
 501 glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp,
 502     int (*errfunc)(const char *, int))
 503 {
 504         const wcat_t *qpatnext;
 505         int err, oldpathc;
 506         wchar_t c;
 507         int a;
 508         wcat_t *bufnext, patbuf[MAXPATHLEN];
 509 
 510         qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
 511         oldpathc = pglob->gl_pathc;
 512         bufnext = patbuf;
 513 
 514         /*
 515          * We don't need to check for buffer overflow any more.
 516          * The pattern has already been copied to an internal buffer.
 517          */
 518         while ((a = qpatnext->w_at), (c = (qpatnext++)->w_wc) != EOS) {
 519                 switch (c) {
 520                 case LBRACKET:
 521                         if (a != 0) {
 522                                 bufnext->w_at = a;
 523                                 (bufnext++)->w_wc = c;
 524                                 break;
 525                         }
 526                         a = qpatnext->w_at;
 527                         c = qpatnext->w_wc;
 528                         if (a == 0 && c == NOT)
 529                                 ++qpatnext;
 530                         if (qpatnext->w_wc == EOS ||
 531                             g_strchr(qpatnext+1, RBRACKET) == NULL) {
 532                                 bufnext->w_at = 0;
 533                                 (bufnext++)->w_wc = LBRACKET;
 534                                 if (a == 0 && c == NOT)
 535                                         --qpatnext;
 536                                 break;
 537                         }
 538                         bufnext->w_at = M_QUOTE;
 539                         (bufnext++)->w_wc = M_SET;
 540                         if (a == 0 && c == NOT) {
 541                                 bufnext->w_at = M_QUOTE;
 542                                 (bufnext++)->w_wc = M_NOT;
 543                         }
 544                         a = qpatnext->w_at;
 545                         c = (qpatnext++)->w_wc;
 546                         do {
 547                                 if (a == 0 && c == LBRACKET &&
 548                                     qpatnext->w_wc == COLON) {
 549                                         do {
 550                                                 err = g_charclass(&qpatnext,
 551                                                     &bufnext);
 552                                                 if (err)
 553                                                         break;
 554                                                 a = qpatnext->w_at;
 555                                                 c = (qpatnext++)->w_wc;
 556                                         } while (a == 0 && c == LBRACKET &&
 557                                             qpatnext->w_wc == COLON);
 558                                         if (err == -1 &&
 559                                             !(pglob->gl_flags & GLOB_NOCHECK))
 560                                                 return (GLOB_NOMATCH);
 561                                         if (a == 0 && c == RBRACKET)
 562                                                 break;
 563                                 }
 564                                 bufnext->w_at = a;
 565                                 (bufnext++)->w_wc = c;
 566                                 if (qpatnext->w_at == 0 &&
 567                                     qpatnext->w_wc == RANGE) {
 568                                         a = qpatnext[1].w_at;
 569                                         c = qpatnext[1].w_wc;
 570                                         if (qpatnext[1].w_at != 0 ||
 571                                             qpatnext[1].w_wc != RBRACKET) {
 572                                                 bufnext->w_at = M_QUOTE;
 573                                                 (bufnext++)->w_wc = M_RNG;
 574                                                 bufnext->w_at = a;
 575                                                 (bufnext++)->w_wc = c;
 576                                                 qpatnext += 2;
 577                                         }
 578                                 }
 579                                 a = qpatnext->w_at;
 580                                 c = (qpatnext++)->w_wc;
 581                         } while (a != 0 || c != RBRACKET);
 582                         pglob->gl_flags |= GLOB_MAGCHAR;
 583                         bufnext->w_at = M_QUOTE;
 584                         (bufnext++)->w_wc = M_END;
 585                         break;
 586                 case QUESTION:
 587                         if (a != 0) {
 588                                 bufnext->w_at = a;
 589                                 (bufnext++)->w_wc = c;
 590                                 break;
 591                         }
 592                         pglob->gl_flags |= GLOB_MAGCHAR;
 593                         bufnext->w_at = M_QUOTE;
 594                         (bufnext++)->w_wc = M_ONE;
 595                         break;
 596                 case STAR:
 597                         if (a != 0) {
 598                                 bufnext->w_at = a;
 599                                 (bufnext++)->w_wc = c;
 600                                 break;
 601                         }
 602                         pglob->gl_flags |= GLOB_MAGCHAR;
 603                         /*
 604                          * collapse adjacent stars to one,
 605                          * to avoid exponential behavior
 606                          */
 607                         if (bufnext == patbuf ||
 608                             bufnext[-1].w_at != M_QUOTE ||
 609                             bufnext[-1].w_wc != M_ALL) {
 610                                 bufnext->w_at = M_QUOTE;
 611                                 (bufnext++)->w_wc = M_ALL;
 612                         }
 613                         break;
 614                 default:
 615                         bufnext->w_at = a;
 616                         (bufnext++)->w_wc = c;
 617                         break;
 618                 }
 619         }
 620         bufnext->w_at = 0;
 621         bufnext->w_wc = EOS;
 622 #ifdef DEBUG
 623         qprintf("glob0:glob1:patbuf", patbuf);
 624 #endif
 625 
 626         if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc))
 627             != 0)
 628                 return (err);
 629 
 630         /*
 631          * If there was no match we are going to append the pattern
 632          * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
 633          * and the pattern did not contain any magic characters
 634          * GLOB_NOMAGIC is there just for compatibility with csh.
 635          */
 636         if (pglob->gl_pathc == oldpathc) {
 637                 if ((pglob->gl_flags & GLOB_NOCHECK) ||
 638                     ((pglob->gl_flags & GLOB_NOMAGIC) &&
 639                     !(pglob->gl_flags & GLOB_MAGCHAR)))
 640                         return (globextend(pattern, pglob, limitp, NULL));
 641                 else
 642                         return (GLOB_NOMATCH);
 643         }
 644         if (!(pglob->gl_flags & GLOB_NOSORT)) {
 645                 if ((pglob->gl_flags & GLOB_KEEPSTAT)) {
 646                         /* Keep the paths and stat info synced during sort */
 647                         struct glob_path_stat *path_stat;
 648                         int i;
 649                         int n = pglob->gl_pathc - oldpathc;
 650                         int o = pglob->gl_offs + oldpathc;
 651 
 652                         if ((path_stat = calloc(n, sizeof (*path_stat))) ==
 653                             NULL)
 654                                 return (GLOB_NOSPACE);
 655                         for (i = 0; i < n; i++) {
 656                                 path_stat[i].gps_path = pglob->gl_pathv[o + i];
 657                                 path_stat[i].gps_stat = pglob->gl_statv[o + i];
 658                         }
 659                         qsort(path_stat, n, sizeof (*path_stat), compare_gps);
 660                         for (i = 0; i < n; i++) {
 661                                 pglob->gl_pathv[o + i] = path_stat[i].gps_path;
 662                                 pglob->gl_statv[o + i] = path_stat[i].gps_stat;
 663                         }
 664                         free(path_stat);
 665                 } else {
 666                         qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
 667                             pglob->gl_pathc - oldpathc, sizeof (char *),
 668                             compare);
 669                 }
 670         }
 671         return (0);
 672 }
 673 
 674 static int
 675 compare(const void *p, const void *q)
 676 {
 677         return (strcmp(*(char **)p, *(char **)q));
 678 }
 679 
 680 static int
 681 compare_gps(const void *_p, const void *_q)
 682 {
 683         const struct glob_path_stat *p = (const struct glob_path_stat *)_p;
 684         const struct glob_path_stat *q = (const struct glob_path_stat *)_q;
 685 
 686         return (strcmp(p->gps_path, q->gps_path));
 687 }
 688 
 689 static int
 690 glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob,
 691     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 692 {
 693         wcat_t pathbuf[MAXPATHLEN];
 694 
 695         /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
 696         if (pattern->w_wc == EOS)
 697                 return (0);
 698         return (glob2(pathbuf, pathbuf+MAXPATHLEN-1,
 699             pathbuf, pathbuf+MAXPATHLEN-1,
 700             pattern, pattern_last, pglob, limitp, errfunc));
 701 }
 702 
 703 /*
 704  * The functions glob2 and glob3 are mutually recursive; there is one level
 705  * of recursion for each segment in the pattern that contains one or more
 706  * meta characters.
 707  */
 708 static int
 709 glob2(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
 710     wcat_t *pathend_last, wcat_t *pattern, wcat_t *pattern_last,
 711     glob_t *pglob, struct glob_lim *limitp, int (*errfunc)(const char *, int))
 712 {
 713         struct stat sb;
 714         wcat_t *p, *q;
 715         int anymeta;
 716 
 717         /*
 718          * Loop over pattern segments until end of pattern or until
 719          * segment with meta character found.
 720          */
 721         for (anymeta = 0; ; ) {
 722                 if (pattern->w_wc == EOS) {          /* End of pattern? */
 723                         pathend->w_at = 0;
 724                         pathend->w_wc = EOS;
 725 
 726                         if ((pglob->gl_flags & GLOB_LIMIT) &&
 727                             limitp->glim_stat++ >= GLOB_LIMIT_STAT) {
 728                                 errno = 0;
 729                                 pathend->w_at = 0;
 730                                 (pathend++)->w_wc = SEP;
 731                                 pathend->w_at = 0;
 732                                 pathend->w_wc = EOS;
 733                                 return (GLOB_NOSPACE);
 734                         }
 735                         if (g_lstat(pathbuf, &sb, pglob))
 736                                 return (0);
 737 
 738                         if (((pglob->gl_flags & GLOB_MARK) &&
 739                             (pathend[-1].w_at != 0 ||
 740                             pathend[-1].w_wc != SEP)) &&
 741                             (S_ISDIR(sb.st_mode) ||
 742                             (S_ISLNK(sb.st_mode) &&
 743                             (g_stat(pathbuf, &sb, pglob) == 0) &&
 744                             S_ISDIR(sb.st_mode)))) {
 745                                 if (pathend+1 > pathend_last)
 746                                         return (GLOB_NOSPACE);
 747                                 pathend->w_at = 0;
 748                                 (pathend++)->w_wc = SEP;
 749                                 pathend->w_at = 0;
 750                                 pathend->w_wc = EOS;
 751                         }
 752                         ++pglob->gl_matchc;
 753                         return (globextend(pathbuf, pglob, limitp, &sb));
 754                 }
 755 
 756                 /* Find end of next segment, copy tentatively to pathend. */
 757                 q = pathend;
 758                 p = pattern;
 759                 while (p->w_wc != EOS && p->w_wc != SEP) {
 760                         if (ismeta(*p))
 761                                 anymeta = 1;
 762                         if (q+1 > pathend_last)
 763                                 return (GLOB_NOSPACE);
 764                         *q++ = *p++;
 765                 }
 766 
 767                 if (!anymeta) {         /* No expansion, do next segment. */
 768                         pathend = q;
 769                         pattern = p;
 770                         while (pattern->w_wc == SEP) {
 771                                 if (pathend+1 > pathend_last)
 772                                         return (GLOB_NOSPACE);
 773                                 *pathend++ = *pattern++;
 774                         }
 775                 } else  {
 776                         /* Need expansion, recurse. */
 777                         return (glob3(pathbuf, pathbuf_last, pathend,
 778                             pathend_last, pattern, p, pattern_last,
 779                             pglob, limitp, errfunc));
 780                 }
 781         }
 782         /* NOTREACHED */
 783 }
 784 
 785 static int
 786 glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend,
 787     wcat_t *pathend_last, wcat_t *pattern, wcat_t *restpattern,
 788     wcat_t *restpattern_last, glob_t *pglob, struct glob_lim *limitp,
 789     int (*errfunc)(const char *, int))
 790 {
 791         struct dirent *dp;
 792         DIR *dirp;
 793         int err;
 794         char buf[MAXPATHLEN];
 795 
 796         /*
 797          * The readdirfunc declaration can't be prototyped, because it is
 798          * assigned, below, to two functions which are prototyped in glob.h
 799          * and dirent.h as taking pointers to differently typed opaque
 800          * structures.
 801          */
 802         struct dirent *(*readdirfunc)(void *);
 803 
 804         if (pathend > pathend_last)
 805                 return (GLOB_NOSPACE);
 806         pathend->w_at = 0;
 807         pathend->w_wc = EOS;
 808         errno = 0;
 809 
 810         if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
 811                 /* TODO: don't call for ENOENT or ENOTDIR? */
 812                 if (errfunc) {
 813                         if (g_Ctoc(pathbuf, buf, sizeof (buf)))
 814                                 return (GLOB_ABORTED);
 815                         if (errfunc(buf, errno) ||
 816                             pglob->gl_flags & GLOB_ERR)
 817                                 return (GLOB_ABORTED);
 818                 }
 819                 return (0);
 820         }
 821 
 822         err = 0;
 823 
 824         /* Search directory for matching names. */
 825         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 826                 readdirfunc = pglob->gl_readdir;
 827         else
 828                 readdirfunc = (struct dirent *(*)(void *))readdir;
 829         while ((dp = (*readdirfunc)(dirp))) {
 830                 char *sc;
 831                 wcat_t *dc;
 832                 size_t n;
 833                 wchar_t w;
 834 
 835                 if ((pglob->gl_flags & GLOB_LIMIT) &&
 836                     limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) {
 837                         errno = 0;
 838                         pathend->w_at = 0;
 839                         (pathend++)->w_wc = SEP;
 840                         pathend->w_at = 0;
 841                         pathend->w_wc = EOS;
 842                         err = GLOB_NOSPACE;
 843                         break;
 844                 }
 845 
 846                 /* Initial DOT must be matched literally. */
 847                 if (dp->d_name[0] == DOT && pattern->w_wc != DOT)
 848                         continue;
 849                 dc = pathend;
 850                 sc = dp->d_name;
 851                 while (dc < pathend_last) {
 852                         if ((n = mbtowc(&w, sc, MB_LEN_MAX)) <= 0) {
 853                                 sc += 1;
 854                                 dc->w_at = 0;
 855                                 dc->w_wc = EOS;
 856                         } else {
 857                                 sc += n;
 858                                 dc->w_at = 0;
 859                                 dc->w_wc = w;
 860                         }
 861                         dc++;
 862                         if (n <= 0)
 863                                 break;
 864                 }
 865                 if (dc >= pathend_last) {
 866                         dc->w_at = 0;
 867                         dc->w_wc = EOS;
 868                         err = GLOB_NOSPACE;
 869                         break;
 870                 }
 871                 if (n < 0) {
 872                         err = GLOB_NOMATCH;
 873                         break;
 874                 }
 875 
 876                 if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
 877                         pathend->w_at = 0;
 878                         pathend->w_wc = EOS;
 879                         continue;
 880                 }
 881                 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
 882                     restpattern, restpattern_last, pglob, limitp,
 883                     errfunc);
 884                 if (err)
 885                         break;
 886         }
 887 
 888         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 889                 (*pglob->gl_closedir)(dirp);
 890         else
 891                 closedir(dirp);
 892         return (err);
 893 }
 894 
 895 
 896 /*
 897  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
 898  * add the new item, and update gl_pathc.
 899  *
 900  * This assumes the BSD realloc, which only copies the block when its size
 901  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
 902  * behavior.
 903  *
 904  * Return 0 if new item added, error code if memory couldn't be allocated.
 905  *
 906  * Invariant of the glob_t structure:
 907  *      Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
 908  *      gl_pathv points to (gl_offs + gl_pathc + 1) items.
 909  */
 910 static int
 911 globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp,
 912     struct stat *sb)
 913 {
 914         char **pathv;
 915         ssize_t i;
 916         size_t newn, len;
 917         char *copy = NULL;
 918         const wcat_t *p;
 919         struct stat **statv;
 920         char junk[MB_LEN_MAX];
 921         int n;
 922 
 923         newn = 2 + pglob->gl_pathc + pglob->gl_offs;
 924         if (pglob->gl_offs >= INT_MAX ||
 925             pglob->gl_pathc >= INT_MAX ||
 926             newn >= INT_MAX ||
 927             SIZE_MAX / sizeof (*pathv) <= newn ||
 928             SIZE_MAX / sizeof (*statv) <= newn) {
 929         nospace:
 930                 for (i = pglob->gl_offs; i < (ssize_t)(newn - 2); i++) {
 931                         if (pglob->gl_pathv && pglob->gl_pathv[i])
 932                                 free(pglob->gl_pathv[i]);
 933                         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
 934                             pglob->gl_statv && pglob->gl_statv[i])
 935                                 free(pglob->gl_statv[i]);
 936                 }
 937                 if (pglob->gl_pathv) {
 938                         free(pglob->gl_pathv);
 939                         pglob->gl_pathv = NULL;
 940                 }
 941                 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
 942                     pglob->gl_statv) {
 943                         free(pglob->gl_statv);
 944                         pglob->gl_statv = NULL;
 945                 }
 946                 return (GLOB_NOSPACE);
 947         }
 948 
 949         pathv = realloc(pglob->gl_pathv, newn * sizeof (*pathv));
 950         if (pathv == NULL)
 951                 goto nospace;
 952         if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
 953                 /* first time around -- clear initial gl_offs items */
 954                 pathv += pglob->gl_offs;
 955                 for (i = pglob->gl_offs; --i >= 0; )
 956                         *--pathv = NULL;
 957         }
 958         pglob->gl_pathv = pathv;
 959 
 960         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
 961                 statv = realloc(pglob->gl_statv, newn * sizeof (*statv));
 962                 if (statv == NULL)
 963                         goto nospace;
 964                 if (pglob->gl_statv == NULL && pglob->gl_offs > 0) {
 965                         /* first time around -- clear initial gl_offs items */
 966                         statv += pglob->gl_offs;
 967                         for (i = pglob->gl_offs; --i >= 0; )
 968                                 *--statv = NULL;
 969                 }
 970                 pglob->gl_statv = statv;
 971                 if (sb == NULL)
 972                         statv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 973                 else {
 974                         limitp->glim_malloc += sizeof (**statv);
 975                         if ((pglob->gl_flags & GLOB_LIMIT) &&
 976                             limitp->glim_malloc >= GLOB_LIMIT_MALLOC) {
 977                                 errno = 0;
 978                                 return (GLOB_NOSPACE);
 979                         }
 980                         if ((statv[pglob->gl_offs + pglob->gl_pathc] =
 981                             malloc(sizeof (**statv))) == NULL)
 982                                 goto copy_error;
 983                         memcpy(statv[pglob->gl_offs + pglob->gl_pathc], sb,
 984                             sizeof (*sb));
 985                 }
 986                 statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL;
 987         }
 988 
 989         len = MB_LEN_MAX;
 990         p = path;
 991         while ((n = wctomb(junk, p->w_wc)) > 0) {
 992                 len += n;
 993                 if ((p++)->w_wc == EOS)
 994                         break;
 995         }
 996         if (n < 0)
 997                 return (GLOB_NOMATCH);
 998 
 999         limitp->glim_malloc += len;
1000         if ((copy = malloc(len)) != NULL) {
1001                 if (g_Ctoc(path, copy, len)) {
1002                         free(copy);
1003                         return (GLOB_NOSPACE);
1004                 }
1005                 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
1006         }
1007         pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
1008 
1009         if ((pglob->gl_flags & GLOB_LIMIT) &&
1010             (newn * sizeof (*pathv)) + limitp->glim_malloc >
1011             GLOB_LIMIT_MALLOC) {
1012                 errno = 0;
1013                 return (GLOB_NOSPACE);
1014         }
1015         copy_error:
1016         return (copy == NULL ? GLOB_NOSPACE : 0);
1017 }
1018 
1019 
1020 /*
1021  * pattern matching function for filenames.  Each occurrence of the *
1022  * pattern causes a recursion level.
1023  */
1024 static int
1025 match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur)
1026 {
1027         int ok, negate_range;
1028         wcat_t c, k;
1029 
1030         if (recur-- == 0)
1031                 return (1);
1032 
1033         while (pat < patend) {
1034                 c = *pat++;
1035                 switch (c.w_wc) {
1036                 case M_ALL:
1037                         if (c.w_at != M_QUOTE) {
1038                                 k = *name++;
1039                                 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1040                                         return (0);
1041                                 break;
1042                         }
1043                         while (pat < patend && pat->w_at == M_QUOTE &&
1044                             pat->w_wc == M_ALL)
1045                                 pat++;  /* eat consecutive '*' */
1046                         if (pat == patend)
1047                                 return (1);
1048                         do {
1049                                 if (match(name, pat, patend, recur))
1050                                         return (1);
1051                         } while ((name++)->w_wc != EOS);
1052                         return (0);
1053                 case M_ONE:
1054                         if (c.w_at != M_QUOTE) {
1055                                 k = *name++;
1056                                 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1057                                         return (0);
1058                                 break;
1059                         }
1060                         if ((name++)->w_wc == EOS)
1061                                 return (0);
1062                         break;
1063                 case M_SET:
1064                         if (c.w_at != M_QUOTE) {
1065                                 k = *name++;
1066                                 if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1067                                         return (0);
1068                                 break;
1069                         }
1070                         ok = 0;
1071                         if ((k = *name++).w_wc == EOS)
1072                                 return (0);
1073                         if ((negate_range = (pat->w_at == M_QUOTE &&
1074                             pat->w_wc == M_NOT)) != 0)
1075                                 ++pat;
1076                         while (((c = *pat++).w_at != M_QUOTE) ||
1077                             c.w_wc != M_END) {
1078                                 if (c.w_at == M_QUOTE && c.w_wc == M_CLASS) {
1079                                         wcat_t cc;
1080 
1081                                         cc.w_at = pat->w_at;
1082                                         cc.w_wc = pat->w_wc;
1083                                         if (iswctype(k.w_wc, cc.w_wc))
1084                                                 ok = 1;
1085                                         ++pat;
1086                                 }
1087                                 if (pat->w_at == M_QUOTE &&
1088                                     pat->w_wc == M_RNG) {
1089                                         if (c.w_wc <= k.w_wc &&
1090                                             k.w_wc <= pat[1].w_wc)
1091                                                 ok = 1;
1092                                         pat += 2;
1093                                 } else if (c.w_wc == k.w_wc)
1094                                         ok = 1;
1095                         }
1096                         if (ok == negate_range)
1097                                 return (0);
1098                         break;
1099                 default:
1100                         k = *name++;
1101                         if (k.w_at != c.w_at || k.w_wc != c.w_wc)
1102                                 return (0);
1103                         break;
1104                 }
1105         }
1106         return (name->w_wc == EOS);
1107 }
1108 
1109 /* Free allocated data belonging to a glob_t structure. */
1110 void
1111 globfree(glob_t *pglob)
1112 {
1113         int i;
1114         char **pp;
1115 
1116         if (pglob->gl_pathv != NULL) {
1117                 pp = pglob->gl_pathv + pglob->gl_offs;
1118                 for (i = pglob->gl_pathc; i--; ++pp)
1119                         if (*pp)
1120                                 free(*pp);
1121                 free(pglob->gl_pathv);
1122                 pglob->gl_pathv = NULL;
1123         }
1124         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
1125             pglob->gl_statv != NULL) {
1126                 for (i = 0; i < pglob->gl_pathc; i++) {
1127                         if (pglob->gl_statv[i] != NULL)
1128                                 free(pglob->gl_statv[i]);
1129                 }
1130                 free(pglob->gl_statv);
1131                 pglob->gl_statv = NULL;
1132         }
1133 }
1134 
1135 static DIR *
1136 g_opendir(wcat_t *str, glob_t *pglob)
1137 {
1138         char buf[MAXPATHLEN];
1139 
1140         if (str->w_wc == EOS)
1141                 strlcpy(buf, ".", sizeof (buf));
1142         else {
1143                 if (g_Ctoc(str, buf, sizeof (buf)))
1144                         return (NULL);
1145         }
1146 
1147         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1148                 return ((*pglob->gl_opendir)(buf));
1149 
1150         return (opendir(buf));
1151 }
1152 
1153 static int
1154 g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob)
1155 {
1156         char buf[MAXPATHLEN];
1157 
1158         if (g_Ctoc(fn, buf, sizeof (buf)))
1159                 return (-1);
1160         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1161                 return ((*pglob->gl_lstat)(buf, sb));
1162         return (lstat(buf, sb));
1163 }
1164 
1165 static int
1166 g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob)
1167 {
1168         char buf[MAXPATHLEN];
1169 
1170         if (g_Ctoc(fn, buf, sizeof (buf)))
1171                 return (-1);
1172         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1173                 return ((*pglob->gl_stat)(buf, sb));
1174         return (stat(buf, sb));
1175 }
1176 
1177 static wcat_t *
1178 g_strchr(const wcat_t *str, wchar_t ch)
1179 {
1180         do {
1181                 if (str->w_at == 0 && str->w_wc == ch)
1182                         return ((wcat_t *)str);
1183         } while ((str++)->w_wc != EOS);
1184         return (NULL);
1185 }
1186 
1187 static int
1188 g_Ctoc(const wcat_t *str, char *buf, uint_t len)
1189 {
1190         int n;
1191         wchar_t w;
1192 
1193         while (len >= MB_LEN_MAX) {
1194                 w = (str++)->w_wc;
1195                 if ((n = wctomb(buf, w)) > 0) {
1196                         len -= n;
1197                         buf += n;
1198                 }
1199                 if (n < 0)
1200                         break;
1201                 if (w == EOS)
1202                         return (0);
1203         }
1204         return (1);
1205 }
1206 
1207 #ifdef DEBUG
1208 static void
1209 qprintf(const char *str, wcat_t *s)
1210 {
1211         wcat_t *p;
1212 
1213         (void) printf("%s:\n", str);
1214         for (p = s; p->w_wc != EOS; p++)
1215                 (void) printf("%wc", p->w_wc);
1216         (void) printf("\n");
1217         for (p = s; p->w_wc != EOS; p++)
1218                 (void) printf("%c", p->w_at & M_PROTECT ? '"' : ' ');
1219         (void) printf("\n");
1220         for (p = s; p->w_wc != EOS; p++)
1221                 (void) printf("%c", ismeta(*p) ? '_' : ' ');
1222         (void) printf("\n");
1223 }
1224 #endif