1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2012 Gary Mills
  24  */
  25 
  26 /*      $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */
  27 /*
  28  * Copyright (c) 1989, 1993
  29  *      The Regents of the University of California.  All rights reserved.
  30  *
  31  * This code is derived from software contributed to Berkeley by
  32  * Guido van Rossum.
  33  *
  34  * Redistribution and use in source and binary forms, with or without
  35  * modification, are permitted provided that the following conditions
  36  * are met:
  37  * 1. Redistributions of source code must retain the above copyright
  38  *    notice, this list of conditions and the following disclaimer.
  39  * 2. Redistributions in binary form must reproduce the above copyright
  40  *    notice, this list of conditions and the following disclaimer in the
  41  *    documentation and/or other materials provided with the distribution.
  42  * 3. Neither the name of the University nor the names of its contributors
  43  *    may be used to endorse or promote products derived from this software
  44  *    without specific prior written permission.
  45  *
  46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  56  * SUCH DAMAGE.
  57  */
  58 
  59 /*
  60  * glob(3) -- a superset of the one defined in POSIX 1003.2.
  61  *
  62  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
  63  *
  64  * Optional extra services, controlled by flags not defined by POSIX:
  65  *
  66  * GLOB_QUOTE:
  67  *      Escaping convention: \ inhibits any special meaning the following
  68  *      character might have (except \ at end of string is retained).
  69  * GLOB_MAGCHAR:
  70  *      Set in gl_flags if pattern contained a globbing character.
  71  * GLOB_NOMAGIC:
  72  *      Same as GLOB_NOCHECK, but it will only append pattern if it did
  73  *      not contain any magic characters.  [Used in csh style globbing]
  74  * GLOB_ALTDIRFUNC:
  75  *      Use alternately specified directory access functions.
  76  * GLOB_TILDE:
  77  *      expand ~user/foo to the /home/dir/of/user/foo
  78  * GLOB_BRACE:
  79  *      expand {1,2}{a,b} to 1a 1b 2a 2b
  80  * gl_matchc:
  81  *      Number of matches in the current invocation of glob.
  82  */
  83 
  84 #include <sys/param.h>
  85 #include <sys/stat.h>
  86 
  87 #include <ctype.h>
  88 #include <dirent.h>
  89 #include <errno.h>
  90 #include <glob.h>
  91 #include <limits.h>
  92 #include <pwd.h>
  93 #include <stdio.h>
  94 #include <stdlib.h>
  95 #include <string.h>
  96 #include <unistd.h>
  97 #include <wchar.h>
  98 
  99 #include "charclass.h"
 100 
 101 #define DOLLAR          '$'
 102 #define DOT             '.'
 103 #define EOS             '\0'
 104 #define LBRACKET        '['
 105 #define NOT             '!'
 106 #define QUESTION        '?'
 107 #define QUOTE           '\\'
 108 #define RANGE           '-'
 109 #define RBRACKET        ']'
 110 #define SEP             '/'
 111 #define STAR            '*'
 112 #define TILDE           '~'
 113 #define UNDERSCORE      '_'
 114 #define LBRACE          '{'
 115 #define RBRACE          '}'
 116 #define SLASH           '/'
 117 #define COMMA           ','
 118 #define COLON           ':'
 119 
 120 #define M_QUOTE         0x800000
 121 #define M_PROTECT       0x400000
 122 
 123 typedef struct Char_s {
 124         wchar_t wc;
 125         uint_t at;
 126 } Char;
 127 
 128 #define M_ALL           '*'     /* Plus M_QUOTE */
 129 #define M_END           ']'     /* Plus M_QUOTE */
 130 #define M_NOT           '!'     /* Plus M_QUOTE */
 131 #define M_ONE           '?'     /* Plus M_QUOTE */
 132 #define M_RNG           '-'     /* Plus M_QUOTE */
 133 #define M_SET           '['     /* Plus M_QUOTE */
 134 #define M_CLASS         ':'     /* Plus M_QUOTE */
 135 #define ismeta(c)       (((c).at&M_QUOTE) != 0)
 136 
 137 #define GLOB_LIMIT_MALLOC       65536
 138 #define GLOB_LIMIT_STAT         2048
 139 #define GLOB_LIMIT_READDIR      16384
 140 
 141 /* Limit of recursion during matching attempts. */
 142 #define GLOB_LIMIT_RECUR        64
 143 
 144 struct glob_lim {
 145         size_t  glim_malloc;
 146         size_t  glim_stat;
 147         size_t  glim_readdir;
 148 };
 149 
 150 struct glob_path_stat {
 151         char            *gps_path;
 152         struct stat     *gps_stat;
 153 };
 154 
 155 static int       compare(const void *, const void *);
 156 static int       compare_gps(const void *, const void *);
 157 static int       g_Ctoc(const Char *, char *, uint_t);
 158 static int       g_lstat(Char *, struct stat *, glob_t *);
 159 static DIR      *g_opendir(Char *, glob_t *);
 160 static Char     *g_strchr(const Char *, wchar_t);
 161 static int       g_strncmp(const Char *, const char *, size_t);
 162 static int       g_stat(Char *, struct stat *, glob_t *);
 163 static int       glob0(const Char *, glob_t *, struct glob_lim *,
 164                         int (*)(const char *, int));
 165 static int       glob1(Char *, Char *, glob_t *, struct glob_lim *,
 166                         int (*)(const char *, int));
 167 static int       glob2(Char *, Char *, Char *, Char *, Char *, Char *,
 168                         glob_t *, struct glob_lim *,
 169                         int (*)(const char *, int));
 170 static int       glob3(Char *, Char *, Char *, Char *, Char *,
 171                         Char *, Char *, glob_t *, struct glob_lim *,
 172                         int (*)(const char *, int));
 173 static int       globextend(const Char *, glob_t *, struct glob_lim *,
 174                     struct stat *);
 175 static
 176 const Char      *globtilde(const Char *, Char *, size_t, glob_t *);
 177 static int       globexp1(const Char *, glob_t *, struct glob_lim *,
 178                     int (*)(const char *, int));
 179 static int       globexp2(const Char *, const Char *, glob_t *,
 180                     struct glob_lim *, int (*)(const char *, int));
 181 static int       match(Char *, Char *, Char *, int);
 182 #ifdef DEBUG
 183 static void      qprintf(const char *, Char *);
 184 #endif
 185 
 186 int
 187 glob(const char *pattern, int flags, int (*errfunc)(const char *, int),
 188     glob_t *pglob)
 189 {
 190         const char *patnext;
 191         size_t n;
 192         wchar_t c;
 193         Char *bufnext, *bufend, patbuf[MAXPATHLEN];
 194         struct glob_lim limit = { 0, 0, 0 };
 195 
 196         if (strnlen(pattern, PATH_MAX) == PATH_MAX)
 197                 return (GLOB_NOMATCH);
 198 
 199         patnext = pattern;
 200         if (!(flags & GLOB_APPEND)) {
 201                 pglob->gl_pathc = 0;
 202                 pglob->gl_pathv = NULL;
 203                 if ((flags & GLOB_KEEPSTAT) != 0)
 204                         pglob->gl_statv = NULL;
 205                 if (!(flags & GLOB_DOOFFS))
 206                         pglob->gl_offs = 0;
 207         }
 208         pglob->gl_flags = flags & ~GLOB_MAGCHAR;
 209         pglob->gl_matchc = 0;
 210 
 211         if (pglob->gl_offs < 0 || pglob->gl_pathc < 0 ||
 212             pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX ||
 213             pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1)
 214                 return (GLOB_NOSPACE);
 215 
 216         bufnext = patbuf;
 217         bufend = bufnext + MAXPATHLEN - 1;
 218         if (flags & GLOB_NOESCAPE) {
 219                 while (bufnext < bufend &&
 220                     (n = mbtowc(&c, patnext, PATH_MAX)) > 0) {
 221                         patnext += n;
 222                         bufnext->at = 0;
 223                         (bufnext++)->wc = c;
 224                 }
 225         } else {
 226                 /* Protect the quoted characters. */
 227                 while (bufnext < bufend &&
 228                     (n = mbtowc(&c, patnext, PATH_MAX)) > 0) {
 229                         patnext += n;
 230                         if (c == QUOTE) {
 231                                 n = mbtowc(&c, patnext, PATH_MAX);
 232                                 if (n > 0)
 233                                         patnext += n;
 234                                 if (n == 0) {
 235                                         c = QUOTE;
 236                                 }
 237                                 bufnext->at = M_PROTECT;
 238                                 (bufnext++)->wc = c;
 239                         } else {
 240                                 bufnext->at = 0;
 241                                 (bufnext++)->wc = c;
 242                         }
 243                 }
 244         }
 245         bufnext->at = 0;
 246         bufnext->wc = EOS;
 247 
 248         if (flags & GLOB_BRACE)
 249                 return (globexp1(patbuf, pglob, &limit, errfunc));
 250         else
 251                 return (glob0(patbuf, pglob, &limit, errfunc));
 252 }
 253 
 254 /*
 255  * Expand recursively a glob {} pattern. When there is no more expansion
 256  * invoke the standard globbing routine to glob the rest of the magic
 257  * characters
 258  */
 259 static int
 260 globexp1(const Char *pattern, glob_t *pglob, struct glob_lim *limitp,
 261     int (*errfunc)(const char *, int))
 262 {
 263         const Char* ptr = pattern;
 264 
 265         /* Protect a single {}, for find(1), like csh */
 266         if (pattern[0].wc == LBRACE && pattern[1].wc == RBRACE &&
 267             pattern[2].wc == EOS)
 268                 return (glob0(pattern, pglob, limitp, errfunc));
 269 
 270         if ((ptr = (const Char *) g_strchr(ptr, LBRACE)) != NULL)
 271                 return (globexp2(ptr, pattern, pglob, limitp, errfunc));
 272 
 273         return (glob0(pattern, pglob, limitp, errfunc));
 274 }
 275 
 276 
 277 /*
 278  * Recursive brace globbing helper. Tries to expand a single brace.
 279  * If it succeeds then it invokes globexp1 with the new pattern.
 280  * If it fails then it tries to glob the rest of the pattern and returns.
 281  */
 282 static int
 283 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob,
 284     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 285 {
 286         int     i, rv;
 287         Char   *lm, *ls;
 288         const Char *pe, *pm, *pl;
 289         Char    patbuf[MAXPATHLEN];
 290 
 291         /* copy part up to the brace */
 292         for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
 293                 ;
 294         lm->at = 0;
 295         lm->wc = EOS;
 296         ls = lm;
 297 
 298         /* Find the balanced brace */
 299         for (i = 0, pe = ++ptr; pe->wc != EOS; pe++)
 300                 if (pe->wc == LBRACKET) {
 301                         /* Ignore everything between [] */
 302                         for (pm = pe++; pe->wc != RBRACKET &&
 303                             pe->wc != EOS; pe++)
 304                                 ;
 305                         if (pe->wc == EOS) {
 306                                 /*
 307                                  * We could not find a matching RBRACKET.
 308                                  * Ignore and just look for RBRACE
 309                                  */
 310                                 pe = pm;
 311                         }
 312                 } else if (pe->wc == LBRACE) {
 313                         i++;
 314                 } else if (pe->wc == RBRACE) {
 315                         if (i == 0)
 316                                 break;
 317                         i--;
 318                 }
 319 
 320         /* Non matching braces; just glob the pattern */
 321         if (i != 0 || pe->wc == EOS)
 322                 return (glob0(patbuf, pglob, limitp, errfunc));
 323 
 324         for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
 325                 switch (pm->wc) {
 326                 case LBRACKET:
 327                         /* Ignore everything between [] */
 328                         for (pl = pm++; pm->wc != RBRACKET && pm->wc != EOS;
 329                             pm++)
 330                                 ;
 331                         if (pm->wc == EOS) {
 332                                 /*
 333                                  * We could not find a matching RBRACKET.
 334                                  * Ignore and just look for RBRACE
 335                                  */
 336                                 pm = pl;
 337                         }
 338                         break;
 339 
 340                 case LBRACE:
 341                         i++;
 342                         break;
 343 
 344                 case RBRACE:
 345                         if (i) {
 346                                 i--;
 347                                 break;
 348                         }
 349                         /* FALLTHROUGH */
 350                 case COMMA:
 351                         if (i && pm->wc == COMMA)
 352                                 break;
 353                         else {
 354                                 /* Append the current string */
 355                                 for (lm = ls; (pl < pm); *lm++ = *pl++)
 356                                         ;
 357 
 358                                 /*
 359                                  * Append the rest of the pattern after the
 360                                  * closing brace
 361                                  */
 362                                 for (pl = pe + 1;
 363                                     (*lm++ = *pl++).wc != EOS; /* */)
 364                                         ;
 365 
 366                                 /* Expand the current pattern */
 367                                 rv = globexp1(patbuf, pglob, limitp, errfunc);
 368                                 if (rv && rv != GLOB_NOMATCH)
 369                                         return (rv);
 370 
 371                                 /* move after the comma, to the next string */
 372                                 pl = pm + 1;
 373                         }
 374                         break;
 375 
 376                 default:
 377                         break;
 378                 }
 379         }
 380         return (0);
 381 }
 382 
 383 
 384 
 385 /*
 386  * expand tilde from the passwd file.
 387  */
 388 static const Char *
 389 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
 390 {
 391         struct passwd *pwd;
 392         char *h;
 393         const Char *p;
 394         Char *b, *eb, *q;
 395         size_t n;
 396         wchar_t c;
 397 
 398         if (pattern->wc != TILDE || !(pglob->gl_flags & GLOB_TILDE))
 399                 return (pattern);
 400 
 401         /* Copy up to the end of the string or / */
 402         eb = &patbuf[patbuf_len - 1];
 403         for (p = pattern + 1, q = patbuf;
 404             q < eb && p->wc != EOS && p->wc != SLASH; *q++ = *p++)
 405                 ;
 406 
 407         q->at = 0;
 408         q->wc = EOS;
 409 
 410 #if 0
 411         if (q == eb)
 412                 return (what);
 413 #endif
 414 
 415         if (patbuf[0].wc == EOS) {
 416                 /*
 417                  * handle a plain ~ or ~/ by expanding $HOME
 418                  * first and then trying the password file
 419                  */
 420                 if (issetugid() != 0 || (h = getenv("HOME")) == NULL) {
 421                         if ((pwd = getpwuid(getuid())) == NULL)
 422                                 return (pattern);
 423                         else
 424                                 h = pwd->pw_dir;
 425                 }
 426         } else {
 427                 /*
 428                  * Expand a ~user
 429                  */
 430                 if ((pwd = getpwnam((char *)patbuf)) == NULL)
 431                         return (pattern);
 432                 else
 433                         h = pwd->pw_dir;
 434         }
 435 
 436         /* Copy the home directory */
 437         for (b = patbuf; b < eb && *h != EOS; b++) {
 438                 if ((n = mbtowc(&c, h, PATH_MAX)) > 0) {
 439                         h += n;
 440                         b->at = 0;
 441                         b->wc = c;
 442                 } else {
 443                         break;
 444                 }
 445         }
 446 
 447         /* Append the rest of the pattern */
 448         while (b < eb && (*b++ = *p++).wc != EOS)
 449                 ;
 450         b->at = 0;
 451         b->wc = EOS;
 452 
 453         return (patbuf);
 454 }
 455 
 456 static int
 457 g_strncmp(const Char *s1, const char *s2, size_t n)
 458 {
 459         int rv = 0;
 460         int r;
 461         wchar_t c;
 462 
 463         while (n > 0) {
 464                 if ((r = mbtowc(&c, s2, n)) > 0) {
 465                         n -= r;
 466                         s2 += r;
 467                 } else {
 468                         rv = s1->wc;
 469                         break;
 470                 }
 471                 rv = s1->wc - c;
 472                 if (rv != 0)
 473                         break;
 474                 if ((s1++)->wc == EOS)
 475                         break;
 476         }
 477         return (rv);
 478 }
 479 
 480 static int
 481 g_charclass(const Char **patternp, Char **bufnextp)
 482 {
 483         const Char *pattern = *patternp + 1;
 484         Char *bufnext = *bufnextp;
 485         const Char *colon;
 486         struct cclass *cc;
 487         size_t len;
 488 
 489         if ((colon = g_strchr(pattern, COLON)) == NULL ||
 490             colon[1].wc != RBRACKET)
 491                 return (1);     /* not a character class */
 492 
 493         len = (size_t)(colon - pattern);
 494         for (cc = cclasses; cc->name != NULL; cc++) {
 495                 if (!g_strncmp(pattern, cc->name, len) && cc->name[len] == EOS)
 496                         break;
 497         }
 498         if (cc->name == NULL)
 499                 return (-1);    /* invalid character class */
 500         bufnext->at = M_QUOTE;
 501         (bufnext++)->wc = M_CLASS;
 502         bufnext->at = (cc - &cclasses[0]);
 503         (bufnext++)->wc = COLON;
 504         *bufnextp = bufnext;
 505         *patternp += len + 3;
 506 
 507         return (0);
 508 }
 509 
 510 /*
 511  * The main glob() routine: compiles the pattern (optionally processing
 512  * quotes), calls glob1() to do the real pattern matching, and finally
 513  * sorts the list (unless unsorted operation is requested).  Returns 0
 514  * if things went well, nonzero if errors occurred.  It is not an error
 515  * to find no matches.
 516  */
 517 static int
 518 glob0(const Char *pattern, glob_t *pglob, struct glob_lim *limitp,
 519     int (*errfunc)(const char *, int))
 520 {
 521         const Char *qpatnext;
 522         int err, oldpathc;
 523         wchar_t c;
 524         int a;
 525         Char *bufnext, patbuf[MAXPATHLEN];
 526 
 527         qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
 528         oldpathc = pglob->gl_pathc;
 529         bufnext = patbuf;
 530 
 531         /* We don't need to check for buffer overflow any more. */
 532         while ((a = qpatnext->at), (c = (qpatnext++)->wc) != EOS) {
 533                 switch (c) {
 534                 case LBRACKET:
 535                         if (a != 0) {
 536                                 bufnext->at = a;
 537                                 (bufnext++)->wc = c;
 538                                 break;
 539                         }
 540                         a = qpatnext->at;
 541                         c = qpatnext->wc;
 542                         if (a == 0 && c == NOT)
 543                                 ++qpatnext;
 544                         if (qpatnext->wc == EOS ||
 545                             g_strchr(qpatnext+1, RBRACKET) == NULL) {
 546                                 bufnext->at = 0;
 547                                 (bufnext++)->wc = LBRACKET;
 548                                 if (a == 0 && c == NOT)
 549                                         --qpatnext;
 550                                 break;
 551                         }
 552                         bufnext->at = M_QUOTE;
 553                         (bufnext++)->wc = M_SET;
 554                         if (a == 0 && c == NOT) {
 555                                 bufnext->at = M_QUOTE;
 556                                 (bufnext++)->wc = M_NOT;
 557                         }
 558                         a = qpatnext->at;
 559                         c = (qpatnext++)->wc;
 560                         do {
 561                                 if (a == 0 && c == LBRACKET &&
 562                                     qpatnext->wc == COLON) {
 563                                         do {
 564                                                 err = g_charclass(&qpatnext,
 565                                                     &bufnext);
 566                                                 if (err)
 567                                                         break;
 568                                                 a = qpatnext->at;
 569                                                 c = (qpatnext++)->wc;
 570                                         } while (a == 0 && c == LBRACKET &&
 571                                             qpatnext->wc == COLON);
 572                                         if (err == -1 &&
 573                                             !(pglob->gl_flags & GLOB_NOCHECK))
 574                                                 return (GLOB_NOMATCH);
 575                                         if (a == 0 && c == RBRACKET)
 576                                                 break;
 577                                 }
 578                                 bufnext->at = a;
 579                                 (bufnext++)->wc = c;
 580                                 if (qpatnext->at == 0 &&
 581                                     qpatnext->wc == RANGE) {
 582                                         a = qpatnext[1].at;
 583                                         c = qpatnext[1].wc;
 584                                         if (qpatnext[1].at != 0 ||
 585                                             qpatnext[1].wc != RBRACKET) {
 586                                                 bufnext->at = M_QUOTE;
 587                                                 (bufnext++)->wc = M_RNG;
 588                                                 bufnext->at = a;
 589                                                 (bufnext++)->wc = c;
 590                                                 qpatnext += 2;
 591                                         }
 592                                 }
 593                                 a = qpatnext->at;
 594                                 c = (qpatnext++)->wc;
 595                         } while (a != 0 || c != RBRACKET);
 596                         pglob->gl_flags |= GLOB_MAGCHAR;
 597                         bufnext->at = M_QUOTE;
 598                         (bufnext++)->wc = M_END;
 599                         break;
 600                 case QUESTION:
 601                         if (a != 0) {
 602                                 bufnext->at = a;
 603                                 (bufnext++)->wc = c;
 604                                 break;
 605                         }
 606                         pglob->gl_flags |= GLOB_MAGCHAR;
 607                         bufnext->at = M_QUOTE;
 608                         (bufnext++)->wc = M_ONE;
 609                         break;
 610                 case STAR:
 611                         if (a != 0) {
 612                                 bufnext->at = a;
 613                                 (bufnext++)->wc = c;
 614                                 break;
 615                         }
 616                         pglob->gl_flags |= GLOB_MAGCHAR;
 617                         /*
 618                          * collapse adjacent stars to one,
 619                          * to avoid exponential behavior
 620                          */
 621                         if (bufnext == patbuf ||
 622                             bufnext[-1].at != M_QUOTE ||
 623                             bufnext[-1].wc != M_ALL) {
 624                                 bufnext->at = M_QUOTE;
 625                                 (bufnext++)->wc = M_ALL;
 626                         }
 627                         break;
 628                 default:
 629                         bufnext->at = a;
 630                         (bufnext++)->wc = c;
 631                         break;
 632                 }
 633         }
 634         bufnext->at = 0;
 635         bufnext->wc = EOS;
 636 #ifdef DEBUG
 637         qprintf("glob0:glob1:patbuf", patbuf);
 638 #endif
 639 
 640         if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc))
 641             != 0)
 642                 return (err);
 643 
 644         /*
 645          * If there was no match we are going to append the pattern
 646          * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
 647          * and the pattern did not contain any magic characters
 648          * GLOB_NOMAGIC is there just for compatibility with csh.
 649          */
 650         if (pglob->gl_pathc == oldpathc) {
 651                 if ((pglob->gl_flags & GLOB_NOCHECK) ||
 652                     ((pglob->gl_flags & GLOB_NOMAGIC) &&
 653                     !(pglob->gl_flags & GLOB_MAGCHAR)))
 654                         return (globextend(pattern, pglob, limitp, NULL));
 655                 else
 656                         return (GLOB_NOMATCH);
 657         }
 658         if (!(pglob->gl_flags & GLOB_NOSORT)) {
 659                 if ((pglob->gl_flags & GLOB_KEEPSTAT)) {
 660                         /* Keep the paths and stat info synced during sort */
 661                         struct glob_path_stat *path_stat;
 662                         int i;
 663                         int n = pglob->gl_pathc - oldpathc;
 664                         int o = pglob->gl_offs + oldpathc;
 665 
 666                         if ((path_stat = calloc(n, sizeof (*path_stat))) ==
 667                             NULL)
 668                                 return (GLOB_NOSPACE);
 669                         for (i = 0; i < n; i++) {
 670                                 path_stat[i].gps_path = pglob->gl_pathv[o + i];
 671                                 path_stat[i].gps_stat = pglob->gl_statv[o + i];
 672                         }
 673                         qsort(path_stat, n, sizeof (*path_stat), compare_gps);
 674                         for (i = 0; i < n; i++) {
 675                                 pglob->gl_pathv[o + i] = path_stat[i].gps_path;
 676                                 pglob->gl_statv[o + i] = path_stat[i].gps_stat;
 677                         }
 678                         free(path_stat);
 679                 } else {
 680                         qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
 681                             pglob->gl_pathc - oldpathc, sizeof (char *),
 682                             compare);
 683                 }
 684         }
 685         return (0);
 686 }
 687 
 688 static int
 689 compare(const void *p, const void *q)
 690 {
 691         return (strcmp(*(char **)p, *(char **)q));
 692 }
 693 
 694 static int
 695 compare_gps(const void *_p, const void *_q)
 696 {
 697         const struct glob_path_stat *p = (const struct glob_path_stat *)_p;
 698         const struct glob_path_stat *q = (const struct glob_path_stat *)_q;
 699 
 700         return (strcmp(p->gps_path, q->gps_path));
 701 }
 702 
 703 static int
 704 glob1(Char *pattern, Char *pattern_last, glob_t *pglob,
 705     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 706 {
 707         Char pathbuf[MAXPATHLEN];
 708 
 709         /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
 710         if (pattern->wc == EOS)
 711                 return (0);
 712         return (glob2(pathbuf, pathbuf+MAXPATHLEN-1,
 713             pathbuf, pathbuf+MAXPATHLEN-1,
 714             pattern, pattern_last, pglob, limitp, errfunc));
 715 }
 716 
 717 /*
 718  * The functions glob2 and glob3 are mutually recursive; there is one level
 719  * of recursion for each segment in the pattern that contains one or more
 720  * meta characters.
 721  */
 722 static int
 723 glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
 724     Char *pattern, Char *pattern_last, glob_t *pglob,
 725     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 726 {
 727         struct stat sb;
 728         Char *p, *q;
 729         int anymeta;
 730 
 731         /*
 732          * Loop over pattern segments until end of pattern or until
 733          * segment with meta character found.
 734          */
 735         for (anymeta = 0; ; ) {
 736                 if (pattern->wc == EOS) {            /* End of pattern? */
 737                         pathend->at = 0;
 738                         pathend->wc = EOS;
 739 
 740                         if ((pglob->gl_flags & GLOB_LIMIT) &&
 741                             limitp->glim_stat++ >= GLOB_LIMIT_STAT) {
 742                                 errno = 0;
 743                                 pathend->at = 0;
 744                                 (pathend++)->wc = SEP;
 745                                 pathend->at = 0;
 746                                 pathend->wc = EOS;
 747                                 return (GLOB_NOSPACE);
 748                         }
 749                         if (g_lstat(pathbuf, &sb, pglob))
 750                                 return (0);
 751 
 752                         if (((pglob->gl_flags & GLOB_MARK) &&
 753                             (pathend[-1].at != 0 ||
 754                             pathend[-1].wc != SEP)) &&
 755                             (S_ISDIR(sb.st_mode) ||
 756                             (S_ISLNK(sb.st_mode) &&
 757                             (g_stat(pathbuf, &sb, pglob) == 0) &&
 758                             S_ISDIR(sb.st_mode)))) {
 759                                 if (pathend+1 > pathend_last)
 760                                         return (1);
 761                                 pathend->at = 0;
 762                                 (pathend++)->wc = SEP;
 763                                 pathend->at = 0;
 764                                 pathend->wc = EOS;
 765                         }
 766                         ++pglob->gl_matchc;
 767                         return (globextend(pathbuf, pglob, limitp, &sb));
 768                 }
 769 
 770                 /* Find end of next segment, copy tentatively to pathend. */
 771                 q = pathend;
 772                 p = pattern;
 773                 while (p->wc != EOS && p->wc != SEP) {
 774                         if (ismeta(*p))
 775                                 anymeta = 1;
 776                         if (q+1 > pathend_last)
 777                                 return (1);
 778                         *q++ = *p++;
 779                 }
 780 
 781                 if (!anymeta) {         /* No expansion, do next segment. */
 782                         pathend = q;
 783                         pattern = p;
 784                         while (pattern->wc == SEP) {
 785                                 if (pathend+1 > pathend_last)
 786                                         return (1);
 787                                 *pathend++ = *pattern++;
 788                         }
 789                 } else  {
 790                         /* Need expansion, recurse. */
 791                         return (glob3(pathbuf, pathbuf_last, pathend,
 792                             pathend_last, pattern, p, pattern_last,
 793                             pglob, limitp, errfunc));
 794                 }
 795         }
 796         /* NOTREACHED */
 797 }
 798 
 799 static int
 800 glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
 801     Char *pattern, Char *restpattern, Char *restpattern_last, glob_t *pglob,
 802     struct glob_lim *limitp, int (*errfunc)(const char *, int))
 803 {
 804         struct dirent *dp;
 805         DIR *dirp;
 806         int err;
 807         char buf[MAXPATHLEN];
 808 
 809         /*
 810          * The readdirfunc declaration can't be prototyped, because it is
 811          * assigned, below, to two functions which are prototyped in glob.h
 812          * and dirent.h as taking pointers to differently typed opaque
 813          * structures.
 814          */
 815         struct dirent *(*readdirfunc)(void *);
 816 
 817         if (pathend > pathend_last)
 818                 return (1);
 819         pathend->at = 0;
 820         pathend->wc = EOS;
 821         errno = 0;
 822 
 823         if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
 824                 /* TODO: don't call for ENOENT or ENOTDIR? */
 825                 if (errfunc) {
 826                         if (g_Ctoc(pathbuf, buf, sizeof (buf)))
 827                                 return (GLOB_ABORTED);
 828                         if (errfunc(buf, errno) ||
 829                             pglob->gl_flags & GLOB_ERR)
 830                                 return (GLOB_ABORTED);
 831                 }
 832                 return (0);
 833         }
 834 
 835         err = 0;
 836 
 837         /* Search directory for matching names. */
 838         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 839                 readdirfunc = pglob->gl_readdir;
 840         else
 841                 readdirfunc = (struct dirent *(*)(void *))readdir;
 842         while ((dp = (*readdirfunc)(dirp))) {
 843                 char *sc;
 844                 Char *dc;
 845                 size_t n;
 846                 wchar_t c;
 847 
 848                 if ((pglob->gl_flags & GLOB_LIMIT) &&
 849                     limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) {
 850                         errno = 0;
 851                         pathend->at = 0;
 852                         (pathend++)->wc = SEP;
 853                         pathend->at = 0;
 854                         pathend->wc = EOS;
 855                         err = GLOB_NOSPACE;
 856                         break;
 857                 }
 858 
 859                 /* Initial DOT must be matched literally. */
 860                 if (dp->d_name[0] == DOT && pattern->wc != DOT)
 861                         continue;
 862                 dc = pathend;
 863                 sc = dp->d_name;
 864                 while (dc < pathend_last) {
 865                         if ((n = mbtowc(&c, sc, PATH_MAX)) <= 0) {
 866                                 sc += 1;
 867                                 dc->at = 0;
 868                                 dc->wc = EOS;
 869                         } else {
 870                                 sc += n;
 871                                 dc->at = 0;
 872                                 dc->wc = c;
 873                         }
 874                         dc++;
 875                         if (n <= 0)
 876                                 break;
 877                 }
 878                 if (dc >= pathend_last) {
 879                         dc->at = 0;
 880                         dc->wc = EOS;
 881                         err = 1;
 882                         break;
 883                 }
 884 
 885                 if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) {
 886                         pathend->at = 0;
 887                         pathend->wc = EOS;
 888                         continue;
 889                 }
 890                 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
 891                     restpattern, restpattern_last, pglob, limitp,
 892                     errfunc);
 893                 if (err)
 894                         break;
 895         }
 896 
 897         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
 898                 (*pglob->gl_closedir)(dirp);
 899         else
 900                 closedir(dirp);
 901         return (err);
 902 }
 903 
 904 
 905 /*
 906  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
 907  * add the new item, and update gl_pathc.
 908  *
 909  * This assumes the BSD realloc, which only copies the block when its size
 910  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
 911  * behavior.
 912  *
 913  * Return 0 if new item added, error code if memory couldn't be allocated.
 914  *
 915  * Invariant of the glob_t structure:
 916  *      Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
 917  *      gl_pathv points to (gl_offs + gl_pathc + 1) items.
 918  */
 919 static int
 920 globextend(const Char *path, glob_t *pglob, struct glob_lim *limitp,
 921     struct stat *sb)
 922 {
 923         char **pathv;
 924         ssize_t i;
 925         size_t newn, len;
 926         char *copy = NULL;
 927         const Char *p;
 928         struct stat **statv;
 929         char junk[MB_CUR_MAX];
 930         int n;
 931 
 932         newn = 2 + pglob->gl_pathc + pglob->gl_offs;
 933         if (pglob->gl_offs >= INT_MAX ||
 934             pglob->gl_pathc >= INT_MAX ||
 935             newn >= INT_MAX ||
 936             SIZE_MAX / sizeof (*pathv) <= newn ||
 937             SIZE_MAX / sizeof (*statv) <= newn) {
 938         nospace:
 939                 for (i = pglob->gl_offs; i < (ssize_t)(newn - 2); i++) {
 940                         if (pglob->gl_pathv && pglob->gl_pathv[i])
 941                                 free(pglob->gl_pathv[i]);
 942                         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
 943                             pglob->gl_pathv && pglob->gl_pathv[i])
 944                                 free(pglob->gl_statv[i]);
 945                 }
 946                 if (pglob->gl_pathv) {
 947                         free(pglob->gl_pathv);
 948                         pglob->gl_pathv = NULL;
 949                 }
 950                 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
 951                     pglob->gl_statv) {
 952                         free(pglob->gl_statv);
 953                         pglob->gl_statv = NULL;
 954                 }
 955                 return (GLOB_NOSPACE);
 956         }
 957 
 958         pathv = realloc(pglob->gl_pathv, newn * sizeof (*pathv));
 959         if (pathv == NULL)
 960                 goto nospace;
 961         if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
 962                 /* first time around -- clear initial gl_offs items */
 963                 pathv += pglob->gl_offs;
 964                 for (i = pglob->gl_offs; --i >= 0; )
 965                         *--pathv = NULL;
 966         }
 967         pglob->gl_pathv = pathv;
 968 
 969         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) {
 970                 statv = realloc(pglob->gl_statv, newn * sizeof (*statv));
 971                 if (statv == NULL)
 972                         goto nospace;
 973                 if (pglob->gl_statv == NULL && pglob->gl_offs > 0) {
 974                         /* first time around -- clear initial gl_offs items */
 975                         statv += pglob->gl_offs;
 976                         for (i = pglob->gl_offs; --i >= 0; )
 977                                 *--statv = NULL;
 978                 }
 979                 pglob->gl_statv = statv;
 980                 if (sb == NULL)
 981                         statv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 982                 else {
 983                         limitp->glim_malloc += sizeof (**statv);
 984                         if ((pglob->gl_flags & GLOB_LIMIT) &&
 985                             limitp->glim_malloc >= GLOB_LIMIT_MALLOC) {
 986                                 errno = 0;
 987                                 return (GLOB_NOSPACE);
 988                         }
 989                         if ((statv[pglob->gl_offs + pglob->gl_pathc] =
 990                             malloc(sizeof (**statv))) == NULL)
 991                                 goto copy_error;
 992                         memcpy(statv[pglob->gl_offs + pglob->gl_pathc], sb,
 993                             sizeof (*sb));
 994                 }
 995                 statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL;
 996         }
 997 
 998         len = MB_CUR_MAX;
 999         p = path;
1000         while ((n = wctomb(junk, p->wc)) > 0) {
1001                 len += n;
1002                 if ((p++)->wc == EOS)
1003                         break;
1004         }
1005 
1006         limitp->glim_malloc += len;
1007         if ((copy = malloc(len)) != NULL) {
1008                 if (g_Ctoc(path, copy, len)) {
1009                         free(copy);
1010                         return (GLOB_NOSPACE);
1011                 }
1012                 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
1013         }
1014         pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
1015 
1016         if ((pglob->gl_flags & GLOB_LIMIT) &&
1017             (newn * sizeof (*pathv)) + limitp->glim_malloc >
1018             GLOB_LIMIT_MALLOC) {
1019                 errno = 0;
1020                 return (GLOB_NOSPACE);
1021         }
1022         copy_error:
1023         return (copy == NULL ? GLOB_NOSPACE : 0);
1024 }
1025 
1026 
1027 /*
1028  * pattern matching function for filenames.  Each occurrence of the *
1029  * pattern causes a recursion level.
1030  */
1031 static int
1032 match(Char *name, Char *pat, Char *patend, int recur)
1033 {
1034         int ok, negate_range;
1035         Char c, k;
1036 
1037         if (recur-- == 0)
1038                 return (GLOB_NOSPACE);
1039 
1040         while (pat < patend) {
1041                 c = *pat++;
1042                 switch (c.wc) {
1043                 case M_ALL:
1044                         if (c.at != M_QUOTE) {
1045                                 k = *name++;
1046                                 if (k.at != c.at || k.wc != c.wc)
1047                                         return (0);
1048                                 break;
1049                         }
1050                         while (pat < patend && pat->at == M_QUOTE &&
1051                             pat->wc == M_ALL)
1052                                 pat++;  /* eat consecutive '*' */
1053                         if (pat == patend)
1054                                 return (1);
1055                         do {
1056                                 if (match(name, pat, patend, recur))
1057                                         return (1);
1058                         } while ((name++)->wc != EOS);
1059                         return (0);
1060                 case M_ONE:
1061                         if (c.at != M_QUOTE) {
1062                                 k = *name++;
1063                                 if (k.at != c.at || k.wc != c.wc)
1064                                         return (0);
1065                                 break;
1066                         }
1067                         if ((name++)->wc == EOS)
1068                                 return (0);
1069                         break;
1070                 case M_SET:
1071                         if (c.at != M_QUOTE) {
1072                                 k = *name++;
1073                                 if (k.at != c.at || k.wc != c.wc)
1074                                         return (0);
1075                                 break;
1076                         }
1077                         ok = 0;
1078                         if ((k = *name++).wc == EOS)
1079                                 return (0);
1080                         if ((negate_range = (pat->at == M_QUOTE &&
1081                             pat->wc == M_NOT)) != 0)
1082                                 ++pat;
1083                         while (((c = *pat++).at != M_QUOTE) || c.wc != M_END) {
1084                                 if (c.at == M_QUOTE && c.wc == M_CLASS) {
1085                                         Char idx;
1086 
1087                                         idx.at = pat->at;
1088                                         idx.wc = pat->wc;
1089                                         if (idx.at < NCCLASSES &&
1090                                             cclasses[idx.at].isctype(k.wc))
1091                                                 ok = 1;
1092                                         ++pat;
1093                                 }
1094                                 if (pat->at == M_QUOTE && pat->wc == M_RNG) {
1095                                         if (c.wc <= k.wc && k.wc <= pat[1].wc)
1096                                                 ok = 1;
1097                                         pat += 2;
1098                                 } else if (c.wc == k.wc)
1099                                         ok = 1;
1100                         }
1101                         if (ok == negate_range)
1102                                 return (0);
1103                         break;
1104                 default:
1105                         k = *name++;
1106                         if (k.at != c.at || k.wc != c.wc)
1107                                 return (0);
1108                         break;
1109                 }
1110         }
1111         return (name->wc == EOS);
1112 }
1113 
1114 /* Free allocated data belonging to a glob_t structure. */
1115 void
1116 globfree(glob_t *pglob)
1117 {
1118         int i;
1119         char **pp;
1120 
1121         if (pglob->gl_pathv != NULL) {
1122                 pp = pglob->gl_pathv + pglob->gl_offs;
1123                 for (i = pglob->gl_pathc; i--; ++pp)
1124                         if (*pp)
1125                                 free(*pp);
1126                 free(pglob->gl_pathv);
1127                 pglob->gl_pathv = NULL;
1128         }
1129         if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 &&
1130             pglob->gl_statv != NULL) {
1131                 for (i = 0; i < pglob->gl_pathc; i++) {
1132                         if (pglob->gl_statv[i] != NULL)
1133                                 free(pglob->gl_statv[i]);
1134                 }
1135                 free(pglob->gl_statv);
1136                 pglob->gl_statv = NULL;
1137         }
1138 }
1139 
1140 static DIR *
1141 g_opendir(Char *str, glob_t *pglob)
1142 {
1143         char buf[MAXPATHLEN];
1144 
1145         if (str->wc == EOS)
1146                 strlcpy(buf, ".", sizeof (buf));
1147         else {
1148                 if (g_Ctoc(str, buf, sizeof (buf)))
1149                         return (NULL);
1150         }
1151 
1152         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1153                 return ((*pglob->gl_opendir)(buf));
1154 
1155         return (opendir(buf));
1156 }
1157 
1158 static int
1159 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
1160 {
1161         char buf[MAXPATHLEN];
1162 
1163         if (g_Ctoc(fn, buf, sizeof (buf)))
1164                 return (-1);
1165         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1166                 return ((*pglob->gl_lstat)(buf, sb));
1167         return (lstat(buf, sb));
1168 }
1169 
1170 static int
1171 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
1172 {
1173         char buf[MAXPATHLEN];
1174 
1175         if (g_Ctoc(fn, buf, sizeof (buf)))
1176                 return (-1);
1177         if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1178                 return ((*pglob->gl_stat)(buf, sb));
1179         return (stat(buf, sb));
1180 }
1181 
1182 static Char *
1183 g_strchr(const Char *str, wchar_t ch)
1184 {
1185         do {
1186                 if (str->at == 0 && str->wc == ch)
1187                         return ((Char *)str);
1188         } while ((str++)->wc != EOS);
1189         return (NULL);
1190 }
1191 
1192 static int
1193 g_Ctoc(const Char *str, char *buf, uint_t len)
1194 {
1195         int n;
1196         wchar_t w;
1197 
1198         while (len >= MB_CUR_MAX) {
1199                 w = (str++)->wc;
1200                 if ((n = wctomb(buf, w)) > 0) {
1201                         len -= n;
1202                         buf += n;
1203                 }
1204                 if (w == EOS)
1205                         return (0);
1206         }
1207         return (1);
1208 }
1209 
1210 #ifdef DEBUG
1211 static void
1212 qprintf(const char *str, Char *s)
1213 {
1214         Char *p;
1215 
1216         (void) printf("%s:\n", str);
1217         for (p = s; p->wc != EOS; p++)
1218                 (void) printf("%wc", p->wc);
1219         (void) printf("\n");
1220         for (p = s; p->wc != EOS; p++)
1221                 (void) printf("%c", p->at & M_PROTECT ? '"' : ' ');
1222         (void) printf("\n");
1223         for (p = s; p->wc != EOS; p++)
1224                 (void) printf("%c", ismeta(*p) ? '_' : ' ');
1225         (void) printf("\n");
1226 }
1227 #endif