iltre Wdiff usr/src/lib/libc/port/regex/regcomp.c

Print this page

9083 replace regex implementation with tre

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libc/port/regex/regcomp.c
          +++ new/usr/src/lib/libc/port/regex/regcomp.c
   1    1  /*
   2      - * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   3      - * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   4      - * Copyright 2012 Milan Jurik. All rights reserved.
   5      - * Copyright (c) 1992, 1993, 1994 Henry Spencer.
   6      - * Copyright (c) 1992, 1993, 1994
   7      - *      The Regents of the University of California.  All rights reserved.
        2 + * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
        3 + * All rights reserved.
   8    4   *
   9      - * This code is derived from software contributed to Berkeley by
  10      - * Henry Spencer.
  11      - *
  12    5   * Redistribution and use in source and binary forms, with or without
  13    6   * modification, are permitted provided that the following conditions
  14    7   * are met:
        8 + *
  15    9   * 1. Redistributions of source code must retain the above copyright
  16   10   *    notice, this list of conditions and the following disclaimer.
       11 + *
  17   12   * 2. Redistributions in binary form must reproduce the above copyright
  18   13   *    notice, this list of conditions and the following disclaimer in the
  19   14   *    documentation and/or other materials provided with the distribution.
  20      - * 3. Neither the name of the University nor the names of its contributors
  21      - *    may be used to endorse or promote products derived from this software
  22      - *    without specific prior written permission.
  23   15   *
  24      - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25      - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26      - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27      - * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28      - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29      - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30      - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31      - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32      - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33      - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34      - * SUCH DAMAGE.
       16 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
       17 + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       18 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       19 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
       20 + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       21 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       22 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       23 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       24 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       25 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       26 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35   27   */
  36   28  
  37      -#include "lint.h"
  38      -#include "file64.h"
  39      -#include <sys/types.h>
  40      -#include <stdio.h>
       29 +/*
       30 +  tre_regcomp.c - TRE POSIX compatible regex compilation functions.
       31 +*/
       32 +
  41   33  #include <string.h>
  42      -#include <ctype.h>
  43      -#include <limits.h>
       34 +#include <errno.h>
  44   35  #include <stdlib.h>
  45      -#include <regex.h>
  46      -#include <wchar.h>
  47      -#include <wctype.h>
  48   36  
  49      -#include "../locale/runetype.h"
  50      -#include "../locale/collate.h"
       37 +#include "tre.h"
       38 +#include "tre-internal.h"
  51   39  
  52      -#include "utils.h"
  53      -#include "regex2.h"
  54      -
  55      -#include "cname.h"
  56      -#include "../locale/mblocal.h"
  57      -
  58      -/*
  59      - * parse structure, passed up and down to avoid global variables and
  60      - * other clumsinesses
  61      - */
  62      -struct parse {
  63      -        const char *next;       /* next character in RE */
  64      -        const char *end;        /* end of string (-> NUL normally) */
  65      -        int error;              /* has an error been seen? */
  66      -        sop *strip;             /* malloced strip */
  67      -        sopno ssize;            /* malloced strip size (allocated) */
  68      -        sopno slen;             /* malloced strip length (used) */
  69      -        int ncsalloc;           /* number of csets allocated */
  70      -        struct re_guts *g;
  71      -#define NPAREN  10              /* we need to remember () 1-9 for back refs */
  72      -        sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
  73      -        sopno pend[NPAREN];     /* -> ) ([0] unused) */
  74      -};
  75      -
  76      -/* ========= begin header generated by ./mkh ========= */
  77      -#ifdef __cplusplus
  78      -extern "C" {
  79      -#endif
  80      -
  81      -/* === regcomp.c === */
  82      -static void p_ere(struct parse *p, int stop);
  83      -static void p_ere_exp(struct parse *p);
  84      -static void p_str(struct parse *p);
  85      -static void p_bre(struct parse *p, int end1, int end2);
  86      -static int p_simp_re(struct parse *p, int starordinary);
  87      -static int p_count(struct parse *p);
  88      -static void p_bracket(struct parse *p);
  89      -static void p_b_term(struct parse *p, cset *cs);
  90      -static void p_b_cclass(struct parse *p, cset *cs);
  91      -static void p_b_eclass(struct parse *p, cset *cs);
  92      -static wint_t p_b_symbol(struct parse *p);
  93      -static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
  94      -static wint_t othercase(wint_t ch);
  95      -static void bothcases(struct parse *p, wint_t ch);
  96      -static void ordinary(struct parse *p, wint_t ch);
  97      -static void nonnewline(struct parse *p);
  98      -static void repeat(struct parse *p, sopno start, int from, int to);
  99      -static int seterr(struct parse *p, int e);
 100      -static cset *allocset(struct parse *p);
 101      -static void freeset(struct parse *p, cset *cs);
 102      -static void CHadd(struct parse *p, cset *cs, wint_t ch);
 103      -static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
 104      -static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
 105      -static wint_t singleton(cset *cs);
 106      -static sopno dupl(struct parse *p, sopno start, sopno finish);
 107      -static void doemit(struct parse *p, sop op, size_t opnd);
 108      -static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
 109      -static void dofwd(struct parse *p, sopno pos, sop value);
 110      -static int enlarge(struct parse *p, sopno size);
 111      -static void stripsnug(struct parse *p, struct re_guts *g);
 112      -static void findmust(struct parse *p, struct re_guts *g);
 113      -static int altoffset(sop *scan, int offset);
 114      -static void computejumps(struct parse *p, struct re_guts *g);
 115      -static void computematchjumps(struct parse *p, struct re_guts *g);
 116      -static sopno pluscount(struct parse *p, struct re_guts *g);
 117      -static wint_t wgetnext(struct parse *p);
 118      -
 119      -#ifdef __cplusplus
 120      -}
 121      -#endif
 122      -/* ========= end header generated by ./mkh ========= */
 123      -
 124      -static char nuls[10];           /* place to point scanner in event of error */
 125      -
 126      -/*
 127      - * macros for use with parse structure
 128      - * BEWARE:  these know that the parse structure is named `p' !!!
 129      - */
 130      -#define PEEK()  (*p->next)
 131      -#define PEEK2() (*(p->next+1))
 132      -#define MORE()  (p->next < p->end)
 133      -#define MORE2() (p->next+1 < p->end)
 134      -#define SEE(c)  (MORE() && PEEK() == (c))
 135      -#define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
 136      -#define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
 137      -#define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
 138      -#define NEXT()  (p->next++)
 139      -#define NEXT2() (p->next += 2)
 140      -#define NEXTn(n)        (p->next += (n))
 141      -#define GETNEXT()       (*p->next++)
 142      -#define WGETNEXT()      wgetnext(p)
 143      -#define SETERROR(e)     ((void)seterr(p, (e)))
 144      -#define REQUIRE(co, e)  ((co) || seterr(p, e))
 145      -#define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
 146      -#define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
 147      -#define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
 148      -#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
 149      -#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
 150      -#define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
 151      -#define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
 152      -#define HERE()          (p->slen)
 153      -#define THERE()         (p->slen - 1)
 154      -#define THERETHERE()    (p->slen - 2)
 155      -#define DROP(n) (p->slen -= (n))
 156      -
 157      -#ifndef NDEBUG
 158      -static int never = 0;           /* for use in asserts; shuts lint up */
 159      -#else
 160      -#define never   0               /* some <assert.h>s have bugs too */
 161      -#endif
 162      -
 163      -/*
 164      - * regcomp - interface for parser and compilation
 165      - */
 166      -int                             /* 0 success, otherwise REG_something */
 167      -regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD pattern,
 168      -    int cflags)
       40 +int
       41 +tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
       42 +    locale_t loc)
 169   43  {
 170      -        struct parse pa;
 171      -        struct re_guts *g;
 172      -        struct parse *p = &pa;
 173      -        int i;
 174      -        size_t len;
 175      -        size_t maxlen;
 176      -#ifdef REDEBUG
 177      -#define GOODFLAGS(f)    (f)
 178      -#else
 179      -#define GOODFLAGS(f)    ((f)&~REG_DUMP)
 180      -#endif
       44 +  int ret;
       45 +  tre_char_t *wregex;
       46 +  size_t wlen;
 181   47  
 182      -        /* We had REG_INVARG, but we don't have that on Solaris. */
 183      -        cflags = GOODFLAGS(cflags);
 184      -        if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
 185      -                return (REG_EFATAL);
       48 +  wregex = malloc(sizeof(tre_char_t) * (n + 1));
       49 +  if (wregex == NULL)
       50 +    return REG_ESPACE;
 186   51  
 187      -        if (cflags&REG_PEND) {
 188      -                if (preg->re_endp < pattern)
 189      -                        return (REG_EFATAL);
 190      -                len = preg->re_endp - pattern;
 191      -        } else
 192      -                len = strlen(pattern);
       52 +  /* If the current locale uses the standard single byte encoding of
       53 +     characters, we don't do a multibyte string conversion.  If we did,
       54 +     many applications which use the default locale would break since
       55 +     the default "C" locale uses the 7-bit ASCII character set, and
       56 +     all characters with the eighth bit set would be considered invalid. */
       57 +  if (TRE_MB_CUR_MAX_L(loc) == 1)
       58 +    {
       59 +      unsigned int i;
       60 +      const unsigned char *str = (const unsigned char *)regex;
       61 +      tre_char_t *wstr = wregex;
 193   62  
 194      -        /* do the mallocs early so failure handling is easy */
 195      -        g = (struct re_guts *)malloc(sizeof (struct re_guts));
 196      -        if (g == NULL)
 197      -                return (REG_ESPACE);
 198      -        /*
 199      -         * Limit the pattern space to avoid a 32-bit overflow on buffer
 200      -         * extension.  Also avoid any signed overflow in case of conversion
 201      -         * so make the real limit based on a 31-bit overflow.
 202      -         *
 203      -         * Likely not applicable on 64-bit systems but handle the case
 204      -         * generically (who are we to stop people from using ~715MB+
 205      -         * patterns?).
 206      -         */
 207      -        maxlen = ((size_t)-1 >> 1) / sizeof (sop) * 2 / 3;
 208      -        if (len >= maxlen) {
 209      -                free((char *)g);
 210      -                return (REG_ESPACE);
 211      -        }
 212      -        p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
 213      -        assert(p->ssize >= len);
       63 +      for (i = 0; i < n; i++)
       64 +        *(wstr++) = *(str++);
       65 +      wlen = n;
       66 +    }
       67 +  else
       68 +    {
       69 +      size_t consumed;
       70 +      tre_char_t *wcptr = wregex;
       71 +      mbstate_t state;
       72 +      memset(&state, '\0', sizeof(state));
       73 +      while (n > 0)
       74 +        {
       75 +          consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
 214   76  
 215      -        p->strip = (sop *)malloc(p->ssize * sizeof (sop));
 216      -        p->slen = 0;
 217      -        if (p->strip == NULL) {
 218      -                free((char *)g);
 219      -                return (REG_ESPACE);
 220      -        }
 221      -
 222      -        /* set things up */
 223      -        p->g = g;
 224      -        p->next = pattern;      /* convenience; we do not modify it */
 225      -        p->end = p->next + len;
 226      -        p->error = 0;
 227      -        p->ncsalloc = 0;
 228      -        for (i = 0; i < NPAREN; i++) {
 229      -                p->pbegin[i] = 0;
 230      -                p->pend[i] = 0;
 231      -        }
 232      -        g->sets = NULL;
 233      -        g->ncsets = 0;
 234      -        g->cflags = cflags;
 235      -        g->iflags = 0;
 236      -        g->nbol = 0;
 237      -        g->neol = 0;
 238      -        g->must = NULL;
 239      -        g->moffset = -1;
 240      -        g->charjump = NULL;
 241      -        g->matchjump = NULL;
 242      -        g->mlen = 0;
 243      -        g->nsub = 0;
 244      -        g->backrefs = 0;
 245      -
 246      -        /* do it */
 247      -        EMIT(OEND, 0);
 248      -        g->firststate = THERE();
 249      -        if (cflags&REG_EXTENDED)
 250      -                p_ere(p, OUT);
 251      -        else if (cflags&REG_NOSPEC)
 252      -                p_str(p);
 253      -        else
 254      -                p_bre(p, OUT, OUT);
 255      -        EMIT(OEND, 0);
 256      -        g->laststate = THERE();
 257      -
 258      -        /* tidy up loose ends and fill things in */
 259      -        stripsnug(p, g);
 260      -        findmust(p, g);
 261      -        /*
 262      -         * only use Boyer-Moore algorithm if the pattern is bigger
 263      -         * than three characters
 264      -         */
 265      -        if (g->mlen > 3) {
 266      -                computejumps(p, g);
 267      -                computematchjumps(p, g);
 268      -                if (g->matchjump == NULL && g->charjump != NULL) {
 269      -                        free(g->charjump);
 270      -                        g->charjump = NULL;
       77 +          switch (consumed)
       78 +            {
       79 +            case 0:
       80 +              if (*regex == '\0')
       81 +                consumed = 1;
       82 +              else
       83 +                {
       84 +                  free(wregex);
       85 +                  return REG_BADPAT;
 271   86                  }
       87 +              break;
       88 +            case (size_t)-1:
       89 +            case (size_t)-2:
       90 +              DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
       91 +              free(wregex);
       92 +              return REG_ILLSEQ;
       93 +            }
       94 +          regex += consumed;
       95 +          n -= consumed;
       96 +          wcptr++;
 272   97          }
 273      -        g->nplus = pluscount(p, g);
 274      -        g->magic = MAGIC2;
 275      -        preg->re_nsub = g->nsub;
 276      -        preg->re_g = g;
 277      -        preg->re_magic = MAGIC1;
 278      -#ifndef REDEBUG
 279      -        /* not debugging, so can't rely on the assert() in regexec() */
 280      -        if (g->iflags&BAD)
 281      -                SETERROR(REG_EFATAL);
 282      -#endif
       98 +      wlen = wcptr - wregex;
       99 +    }
 283  100  
 284      -        /* win or lose, we're done */
 285      -        if (p->error != 0)      /* lose */
 286      -                regfree(preg);
 287      -        return (p->error);
 288      -}
      101 +  wregex[wlen] = L'\0';
      102 +  ret = tre_compile(preg, wregex, wlen, cflags, loc);
      103 +  free(wregex);
 289  104  
 290      -/*
 291      - * p_ere - ERE parser top level, concatenation and alternation
 292      - */
 293      -static void
 294      -p_ere(struct parse *p,
 295      -    int stop)           /* character this ERE should end at */
 296      -{
 297      -        char c;
 298      -        sopno prevback;
 299      -        sopno prevfwd;
 300      -        sopno conc;
 301      -        int first = 1;          /* is this the first alternative? */
 302      -
 303      -        for (;;) {
 304      -                /* do a bunch of concatenated expressions */
 305      -                conc = HERE();
 306      -                while (MORE() && (c = PEEK()) != '|' && c != stop)
 307      -                        p_ere_exp(p);
 308      -                /* require nonempty */
 309      -                (void) REQUIRE(HERE() != conc, REG_BADPAT);
 310      -
 311      -                if (!EAT('|'))
 312      -                        break;          /* NOTE BREAK OUT */
 313      -
 314      -                if (first) {
 315      -                        INSERT(OCH_, conc);     /* offset is wrong */
 316      -                        prevfwd = conc;
 317      -                        prevback = conc;
 318      -                        first = 0;
 319      -                }
 320      -                ASTERN(OOR1, prevback);
 321      -                prevback = THERE();
 322      -                AHEAD(prevfwd);                 /* fix previous offset */
 323      -                prevfwd = HERE();
 324      -                EMIT(OOR2, 0);                  /* offset is very wrong */
 325      -        }
 326      -
 327      -        if (!first) {           /* tail-end fixups */
 328      -                AHEAD(prevfwd);
 329      -                ASTERN(O_CH, prevback);
 330      -        }
 331      -
 332      -        assert(!MORE() || SEE(stop));
      105 +  return ret;
 333  106  }
 334  107  
 335      -/*
 336      - * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
 337      - */
 338      -static void
 339      -p_ere_exp(struct parse *p)
      108 +int
      109 +tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
 340  110  {
 341      -        char c;
 342      -        wint_t wc;
 343      -        sopno pos;
 344      -        int count;
 345      -        int count2;
 346      -        sopno subno;
 347      -        int wascaret = 0;
 348      -
 349      -        assert(MORE());         /* caller should have ensured this */
 350      -        c = GETNEXT();
 351      -
 352      -        pos = HERE();
 353      -        switch (c) {
 354      -        case '(':
 355      -                (void) REQUIRE(MORE(), REG_EPAREN);
 356      -                p->g->nsub++;
 357      -                subno = p->g->nsub;
 358      -                if (subno < NPAREN)
 359      -                        p->pbegin[subno] = HERE();
 360      -                EMIT(OLPAREN, subno);
 361      -                if (!SEE(')'))
 362      -                        p_ere(p, ')');
 363      -                if (subno < NPAREN) {
 364      -                        p->pend[subno] = HERE();
 365      -                        assert(p->pend[subno] != 0);
 366      -                }
 367      -                EMIT(ORPAREN, subno);
 368      -                (void) MUSTEAT(')', REG_EPAREN);
 369      -                break;
 370      -#ifndef POSIX_MISTAKE
 371      -        case ')':               /* happens only if no current unmatched ( */
 372      -                /*
 373      -                 * You may ask, why the ifndef?  Because I didn't notice
 374      -                 * this until slightly too late for 1003.2, and none of the
 375      -                 * other 1003.2 regular-expression reviewers noticed it at
 376      -                 * all.  So an unmatched ) is legal POSIX, at least until
 377      -                 * we can get it fixed.
 378      -                 */
 379      -                SETERROR(REG_EPAREN);
 380      -                break;
 381      -#endif
 382      -        case '^':
 383      -                EMIT(OBOL, 0);
 384      -                p->g->iflags |= USEBOL;
 385      -                p->g->nbol++;
 386      -                wascaret = 1;
 387      -                break;
 388      -        case '$':
 389      -                EMIT(OEOL, 0);
 390      -                p->g->iflags |= USEEOL;
 391      -                p->g->neol++;
 392      -                break;
 393      -        case '|':
 394      -                SETERROR(REG_BADPAT);
 395      -                break;
 396      -        case '*':
 397      -        case '+':
 398      -        case '?':
 399      -        case '{':
 400      -                SETERROR(REG_BADRPT);
 401      -                break;
 402      -        case '.':
 403      -                if (p->g->cflags&REG_NEWLINE)
 404      -                        nonnewline(p);
 405      -                else
 406      -                        EMIT(OANY, 0);
 407      -                break;
 408      -        case '[':
 409      -                p_bracket(p);
 410      -                break;
 411      -        case '\\':
 412      -                (void) REQUIRE(MORE(), REG_EESCAPE);
 413      -                wc = WGETNEXT();
 414      -                switch (wc) {
 415      -                case '<':
 416      -                        EMIT(OBOW, 0);
 417      -                        break;
 418      -                case '>':
 419      -                        EMIT(OEOW, 0);
 420      -                        break;
 421      -                default:
 422      -                        ordinary(p, wc);
 423      -                        break;
 424      -                }
 425      -                break;
 426      -        default:
 427      -                if (p->error != 0)
 428      -                        return;
 429      -                p->next--;
 430      -                wc = WGETNEXT();
 431      -                ordinary(p, wc);
 432      -                break;
 433      -        }
 434      -
 435      -        if (!MORE())
 436      -                return;
 437      -        c = PEEK();
 438      -        /* we call { a repetition if followed by a digit */
 439      -        if (!(c == '*' || c == '+' || c == '?' || c == '{'))
 440      -                return;         /* no repetition, we're done */
 441      -        else if (c == '{')
 442      -                (void) REQUIRE(MORE2() && \
 443      -                    (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
 444      -        NEXT();
 445      -
 446      -        (void) REQUIRE(!wascaret, REG_BADRPT);
 447      -        switch (c) {
 448      -        case '*':       /* implemented as +? */
 449      -                /* this case does not require the (y|) trick, noKLUDGE */
 450      -                INSERT(OPLUS_, pos);
 451      -                ASTERN(O_PLUS, pos);
 452      -                INSERT(OQUEST_, pos);
 453      -                ASTERN(O_QUEST, pos);
 454      -                break;
 455      -        case '+':
 456      -                INSERT(OPLUS_, pos);
 457      -                ASTERN(O_PLUS, pos);
 458      -                break;
 459      -        case '?':
 460      -                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
 461      -                INSERT(OCH_, pos);              /* offset slightly wrong */
 462      -                ASTERN(OOR1, pos);              /* this one's right */
 463      -                AHEAD(pos);                     /* fix the OCH_ */
 464      -                EMIT(OOR2, 0);                  /* offset very wrong... */
 465      -                AHEAD(THERE());                 /* ...so fix it */
 466      -                ASTERN(O_CH, THERETHERE());
 467      -                break;
 468      -        case '{':
 469      -                count = p_count(p);
 470      -                if (EAT(',')) {
 471      -                        if (isdigit((uch)PEEK())) {
 472      -                                count2 = p_count(p);
 473      -                                (void) REQUIRE(count <= count2, REG_BADBR);
 474      -                        } else          /* single number with comma */
 475      -                                count2 = INFINITY;
 476      -                } else          /* just a single number */
 477      -                        count2 = count;
 478      -                repeat(p, pos, count, count2);
 479      -                if (!EAT('}')) {        /* error heuristics */
 480      -                        while (MORE() && PEEK() != '}')
 481      -                                NEXT();
 482      -                        (void) REQUIRE(MORE(), REG_EBRACE);
 483      -                        SETERROR(REG_BADBR);
 484      -                }
 485      -                break;
 486      -        }
 487      -
 488      -        if (!MORE())
 489      -                return;
 490      -        c = PEEK();
 491      -        if (!(c == '*' || c == '+' || c == '?' ||
 492      -            (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
 493      -                return;
 494      -        SETERROR(REG_BADRPT);
      111 +  return tre_regncomp_l(preg, regex, n, cflags, uselocale((locale_t)0));
 495  112  }
 496  113  
 497      -/*
 498      - * p_str - string (no metacharacters) "parser"
 499      - */
 500      -static void
 501      -p_str(struct parse *p)
      114 +int
      115 +tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
 502  116  {
 503      -        (void) REQUIRE(MORE(), REG_BADPAT);
 504      -        while (MORE())
 505      -                ordinary(p, WGETNEXT());
 506      -}
      117 +  size_t len;
 507  118  
 508      -/*
 509      - * p_bre - BRE parser top level, anchoring and concatenation
 510      - * Giving end1 as OUT essentially eliminates the end1/end2 check.
 511      - *
 512      - * This implementation is a bit of a kludge, in that a trailing $ is first
 513      - * taken as an ordinary character and then revised to be an anchor.
 514      - * The amount of lookahead needed to avoid this kludge is excessive.
 515      - */
 516      -static void
 517      -p_bre(struct parse *p,
 518      -    int end1,           /* first terminating character */
 519      -    int end2)           /* second terminating character */
 520      -{
 521      -        sopno start = HERE();
 522      -        int first = 1;                  /* first subexpression? */
 523      -        int wasdollar = 0;
 524      -
 525      -        if (EAT('^')) {
 526      -                EMIT(OBOL, 0);
 527      -                p->g->iflags |= USEBOL;
 528      -                p->g->nbol++;
 529      -        }
 530      -        while (MORE() && !SEETWO(end1, end2)) {
 531      -                wasdollar = p_simp_re(p, first);
 532      -                first = 0;
 533      -        }
 534      -        if (wasdollar) {        /* oops, that was a trailing anchor */
 535      -                DROP(1);
 536      -                EMIT(OEOL, 0);
 537      -                p->g->iflags |= USEEOL;
 538      -                p->g->neol++;
 539      -        }
 540      -
 541      -        (void) REQUIRE(HERE() != start, REG_BADPAT);    /* require nonempty */
      119 +  if (cflags & REG_PEND)
      120 +    {
      121 +      if ((const char *)(preg->re_endp) < regex)
      122 +        return REG_INVARG;
      123 +      len = (const char *)(preg->re_endp) - regex;
      124 +    }
      125 +  else
      126 +    len = strlen(regex);
      127 +  return tre_regncomp_l(preg, regex, len, cflags, loc);
 542  128  }
 543  129  
 544      -/*
 545      - * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 546      - */
 547      -static int                      /* was the simple RE an unbackslashed $? */
 548      -p_simp_re(struct parse *p,
 549      -    int starordinary)   /* is a leading * an ordinary character? */
      130 +int
      131 +regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD regex,
      132 +    int cflags)
 550  133  {
 551      -        int c;
 552      -        int count;
 553      -        int count2;
 554      -        sopno pos;
 555      -        int i;
 556      -        wint_t wc;
 557      -        sopno subno;
 558      -#define BACKSL  (1<<CHAR_BIT)
 559      -
 560      -        pos = HERE();           /* repetition op, if any, covers from here */
 561      -
 562      -        assert(MORE());         /* caller should have ensured this */
 563      -        c = GETNEXT();
 564      -        if (c == '\\') {
 565      -                (void) REQUIRE(MORE(), REG_EESCAPE);
 566      -                c = BACKSL | GETNEXT();
 567      -        }
 568      -        switch (c) {
 569      -        case '.':
 570      -                if (p->g->cflags&REG_NEWLINE)
 571      -                        nonnewline(p);
 572      -                else
 573      -                        EMIT(OANY, 0);
 574      -                break;
 575      -        case '[':
 576      -                p_bracket(p);
 577      -                break;
 578      -        case BACKSL|'<':
 579      -                EMIT(OBOW, 0);
 580      -                break;
 581      -        case BACKSL|'>':
 582      -                EMIT(OEOW, 0);
 583      -                break;
 584      -        case BACKSL|'{':
 585      -                SETERROR(REG_BADRPT);
 586      -                break;
 587      -        case BACKSL|'(':
 588      -                p->g->nsub++;
 589      -                subno = p->g->nsub;
 590      -                if (subno < NPAREN)
 591      -                        p->pbegin[subno] = HERE();
 592      -                EMIT(OLPAREN, subno);
 593      -                /* the MORE here is an error heuristic */
 594      -                if (MORE() && !SEETWO('\\', ')'))
 595      -                        p_bre(p, '\\', ')');
 596      -                if (subno < NPAREN) {
 597      -                        p->pend[subno] = HERE();
 598      -                        assert(p->pend[subno] != 0);
 599      -                }
 600      -                EMIT(ORPAREN, subno);
 601      -                (void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
 602      -                break;
 603      -        case BACKSL|')':        /* should not get here -- must be user */
 604      -                SETERROR(REG_EPAREN);
 605      -                break;
 606      -        case BACKSL|'1':
 607      -        case BACKSL|'2':
 608      -        case BACKSL|'3':
 609      -        case BACKSL|'4':
 610      -        case BACKSL|'5':
 611      -        case BACKSL|'6':
 612      -        case BACKSL|'7':
 613      -        case BACKSL|'8':
 614      -        case BACKSL|'9':
 615      -                i = (c&~BACKSL) - '0';
 616      -                assert(i < NPAREN);
 617      -                if (p->pend[i] != 0) {
 618      -                        assert(i <= p->g->nsub);
 619      -                        EMIT(OBACK_, i);
 620      -                        assert(p->pbegin[i] != 0);
 621      -                        assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
 622      -                        assert(OP(p->strip[p->pend[i]]) == ORPAREN);
 623      -                        (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
 624      -                        EMIT(O_BACK, i);
 625      -                } else
 626      -                        SETERROR(REG_ESUBREG);
 627      -                p->g->backrefs = 1;
 628      -                break;
 629      -        case '*':
 630      -                (void) REQUIRE(starordinary, REG_BADRPT);
 631      -                /* FALLTHROUGH */
 632      -        default:
 633      -                if (p->error != 0)
 634      -                        return (0);     /* Definitely not $... */
 635      -                p->next--;
 636      -                wc = WGETNEXT();
 637      -                ordinary(p, wc);
 638      -                break;
 639      -        }
 640      -
 641      -        if (EAT('*')) {         /* implemented as +? */
 642      -                /* this case does not require the (y|) trick, noKLUDGE */
 643      -                INSERT(OPLUS_, pos);
 644      -                ASTERN(O_PLUS, pos);
 645      -                INSERT(OQUEST_, pos);
 646      -                ASTERN(O_QUEST, pos);
 647      -        } else if (EATTWO('\\', '{')) {
 648      -                count = p_count(p);
 649      -                if (EAT(',')) {
 650      -                        if (MORE() && isdigit((uch)PEEK())) {
 651      -                                count2 = p_count(p);
 652      -                                (void) REQUIRE(count <= count2, REG_BADBR);
 653      -                        } else          /* single number with comma */
 654      -                                count2 = INFINITY;
 655      -                } else          /* just a single number */
 656      -                        count2 = count;
 657      -                repeat(p, pos, count, count2);
 658      -                if (!EATTWO('\\', '}')) {       /* error heuristics */
 659      -                        while (MORE() && !SEETWO('\\', '}'))
 660      -                                NEXT();
 661      -                        (void) REQUIRE(MORE(), REG_EBRACE);
 662      -                        SETERROR(REG_BADBR);
 663      -                }
 664      -        } else if (c == '$')    /* $ (but not \$) ends it */
 665      -                return (1);
 666      -
 667      -        return (0);
      134 +  return tre_regcomp_l(preg, regex, cflags, uselocale((locale_t)0));
 668  135  }
 669  136  
 670      -/*
 671      - * p_count - parse a repetition count
 672      - */
 673      -static int                      /* the value */
 674      -p_count(struct parse *p)
      137 +void
      138 +regfree(regex_t *preg)
 675  139  {
 676      -        int count = 0;
 677      -        int ndigits = 0;
 678      -
 679      -        while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
 680      -                count = count*10 + (GETNEXT() - '0');
 681      -                ndigits++;
 682      -        }
 683      -
 684      -        (void) REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
 685      -        return (count);
 686      -}
 687      -
 688      -/*
 689      - * p_bracket - parse a bracketed character list
 690      - */
 691      -static void
 692      -p_bracket(struct parse *p)
 693      -{
 694      -        cset *cs;
 695      -        wint_t ch;
 696      -
 697      -        /* Dept of Truly Sickening Special-Case Kludges */
 698      -        if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
 699      -                EMIT(OBOW, 0);
 700      -                NEXTn(6);
 701      -                return;
 702      -        }
 703      -        if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
 704      -                EMIT(OEOW, 0);
 705      -                NEXTn(6);
 706      -                return;
 707      -        }
 708      -
 709      -        if ((cs = allocset(p)) == NULL)
 710      -                return;
 711      -
 712      -        if (p->g->cflags&REG_ICASE)
 713      -                cs->icase = 1;
 714      -        if (EAT('^'))
 715      -                cs->invert = 1;
 716      -        if (EAT(']'))
 717      -                CHadd(p, cs, ']');
 718      -        else if (EAT('-'))
 719      -                CHadd(p, cs, '-');
 720      -        while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
 721      -                p_b_term(p, cs);
 722      -        if (EAT('-'))
 723      -                CHadd(p, cs, '-');
 724      -        (void) MUSTEAT(']', REG_EBRACK);
 725      -
 726      -        if (p->error != 0)      /* don't mess things up further */
 727      -                return;
 728      -
 729      -        if (cs->invert && p->g->cflags&REG_NEWLINE)
 730      -                cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
 731      -
 732      -        if ((ch = singleton(cs)) != OUT) {      /* optimize singleton sets */
 733      -                ordinary(p, ch);
 734      -                freeset(p, cs);
 735      -        } else
 736      -                EMIT(OANYOF, (int)(cs - p->g->sets));
 737      -}
 738      -
 739      -/*
 740      - * p_b_term - parse one term of a bracketed character list
 741      - */
 742      -static void
 743      -p_b_term(struct parse *p, cset *cs)
 744      -{
 745      -        char c;
 746      -        wint_t start, finish;
 747      -        wint_t i;
 748      -        locale_t loc = uselocale(NULL);
 749      -
 750      -        /* classify what we've got */
 751      -        switch ((MORE()) ? PEEK() : '\0') {
 752      -        case '[':
 753      -                c = (MORE2()) ? PEEK2() : '\0';
 754      -                break;
 755      -        case '-':
 756      -                SETERROR(REG_ERANGE);
 757      -                return;                 /* NOTE RETURN */
 758      -        default:
 759      -                c = '\0';
 760      -                break;
 761      -        }
 762      -
 763      -        switch (c) {
 764      -        case ':':               /* character class */
 765      -                NEXT2();
 766      -                (void) REQUIRE(MORE(), REG_EBRACK);
 767      -                c = PEEK();
 768      -                (void) REQUIRE(c != '-' && c != ']', REG_ECTYPE);
 769      -                p_b_cclass(p, cs);
 770      -                (void) REQUIRE(MORE(), REG_EBRACK);
 771      -                (void) REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
 772      -                break;
 773      -        case '=':               /* equivalence class */
 774      -                NEXT2();
 775      -                (void) REQUIRE(MORE(), REG_EBRACK);
 776      -                c = PEEK();
 777      -                (void) REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
 778      -                p_b_eclass(p, cs);
 779      -                (void) REQUIRE(MORE(), REG_EBRACK);
 780      -                (void) REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
 781      -                break;
 782      -        default:                /* symbol, ordinary character, or range */
 783      -                start = p_b_symbol(p);
 784      -                if (SEE('-') && MORE2() && PEEK2() != ']') {
 785      -                        /* range */
 786      -                        NEXT();
 787      -                        if (EAT('-'))
 788      -                                finish = '-';
 789      -                        else
 790      -                                finish = p_b_symbol(p);
 791      -                } else
 792      -                        finish = start;
 793      -                if (start == finish)
 794      -                        CHadd(p, cs, start);
 795      -                else {
 796      -                        if (loc->collate->lc_is_posix) {
 797      -                                (void) REQUIRE((uch)start <= (uch)finish,
 798      -                                    REG_ERANGE);
 799      -                                CHaddrange(p, cs, start, finish);
 800      -                        } else {
 801      -                                (void) REQUIRE(_collate_range_cmp(start,
 802      -                                    finish, loc) <= 0, REG_ERANGE);
 803      -                                for (i = 0; i <= UCHAR_MAX; i++) {
 804      -                                        if (_collate_range_cmp(start, i, loc)
 805      -                                            <= 0 &&
 806      -                                            _collate_range_cmp(i, finish, loc)
 807      -                                            <= 0)
 808      -                                                CHadd(p, cs, i);
 809      -                                }
 810      -                        }
 811      -                }
 812      -                break;
 813      -        }
 814      -}
 815      -
 816      -/*
 817      - * p_b_cclass - parse a character-class name and deal with it
 818      - */
 819      -static void
 820      -p_b_cclass(struct parse *p, cset *cs)
 821      -{
 822      -        const char *sp = p->next;
 823      -        size_t len;
 824      -        wctype_t wct;
 825      -        char clname[16];
 826      -
 827      -        while (MORE() && isalpha((uch)PEEK()))
 828      -                NEXT();
 829      -        len = p->next - sp;
 830      -        if (len >= sizeof (clname) - 1) {
 831      -                SETERROR(REG_ECTYPE);
 832      -                return;
 833      -        }
 834      -        (void) memcpy(clname, sp, len);
 835      -        clname[len] = '\0';
 836      -        if ((wct = wctype(clname)) == 0) {
 837      -                SETERROR(REG_ECTYPE);
 838      -                return;
 839      -        }
 840      -        CHaddtype(p, cs, wct);
 841      -}
 842      -
 843      -/*
 844      - * p_b_eclass - parse an equivalence-class name and deal with it
 845      - *
 846      - * This implementation is incomplete. xxx
 847      - */
 848      -static void
 849      -p_b_eclass(struct parse *p, cset *cs)
 850      -{
 851      -        wint_t c;
 852      -
 853      -        c = p_b_coll_elem(p, '=');
 854      -        CHadd(p, cs, c);
 855      -}
 856      -
 857      -/*
 858      - * p_b_symbol - parse a character or [..]ed multicharacter collating symbol
 859      - */
 860      -static wint_t                   /* value of symbol */
 861      -p_b_symbol(struct parse *p)
 862      -{
 863      -        wint_t value;
 864      -
 865      -        (void) REQUIRE(MORE(), REG_EBRACK);
 866      -        if (!EATTWO('[', '.'))
 867      -                return (WGETNEXT());
 868      -
 869      -        /* collating symbol */
 870      -        value = p_b_coll_elem(p, '.');
 871      -        (void) REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
 872      -        return (value);
 873      -}
 874      -
 875      -/*
 876      - * p_b_coll_elem - parse a collating-element name and look it up
 877      - */
 878      -static wint_t                   /* value of collating element */
 879      -p_b_coll_elem(struct parse *p,
 880      -    wint_t endc)                /* name ended by endc,']' */
 881      -{
 882      -        const char *sp = p->next;
 883      -        struct cname *cp;
 884      -        mbstate_t mbs;
 885      -        wchar_t wc;
 886      -        size_t clen, len;
 887      -
 888      -        while (MORE() && !SEETWO(endc, ']'))
 889      -                NEXT();
 890      -        if (!MORE()) {
 891      -                SETERROR(REG_EBRACK);
 892      -                return (0);
 893      -        }
 894      -        len = p->next - sp;
 895      -        for (cp = cnames; cp->name != NULL; cp++)
 896      -                if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
 897      -                        return (cp->code);      /* known name */
 898      -        (void) memset(&mbs, 0, sizeof (mbs));
 899      -        if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
 900      -                return (wc);                    /* single character */
 901      -        else if (clen == (size_t)-1 || clen == (size_t)-2)
 902      -                SETERROR(REG_ECHAR);
 903      -        else
 904      -                SETERROR(REG_ECOLLATE);         /* neither */
 905      -        return (0);
 906      -}
 907      -
 908      -/*
 909      - * othercase - return the case counterpart of an alphabetic
 910      - */
 911      -static wint_t                   /* if no counterpart, return ch */
 912      -othercase(wint_t ch)
 913      -{
 914      -        assert(iswalpha(ch));
 915      -        if (iswupper(ch))
 916      -                return (towlower(ch));
 917      -        else if (iswlower(ch))
 918      -                return (towupper(ch));
 919      -        else                    /* peculiar, but could happen */
 920      -                return (ch);
 921      -}
 922      -
 923      -/*
 924      - * bothcases - emit a dualcase version of a two-case character
 925      - *
 926      - * Boy, is this implementation ever a kludge...
 927      - */
 928      -static void
 929      -bothcases(struct parse *p, wint_t ch)
 930      -{
 931      -        const char *oldnext = p->next;
 932      -        const char *oldend = p->end;
 933      -        char bracket[3 + MB_LEN_MAX];
 934      -        size_t n;
 935      -        mbstate_t mbs;
 936      -
 937      -        assert(othercase(ch) != ch);    /* p_bracket() would recurse */
 938      -        p->next = bracket;
 939      -        (void) memset(&mbs, 0, sizeof (mbs));
 940      -        n = wcrtomb(bracket, ch, &mbs);
 941      -        assert(n != (size_t)-1);
 942      -        bracket[n] = ']';
 943      -        bracket[n + 1] = '\0';
 944      -        p->end = bracket+n+1;
 945      -        p_bracket(p);
 946      -        assert(p->next == p->end);
 947      -        p->next = oldnext;
 948      -        p->end = oldend;
 949      -}
 950      -
 951      -/*
 952      - * ordinary - emit an ordinary character
 953      - */
 954      -static void
 955      -ordinary(struct parse *p, wint_t ch)
 956      -{
 957      -        cset *cs;
 958      -
 959      -        if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
 960      -                bothcases(p, ch);
 961      -        else if ((ch & OPDMASK) == ch)
 962      -                EMIT(OCHAR, ch);
 963      -        else {
 964      -                /*
 965      -                 * Kludge: character is too big to fit into an OCHAR operand.
 966      -                 * Emit a singleton set.
 967      -                 */
 968      -                if ((cs = allocset(p)) == NULL)
 969      -                        return;
 970      -                CHadd(p, cs, ch);
 971      -                EMIT(OANYOF, (int)(cs - p->g->sets));
 972      -        }
 973      -}
 974      -
 975      -/*
 976      - * nonnewline - emit REG_NEWLINE version of OANY
 977      - *
 978      - * Boy, is this implementation ever a kludge...
 979      - */
 980      -static void
 981      -nonnewline(struct parse *p)
 982      -{
 983      -        const char *oldnext = p->next;
 984      -        const char *oldend = p->end;
 985      -        char bracket[4];
 986      -
 987      -        p->next = bracket;
 988      -        p->end = bracket+3;
 989      -        bracket[0] = '^';
 990      -        bracket[1] = '\n';
 991      -        bracket[2] = ']';
 992      -        bracket[3] = '\0';
 993      -        p_bracket(p);
 994      -        assert(p->next == bracket+3);
 995      -        p->next = oldnext;
 996      -        p->end = oldend;
 997      -}
 998      -
 999      -/*
1000      - * repeat - generate code for a bounded repetition, recursively if needed
1001      - */
1002      -static void
1003      -repeat(struct parse *p,
1004      -    sopno start,                /* operand from here to end of strip */
1005      -    int from,                   /* repeated from this number */
1006      -    int to)                     /* to this number of times (maybe INFINITY) */
1007      -{
1008      -        sopno finish = HERE();
1009      -#define N       2
1010      -#define INF     3
1011      -#define REP(f, t)       ((f)*8 + (t))
1012      -#define MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
1013      -        sopno copy;
1014      -
1015      -        if (p->error != 0)      /* head off possible runaway recursion */
1016      -                return;
1017      -
1018      -        assert(from <= to);
1019      -
1020      -        switch (REP(MAP(from), MAP(to))) {
1021      -        case REP(0, 0):                 /* must be user doing this */
1022      -                DROP(finish-start);     /* drop the operand */
1023      -                break;
1024      -        case REP(0, 1):                 /* as x{1,1}? */
1025      -        case REP(0, N):                 /* as x{1,n}? */
1026      -        case REP(0, INF):               /* as x{1,}? */
1027      -                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1028      -                INSERT(OCH_, start);            /* offset is wrong... */
1029      -                repeat(p, start+1, 1, to);
1030      -                ASTERN(OOR1, start);
1031      -                AHEAD(start);                   /* ... fix it */
1032      -                EMIT(OOR2, 0);
1033      -                AHEAD(THERE());
1034      -                ASTERN(O_CH, THERETHERE());
1035      -                break;
1036      -        case REP(1, 1):                 /* trivial case */
1037      -                /* done */
1038      -                break;
1039      -        case REP(1, N):                 /* as x?x{1,n-1} */
1040      -                /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1041      -                INSERT(OCH_, start);
1042      -                ASTERN(OOR1, start);
1043      -                AHEAD(start);
1044      -                EMIT(OOR2, 0);                  /* offset very wrong... */
1045      -                AHEAD(THERE());                 /* ...so fix it */
1046      -                ASTERN(O_CH, THERETHERE());
1047      -                copy = dupl(p, start+1, finish+1);
1048      -                assert(copy == finish+4);
1049      -                repeat(p, copy, 1, to-1);
1050      -                break;
1051      -        case REP(1, INF):               /* as x+ */
1052      -                INSERT(OPLUS_, start);
1053      -                ASTERN(O_PLUS, start);
1054      -                break;
1055      -        case REP(N, N):                 /* as xx{m-1,n-1} */
1056      -                copy = dupl(p, start, finish);
1057      -                repeat(p, copy, from-1, to-1);
1058      -                break;
1059      -        case REP(N, INF):               /* as xx{n-1,INF} */
1060      -                copy = dupl(p, start, finish);
1061      -                repeat(p, copy, from-1, to);
1062      -                break;
1063      -        default:                        /* "can't happen" */
1064      -                SETERROR(REG_EFATAL);   /* just in case */
1065      -                break;
1066      -        }
1067      -}
1068      -
1069      -/*
1070      - * wgetnext - helper function for WGETNEXT() macro. Gets the next wide
1071      - * character from the parse struct, signals a REG_ILLSEQ error if the
1072      - * character can't be converted. Returns the number of bytes consumed.
1073      - */
1074      -static wint_t
1075      -wgetnext(struct parse *p)
1076      -{
1077      -        mbstate_t mbs;
1078      -        wchar_t wc;
1079      -        size_t n;
1080      -
1081      -        (void) memset(&mbs, 0, sizeof (mbs));
1082      -        n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
1083      -        if (n == (size_t)-1 || n == (size_t)-2) {
1084      -                SETERROR(REG_ECHAR);
1085      -                return (0);
1086      -        }
1087      -        if (n == 0)
1088      -                n = 1;
1089      -        p->next += n;
1090      -        return (wc);
1091      -}
1092      -
1093      -/*
1094      - * seterr - set an error condition
1095      - */
1096      -static int                      /* useless but makes type checking happy */
1097      -seterr(struct parse *p, int e)
1098      -{
1099      -        if (p->error == 0)      /* keep earliest error condition */
1100      -                p->error = e;
1101      -        p->next = nuls;         /* try to bring things to a halt */
1102      -        p->end = nuls;
1103      -        return (0);             /* make the return value well-defined */
1104      -}
1105      -
1106      -/*
1107      - * allocset - allocate a set of characters for []
1108      - */
1109      -static cset *
1110      -allocset(struct parse *p)
1111      -{
1112      -        cset *cs, *ncs;
1113      -
1114      -        ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof (*ncs));
1115      -        if (ncs == NULL) {
1116      -                SETERROR(REG_ESPACE);
1117      -                return (NULL);
1118      -        }
1119      -        p->g->sets = ncs;
1120      -        cs = &p->g->sets[p->g->ncsets++];
1121      -        (void) memset(cs, 0, sizeof (*cs));
1122      -
1123      -        return (cs);
1124      -}
1125      -
1126      -/*
1127      - * freeset - free a now-unused set
1128      - */
1129      -static void
1130      -freeset(struct parse *p, cset *cs)
1131      -{
1132      -        cset *top = &p->g->sets[p->g->ncsets];
1133      -
1134      -        free(cs->wides);
1135      -        free(cs->ranges);
1136      -        free(cs->types);
1137      -        (void) memset(cs, 0, sizeof (*cs));
1138      -        if (cs == top-1)        /* recover only the easy case */
1139      -                p->g->ncsets--;
1140      -}
1141      -
1142      -/*
1143      - * singleton - Determine whether a set contains only one character,
1144      - * returning it if so, otherwise returning OUT.
1145      - */
1146      -static wint_t
1147      -singleton(cset *cs)
1148      -{
1149      -        wint_t i, s, n;
1150      -
1151      -        for (i = n = 0; i < NC; i++)
1152      -                if (CHIN(cs, i)) {
1153      -                        n++;
1154      -                        s = i;
1155      -                }
1156      -        if (n == 1)
1157      -                return (s);
1158      -        if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
1159      -            cs->icase == 0)
1160      -                return (cs->wides[0]);
1161      -        /* Don't bother handling the other cases. */
1162      -        return (OUT);
1163      -}
1164      -
1165      -/*
1166      - * CHadd - add character to character set.
1167      - */
1168      -static void
1169      -CHadd(struct parse *p, cset *cs, wint_t ch)
1170      -{
1171      -        wint_t nch, *newwides;
1172      -        assert(ch >= 0);
1173      -        if (ch < NC)
1174      -                cs->bmp[ch >> 3] |= 1 << (ch & 7);
1175      -        else {
1176      -                newwides = realloc(cs->wides, (cs->nwides + 1) *
1177      -                    sizeof (*cs->wides));
1178      -                if (newwides == NULL) {
1179      -                        SETERROR(REG_ESPACE);
1180      -                        return;
1181      -                }
1182      -                cs->wides = newwides;
1183      -                cs->wides[cs->nwides++] = ch;
1184      -        }
1185      -        if (cs->icase) {
1186      -                if ((nch = towlower(ch)) < NC)
1187      -                        cs->bmp[nch >> 3] |= 1 << (nch & 7);
1188      -                if ((nch = towupper(ch)) < NC)
1189      -                        cs->bmp[nch >> 3] |= 1 << (nch & 7);
1190      -        }
1191      -}
1192      -
1193      -/*
1194      - * CHaddrange - add all characters in the range [min,max] to a character set.
1195      - */
1196      -static void
1197      -CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
1198      -{
1199      -        crange *newranges;
1200      -
1201      -        for (; min < NC && min <= max; min++)
1202      -                CHadd(p, cs, min);
1203      -        if (min >= max)
1204      -                return;
1205      -        newranges = realloc(cs->ranges, (cs->nranges + 1) *
1206      -            sizeof (*cs->ranges));
1207      -        if (newranges == NULL) {
1208      -                SETERROR(REG_ESPACE);
1209      -                return;
1210      -        }
1211      -        cs->ranges = newranges;
1212      -        cs->ranges[cs->nranges].min = min;
1213      -        cs->ranges[cs->nranges].max = max;
1214      -        cs->nranges++;
1215      -}
1216      -
1217      -/*
1218      - * CHaddtype - add all characters of a certain type to a character set.
1219      - */
1220      -static void
1221      -CHaddtype(struct parse *p, cset *cs, wctype_t wct)
1222      -{
1223      -        wint_t i;
1224      -        wctype_t *newtypes;
1225      -
1226      -        for (i = 0; i < NC; i++)
1227      -                if (iswctype(i, wct))
1228      -                        CHadd(p, cs, i);
1229      -        newtypes = realloc(cs->types, (cs->ntypes + 1) *
1230      -            sizeof (*cs->types));
1231      -        if (newtypes == NULL) {
1232      -                SETERROR(REG_ESPACE);
1233      -                return;
1234      -        }
1235      -        cs->types = newtypes;
1236      -        cs->types[cs->ntypes++] = wct;
1237      -}
1238      -
1239      -/*
1240      - * dupl - emit a duplicate of a bunch of sops
1241      - */
1242      -static sopno                    /* start of duplicate */
1243      -dupl(struct parse *p,
1244      -    sopno start,                /* from here */
1245      -    sopno finish)               /* to this less one */
1246      -{
1247      -        sopno ret = HERE();
1248      -        sopno len = finish - start;
1249      -
1250      -        assert(finish >= start);
1251      -        if (len == 0)
1252      -                return (ret);
1253      -        if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
1254      -                return (ret);
1255      -        assert(p->ssize >= p->slen + len);
1256      -        (void) memcpy((char *)(p->strip + p->slen),
1257      -            (char *)(p->strip + start), (size_t)len*sizeof (sop));
1258      -        p->slen += len;
1259      -        return (ret);
1260      -}
1261      -
1262      -/*
1263      - * doemit - emit a strip operator
1264      - *
1265      - * It might seem better to implement this as a macro with a function as
1266      - * hard-case backup, but it's just too big and messy unless there are
1267      - * some changes to the data structures.  Maybe later.
1268      - */
1269      -static void
1270      -doemit(struct parse *p, sop op, size_t opnd)
1271      -{
1272      -        /* avoid making error situations worse */
1273      -        if (p->error != 0)
1274      -                return;
1275      -
1276      -        /* deal with oversize operands ("can't happen", more or less) */
1277      -        assert(opnd < 1<<OPSHIFT);
1278      -
1279      -        /* deal with undersized strip */
1280      -        if (p->slen >= p->ssize)
1281      -                if (!enlarge(p, (p->ssize+1) / 2 * 3))  /* +50% */
1282      -                        return;
1283      -
1284      -        /* finally, it's all reduced to the easy case */
1285      -        p->strip[p->slen++] = SOP(op, opnd);
1286      -}
1287      -
1288      -/*
1289      - * doinsert - insert a sop into the strip
1290      - */
1291      -static void
1292      -doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1293      -{
1294      -        sopno sn;
1295      -        sop s;
1296      -        int i;
1297      -
1298      -        /* avoid making error situations worse */
1299      -        if (p->error != 0)
1300      -                return;
1301      -
1302      -        sn = HERE();
1303      -        EMIT(op, opnd);         /* do checks, ensure space */
1304      -        assert(HERE() == sn+1);
1305      -        s = p->strip[sn];
1306      -
1307      -        /* adjust paren pointers */
1308      -        assert(pos > 0);
1309      -        for (i = 1; i < NPAREN; i++) {
1310      -                if (p->pbegin[i] >= pos) {
1311      -                        p->pbegin[i]++;
1312      -                }
1313      -                if (p->pend[i] >= pos) {
1314      -                        p->pend[i]++;
1315      -                }
1316      -        }
1317      -
1318      -        (void) memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1319      -            (HERE()-pos-1)*sizeof (sop));
1320      -        p->strip[pos] = s;
1321      -}
1322      -
1323      -/*
1324      - * dofwd - complete a forward reference
1325      - */
1326      -static void
1327      -dofwd(struct parse *p, sopno pos, sop value)
1328      -{
1329      -        /* avoid making error situations worse */
1330      -        if (p->error != 0)
1331      -                return;
1332      -
1333      -        assert(value < 1<<OPSHIFT);
1334      -        p->strip[pos] = OP(p->strip[pos]) | value;
1335      -}
1336      -
1337      -/*
1338      - * enlarge - enlarge the strip
1339      - */
1340      -static int
1341      -enlarge(struct parse *p, sopno size)
1342      -{
1343      -        sop *sp;
1344      -
1345      -        if (p->ssize >= size)
1346      -                return (1);
1347      -
1348      -        sp = (sop *)realloc(p->strip, size*sizeof (sop));
1349      -        if (sp == NULL) {
1350      -                SETERROR(REG_ESPACE);
1351      -                return (0);
1352      -        }
1353      -        p->strip = sp;
1354      -        p->ssize = size;
1355      -        return (1);
1356      -}
1357      -
1358      -/*
1359      - * stripsnug - compact the strip
1360      - */
1361      -static void
1362      -stripsnug(struct parse *p, struct re_guts *g)
1363      -{
1364      -        g->nstates = p->slen;
1365      -        g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof (sop));
1366      -        if (g->strip == NULL) {
1367      -                SETERROR(REG_ESPACE);
1368      -                g->strip = p->strip;
1369      -        }
1370      -}
1371      -
1372      -/*
1373      - * findmust - fill in must and mlen with longest mandatory literal string
1374      - *
1375      - * This algorithm could do fancy things like analyzing the operands of |
1376      - * for common subsequences.  Someday.  This code is simple and finds most
1377      - * of the interesting cases.
1378      - *
1379      - * Note that must and mlen got initialized during setup.
1380      - */
1381      -static void
1382      -findmust(struct parse *p, struct re_guts *g)
1383      -{
1384      -        sop *scan;
1385      -        sop *start = NULL;
1386      -        sop *newstart = NULL;
1387      -        sopno newlen;
1388      -        sop s;
1389      -        char *cp;
1390      -        int offset;
1391      -        char buf[MB_LEN_MAX];
1392      -        size_t clen;
1393      -        mbstate_t mbs;
1394      -        locale_t loc = uselocale(NULL);
1395      -
1396      -        /* avoid making error situations worse */
1397      -        if (p->error != 0)
1398      -                return;
1399      -
1400      -        /*
1401      -         * It's not generally safe to do a ``char'' substring search on
1402      -         * multibyte character strings, but it's safe for at least
1403      -         * UTF-8 (see RFC 3629).
1404      -         */
1405      -        if (MB_CUR_MAX > 1 &&
1406      -            strcmp(loc->runelocale->__encoding, "UTF-8") != 0)
1407      -                return;
1408      -
1409      -        /* find the longest OCHAR sequence in strip */
1410      -        newlen = 0;
1411      -        offset = 0;
1412      -        g->moffset = 0;
1413      -        scan = g->strip + 1;
1414      -        do {
1415      -                s = *scan++;
1416      -                switch (OP(s)) {
1417      -                case OCHAR:             /* sequence member */
1418      -                        if (newlen == 0) {              /* new sequence */
1419      -                                (void) memset(&mbs, 0, sizeof (mbs));
1420      -                                newstart = scan - 1;
1421      -                        }
1422      -                        clen = wcrtomb(buf, OPND(s), &mbs);
1423      -                        if (clen == (size_t)-1)
1424      -                                goto toohard;
1425      -                        newlen += clen;
1426      -                        break;
1427      -                case OPLUS_:            /* things that don't break one */
1428      -                case OLPAREN:
1429      -                case ORPAREN:
1430      -                        break;
1431      -                case OQUEST_:           /* things that must be skipped */
1432      -                case OCH_:
1433      -                        offset = altoffset(scan, offset);
1434      -                        scan--;
1435      -                        do {
1436      -                                scan += OPND(s);
1437      -                                s = *scan;
1438      -                                /* assert() interferes w debug printouts */
1439      -                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1440      -                                    OP(s) != OOR2) {
1441      -                                        g->iflags |= BAD;
1442      -                                        return;
1443      -                                }
1444      -                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
1445      -                        /* FALLTHROUGH */
1446      -                case OBOW:              /* things that break a sequence */
1447      -                case OEOW:
1448      -                case OBOL:
1449      -                case OEOL:
1450      -                case O_QUEST:
1451      -                case O_CH:
1452      -                case OEND:
1453      -                        if (newlen > g->mlen) {         /* ends one */
1454      -                                start = newstart;
1455      -                                g->mlen = newlen;
1456      -                                if (offset > -1) {
1457      -                                        g->moffset += offset;
1458      -                                        offset = newlen;
1459      -                                } else
1460      -                                        g->moffset = offset;
1461      -                        } else {
1462      -                                if (offset > -1)
1463      -                                        offset += newlen;
1464      -                        }
1465      -                        newlen = 0;
1466      -                        break;
1467      -                case OANY:
1468      -                        if (newlen > g->mlen) {         /* ends one */
1469      -                                start = newstart;
1470      -                                g->mlen = newlen;
1471      -                                if (offset > -1) {
1472      -                                        g->moffset += offset;
1473      -                                        offset = newlen;
1474      -                                } else
1475      -                                        g->moffset = offset;
1476      -                        } else {
1477      -                                if (offset > -1)
1478      -                                        offset += newlen;
1479      -                        }
1480      -                        if (offset > -1)
1481      -                                offset++;
1482      -                        newlen = 0;
1483      -                        break;
1484      -                case OANYOF:            /* may or may not invalidate offset */
1485      -                        /* First, everything as OANY */
1486      -                        if (newlen > g->mlen) {         /* ends one */
1487      -                                start = newstart;
1488      -                                g->mlen = newlen;
1489      -                                if (offset > -1) {
1490      -                                        g->moffset += offset;
1491      -                                        offset = newlen;
1492      -                                } else
1493      -                                        g->moffset = offset;
1494      -                        } else {
1495      -                                if (offset > -1)
1496      -                                        offset += newlen;
1497      -                        }
1498      -                        if (offset > -1)
1499      -                                offset++;
1500      -                        newlen = 0;
1501      -                        break;
1502      -                toohard:
1503      -                default:
1504      -                        /*
1505      -                         * Anything here makes it impossible or too hard
1506      -                         * to calculate the offset -- so we give up;
1507      -                         * save the last known good offset, in case the
1508      -                         * must sequence doesn't occur later.
1509      -                         */
1510      -                        if (newlen > g->mlen) {         /* ends one */
1511      -                                start = newstart;
1512      -                                g->mlen = newlen;
1513      -                                if (offset > -1)
1514      -                                        g->moffset += offset;
1515      -                                else
1516      -                                        g->moffset = offset;
1517      -                        }
1518      -                        offset = -1;
1519      -                        newlen = 0;
1520      -                        break;
1521      -                }
1522      -        } while (OP(s) != OEND);
1523      -
1524      -        if (g->mlen == 0) {             /* there isn't one */
1525      -                g->moffset = -1;
1526      -                return;
1527      -        }
1528      -
1529      -        /* turn it into a character string */
1530      -        g->must = malloc((size_t)g->mlen + 1);
1531      -        if (g->must == NULL) {          /* argh; just forget it */
1532      -                g->mlen = 0;
1533      -                g->moffset = -1;
1534      -                return;
1535      -        }
1536      -        cp = g->must;
1537      -        scan = start;
1538      -        (void) memset(&mbs, 0, sizeof (mbs));
1539      -        while (cp < g->must + g->mlen) {
1540      -                while (OP(s = *scan++) != OCHAR)
1541      -                        continue;
1542      -                clen = wcrtomb(cp, OPND(s), &mbs);
1543      -                assert(clen != (size_t)-1);
1544      -                cp += clen;
1545      -        }
1546      -        assert(cp == g->must + g->mlen);
1547      -        *cp++ = '\0';           /* just on general principles */
1548      -}
1549      -
1550      -/*
1551      - * altoffset - choose biggest offset among multiple choices
1552      - *
1553      - * Compute, recursively if necessary, the largest offset among multiple
1554      - * re paths.
1555      - */
1556      -static int
1557      -altoffset(sop *scan, int offset)
1558      -{
1559      -        int largest;
1560      -        int try;
1561      -        sop s;
1562      -
1563      -        /* If we gave up already on offsets, return */
1564      -        if (offset == -1)
1565      -                return (-1);
1566      -
1567      -        largest = 0;
1568      -        try = 0;
1569      -        s = *scan++;
1570      -        while (OP(s) != O_QUEST && OP(s) != O_CH) {
1571      -                switch (OP(s)) {
1572      -                case OOR1:
1573      -                        if (try > largest)
1574      -                                largest = try;
1575      -                        try = 0;
1576      -                        break;
1577      -                case OQUEST_:
1578      -                case OCH_:
1579      -                        try = altoffset(scan, try);
1580      -                        if (try == -1)
1581      -                                return (-1);
1582      -                        scan--;
1583      -                        do {
1584      -                                scan += OPND(s);
1585      -                                s = *scan;
1586      -                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1587      -                                    OP(s) != OOR2)
1588      -                                        return (-1);
1589      -                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
1590      -                        /*
1591      -                         * We must skip to the next position, or we'll
1592      -                         * leave altoffset() too early.
1593      -                         */
1594      -                        scan++;
1595      -                        break;
1596      -                case OANYOF:
1597      -                case OCHAR:
1598      -                case OANY:
1599      -                        try++;
1600      -                        /*FALLTHRU*/
1601      -                case OBOW:
1602      -                case OEOW:
1603      -                case OLPAREN:
1604      -                case ORPAREN:
1605      -                case OOR2:
1606      -                        break;
1607      -                default:
1608      -                        try = -1;
1609      -                        break;
1610      -                }
1611      -                if (try == -1)
1612      -                        return (-1);
1613      -                s = *scan++;
1614      -        }
1615      -
1616      -        if (try > largest)
1617      -                largest = try;
1618      -
1619      -        return (largest+offset);
1620      -}
1621      -
1622      -/*
1623      - * computejumps - compute char jumps for BM scan
1624      - *
1625      - * This algorithm assumes g->must exists and is has size greater than
1626      - * zero. It's based on the algorithm found on Computer Algorithms by
1627      - * Sara Baase.
1628      - *
1629      - * A char jump is the number of characters one needs to jump based on
1630      - * the value of the character from the text that was mismatched.
1631      - */
1632      -static void
1633      -computejumps(struct parse *p, struct re_guts *g)
1634      -{
1635      -        int ch;
1636      -        int mindex;
1637      -
1638      -        /* Avoid making errors worse */
1639      -        if (p->error != 0)
1640      -                return;
1641      -
1642      -        g->charjump = (int *)malloc((NC + 1) * sizeof (int));
1643      -        if (g->charjump == NULL)        /* Not a fatal error */
1644      -                return;
1645      -        /* Adjust for signed chars, if necessary */
1646      -        g->charjump = &g->charjump[-(CHAR_MIN)];
1647      -
1648      -        /*
1649      -         * If the character does not exist in the pattern, the jump
1650      -         * is equal to the number of characters in the pattern.
1651      -         */
1652      -        for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
1653      -                g->charjump[ch] = g->mlen;
1654      -
1655      -        /*
1656      -         * If the character does exist, compute the jump that would
1657      -         * take us to the last character in the pattern equal to it
1658      -         * (notice that we match right to left, so that last character
1659      -         * is the first one that would be matched).
1660      -         */
1661      -        for (mindex = 0; mindex < g->mlen; mindex++)
1662      -                g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
1663      -}
1664      -
1665      -/*
1666      - * computematchjumps - compute match jumps for BM scan
1667      - *
1668      - * This algorithm assumes g->must exists and is has size greater than
1669      - * zero. It's based on the algorithm found on Computer Algorithms by
1670      - * Sara Baase.
1671      - *
1672      - * A match jump is the number of characters one needs to advance based
1673      - * on the already-matched suffix.
1674      - * Notice that all values here are minus (g->mlen-1), because of the way
1675      - * the search algorithm works.
1676      - */
1677      -static void
1678      -computematchjumps(struct parse *p, struct re_guts *g)
1679      -{
1680      -        int mindex;             /* General "must" iterator */
1681      -        int suffix;             /* Keeps track of matching suffix */
1682      -        int ssuffix;            /* Keeps track of suffixes' suffix */
1683      -        int *pmatches;
1684      -                                /*
1685      -                                 * pmatches[k] points to the next i
1686      -                                 * such that i+1...mlen is a substring
1687      -                                 * of k+1...k+mlen-i-1
1688      -                                 */
1689      -
1690      -        /* Avoid making errors worse */
1691      -        if (p->error != 0)
1692      -                return;
1693      -
1694      -        pmatches = (int *)malloc(g->mlen * sizeof (unsigned int));
1695      -        if (pmatches == NULL) {
1696      -                g->matchjump = NULL;
1697      -                return;
1698      -        }
1699      -
1700      -        g->matchjump = (int *)malloc(g->mlen * sizeof (unsigned int));
1701      -        if (g->matchjump == NULL) {     /* Not a fatal error */
1702      -                free(pmatches);
1703      -                return;
1704      -        }
1705      -
1706      -        /* Set maximum possible jump for each character in the pattern */
1707      -        for (mindex = 0; mindex < g->mlen; mindex++)
1708      -                g->matchjump[mindex] = 2*g->mlen - mindex - 1;
1709      -
1710      -        /* Compute pmatches[] */
1711      -        for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
1712      -            mindex--, suffix--) {
1713      -                pmatches[mindex] = suffix;
1714      -
1715      -                /*
1716      -                 * If a mismatch is found, interrupting the substring,
1717      -                 * compute the matchjump for that position. If no
1718      -                 * mismatch is found, then a text substring mismatched
1719      -                 * against the suffix will also mismatch against the
1720      -                 * substring.
1721      -                 */
1722      -                while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) {
1723      -                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
1724      -                            g->mlen - mindex - 1);
1725      -                        suffix = pmatches[suffix];
1726      -                }
1727      -        }
1728      -
1729      -        /*
1730      -         * Compute the matchjump up to the last substring found to jump
1731      -         * to the beginning of the largest must pattern prefix matching
1732      -         * it's own suffix.
1733      -         */
1734      -        for (mindex = 0; mindex <= suffix; mindex++)
1735      -                g->matchjump[mindex] = MIN(g->matchjump[mindex],
1736      -                    g->mlen + suffix - mindex);
1737      -
1738      -        ssuffix = pmatches[suffix];
1739      -        while (suffix < g->mlen) {
1740      -                while (suffix <= ssuffix && suffix < g->mlen) {
1741      -                        g->matchjump[suffix] = MIN(g->matchjump[suffix],
1742      -                            g->mlen + ssuffix - suffix);
1743      -                        suffix++;
1744      -                }
1745      -                if (suffix < g->mlen)
1746      -                        ssuffix = pmatches[ssuffix];
1747      -        }
1748      -
1749      -        free(pmatches);
1750      -}
1751      -
1752      -/*
1753      - * pluscount - count + nesting
1754      - */
1755      -static sopno                    /* nesting depth */
1756      -pluscount(struct parse *p, struct re_guts *g)
1757      -{
1758      -        sop *scan;
1759      -        sop s;
1760      -        sopno plusnest = 0;
1761      -        sopno maxnest = 0;
1762      -
1763      -        if (p->error != 0)
1764      -                return (0);     /* there may not be an OEND */
1765      -
1766      -        scan = g->strip + 1;
1767      -        do {
1768      -                s = *scan++;
1769      -                switch (OP(s)) {
1770      -                case OPLUS_:
1771      -                        plusnest++;
1772      -                        break;
1773      -                case O_PLUS:
1774      -                        if (plusnest > maxnest)
1775      -                                maxnest = plusnest;
1776      -                        plusnest--;
1777      -                        break;
1778      -                }
1779      -        } while (OP(s) != OEND);
1780      -        if (plusnest != 0)
1781      -                g->iflags |= BAD;
1782      -        return (maxnest);
      140 +  tre_free(preg);
1783  141  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX