iltre Cdiff usr/src/lib/libc/port/regex/regcomp.c

Print this page

9083 replace regex implementation with tre


*** 1,1783 ****
  /*
!  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
!  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
!  * Copyright 2012 Milan Jurik. All rights reserved.
!  * Copyright (c) 1992, 1993, 1994 Henry Spencer.
!  * Copyright (c) 1992, 1993, 1994
!  *      The Regents of the University of California.  All rights reserved.
   *
-  * This code is derived from software contributed to Berkeley by
-  * Henry Spencer.
-  *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in the
   *    documentation and/or other materials provided with the distribution.
-  * 3. Neither the name of the University nor the names of its contributors
-  *    may be used to endorse or promote products derived from this software
-  *    without specific prior written permission.
   *
!  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
!  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
!  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
!  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
!  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
!  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
!  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
!  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
!  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
!  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
!  * SUCH DAMAGE.
   */
  
! #include "lint.h"
! #include "file64.h"
! #include <sys/types.h>
! #include <stdio.h>
  #include <string.h>
! #include <ctype.h>
! #include <limits.h>
  #include <stdlib.h>
- #include <regex.h>
- #include <wchar.h>
- #include <wctype.h>
  
! #include "../locale/runetype.h"
! #include "../locale/collate.h"
  
! #include "utils.h"
! #include "regex2.h"
! 
! #include "cname.h"
! #include "../locale/mblocal.h"
! 
! /*
!  * parse structure, passed up and down to avoid global variables and
!  * other clumsinesses
!  */
! struct parse {
!         const char *next;       /* next character in RE */
!         const char *end;        /* end of string (-> NUL normally) */
!         int error;              /* has an error been seen? */
!         sop *strip;             /* malloced strip */
!         sopno ssize;            /* malloced strip size (allocated) */
!         sopno slen;             /* malloced strip length (used) */
!         int ncsalloc;           /* number of csets allocated */
!         struct re_guts *g;
! #define NPAREN  10              /* we need to remember () 1-9 for back refs */
!         sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
!         sopno pend[NPAREN];     /* -> ) ([0] unused) */
! };
! 
! /* ========= begin header generated by ./mkh ========= */
! #ifdef __cplusplus
! extern "C" {
! #endif
! 
! /* === regcomp.c === */
! static void p_ere(struct parse *p, int stop);
! static void p_ere_exp(struct parse *p);
! static void p_str(struct parse *p);
! static void p_bre(struct parse *p, int end1, int end2);
! static int p_simp_re(struct parse *p, int starordinary);
! static int p_count(struct parse *p);
! static void p_bracket(struct parse *p);
! static void p_b_term(struct parse *p, cset *cs);
! static void p_b_cclass(struct parse *p, cset *cs);
! static void p_b_eclass(struct parse *p, cset *cs);
! static wint_t p_b_symbol(struct parse *p);
! static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
! static wint_t othercase(wint_t ch);
! static void bothcases(struct parse *p, wint_t ch);
! static void ordinary(struct parse *p, wint_t ch);
! static void nonnewline(struct parse *p);
! static void repeat(struct parse *p, sopno start, int from, int to);
! static int seterr(struct parse *p, int e);
! static cset *allocset(struct parse *p);
! static void freeset(struct parse *p, cset *cs);
! static void CHadd(struct parse *p, cset *cs, wint_t ch);
! static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
! static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
! static wint_t singleton(cset *cs);
! static sopno dupl(struct parse *p, sopno start, sopno finish);
! static void doemit(struct parse *p, sop op, size_t opnd);
! static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
! static void dofwd(struct parse *p, sopno pos, sop value);
! static int enlarge(struct parse *p, sopno size);
! static void stripsnug(struct parse *p, struct re_guts *g);
! static void findmust(struct parse *p, struct re_guts *g);
! static int altoffset(sop *scan, int offset);
! static void computejumps(struct parse *p, struct re_guts *g);
! static void computematchjumps(struct parse *p, struct re_guts *g);
! static sopno pluscount(struct parse *p, struct re_guts *g);
! static wint_t wgetnext(struct parse *p);
! 
! #ifdef __cplusplus
! }
! #endif
! /* ========= end header generated by ./mkh ========= */
! 
! static char nuls[10];           /* place to point scanner in event of error */
! 
! /*
!  * macros for use with parse structure
!  * BEWARE:  these know that the parse structure is named `p' !!!
!  */
! #define PEEK()  (*p->next)
! #define PEEK2() (*(p->next+1))
! #define MORE()  (p->next < p->end)
! #define MORE2() (p->next+1 < p->end)
! #define SEE(c)  (MORE() && PEEK() == (c))
! #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
! #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
! #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
! #define NEXT()  (p->next++)
! #define NEXT2() (p->next += 2)
! #define NEXTn(n)        (p->next += (n))
! #define GETNEXT()       (*p->next++)
! #define WGETNEXT()      wgetnext(p)
! #define SETERROR(e)     ((void)seterr(p, (e)))
! #define REQUIRE(co, e)  ((co) || seterr(p, e))
! #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
! #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
! #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
! #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
! #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
! #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
! #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
! #define HERE()          (p->slen)
! #define THERE()         (p->slen - 1)
! #define THERETHERE()    (p->slen - 2)
! #define DROP(n) (p->slen -= (n))
! 
! #ifndef NDEBUG
! static int never = 0;           /* for use in asserts; shuts lint up */
! #else
! #define never   0               /* some <assert.h>s have bugs too */
! #endif
! 
! /*
!  * regcomp - interface for parser and compilation
!  */
! int                             /* 0 success, otherwise REG_something */
! regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD pattern,
!     int cflags)
  {
!         struct parse pa;
!         struct re_guts *g;
!         struct parse *p = &pa;
!         int i;
!         size_t len;
!         size_t maxlen;
! #ifdef REDEBUG
! #define GOODFLAGS(f)    (f)
! #else
! #define GOODFLAGS(f)    ((f)&~REG_DUMP)
! #endif
  
!         /* We had REG_INVARG, but we don't have that on Solaris. */
!         cflags = GOODFLAGS(cflags);
!         if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
!                 return (REG_EFATAL);
  
!         if (cflags&REG_PEND) {
!                 if (preg->re_endp < pattern)
!                         return (REG_EFATAL);
!                 len = preg->re_endp - pattern;
!         } else
!                 len = strlen(pattern);
  
!         /* do the mallocs early so failure handling is easy */
!         g = (struct re_guts *)malloc(sizeof (struct re_guts));
!         if (g == NULL)
!                 return (REG_ESPACE);
!         /*
!          * Limit the pattern space to avoid a 32-bit overflow on buffer
!          * extension.  Also avoid any signed overflow in case of conversion
!          * so make the real limit based on a 31-bit overflow.
!          *
!          * Likely not applicable on 64-bit systems but handle the case
!          * generically (who are we to stop people from using ~715MB+
!          * patterns?).
!          */
!         maxlen = ((size_t)-1 >> 1) / sizeof (sop) * 2 / 3;
!         if (len >= maxlen) {
!                 free((char *)g);
!                 return (REG_ESPACE);
          }
-         p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
-         assert(p->ssize >= len);
- 
-         p->strip = (sop *)malloc(p->ssize * sizeof (sop));
-         p->slen = 0;
-         if (p->strip == NULL) {
-                 free((char *)g);
-                 return (REG_ESPACE);
-         }
- 
-         /* set things up */
-         p->g = g;
-         p->next = pattern;      /* convenience; we do not modify it */
-         p->end = p->next + len;
-         p->error = 0;
-         p->ncsalloc = 0;
-         for (i = 0; i < NPAREN; i++) {
-                 p->pbegin[i] = 0;
-                 p->pend[i] = 0;
-         }
-         g->sets = NULL;
-         g->ncsets = 0;
-         g->cflags = cflags;
-         g->iflags = 0;
-         g->nbol = 0;
-         g->neol = 0;
-         g->must = NULL;
-         g->moffset = -1;
-         g->charjump = NULL;
-         g->matchjump = NULL;
-         g->mlen = 0;
-         g->nsub = 0;
-         g->backrefs = 0;
- 
-         /* do it */
-         EMIT(OEND, 0);
-         g->firststate = THERE();
-         if (cflags&REG_EXTENDED)
-                 p_ere(p, OUT);
-         else if (cflags&REG_NOSPEC)
-                 p_str(p);
          else
!                 p_bre(p, OUT, OUT);
!         EMIT(OEND, 0);
!         g->laststate = THERE();
  
!         /* tidy up loose ends and fill things in */
!         stripsnug(p, g);
!         findmust(p, g);
!         /*
!          * only use Boyer-Moore algorithm if the pattern is bigger
!          * than three characters
!          */
!         if (g->mlen > 3) {
!                 computejumps(p, g);
!                 computematchjumps(p, g);
!                 if (g->matchjump == NULL && g->charjump != NULL) {
!                         free(g->charjump);
!                         g->charjump = NULL;
!                 }
!         }
!         g->nplus = pluscount(p, g);
!         g->magic = MAGIC2;
!         preg->re_nsub = g->nsub;
!         preg->re_g = g;
!         preg->re_magic = MAGIC1;
! #ifndef REDEBUG
!         /* not debugging, so can't rely on the assert() in regexec() */
!         if (g->iflags&BAD)
!                 SETERROR(REG_EFATAL);
! #endif
! 
!         /* win or lose, we're done */
!         if (p->error != 0)      /* lose */
!                 regfree(preg);
!         return (p->error);
! }
! 
! /*
!  * p_ere - ERE parser top level, concatenation and alternation
!  */
! static void
! p_ere(struct parse *p,
!     int stop)           /* character this ERE should end at */
! {
!         char c;
!         sopno prevback;
!         sopno prevfwd;
!         sopno conc;
!         int first = 1;          /* is this the first alternative? */
! 
!         for (;;) {
!                 /* do a bunch of concatenated expressions */
!                 conc = HERE();
!                 while (MORE() && (c = PEEK()) != '|' && c != stop)
!                         p_ere_exp(p);
!                 /* require nonempty */
!                 (void) REQUIRE(HERE() != conc, REG_BADPAT);
! 
!                 if (!EAT('|'))
!                         break;          /* NOTE BREAK OUT */
! 
!                 if (first) {
!                         INSERT(OCH_, conc);     /* offset is wrong */
!                         prevfwd = conc;
!                         prevback = conc;
!                         first = 0;
!                 }
!                 ASTERN(OOR1, prevback);
!                 prevback = THERE();
!                 AHEAD(prevfwd);                 /* fix previous offset */
!                 prevfwd = HERE();
!                 EMIT(OOR2, 0);                  /* offset is very wrong */
!         }
! 
!         if (!first) {           /* tail-end fixups */
!                 AHEAD(prevfwd);
!                 ASTERN(O_CH, prevback);
!         }
! 
!         assert(!MORE() || SEE(stop));
! }
! 
! /*
!  * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
!  */
! static void
! p_ere_exp(struct parse *p)
! {
!         char c;
!         wint_t wc;
!         sopno pos;
!         int count;
!         int count2;
!         sopno subno;
!         int wascaret = 0;
! 
!         assert(MORE());         /* caller should have ensured this */
!         c = GETNEXT();
! 
!         pos = HERE();
!         switch (c) {
!         case '(':
!                 (void) REQUIRE(MORE(), REG_EPAREN);
!                 p->g->nsub++;
!                 subno = p->g->nsub;
!                 if (subno < NPAREN)
!                         p->pbegin[subno] = HERE();
!                 EMIT(OLPAREN, subno);
!                 if (!SEE(')'))
!                         p_ere(p, ')');
!                 if (subno < NPAREN) {
!                         p->pend[subno] = HERE();
!                         assert(p->pend[subno] != 0);
!                 }
!                 EMIT(ORPAREN, subno);
!                 (void) MUSTEAT(')', REG_EPAREN);
!                 break;
! #ifndef POSIX_MISTAKE
!         case ')':               /* happens only if no current unmatched ( */
!                 /*
!                  * You may ask, why the ifndef?  Because I didn't notice
!                  * this until slightly too late for 1003.2, and none of the
!                  * other 1003.2 regular-expression reviewers noticed it at
!                  * all.  So an unmatched ) is legal POSIX, at least until
!                  * we can get it fixed.
!                  */
!                 SETERROR(REG_EPAREN);
!                 break;
! #endif
!         case '^':
!                 EMIT(OBOL, 0);
!                 p->g->iflags |= USEBOL;
!                 p->g->nbol++;
!                 wascaret = 1;
!                 break;
!         case '$':
!                 EMIT(OEOL, 0);
!                 p->g->iflags |= USEEOL;
!                 p->g->neol++;
!                 break;
!         case '|':
!                 SETERROR(REG_BADPAT);
!                 break;
!         case '*':
!         case '+':
!         case '?':
!         case '{':
!                 SETERROR(REG_BADRPT);
!                 break;
!         case '.':
!                 if (p->g->cflags&REG_NEWLINE)
!                         nonnewline(p);
                  else
!                         EMIT(OANY, 0);
!                 break;
!         case '[':
!                 p_bracket(p);
!                 break;
!         case '\\':
!                 (void) REQUIRE(MORE(), REG_EESCAPE);
!                 wc = WGETNEXT();
!                 switch (wc) {
!                 case '<':
!                         EMIT(OBOW, 0);
!                         break;
!                 case '>':
!                         EMIT(OEOW, 0);
!                         break;
!                 default:
!                         ordinary(p, wc);
!                         break;
                  }
                  break;
!         default:
!                 if (p->error != 0)
!                         return;
!                 p->next--;
!                 wc = WGETNEXT();
!                 ordinary(p, wc);
!                 break;
          }
! 
!         if (!MORE())
!                 return;
!         c = PEEK();
!         /* we call { a repetition if followed by a digit */
!         if (!(c == '*' || c == '+' || c == '?' || c == '{'))
!                 return;         /* no repetition, we're done */
!         else if (c == '{')
!                 (void) REQUIRE(MORE2() && \
!                     (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
!         NEXT();
! 
!         (void) REQUIRE(!wascaret, REG_BADRPT);
!         switch (c) {
!         case '*':       /* implemented as +? */
!                 /* this case does not require the (y|) trick, noKLUDGE */
!                 INSERT(OPLUS_, pos);
!                 ASTERN(O_PLUS, pos);
!                 INSERT(OQUEST_, pos);
!                 ASTERN(O_QUEST, pos);
!                 break;
!         case '+':
!                 INSERT(OPLUS_, pos);
!                 ASTERN(O_PLUS, pos);
!                 break;
!         case '?':
!                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
!                 INSERT(OCH_, pos);              /* offset slightly wrong */
!                 ASTERN(OOR1, pos);              /* this one's right */
!                 AHEAD(pos);                     /* fix the OCH_ */
!                 EMIT(OOR2, 0);                  /* offset very wrong... */
!                 AHEAD(THERE());                 /* ...so fix it */
!                 ASTERN(O_CH, THERETHERE());
!                 break;
!         case '{':
!                 count = p_count(p);
!                 if (EAT(',')) {
!                         if (isdigit((uch)PEEK())) {
!                                 count2 = p_count(p);
!                                 (void) REQUIRE(count <= count2, REG_BADBR);
!                         } else          /* single number with comma */
!                                 count2 = INFINITY;
!                 } else          /* just a single number */
!                         count2 = count;
!                 repeat(p, pos, count, count2);
!                 if (!EAT('}')) {        /* error heuristics */
!                         while (MORE() && PEEK() != '}')
!                                 NEXT();
!                         (void) REQUIRE(MORE(), REG_EBRACE);
!                         SETERROR(REG_BADBR);
                  }
!                 break;
          }
  
!         if (!MORE())
!                 return;
!         c = PEEK();
!         if (!(c == '*' || c == '+' || c == '?' ||
!             (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
!                 return;
!         SETERROR(REG_BADRPT);
! }
  
! /*
!  * p_str - string (no metacharacters) "parser"
!  */
! static void
! p_str(struct parse *p)
! {
!         (void) REQUIRE(MORE(), REG_BADPAT);
!         while (MORE())
!                 ordinary(p, WGETNEXT());
  }
  
! /*
!  * p_bre - BRE parser top level, anchoring and concatenation
!  * Giving end1 as OUT essentially eliminates the end1/end2 check.
!  *
!  * This implementation is a bit of a kludge, in that a trailing $ is first
!  * taken as an ordinary character and then revised to be an anchor.
!  * The amount of lookahead needed to avoid this kludge is excessive.
!  */
! static void
! p_bre(struct parse *p,
!     int end1,           /* first terminating character */
!     int end2)           /* second terminating character */
  {
!         sopno start = HERE();
!         int first = 1;                  /* first subexpression? */
!         int wasdollar = 0;
! 
!         if (EAT('^')) {
!                 EMIT(OBOL, 0);
!                 p->g->iflags |= USEBOL;
!                 p->g->nbol++;
!         }
!         while (MORE() && !SEETWO(end1, end2)) {
!                 wasdollar = p_simp_re(p, first);
!                 first = 0;
!         }
!         if (wasdollar) {        /* oops, that was a trailing anchor */
!                 DROP(1);
!                 EMIT(OEOL, 0);
!                 p->g->iflags |= USEEOL;
!                 p->g->neol++;
!         }
! 
!         (void) REQUIRE(HERE() != start, REG_BADPAT);    /* require nonempty */
  }
  
! /*
!  * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
!  */
! static int                      /* was the simple RE an unbackslashed $? */
! p_simp_re(struct parse *p,
!     int starordinary)   /* is a leading * an ordinary character? */
  {
-         int c;
-         int count;
-         int count2;
-         sopno pos;
-         int i;
-         wint_t wc;
-         sopno subno;
- #define BACKSL  (1<<CHAR_BIT)
- 
-         pos = HERE();           /* repetition op, if any, covers from here */
- 
-         assert(MORE());         /* caller should have ensured this */
-         c = GETNEXT();
-         if (c == '\\') {
-                 (void) REQUIRE(MORE(), REG_EESCAPE);
-                 c = BACKSL | GETNEXT();
-         }
-         switch (c) {
-         case '.':
-                 if (p->g->cflags&REG_NEWLINE)
-                         nonnewline(p);
-                 else
-                         EMIT(OANY, 0);
-                 break;
-         case '[':
-                 p_bracket(p);
-                 break;
-         case BACKSL|'<':
-                 EMIT(OBOW, 0);
-                 break;
-         case BACKSL|'>':
-                 EMIT(OEOW, 0);
-                 break;
-         case BACKSL|'{':
-                 SETERROR(REG_BADRPT);
-                 break;
-         case BACKSL|'(':
-                 p->g->nsub++;
-                 subno = p->g->nsub;
-                 if (subno < NPAREN)
-                         p->pbegin[subno] = HERE();
-                 EMIT(OLPAREN, subno);
-                 /* the MORE here is an error heuristic */
-                 if (MORE() && !SEETWO('\\', ')'))
-                         p_bre(p, '\\', ')');
-                 if (subno < NPAREN) {
-                         p->pend[subno] = HERE();
-                         assert(p->pend[subno] != 0);
-                 }
-                 EMIT(ORPAREN, subno);
-                 (void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
-                 break;
-         case BACKSL|')':        /* should not get here -- must be user */
-                 SETERROR(REG_EPAREN);
-                 break;
-         case BACKSL|'1':
-         case BACKSL|'2':
-         case BACKSL|'3':
-         case BACKSL|'4':
-         case BACKSL|'5':
-         case BACKSL|'6':
-         case BACKSL|'7':
-         case BACKSL|'8':
-         case BACKSL|'9':
-                 i = (c&~BACKSL) - '0';
-                 assert(i < NPAREN);
-                 if (p->pend[i] != 0) {
-                         assert(i <= p->g->nsub);
-                         EMIT(OBACK_, i);
-                         assert(p->pbegin[i] != 0);
-                         assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
-                         assert(OP(p->strip[p->pend[i]]) == ORPAREN);
-                         (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
-                         EMIT(O_BACK, i);
-                 } else
-                         SETERROR(REG_ESUBREG);
-                 p->g->backrefs = 1;
-                 break;
-         case '*':
-                 (void) REQUIRE(starordinary, REG_BADRPT);
-                 /* FALLTHROUGH */
-         default:
-                 if (p->error != 0)
-                         return (0);     /* Definitely not $... */
-                 p->next--;
-                 wc = WGETNEXT();
-                 ordinary(p, wc);
-                 break;
-         }
- 
-         if (EAT('*')) {         /* implemented as +? */
-                 /* this case does not require the (y|) trick, noKLUDGE */
-                 INSERT(OPLUS_, pos);
-                 ASTERN(O_PLUS, pos);
-                 INSERT(OQUEST_, pos);
-                 ASTERN(O_QUEST, pos);
-         } else if (EATTWO('\\', '{')) {
-                 count = p_count(p);
-                 if (EAT(',')) {
-                         if (MORE() && isdigit((uch)PEEK())) {
-                                 count2 = p_count(p);
-                                 (void) REQUIRE(count <= count2, REG_BADBR);
-                         } else          /* single number with comma */
-                                 count2 = INFINITY;
-                 } else          /* just a single number */
-                         count2 = count;
-                 repeat(p, pos, count, count2);
-                 if (!EATTWO('\\', '}')) {       /* error heuristics */
-                         while (MORE() && !SEETWO('\\', '}'))
-                                 NEXT();
-                         (void) REQUIRE(MORE(), REG_EBRACE);
-                         SETERROR(REG_BADBR);
-                 }
-         } else if (c == '$')    /* $ (but not \$) ends it */
-                 return (1);
- 
-         return (0);
- }
- 
- /*
-  * p_count - parse a repetition count
-  */
- static int                      /* the value */
- p_count(struct parse *p)
- {
-         int count = 0;
-         int ndigits = 0;
- 
-         while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
-                 count = count*10 + (GETNEXT() - '0');
-                 ndigits++;
-         }
- 
-         (void) REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
-         return (count);
- }
- 
- /*
-  * p_bracket - parse a bracketed character list
-  */
- static void
- p_bracket(struct parse *p)
- {
-         cset *cs;
-         wint_t ch;
- 
-         /* Dept of Truly Sickening Special-Case Kludges */
-         if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
-                 EMIT(OBOW, 0);
-                 NEXTn(6);
-                 return;
-         }
-         if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
-                 EMIT(OEOW, 0);
-                 NEXTn(6);
-                 return;
-         }
- 
-         if ((cs = allocset(p)) == NULL)
-                 return;
- 
-         if (p->g->cflags&REG_ICASE)
-                 cs->icase = 1;
-         if (EAT('^'))
-                 cs->invert = 1;
-         if (EAT(']'))
-                 CHadd(p, cs, ']');
-         else if (EAT('-'))
-                 CHadd(p, cs, '-');
-         while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
-                 p_b_term(p, cs);
-         if (EAT('-'))
-                 CHadd(p, cs, '-');
-         (void) MUSTEAT(']', REG_EBRACK);
- 
-         if (p->error != 0)      /* don't mess things up further */
-                 return;
- 
-         if (cs->invert && p->g->cflags&REG_NEWLINE)
-                 cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
- 
-         if ((ch = singleton(cs)) != OUT) {      /* optimize singleton sets */
-                 ordinary(p, ch);
-                 freeset(p, cs);
-         } else
-                 EMIT(OANYOF, (int)(cs - p->g->sets));
- }
- 
- /*
-  * p_b_term - parse one term of a bracketed character list
-  */
- static void
- p_b_term(struct parse *p, cset *cs)
- {
-         char c;
-         wint_t start, finish;
-         wint_t i;
-         locale_t loc = uselocale(NULL);
- 
-         /* classify what we've got */
-         switch ((MORE()) ? PEEK() : '\0') {
-         case '[':
-                 c = (MORE2()) ? PEEK2() : '\0';
-                 break;
-         case '-':
-                 SETERROR(REG_ERANGE);
-                 return;                 /* NOTE RETURN */
-         default:
-                 c = '\0';
-                 break;
-         }
- 
-         switch (c) {
-         case ':':               /* character class */
-                 NEXT2();
-                 (void) REQUIRE(MORE(), REG_EBRACK);
-                 c = PEEK();
-                 (void) REQUIRE(c != '-' && c != ']', REG_ECTYPE);
-                 p_b_cclass(p, cs);
-                 (void) REQUIRE(MORE(), REG_EBRACK);
-                 (void) REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
-                 break;
-         case '=':               /* equivalence class */
-                 NEXT2();
-                 (void) REQUIRE(MORE(), REG_EBRACK);
-                 c = PEEK();
-                 (void) REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
-                 p_b_eclass(p, cs);
-                 (void) REQUIRE(MORE(), REG_EBRACK);
-                 (void) REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
-                 break;
-         default:                /* symbol, ordinary character, or range */
-                 start = p_b_symbol(p);
-                 if (SEE('-') && MORE2() && PEEK2() != ']') {
-                         /* range */
-                         NEXT();
-                         if (EAT('-'))
-                                 finish = '-';
-                         else
-                                 finish = p_b_symbol(p);
-                 } else
-                         finish = start;
-                 if (start == finish)
-                         CHadd(p, cs, start);
-                 else {
-                         if (loc->collate->lc_is_posix) {
-                                 (void) REQUIRE((uch)start <= (uch)finish,
-                                     REG_ERANGE);
-                                 CHaddrange(p, cs, start, finish);
-                         } else {
-                                 (void) REQUIRE(_collate_range_cmp(start,
-                                     finish, loc) <= 0, REG_ERANGE);
-                                 for (i = 0; i <= UCHAR_MAX; i++) {
-                                         if (_collate_range_cmp(start, i, loc)
-                                             <= 0 &&
-                                             _collate_range_cmp(i, finish, loc)
-                                             <= 0)
-                                                 CHadd(p, cs, i);
-                                 }
-                         }
-                 }
-                 break;
-         }
- }
- 
- /*
-  * p_b_cclass - parse a character-class name and deal with it
-  */
- static void
- p_b_cclass(struct parse *p, cset *cs)
- {
-         const char *sp = p->next;
          size_t len;
-         wctype_t wct;
-         char clname[16];
  
!         while (MORE() && isalpha((uch)PEEK()))
!                 NEXT();
!         len = p->next - sp;
!         if (len >= sizeof (clname) - 1) {
!                 SETERROR(REG_ECTYPE);
!                 return;
          }
-         (void) memcpy(clname, sp, len);
-         clname[len] = '\0';
-         if ((wct = wctype(clname)) == 0) {
-                 SETERROR(REG_ECTYPE);
-                 return;
-         }
-         CHaddtype(p, cs, wct);
- }
- 
- /*
-  * p_b_eclass - parse an equivalence-class name and deal with it
-  *
-  * This implementation is incomplete. xxx
-  */
- static void
- p_b_eclass(struct parse *p, cset *cs)
- {
-         wint_t c;
- 
-         c = p_b_coll_elem(p, '=');
-         CHadd(p, cs, c);
- }
- 
- /*
-  * p_b_symbol - parse a character or [..]ed multicharacter collating symbol
-  */
- static wint_t                   /* value of symbol */
- p_b_symbol(struct parse *p)
- {
-         wint_t value;
- 
-         (void) REQUIRE(MORE(), REG_EBRACK);
-         if (!EATTWO('[', '.'))
-                 return (WGETNEXT());
- 
-         /* collating symbol */
-         value = p_b_coll_elem(p, '.');
-         (void) REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
-         return (value);
- }
- 
- /*
-  * p_b_coll_elem - parse a collating-element name and look it up
-  */
- static wint_t                   /* value of collating element */
- p_b_coll_elem(struct parse *p,
-     wint_t endc)                /* name ended by endc,']' */
- {
-         const char *sp = p->next;
-         struct cname *cp;
-         mbstate_t mbs;
-         wchar_t wc;
-         size_t clen, len;
- 
-         while (MORE() && !SEETWO(endc, ']'))
-                 NEXT();
-         if (!MORE()) {
-                 SETERROR(REG_EBRACK);
-                 return (0);
-         }
-         len = p->next - sp;
-         for (cp = cnames; cp->name != NULL; cp++)
-                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
-                         return (cp->code);      /* known name */
-         (void) memset(&mbs, 0, sizeof (mbs));
-         if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
-                 return (wc);                    /* single character */
-         else if (clen == (size_t)-1 || clen == (size_t)-2)
-                 SETERROR(REG_ECHAR);
          else
!                 SETERROR(REG_ECOLLATE);         /* neither */
!         return (0);
  }
  
! /*
!  * othercase - return the case counterpart of an alphabetic
!  */
! static wint_t                   /* if no counterpart, return ch */
! othercase(wint_t ch)
  {
!         assert(iswalpha(ch));
!         if (iswupper(ch))
!                 return (towlower(ch));
!         else if (iswlower(ch))
!                 return (towupper(ch));
!         else                    /* peculiar, but could happen */
!                 return (ch);
  }
  
! /*
!  * bothcases - emit a dualcase version of a two-case character
!  *
!  * Boy, is this implementation ever a kludge...
!  */
! static void
! bothcases(struct parse *p, wint_t ch)
  {
!         const char *oldnext = p->next;
!         const char *oldend = p->end;
!         char bracket[3 + MB_LEN_MAX];
!         size_t n;
!         mbstate_t mbs;
! 
!         assert(othercase(ch) != ch);    /* p_bracket() would recurse */
!         p->next = bracket;
!         (void) memset(&mbs, 0, sizeof (mbs));
!         n = wcrtomb(bracket, ch, &mbs);
!         assert(n != (size_t)-1);
!         bracket[n] = ']';
!         bracket[n + 1] = '\0';
!         p->end = bracket+n+1;
!         p_bracket(p);
!         assert(p->next == p->end);
!         p->next = oldnext;
!         p->end = oldend;
! }
! 
! /*
!  * ordinary - emit an ordinary character
!  */
! static void
! ordinary(struct parse *p, wint_t ch)
! {
!         cset *cs;
! 
!         if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
!                 bothcases(p, ch);
!         else if ((ch & OPDMASK) == ch)
!                 EMIT(OCHAR, ch);
!         else {
!                 /*
!                  * Kludge: character is too big to fit into an OCHAR operand.
!                  * Emit a singleton set.
!                  */
!                 if ((cs = allocset(p)) == NULL)
!                         return;
!                 CHadd(p, cs, ch);
!                 EMIT(OANYOF, (int)(cs - p->g->sets));
!         }
! }
! 
! /*
!  * nonnewline - emit REG_NEWLINE version of OANY
!  *
!  * Boy, is this implementation ever a kludge...
!  */
! static void
! nonnewline(struct parse *p)
! {
!         const char *oldnext = p->next;
!         const char *oldend = p->end;
!         char bracket[4];
! 
!         p->next = bracket;
!         p->end = bracket+3;
!         bracket[0] = '^';
!         bracket[1] = '\n';
!         bracket[2] = ']';
!         bracket[3] = '\0';
!         p_bracket(p);
!         assert(p->next == bracket+3);
!         p->next = oldnext;
!         p->end = oldend;
! }
! 
! /*
!  * repeat - generate code for a bounded repetition, recursively if needed
!  */
! static void
! repeat(struct parse *p,
!     sopno start,                /* operand from here to end of strip */
!     int from,                   /* repeated from this number */
!     int to)                     /* to this number of times (maybe INFINITY) */
! {
!         sopno finish = HERE();
! #define N       2
! #define INF     3
! #define REP(f, t)       ((f)*8 + (t))
! #define MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
!         sopno copy;
! 
!         if (p->error != 0)      /* head off possible runaway recursion */
!                 return;
! 
!         assert(from <= to);
! 
!         switch (REP(MAP(from), MAP(to))) {
!         case REP(0, 0):                 /* must be user doing this */
!                 DROP(finish-start);     /* drop the operand */
!                 break;
!         case REP(0, 1):                 /* as x{1,1}? */
!         case REP(0, N):                 /* as x{1,n}? */
!         case REP(0, INF):               /* as x{1,}? */
!                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
!                 INSERT(OCH_, start);            /* offset is wrong... */
!                 repeat(p, start+1, 1, to);
!                 ASTERN(OOR1, start);
!                 AHEAD(start);                   /* ... fix it */
!                 EMIT(OOR2, 0);
!                 AHEAD(THERE());
!                 ASTERN(O_CH, THERETHERE());
!                 break;
!         case REP(1, 1):                 /* trivial case */
!                 /* done */
!                 break;
!         case REP(1, N):                 /* as x?x{1,n-1} */
!                 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
!                 INSERT(OCH_, start);
!                 ASTERN(OOR1, start);
!                 AHEAD(start);
!                 EMIT(OOR2, 0);                  /* offset very wrong... */
!                 AHEAD(THERE());                 /* ...so fix it */
!                 ASTERN(O_CH, THERETHERE());
!                 copy = dupl(p, start+1, finish+1);
!                 assert(copy == finish+4);
!                 repeat(p, copy, 1, to-1);
!                 break;
!         case REP(1, INF):               /* as x+ */
!                 INSERT(OPLUS_, start);
!                 ASTERN(O_PLUS, start);
!                 break;
!         case REP(N, N):                 /* as xx{m-1,n-1} */
!                 copy = dupl(p, start, finish);
!                 repeat(p, copy, from-1, to-1);
!                 break;
!         case REP(N, INF):               /* as xx{n-1,INF} */
!                 copy = dupl(p, start, finish);
!                 repeat(p, copy, from-1, to);
!                 break;
!         default:                        /* "can't happen" */
!                 SETERROR(REG_EFATAL);   /* just in case */
!                 break;
!         }
! }
! 
! /*
!  * wgetnext - helper function for WGETNEXT() macro. Gets the next wide
!  * character from the parse struct, signals a REG_ILLSEQ error if the
!  * character can't be converted. Returns the number of bytes consumed.
!  */
! static wint_t
! wgetnext(struct parse *p)
! {
!         mbstate_t mbs;
!         wchar_t wc;
!         size_t n;
! 
!         (void) memset(&mbs, 0, sizeof (mbs));
!         n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
!         if (n == (size_t)-1 || n == (size_t)-2) {
!                 SETERROR(REG_ECHAR);
!                 return (0);
!         }
!         if (n == 0)
!                 n = 1;
!         p->next += n;
!         return (wc);
! }
! 
! /*
!  * seterr - set an error condition
!  */
! static int                      /* useless but makes type checking happy */
! seterr(struct parse *p, int e)
! {
!         if (p->error == 0)      /* keep earliest error condition */
!                 p->error = e;
!         p->next = nuls;         /* try to bring things to a halt */
!         p->end = nuls;
!         return (0);             /* make the return value well-defined */
! }
! 
! /*
!  * allocset - allocate a set of characters for []
!  */
! static cset *
! allocset(struct parse *p)
! {
!         cset *cs, *ncs;
! 
!         ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof (*ncs));
!         if (ncs == NULL) {
!                 SETERROR(REG_ESPACE);
!                 return (NULL);
!         }
!         p->g->sets = ncs;
!         cs = &p->g->sets[p->g->ncsets++];
!         (void) memset(cs, 0, sizeof (*cs));
! 
!         return (cs);
! }
! 
! /*
!  * freeset - free a now-unused set
!  */
! static void
! freeset(struct parse *p, cset *cs)
! {
!         cset *top = &p->g->sets[p->g->ncsets];
! 
!         free(cs->wides);
!         free(cs->ranges);
!         free(cs->types);
!         (void) memset(cs, 0, sizeof (*cs));
!         if (cs == top-1)        /* recover only the easy case */
!                 p->g->ncsets--;
! }
! 
! /*
!  * singleton - Determine whether a set contains only one character,
!  * returning it if so, otherwise returning OUT.
!  */
! static wint_t
! singleton(cset *cs)
! {
!         wint_t i, s, n;
! 
!         for (i = n = 0; i < NC; i++)
!                 if (CHIN(cs, i)) {
!                         n++;
!                         s = i;
!                 }
!         if (n == 1)
!                 return (s);
!         if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
!             cs->icase == 0)
!                 return (cs->wides[0]);
!         /* Don't bother handling the other cases. */
!         return (OUT);
! }
! 
! /*
!  * CHadd - add character to character set.
!  */
! static void
! CHadd(struct parse *p, cset *cs, wint_t ch)
! {
!         wint_t nch, *newwides;
!         assert(ch >= 0);
!         if (ch < NC)
!                 cs->bmp[ch >> 3] |= 1 << (ch & 7);
!         else {
!                 newwides = realloc(cs->wides, (cs->nwides + 1) *
!                     sizeof (*cs->wides));
!                 if (newwides == NULL) {
!                         SETERROR(REG_ESPACE);
!                         return;
!                 }
!                 cs->wides = newwides;
!                 cs->wides[cs->nwides++] = ch;
!         }
!         if (cs->icase) {
!                 if ((nch = towlower(ch)) < NC)
!                         cs->bmp[nch >> 3] |= 1 << (nch & 7);
!                 if ((nch = towupper(ch)) < NC)
!                         cs->bmp[nch >> 3] |= 1 << (nch & 7);
!         }
! }
! 
! /*
!  * CHaddrange - add all characters in the range [min,max] to a character set.
!  */
! static void
! CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
! {
!         crange *newranges;
! 
!         for (; min < NC && min <= max; min++)
!                 CHadd(p, cs, min);
!         if (min >= max)
!                 return;
!         newranges = realloc(cs->ranges, (cs->nranges + 1) *
!             sizeof (*cs->ranges));
!         if (newranges == NULL) {
!                 SETERROR(REG_ESPACE);
!                 return;
!         }
!         cs->ranges = newranges;
!         cs->ranges[cs->nranges].min = min;
!         cs->ranges[cs->nranges].max = max;
!         cs->nranges++;
! }
! 
! /*
!  * CHaddtype - add all characters of a certain type to a character set.
!  */
! static void
! CHaddtype(struct parse *p, cset *cs, wctype_t wct)
! {
!         wint_t i;
!         wctype_t *newtypes;
! 
!         for (i = 0; i < NC; i++)
!                 if (iswctype(i, wct))
!                         CHadd(p, cs, i);
!         newtypes = realloc(cs->types, (cs->ntypes + 1) *
!             sizeof (*cs->types));
!         if (newtypes == NULL) {
!                 SETERROR(REG_ESPACE);
!                 return;
!         }
!         cs->types = newtypes;
!         cs->types[cs->ntypes++] = wct;
! }
! 
! /*
!  * dupl - emit a duplicate of a bunch of sops
!  */
! static sopno                    /* start of duplicate */
! dupl(struct parse *p,
!     sopno start,                /* from here */
!     sopno finish)               /* to this less one */
! {
!         sopno ret = HERE();
!         sopno len = finish - start;
! 
!         assert(finish >= start);
!         if (len == 0)
!                 return (ret);
!         if (!enlarge(p, p->ssize + len)) /* this many unexpected additions */
!                 return (ret);
!         assert(p->ssize >= p->slen + len);
!         (void) memcpy((char *)(p->strip + p->slen),
!             (char *)(p->strip + start), (size_t)len*sizeof (sop));
!         p->slen += len;
!         return (ret);
! }
! 
! /*
!  * doemit - emit a strip operator
!  *
!  * It might seem better to implement this as a macro with a function as
!  * hard-case backup, but it's just too big and messy unless there are
!  * some changes to the data structures.  Maybe later.
!  */
! static void
! doemit(struct parse *p, sop op, size_t opnd)
! {
!         /* avoid making error situations worse */
!         if (p->error != 0)
!                 return;
! 
!         /* deal with oversize operands ("can't happen", more or less) */
!         assert(opnd < 1<<OPSHIFT);
! 
!         /* deal with undersized strip */
!         if (p->slen >= p->ssize)
!                 if (!enlarge(p, (p->ssize+1) / 2 * 3))  /* +50% */
!                         return;
! 
!         /* finally, it's all reduced to the easy case */
!         p->strip[p->slen++] = SOP(op, opnd);
! }
! 
! /*
!  * doinsert - insert a sop into the strip
!  */
! static void
! doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
! {
!         sopno sn;
!         sop s;
!         int i;
! 
!         /* avoid making error situations worse */
!         if (p->error != 0)
!                 return;
! 
!         sn = HERE();
!         EMIT(op, opnd);         /* do checks, ensure space */
!         assert(HERE() == sn+1);
!         s = p->strip[sn];
! 
!         /* adjust paren pointers */
!         assert(pos > 0);
!         for (i = 1; i < NPAREN; i++) {
!                 if (p->pbegin[i] >= pos) {
!                         p->pbegin[i]++;
!                 }
!                 if (p->pend[i] >= pos) {
!                         p->pend[i]++;
!                 }
!         }
! 
!         (void) memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
!             (HERE()-pos-1)*sizeof (sop));
!         p->strip[pos] = s;
! }
! 
! /*
!  * dofwd - complete a forward reference
!  */
! static void
! dofwd(struct parse *p, sopno pos, sop value)
! {
!         /* avoid making error situations worse */
!         if (p->error != 0)
!                 return;
! 
!         assert(value < 1<<OPSHIFT);
!         p->strip[pos] = OP(p->strip[pos]) | value;
! }
! 
! /*
!  * enlarge - enlarge the strip
!  */
! static int
! enlarge(struct parse *p, sopno size)
! {
!         sop *sp;
! 
!         if (p->ssize >= size)
!                 return (1);
! 
!         sp = (sop *)realloc(p->strip, size*sizeof (sop));
!         if (sp == NULL) {
!                 SETERROR(REG_ESPACE);
!                 return (0);
!         }
!         p->strip = sp;
!         p->ssize = size;
!         return (1);
! }
! 
! /*
!  * stripsnug - compact the strip
!  */
! static void
! stripsnug(struct parse *p, struct re_guts *g)
! {
!         g->nstates = p->slen;
!         g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof (sop));
!         if (g->strip == NULL) {
!                 SETERROR(REG_ESPACE);
!                 g->strip = p->strip;
!         }
! }
! 
! /*
!  * findmust - fill in must and mlen with longest mandatory literal string
!  *
!  * This algorithm could do fancy things like analyzing the operands of |
!  * for common subsequences.  Someday.  This code is simple and finds most
!  * of the interesting cases.
!  *
!  * Note that must and mlen got initialized during setup.
!  */
! static void
! findmust(struct parse *p, struct re_guts *g)
! {
!         sop *scan;
!         sop *start = NULL;
!         sop *newstart = NULL;
!         sopno newlen;
!         sop s;
!         char *cp;
!         int offset;
!         char buf[MB_LEN_MAX];
!         size_t clen;
!         mbstate_t mbs;
!         locale_t loc = uselocale(NULL);
! 
!         /* avoid making error situations worse */
!         if (p->error != 0)
!                 return;
! 
!         /*
!          * It's not generally safe to do a ``char'' substring search on
!          * multibyte character strings, but it's safe for at least
!          * UTF-8 (see RFC 3629).
!          */
!         if (MB_CUR_MAX > 1 &&
!             strcmp(loc->runelocale->__encoding, "UTF-8") != 0)
!                 return;
! 
!         /* find the longest OCHAR sequence in strip */
!         newlen = 0;
!         offset = 0;
!         g->moffset = 0;
!         scan = g->strip + 1;
!         do {
!                 s = *scan++;
!                 switch (OP(s)) {
!                 case OCHAR:             /* sequence member */
!                         if (newlen == 0) {              /* new sequence */
!                                 (void) memset(&mbs, 0, sizeof (mbs));
!                                 newstart = scan - 1;
!                         }
!                         clen = wcrtomb(buf, OPND(s), &mbs);
!                         if (clen == (size_t)-1)
!                                 goto toohard;
!                         newlen += clen;
!                         break;
!                 case OPLUS_:            /* things that don't break one */
!                 case OLPAREN:
!                 case ORPAREN:
!                         break;
!                 case OQUEST_:           /* things that must be skipped */
!                 case OCH_:
!                         offset = altoffset(scan, offset);
!                         scan--;
!                         do {
!                                 scan += OPND(s);
!                                 s = *scan;
!                                 /* assert() interferes w debug printouts */
!                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
!                                     OP(s) != OOR2) {
!                                         g->iflags |= BAD;
!                                         return;
!                                 }
!                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
!                         /* FALLTHROUGH */
!                 case OBOW:              /* things that break a sequence */
!                 case OEOW:
!                 case OBOL:
!                 case OEOL:
!                 case O_QUEST:
!                 case O_CH:
!                 case OEND:
!                         if (newlen > g->mlen) {         /* ends one */
!                                 start = newstart;
!                                 g->mlen = newlen;
!                                 if (offset > -1) {
!                                         g->moffset += offset;
!                                         offset = newlen;
!                                 } else
!                                         g->moffset = offset;
!                         } else {
!                                 if (offset > -1)
!                                         offset += newlen;
!                         }
!                         newlen = 0;
!                         break;
!                 case OANY:
!                         if (newlen > g->mlen) {         /* ends one */
!                                 start = newstart;
!                                 g->mlen = newlen;
!                                 if (offset > -1) {
!                                         g->moffset += offset;
!                                         offset = newlen;
!                                 } else
!                                         g->moffset = offset;
!                         } else {
!                                 if (offset > -1)
!                                         offset += newlen;
!                         }
!                         if (offset > -1)
!                                 offset++;
!                         newlen = 0;
!                         break;
!                 case OANYOF:            /* may or may not invalidate offset */
!                         /* First, everything as OANY */
!                         if (newlen > g->mlen) {         /* ends one */
!                                 start = newstart;
!                                 g->mlen = newlen;
!                                 if (offset > -1) {
!                                         g->moffset += offset;
!                                         offset = newlen;
!                                 } else
!                                         g->moffset = offset;
!                         } else {
!                                 if (offset > -1)
!                                         offset += newlen;
!                         }
!                         if (offset > -1)
!                                 offset++;
!                         newlen = 0;
!                         break;
!                 toohard:
!                 default:
!                         /*
!                          * Anything here makes it impossible or too hard
!                          * to calculate the offset -- so we give up;
!                          * save the last known good offset, in case the
!                          * must sequence doesn't occur later.
!                          */
!                         if (newlen > g->mlen) {         /* ends one */
!                                 start = newstart;
!                                 g->mlen = newlen;
!                                 if (offset > -1)
!                                         g->moffset += offset;
!                                 else
!                                         g->moffset = offset;
!                         }
!                         offset = -1;
!                         newlen = 0;
!                         break;
!                 }
!         } while (OP(s) != OEND);
! 
!         if (g->mlen == 0) {             /* there isn't one */
!                 g->moffset = -1;
!                 return;
!         }
! 
!         /* turn it into a character string */
!         g->must = malloc((size_t)g->mlen + 1);
!         if (g->must == NULL) {          /* argh; just forget it */
!                 g->mlen = 0;
!                 g->moffset = -1;
!                 return;
!         }
!         cp = g->must;
!         scan = start;
!         (void) memset(&mbs, 0, sizeof (mbs));
!         while (cp < g->must + g->mlen) {
!                 while (OP(s = *scan++) != OCHAR)
!                         continue;
!                 clen = wcrtomb(cp, OPND(s), &mbs);
!                 assert(clen != (size_t)-1);
!                 cp += clen;
!         }
!         assert(cp == g->must + g->mlen);
!         *cp++ = '\0';           /* just on general principles */
! }
! 
! /*
!  * altoffset - choose biggest offset among multiple choices
!  *
!  * Compute, recursively if necessary, the largest offset among multiple
!  * re paths.
!  */
! static int
! altoffset(sop *scan, int offset)
! {
!         int largest;
!         int try;
!         sop s;
! 
!         /* If we gave up already on offsets, return */
!         if (offset == -1)
!                 return (-1);
! 
!         largest = 0;
!         try = 0;
!         s = *scan++;
!         while (OP(s) != O_QUEST && OP(s) != O_CH) {
!                 switch (OP(s)) {
!                 case OOR1:
!                         if (try > largest)
!                                 largest = try;
!                         try = 0;
!                         break;
!                 case OQUEST_:
!                 case OCH_:
!                         try = altoffset(scan, try);
!                         if (try == -1)
!                                 return (-1);
!                         scan--;
!                         do {
!                                 scan += OPND(s);
!                                 s = *scan;
!                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
!                                     OP(s) != OOR2)
!                                         return (-1);
!                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
!                         /*
!                          * We must skip to the next position, or we'll
!                          * leave altoffset() too early.
!                          */
!                         scan++;
!                         break;
!                 case OANYOF:
!                 case OCHAR:
!                 case OANY:
!                         try++;
!                         /*FALLTHRU*/
!                 case OBOW:
!                 case OEOW:
!                 case OLPAREN:
!                 case ORPAREN:
!                 case OOR2:
!                         break;
!                 default:
!                         try = -1;
!                         break;
!                 }
!                 if (try == -1)
!                         return (-1);
!                 s = *scan++;
!         }
! 
!         if (try > largest)
!                 largest = try;
! 
!         return (largest+offset);
! }
! 
! /*
!  * computejumps - compute char jumps for BM scan
!  *
!  * This algorithm assumes g->must exists and is has size greater than
!  * zero. It's based on the algorithm found on Computer Algorithms by
!  * Sara Baase.
!  *
!  * A char jump is the number of characters one needs to jump based on
!  * the value of the character from the text that was mismatched.
!  */
! static void
! computejumps(struct parse *p, struct re_guts *g)
! {
!         int ch;
!         int mindex;
! 
!         /* Avoid making errors worse */
!         if (p->error != 0)
!                 return;
! 
!         g->charjump = (int *)malloc((NC + 1) * sizeof (int));
!         if (g->charjump == NULL)        /* Not a fatal error */
!                 return;
!         /* Adjust for signed chars, if necessary */
!         g->charjump = &g->charjump[-(CHAR_MIN)];
! 
!         /*
!          * If the character does not exist in the pattern, the jump
!          * is equal to the number of characters in the pattern.
!          */
!         for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
!                 g->charjump[ch] = g->mlen;
! 
!         /*
!          * If the character does exist, compute the jump that would
!          * take us to the last character in the pattern equal to it
!          * (notice that we match right to left, so that last character
!          * is the first one that would be matched).
!          */
!         for (mindex = 0; mindex < g->mlen; mindex++)
!                 g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
! }
! 
! /*
!  * computematchjumps - compute match jumps for BM scan
!  *
!  * This algorithm assumes g->must exists and is has size greater than
!  * zero. It's based on the algorithm found on Computer Algorithms by
!  * Sara Baase.
!  *
!  * A match jump is the number of characters one needs to advance based
!  * on the already-matched suffix.
!  * Notice that all values here are minus (g->mlen-1), because of the way
!  * the search algorithm works.
!  */
! static void
! computematchjumps(struct parse *p, struct re_guts *g)
! {
!         int mindex;             /* General "must" iterator */
!         int suffix;             /* Keeps track of matching suffix */
!         int ssuffix;            /* Keeps track of suffixes' suffix */
!         int *pmatches;
!                                 /*
!                                  * pmatches[k] points to the next i
!                                  * such that i+1...mlen is a substring
!                                  * of k+1...k+mlen-i-1
!                                  */
! 
!         /* Avoid making errors worse */
!         if (p->error != 0)
!                 return;
! 
!         pmatches = (int *)malloc(g->mlen * sizeof (unsigned int));
!         if (pmatches == NULL) {
!                 g->matchjump = NULL;
!                 return;
!         }
! 
!         g->matchjump = (int *)malloc(g->mlen * sizeof (unsigned int));
!         if (g->matchjump == NULL) {     /* Not a fatal error */
!                 free(pmatches);
!                 return;
!         }
! 
!         /* Set maximum possible jump for each character in the pattern */
!         for (mindex = 0; mindex < g->mlen; mindex++)
!                 g->matchjump[mindex] = 2*g->mlen - mindex - 1;
! 
!         /* Compute pmatches[] */
!         for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
!             mindex--, suffix--) {
!                 pmatches[mindex] = suffix;
! 
!                 /*
!                  * If a mismatch is found, interrupting the substring,
!                  * compute the matchjump for that position. If no
!                  * mismatch is found, then a text substring mismatched
!                  * against the suffix will also mismatch against the
!                  * substring.
!                  */
!                 while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) {
!                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
!                             g->mlen - mindex - 1);
!                         suffix = pmatches[suffix];
!                 }
!         }
! 
!         /*
!          * Compute the matchjump up to the last substring found to jump
!          * to the beginning of the largest must pattern prefix matching
!          * it's own suffix.
!          */
!         for (mindex = 0; mindex <= suffix; mindex++)
!                 g->matchjump[mindex] = MIN(g->matchjump[mindex],
!                     g->mlen + suffix - mindex);
! 
!         ssuffix = pmatches[suffix];
!         while (suffix < g->mlen) {
!                 while (suffix <= ssuffix && suffix < g->mlen) {
!                         g->matchjump[suffix] = MIN(g->matchjump[suffix],
!                             g->mlen + ssuffix - suffix);
!                         suffix++;
!                 }
!                 if (suffix < g->mlen)
!                         ssuffix = pmatches[ssuffix];
!         }
! 
!         free(pmatches);
! }
! 
! /*
!  * pluscount - count + nesting
!  */
! static sopno                    /* nesting depth */
! pluscount(struct parse *p, struct re_guts *g)
! {
!         sop *scan;
!         sop s;
!         sopno plusnest = 0;
!         sopno maxnest = 0;
! 
!         if (p->error != 0)
!                 return (0);     /* there may not be an OEND */
! 
!         scan = g->strip + 1;
!         do {
!                 s = *scan++;
!                 switch (OP(s)) {
!                 case OPLUS_:
!                         plusnest++;
!                         break;
!                 case O_PLUS:
!                         if (plusnest > maxnest)
!                                 maxnest = plusnest;
!                         plusnest--;
!                         break;
!                 }
!         } while (OP(s) != OEND);
!         if (plusnest != 0)
!                 g->iflags |= BAD;
!         return (maxnest);
  }
--- 1,141 ----
  /*
!  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
!  * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
+  *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
+  *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in the
   *    documentation and/or other materials provided with the distribution.
   *
!  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
!  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
!  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
!  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
!  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
!  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
!  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
!  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
!  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
!  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
!  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   */
  
! /*
!   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
! */
! 
  #include <string.h>
! #include <errno.h>
  #include <stdlib.h>
  
! #include "tre.h"
! #include "tre-internal.h"
  
! int
! tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
!     locale_t loc)
  {
!   int ret;
!   tre_char_t *wregex;
!   size_t wlen;
  
!   wregex = malloc(sizeof(tre_char_t) * (n + 1));
!   if (wregex == NULL)
!     return REG_ESPACE;
  
!   /* If the current locale uses the standard single byte encoding of
!      characters, we don't do a multibyte string conversion.  If we did,
!      many applications which use the default locale would break since
!      the default "C" locale uses the 7-bit ASCII character set, and
!      all characters with the eighth bit set would be considered invalid. */
!   if (TRE_MB_CUR_MAX_L(loc) == 1)
!     {
!       unsigned int i;
!       const unsigned char *str = (const unsigned char *)regex;
!       tre_char_t *wstr = wregex;
  
!       for (i = 0; i < n; i++)
!         *(wstr++) = *(str++);
!       wlen = n;
      }
    else
!     {
!       size_t consumed;
!       tre_char_t *wcptr = wregex;
!       mbstate_t state;
!       memset(&state, '\0', sizeof(state));
!       while (n > 0)
!         {
!           consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
  
!           switch (consumed)
!             {
!             case 0:
!               if (*regex == '\0')
!                 consumed = 1;
                else
!                 {
!                   free(wregex);
!                   return REG_BADPAT;
                  }
                break;
!             case (size_t)-1:
!             case (size_t)-2:
!               DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
!               free(wregex);
!               return REG_ILLSEQ;
              }
!           regex += consumed;
!           n -= consumed;
!           wcptr++;
          }
!       wlen = wcptr - wregex;
      }
  
!   wregex[wlen] = L'\0';
!   ret = tre_compile(preg, wregex, wlen, cflags, loc);
!   free(wregex);
  
!   return ret;
  }
  
! int
! tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
  {
!   return tre_regncomp_l(preg, regex, n, cflags, uselocale((locale_t)0));
  }
  
! int
! tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
  {
    size_t len;
  
!   if (cflags & REG_PEND)
!     {
!       if ((const char *)(preg->re_endp) < regex)
!         return REG_INVARG;
!       len = (const char *)(preg->re_endp) - regex;
      }
    else
!     len = strlen(regex);
!   return tre_regncomp_l(preg, regex, len, cflags, loc);
  }
  
! int
! regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD regex,
!     int cflags)
  {
!   return tre_regcomp_l(preg, regex, cflags, uselocale((locale_t)0));
  }
  
! void
! regfree(regex_t *preg)
  {
!   tre_free(preg);
  }