ilregex Cdiff usr/src/lib/libc/port/regex/regcomp.c

Print this page

8993 sync regcomp(3C) with upstream


*** 39,50 ****
  #include <sys/types.h>
  #include <stdio.h>
  #include <string.h>
  #include <ctype.h>
  #include <limits.h>
- #include <stdlib.h>
  #include <regex.h>
  #include <wchar.h>
  #include <wctype.h>
  
  #include "../locale/runetype.h"
  #include "../locale/collate.h"
--- 39,51 ----
  #include <sys/types.h>
  #include <stdio.h>
  #include <string.h>
  #include <ctype.h>
  #include <limits.h>
  #include <regex.h>
+ #include <stdlib.h>
+ #include <stdbool.h>
  #include <wchar.h>
  #include <wctype.h>
  
  #include "../locale/runetype.h"
  #include "../locale/collate.h"
*** 54,63 ****
--- 55,82 ----
  
  #include "cname.h"
  #include "../locale/mblocal.h"
  
  /*
+  * Branching context, used to keep track of branch state for all of the branch-
+  * aware functions. In addition to keeping track of branch positions for the
+  * p_branch_* functions, we use this to simplify some clumsiness in BREs for
+  * detection of whether ^ is acting as an anchor or being used erroneously and
+  * also for whether we're in a sub-expression or not.
+  */
+ struct branchc {
+         sopno start;
+         sopno back;
+         sopno fwd;
+ 
+         int nbranch;
+         int nchain;
+         bool outer;
+         bool terminate;
+ };
+ 
+ /*
   * parse structure, passed up and down to avoid global variables and
   * other clumsinesses
   */
  struct parse {
          const char *next;       /* next character in RE */
*** 69,91 ****
          int ncsalloc;           /* number of csets allocated */
          struct re_guts *g;
  #define NPAREN  10              /* we need to remember () 1-9 for back refs */
          sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
          sopno pend[NPAREN];     /* -> ) ([0] unused) */
  };
  
  /* ========= begin header generated by ./mkh ========= */
  #ifdef __cplusplus
  extern "C" {
  #endif
  
  /* === regcomp.c === */
! static void p_ere(struct parse *p, int stop);
! static void p_ere_exp(struct parse *p);
  static void p_str(struct parse *p);
! static void p_bre(struct parse *p, int end1, int end2);
! static int p_simp_re(struct parse *p, int starordinary);
  static int p_count(struct parse *p);
  static void p_bracket(struct parse *p);
  static void p_b_term(struct parse *p, cset *cs);
  static void p_b_cclass(struct parse *p, cset *cs);
  static void p_b_eclass(struct parse *p, cset *cs);
--- 88,121 ----
          int ncsalloc;           /* number of csets allocated */
          struct re_guts *g;
  #define NPAREN  10              /* we need to remember () 1-9 for back refs */
          sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
          sopno pend[NPAREN];     /* -> ) ([0] unused) */
+         bool allowbranch;       /* can this expression branch? */
+         bool bre;               /* convenience; is this a BRE? */
+         bool (*parse_expr)(struct parse *, struct branchc *);
+         void (*pre_parse)(struct parse *, struct branchc *);
+         void (*post_parse)(struct parse *, struct branchc *);
  };
  
  /* ========= begin header generated by ./mkh ========= */
  #ifdef __cplusplus
  extern "C" {
  #endif
  
  /* === regcomp.c === */
! static bool p_ere_exp(struct parse *p, struct branchc *bc);
  static void p_str(struct parse *p);
! static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
! static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
! static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
! static bool p_branch_empty(struct parse *p, struct branchc *bc);
! static bool p_branch_do(struct parse *p, struct branchc *bc);
! static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
! static void p_bre_post_parse(struct parse *p, struct branchc *bc);
! static void p_re(struct parse *p, int end1, int end2);
! static bool p_simp_re(struct parse *p, struct branchc *bc);
  static int p_count(struct parse *p);
  static void p_bracket(struct parse *p);
  static void p_b_term(struct parse *p, cset *cs);
  static void p_b_cclass(struct parse *p, cset *cs);
  static void p_b_eclass(struct parse *p, cset *cs);
*** 131,140 ****
--- 161,171 ----
  #define PEEK2() (*(p->next+1))
  #define MORE()  (p->next < p->end)
  #define MORE2() (p->next+1 < p->end)
  #define SEE(c)  (MORE() && PEEK() == (c))
  #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+ #define SEESPEC(a)      (p->bre ? SEETWO('\\', a) : SEE(a))
  #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
  #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
  #define NEXT()  (p->next++)
  #define NEXT2() (p->next += 2)
  #define NEXTn(n)        (p->next += (n))
*** 227,236 ****
--- 258,280 ----
          p->ncsalloc = 0;
          for (i = 0; i < NPAREN; i++) {
                  p->pbegin[i] = 0;
                  p->pend[i] = 0;
          }
+         if (cflags & REG_EXTENDED) {
+                 p->allowbranch = true;
+                 p->bre = false;
+                 p->parse_expr = p_ere_exp;
+                 p->pre_parse = NULL;
+                 p->post_parse = NULL;
+         } else {
+                 p->allowbranch = false;
+                 p->bre = true;
+                 p->parse_expr = p_simp_re;
+                 p->pre_parse = p_bre_pre_parse;
+                 p->post_parse = p_bre_post_parse;
+         }
          g->sets = NULL;
          g->ncsets = 0;
          g->cflags = cflags;
          g->iflags = 0;
          g->nbol = 0;
*** 244,259 ****
          g->backrefs = 0;
  
          /* do it */
          EMIT(OEND, 0);
          g->firststate = THERE();
!         if (cflags&REG_EXTENDED)
!                 p_ere(p, OUT);
!         else if (cflags&REG_NOSPEC)
                  p_str(p);
          else
!                 p_bre(p, OUT, OUT);
          EMIT(OEND, 0);
          g->laststate = THERE();
  
          /* tidy up loose ends and fill things in */
          stripsnug(p, g);
--- 288,301 ----
          g->backrefs = 0;
  
          /* do it */
          EMIT(OEND, 0);
          g->firststate = THERE();
!         if (cflags & REG_NOSPEC)
                  p_str(p);
          else
!                 p_re(p, OUT, OUT);
          EMIT(OEND, 0);
          g->laststate = THERE();
  
          /* tidy up loose ends and fill things in */
          stripsnug(p, g);
*** 286,353 ****
                  regfree(preg);
          return (p->error);
  }
  
  /*
!  * p_ere - ERE parser top level, concatenation and alternation
   */
! static void
! p_ere(struct parse *p,
!     int stop)           /* character this ERE should end at */
  {
          char c;
-         sopno prevback;
-         sopno prevfwd;
-         sopno conc;
-         int first = 1;          /* is this the first alternative? */
- 
-         for (;;) {
-                 /* do a bunch of concatenated expressions */
-                 conc = HERE();
-                 while (MORE() && (c = PEEK()) != '|' && c != stop)
-                         p_ere_exp(p);
-                 /* require nonempty */
-                 (void) REQUIRE(HERE() != conc, REG_BADPAT);
- 
-                 if (!EAT('|'))
-                         break;          /* NOTE BREAK OUT */
- 
-                 if (first) {
-                         INSERT(OCH_, conc);     /* offset is wrong */
-                         prevfwd = conc;
-                         prevback = conc;
-                         first = 0;
-                 }
-                 ASTERN(OOR1, prevback);
-                 prevback = THERE();
-                 AHEAD(prevfwd);                 /* fix previous offset */
-                 prevfwd = HERE();
-                 EMIT(OOR2, 0);                  /* offset is very wrong */
-         }
- 
-         if (!first) {           /* tail-end fixups */
-                 AHEAD(prevfwd);
-                 ASTERN(O_CH, prevback);
-         }
- 
-         assert(!MORE() || SEE(stop));
- }
- 
- /*
-  * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
-  */
- static void
- p_ere_exp(struct parse *p)
- {
-         char c;
          wint_t wc;
          sopno pos;
          int count;
          int count2;
          sopno subno;
          int wascaret = 0;
  
          assert(MORE());         /* caller should have ensured this */
          c = GETNEXT();
  
          pos = HERE();
          switch (c) {
--- 328,352 ----
                  regfree(preg);
          return (p->error);
  }
  
  /*
!  * Parse one subERE, an atom possibly followed by a repetition op,
!  * return whether we should terminate or not.
   */
! static bool
! p_ere_exp(struct parse *p, struct branchc *bc)
  {
          char c;
          wint_t wc;
          sopno pos;
          int count;
          int count2;
          sopno subno;
          int wascaret = 0;
  
+         (void) bc;
          assert(MORE());         /* caller should have ensured this */
          c = GETNEXT();
  
          pos = HERE();
          switch (c) {
*** 357,367 ****
                  subno = p->g->nsub;
                  if (subno < NPAREN)
                          p->pbegin[subno] = HERE();
                  EMIT(OLPAREN, subno);
                  if (!SEE(')'))
!                         p_ere(p, ')');
                  if (subno < NPAREN) {
                          p->pend[subno] = HERE();
                          assert(p->pend[subno] != 0);
                  }
                  EMIT(ORPAREN, subno);
--- 356,366 ----
                  subno = p->g->nsub;
                  if (subno < NPAREN)
                          p->pbegin[subno] = HERE();
                  EMIT(OLPAREN, subno);
                  if (!SEE(')'))
!                         p_re(p, ')', IGN);
                  if (subno < NPAREN) {
                          p->pend[subno] = HERE();
                          assert(p->pend[subno] != 0);
                  }
                  EMIT(ORPAREN, subno);
*** 423,445 ****
                          break;
                  }
                  break;
          default:
                  if (p->error != 0)
!                         return;
                  p->next--;
                  wc = WGETNEXT();
                  ordinary(p, wc);
                  break;
          }
  
          if (!MORE())
!                 return;
          c = PEEK();
          /* we call { a repetition if followed by a digit */
          if (!(c == '*' || c == '+' || c == '?' || c == '{'))
!                 return;         /* no repetition, we're done */
          else if (c == '{')
                  (void) REQUIRE(MORE2() && \
                      (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
          NEXT();
  
--- 422,444 ----
                          break;
                  }
                  break;
          default:
                  if (p->error != 0)
!                         return (false);
                  p->next--;
                  wc = WGETNEXT();
                  ordinary(p, wc);
                  break;
          }
  
          if (!MORE())
!                 return (false);
          c = PEEK();
          /* we call { a repetition if followed by a digit */
          if (!(c == '*' || c == '+' || c == '?' || c == '{'))
!                 return (false);         /* no repetition, we're done */
          else if (c == '{')
                  (void) REQUIRE(MORE2() && \
                      (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
          NEXT();
  
*** 484,499 ****
                  }
                  break;
          }
  
          if (!MORE())
!                 return;
          c = PEEK();
          if (!(c == '*' || c == '+' || c == '?' ||
              (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
!                 return;
          SETERROR(REG_BADRPT);
  }
  
  /*
   * p_str - string (no metacharacters) "parser"
   */
--- 483,499 ----
                  }
                  break;
          }
  
          if (!MORE())
!                 return (false);
          c = PEEK();
          if (!(c == '*' || c == '+' || c == '?' ||
              (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
!                 return (false);
          SETERROR(REG_BADRPT);
+         return (false);
  }
  
  /*
   * p_str - string (no metacharacters) "parser"
   */
*** 504,554 ****
          while (MORE())
                  ordinary(p, WGETNEXT());
  }
  
  /*
!  * p_bre - BRE parser top level, anchoring and concatenation
!  * Giving end1 as OUT essentially eliminates the end1/end2 check.
!  *
!  * This implementation is a bit of a kludge, in that a trailing $ is first
!  * taken as an ordinary character and then revised to be an anchor.
!  * The amount of lookahead needed to avoid this kludge is excessive.
   */
  static void
! p_bre(struct parse *p,
!     int end1,           /* first terminating character */
!     int end2)           /* second terminating character */
  {
!         sopno start = HERE();
!         int first = 1;                  /* first subexpression? */
!         int wasdollar = 0;
  
          if (EAT('^')) {
                  EMIT(OBOL, 0);
                  p->g->iflags |= USEBOL;
                  p->g->nbol++;
          }
!         while (MORE() && !SEETWO(end1, end2)) {
!                 wasdollar = p_simp_re(p, first);
!                 first = 0;
!         }
!         if (wasdollar) {        /* oops, that was a trailing anchor */
                  DROP(1);
                  EMIT(OEOL, 0);
                  p->g->iflags |= USEEOL;
                  p->g->neol++;
          }
  
!         (void) REQUIRE(HERE() != start, REG_BADPAT);    /* require nonempty */
  }
  
  /*
   * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
   */
! static int                      /* was the simple RE an unbackslashed $? */
! p_simp_re(struct parse *p,
!     int starordinary)   /* is a leading * an ordinary character? */
  {
          int c;
          int count;
          int count2;
          sopno pos;
--- 504,688 ----
          while (MORE())
                  ordinary(p, WGETNEXT());
  }
  
  /*
!  * Eat consecutive branch delimiters for the kind of expression that we are
!  * parsing, return the number of delimiters that we ate.
   */
+ static int
+ p_branch_eat_delim(struct parse *p, struct branchc *bc)
+ {
+         int nskip;
+ 
+         (void) bc;
+         nskip = 0;
+         while (EAT('|'))
+                 ++nskip;
+         return (nskip);
+ }
+ 
+ /*
+  * Insert necessary branch book-keeping operations. This emits a
+  * bogus 'next' offset, since we still have more to parse
+  */
  static void
! p_branch_ins_offset(struct parse *p, struct branchc *bc)
  {
!         if (bc->nbranch == 0) {
!                 INSERT(OCH_, bc->start);        /* offset is wrong */
!                 bc->fwd = bc->start;
!                 bc->back = bc->start;
!         }
  
+         ASTERN(OOR1, bc->back);
+         bc->back = THERE();
+         AHEAD(bc->fwd);                 /* fix previous offset */
+         bc->fwd = HERE();
+         EMIT(OOR2, 0);                  /* offset is very wrong */
+         ++bc->nbranch;
+ }
+ 
+ /*
+  * Fix the offset of the tail branch, if we actually had any branches.
+  * This is to correct the bogus placeholder offset that we use.
+  */
+ static void
+ p_branch_fix_tail(struct parse *p, struct branchc *bc)
+ {
+         /* Fix bogus offset at the tail if we actually have branches */
+         if (bc->nbranch > 0) {
+                 AHEAD(bc->fwd);
+                 ASTERN(O_CH, bc->back);
+         }
+ }
+ 
+ /*
+  * Signal to the parser that an empty branch has been encountered; this will,
+  * in the future, be used to allow for more permissive behavior with empty
+  * branches. The return value should indicate whether parsing may continue
+  * or not.
+  */
+ static bool
+ p_branch_empty(struct parse *p, struct branchc *bc)
+ {
+         (void) bc;
+         SETERROR(REG_BADPAT);
+         return (false);
+ }
+ 
+ /*
+  * Take care of any branching requirements. This includes inserting the
+  * appropriate branching instructions as well as eating all of the branch
+  * delimiters until we either run out of pattern or need to parse more pattern.
+  */
+ static bool
+ p_branch_do(struct parse *p, struct branchc *bc)
+ {
+         int ate = 0;
+ 
+         ate = p_branch_eat_delim(p, bc);
+         if (ate == 0)
+                 return (false);
+         else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
+                 /*
+                  * Halt parsing only if we have an empty branch and
+                  * p_branch_empty indicates that we must not continue.
+                  * In the future, this will not  necessarily be an error.
+                  */
+                 return (false);
+         p_branch_ins_offset(p, bc);
+ 
+         return (true);
+ }
+ 
+ static void
+ p_bre_pre_parse(struct parse *p, struct branchc *bc)
+ {
+         (void) bc;
+         /*
+          * Does not move cleanly into expression parser because of
+          * ordinary interpration of * at the beginning position of
+          * an expression.
+          */
          if (EAT('^')) {
                  EMIT(OBOL, 0);
                  p->g->iflags |= USEBOL;
                  p->g->nbol++;
          }
! }
! 
! static void
! p_bre_post_parse(struct parse *p, struct branchc *bc)
! {
!         /* Expression is terminating due to EOL token */
!         if (bc->terminate) {
                  DROP(1);
                  EMIT(OEOL, 0);
                  p->g->iflags |= USEEOL;
                  p->g->neol++;
          }
+ }
  
! /*
!  * Top level parser, concatenation and BRE anchoring.
!  * Giving end1 as OUT essentially eliminates the end1/end2 check.
!  *
!  * This implementation is a bit of a kludge, in that a trailing $ is first
!  * taken as an ordinary character and then revised to be an anchor.
!  * The amount of lookahead needed to avoid this kludge is excessive.
!  */
! static void
! p_re(struct parse *p,
!     int end1,   /* first terminating character */
!     int end2)   /* second terminating character; ignored for EREs */
! {
!         struct branchc bc;
! 
!         bc.nbranch = 0;
!         if (end1 == OUT && end2 == OUT)
!                 bc.outer = true;
!         else
!                 bc.outer = false;
! #define SEEEND()        (!p->bre ? SEE(end1) : SEETWO(end1, end2))
!         for (;;) {
!                 bc.start = HERE();
!                 bc.nchain = 0;
!                 bc.terminate = false;
!                 if (p->pre_parse != NULL)
!                         p->pre_parse(p, &bc);
!                 while (MORE() && (!p->allowbranch || !SEESPEC('|')) &&
!                     !SEEEND()) {
!                         bc.terminate = p->parse_expr(p, &bc);
!                         ++bc.nchain;
!                 }
!                 if (p->post_parse != NULL)
!                         p->post_parse(p, &bc);
!                 (void) REQUIRE(HERE() != bc.start, REG_BADPAT);
!                 if (!p->allowbranch)
!                         break;
!                 /*
!                  * p_branch_do's return value indicates whether we should
!                  * continue parsing or not. This is both for correctness and
!                  * a slight optimization, because it will check if we've
!                  * encountered an empty branch or the end of the string
!                  * immediately following a branch delimiter.
!                  */
!                 if (!p_branch_do(p, &bc))
!                         break;
!         }
! #undef SEE_END
!         if (p->allowbranch)
!                 p_branch_fix_tail(p, &bc);
!         assert(!MORE() || SEE(end1));
  }
  
  /*
   * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
   */
! static bool                     /* was the simple RE an unbackslashed $? */
! p_simp_re(struct parse *p, struct branchc *bc)
  {
          int c;
          int count;
          int count2;
          sopno pos;
*** 590,600 ****
                  if (subno < NPAREN)
                          p->pbegin[subno] = HERE();
                  EMIT(OLPAREN, subno);
                  /* the MORE here is an error heuristic */
                  if (MORE() && !SEETWO('\\', ')'))
!                         p_bre(p, '\\', ')');
                  if (subno < NPAREN) {
                          p->pend[subno] = HERE();
                          assert(p->pend[subno] != 0);
                  }
                  EMIT(ORPAREN, subno);
--- 724,734 ----
                  if (subno < NPAREN)
                          p->pbegin[subno] = HERE();
                  EMIT(OLPAREN, subno);
                  /* the MORE here is an error heuristic */
                  if (MORE() && !SEETWO('\\', ')'))
!                         p_re(p, '\\', ')');
                  if (subno < NPAREN) {
                          p->pend[subno] = HERE();
                          assert(p->pend[subno] != 0);
                  }
                  EMIT(ORPAREN, subno);
*** 625,639 ****
                  } else
                          SETERROR(REG_ESUBREG);
                  p->g->backrefs = 1;
                  break;
          case '*':
!                 (void) REQUIRE(starordinary, REG_BADRPT);
                  /* FALLTHROUGH */
          default:
                  if (p->error != 0)
!                         return (0);     /* Definitely not $... */
                  p->next--;
                  wc = WGETNEXT();
                  ordinary(p, wc);
                  break;
          }
--- 759,778 ----
                  } else
                          SETERROR(REG_ESUBREG);
                  p->g->backrefs = 1;
                  break;
          case '*':
!                 /*
!                  * Ordinary if used as the first character beyond BOL anchor of
!                  * a (sub-)expression, counts as a bad repetition operator if it
!                  * appears otherwise.
!                  */
!                 (void) REQUIRE(bc->nchain == 0, REG_BADRPT);
                  /* FALLTHROUGH */
          default:
                  if (p->error != 0)
!                         return (false); /* Definitely not $... */
                  p->next--;
                  wc = WGETNEXT();
                  ordinary(p, wc);
                  break;
          }
*** 660,672 ****
                                  NEXT();
                          (void) REQUIRE(MORE(), REG_EBRACE);
                          SETERROR(REG_BADBR);
                  }
          } else if (c == '$')    /* $ (but not \$) ends it */
!                 return (1);
  
!         return (0);
  }
  
  /*
   * p_count - parse a repetition count
   */
--- 799,811 ----
                                  NEXT();
                          (void) REQUIRE(MORE(), REG_EBRACE);
                          SETERROR(REG_BADBR);
                  }
          } else if (c == '$')    /* $ (but not \$) ends it */
!                 return (true);
  
!         return (false);
  }
  
  /*
   * p_count - parse a repetition count
   */
*** 891,901 ****
                  SETERROR(REG_EBRACK);
                  return (0);
          }
          len = p->next - sp;
          for (cp = cnames; cp->name != NULL; cp++)
!                 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
                          return (cp->code);      /* known name */
          (void) memset(&mbs, 0, sizeof (mbs));
          if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
                  return (wc);                    /* single character */
          else if (clen == (size_t)-1 || clen == (size_t)-2)
--- 1030,1040 ----
                  SETERROR(REG_EBRACK);
                  return (0);
          }
          len = p->next - sp;
          for (cp = cnames; cp->name != NULL; cp++)
!                 if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
                          return (cp->code);      /* known name */
          (void) memset(&mbs, 0, sizeof (mbs));
          if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
                  return (wc);                    /* single character */
          else if (clen == (size_t)-1 || clen == (size_t)-2)
*** 1434,1458 ****
                          scan--;
                          do {
                                  scan += OPND(s);
                                  s = *scan;
                                  /* assert() interferes w debug printouts */
!                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
!                                     OP(s) != OOR2) {
                                          g->iflags |= BAD;
                                          return;
                                  }
!                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
                          /* FALLTHROUGH */
                  case OBOW:              /* things that break a sequence */
                  case OEOW:
                  case OBOL:
                  case OEOL:
                  case O_QUEST:
                  case O_CH:
                  case OEND:
!                         if (newlen > g->mlen) {         /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
--- 1573,1597 ----
                          scan--;
                          do {
                                  scan += OPND(s);
                                  s = *scan;
                                  /* assert() interferes w debug printouts */
!                                 if (OP(s) != (sop)O_QUEST &&
!                                     OP(s) != (sop)O_CH && OP(s) != (sop)OOR2) {
                                          g->iflags |= BAD;
                                          return;
                                  }
!                         } while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
                          /* FALLTHROUGH */
                  case OBOW:              /* things that break a sequence */
                  case OEOW:
                  case OBOL:
                  case OEOL:
                  case O_QUEST:
                  case O_CH:
                  case OEND:
!                         if (newlen > (sopno)g->mlen) {          /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
*** 1463,1473 ****
                                          offset += newlen;
                          }
                          newlen = 0;
                          break;
                  case OANY:
!                         if (newlen > g->mlen) {         /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
--- 1602,1612 ----
                                          offset += newlen;
                          }
                          newlen = 0;
                          break;
                  case OANY:
!                         if (newlen > (sopno)g->mlen) {          /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
*** 1481,1491 ****
                                  offset++;
                          newlen = 0;
                          break;
                  case OANYOF:            /* may or may not invalidate offset */
                          /* First, everything as OANY */
!                         if (newlen > g->mlen) {         /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
--- 1620,1630 ----
                                  offset++;
                          newlen = 0;
                          break;
                  case OANYOF:            /* may or may not invalidate offset */
                          /* First, everything as OANY */
!                         if (newlen > (sopno)g->mlen) {          /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1) {
                                          g->moffset += offset;
                                          offset = newlen;
*** 1505,1515 ****
                           * Anything here makes it impossible or too hard
                           * to calculate the offset -- so we give up;
                           * save the last known good offset, in case the
                           * must sequence doesn't occur later.
                           */
!                         if (newlen > g->mlen) {         /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1)
                                          g->moffset += offset;
                                  else
--- 1644,1654 ----
                           * Anything here makes it impossible or too hard
                           * to calculate the offset -- so we give up;
                           * save the last known good offset, in case the
                           * must sequence doesn't occur later.
                           */
!                         if (newlen > (sopno)g->mlen) {          /* ends one */
                                  start = newstart;
                                  g->mlen = newlen;
                                  if (offset > -1)
                                          g->moffset += offset;
                                  else
*** 1565,1575 ****
                  return (-1);
  
          largest = 0;
          try = 0;
          s = *scan++;
!         while (OP(s) != O_QUEST && OP(s) != O_CH) {
                  switch (OP(s)) {
                  case OOR1:
                          if (try > largest)
                                  largest = try;
                          try = 0;
--- 1704,1714 ----
                  return (-1);
  
          largest = 0;
          try = 0;
          s = *scan++;
!         while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH) {
                  switch (OP(s)) {
                  case OOR1:
                          if (try > largest)
                                  largest = try;
                          try = 0;
*** 1581,1594 ****
                                  return (-1);
                          scan--;
                          do {
                                  scan += OPND(s);
                                  s = *scan;
!                                 if (OP(s) != O_QUEST && OP(s) != O_CH &&
!                                     OP(s) != OOR2)
                                          return (-1);
!                         } while (OP(s) != O_QUEST && OP(s) != O_CH);
                          /*
                           * We must skip to the next position, or we'll
                           * leave altoffset() too early.
                           */
                          scan++;
--- 1720,1733 ----
                                  return (-1);
                          scan--;
                          do {
                                  scan += OPND(s);
                                  s = *scan;
!                                 if (OP(s) != (sop)O_QUEST &&
!                                     OP(s) != (sop)O_CH && OP(s) != (sop)OOR2)
                                          return (-1);
!                         } while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
                          /*
                           * We must skip to the next position, or we'll
                           * leave altoffset() too early.
                           */
                          scan++;