Print this page
8993 sync regcomp(3C) with upstream

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libc/port/regex/regcomp.c
          +++ new/usr/src/lib/libc/port/regex/regcomp.c
↓ open down ↓ 33 lines elided ↑ open up ↑
  34   34   * SUCH DAMAGE.
  35   35   */
  36   36  
  37   37  #include "lint.h"
  38   38  #include "file64.h"
  39   39  #include <sys/types.h>
  40   40  #include <stdio.h>
  41   41  #include <string.h>
  42   42  #include <ctype.h>
  43   43  #include <limits.h>
  44      -#include <stdlib.h>
  45   44  #include <regex.h>
       45 +#include <stdlib.h>
       46 +#include <stdbool.h>
  46   47  #include <wchar.h>
  47   48  #include <wctype.h>
  48   49  
  49   50  #include "../locale/runetype.h"
  50   51  #include "../locale/collate.h"
  51   52  
  52   53  #include "utils.h"
  53   54  #include "regex2.h"
  54   55  
  55   56  #include "cname.h"
  56   57  #include "../locale/mblocal.h"
  57   58  
  58   59  /*
       60 + * Branching context, used to keep track of branch state for all of the branch-
       61 + * aware functions. In addition to keeping track of branch positions for the
       62 + * p_branch_* functions, we use this to simplify some clumsiness in BREs for
       63 + * detection of whether ^ is acting as an anchor or being used erroneously and
       64 + * also for whether we're in a sub-expression or not.
       65 + */
       66 +struct branchc {
       67 +        sopno start;
       68 +        sopno back;
       69 +        sopno fwd;
       70 +
       71 +        int nbranch;
       72 +        int nchain;
       73 +        bool outer;
       74 +        bool terminate;
       75 +};
       76 +
       77 +/*
  59   78   * parse structure, passed up and down to avoid global variables and
  60   79   * other clumsinesses
  61   80   */
  62   81  struct parse {
  63   82          const char *next;       /* next character in RE */
  64   83          const char *end;        /* end of string (-> NUL normally) */
  65   84          int error;              /* has an error been seen? */
  66   85          sop *strip;             /* malloced strip */
  67   86          sopno ssize;            /* malloced strip size (allocated) */
  68   87          sopno slen;             /* malloced strip length (used) */
  69   88          int ncsalloc;           /* number of csets allocated */
  70   89          struct re_guts *g;
  71   90  #define NPAREN  10              /* we need to remember () 1-9 for back refs */
  72   91          sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
  73   92          sopno pend[NPAREN];     /* -> ) ([0] unused) */
       93 +        bool allowbranch;       /* can this expression branch? */
       94 +        bool bre;               /* convenience; is this a BRE? */
       95 +        bool (*parse_expr)(struct parse *, struct branchc *);
       96 +        void (*pre_parse)(struct parse *, struct branchc *);
       97 +        void (*post_parse)(struct parse *, struct branchc *);
  74   98  };
  75   99  
  76  100  /* ========= begin header generated by ./mkh ========= */
  77  101  #ifdef __cplusplus
  78  102  extern "C" {
  79  103  #endif
  80  104  
  81  105  /* === regcomp.c === */
  82      -static void p_ere(struct parse *p, int stop);
  83      -static void p_ere_exp(struct parse *p);
      106 +static bool p_ere_exp(struct parse *p, struct branchc *bc);
  84  107  static void p_str(struct parse *p);
  85      -static void p_bre(struct parse *p, int end1, int end2);
  86      -static int p_simp_re(struct parse *p, int starordinary);
      108 +static int p_branch_eat_delim(struct parse *p, struct branchc *bc);
      109 +static void p_branch_ins_offset(struct parse *p, struct branchc *bc);
      110 +static void p_branch_fix_tail(struct parse *p, struct branchc *bc);
      111 +static bool p_branch_empty(struct parse *p, struct branchc *bc);
      112 +static bool p_branch_do(struct parse *p, struct branchc *bc);
      113 +static void p_bre_pre_parse(struct parse *p, struct branchc *bc);
      114 +static void p_bre_post_parse(struct parse *p, struct branchc *bc);
      115 +static void p_re(struct parse *p, int end1, int end2);
      116 +static bool p_simp_re(struct parse *p, struct branchc *bc);
  87  117  static int p_count(struct parse *p);
  88  118  static void p_bracket(struct parse *p);
  89  119  static void p_b_term(struct parse *p, cset *cs);
  90  120  static void p_b_cclass(struct parse *p, cset *cs);
  91  121  static void p_b_eclass(struct parse *p, cset *cs);
  92  122  static wint_t p_b_symbol(struct parse *p);
  93  123  static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
  94  124  static wint_t othercase(wint_t ch);
  95  125  static void bothcases(struct parse *p, wint_t ch);
  96  126  static void ordinary(struct parse *p, wint_t ch);
↓ open down ↓ 29 lines elided ↑ open up ↑
 126  156  /*
 127  157   * macros for use with parse structure
 128  158   * BEWARE:  these know that the parse structure is named `p' !!!
 129  159   */
 130  160  #define PEEK()  (*p->next)
 131  161  #define PEEK2() (*(p->next+1))
 132  162  #define MORE()  (p->next < p->end)
 133  163  #define MORE2() (p->next+1 < p->end)
 134  164  #define SEE(c)  (MORE() && PEEK() == (c))
 135  165  #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
      166 +#define SEESPEC(a)      (p->bre ? SEETWO('\\', a) : SEE(a))
 136  167  #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
 137  168  #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
 138  169  #define NEXT()  (p->next++)
 139  170  #define NEXT2() (p->next += 2)
 140  171  #define NEXTn(n)        (p->next += (n))
 141  172  #define GETNEXT()       (*p->next++)
 142  173  #define WGETNEXT()      wgetnext(p)
 143  174  #define SETERROR(e)     ((void)seterr(p, (e)))
 144  175  #define REQUIRE(co, e)  ((co) || seterr(p, e))
 145  176  #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
↓ open down ↓ 76 lines elided ↑ open up ↑
 222  253          /* set things up */
 223  254          p->g = g;
 224  255          p->next = pattern;      /* convenience; we do not modify it */
 225  256          p->end = p->next + len;
 226  257          p->error = 0;
 227  258          p->ncsalloc = 0;
 228  259          for (i = 0; i < NPAREN; i++) {
 229  260                  p->pbegin[i] = 0;
 230  261                  p->pend[i] = 0;
 231  262          }
      263 +        if (cflags & REG_EXTENDED) {
      264 +                p->allowbranch = true;
      265 +                p->bre = false;
      266 +                p->parse_expr = p_ere_exp;
      267 +                p->pre_parse = NULL;
      268 +                p->post_parse = NULL;
      269 +        } else {
      270 +                p->allowbranch = false;
      271 +                p->bre = true;
      272 +                p->parse_expr = p_simp_re;
      273 +                p->pre_parse = p_bre_pre_parse;
      274 +                p->post_parse = p_bre_post_parse;
      275 +        }
 232  276          g->sets = NULL;
 233  277          g->ncsets = 0;
 234  278          g->cflags = cflags;
 235  279          g->iflags = 0;
 236  280          g->nbol = 0;
 237  281          g->neol = 0;
 238  282          g->must = NULL;
 239  283          g->moffset = -1;
 240  284          g->charjump = NULL;
 241  285          g->matchjump = NULL;
 242  286          g->mlen = 0;
 243  287          g->nsub = 0;
 244  288          g->backrefs = 0;
 245  289  
 246  290          /* do it */
 247  291          EMIT(OEND, 0);
 248  292          g->firststate = THERE();
 249      -        if (cflags&REG_EXTENDED)
 250      -                p_ere(p, OUT);
 251      -        else if (cflags&REG_NOSPEC)
      293 +        if (cflags & REG_NOSPEC)
 252  294                  p_str(p);
 253  295          else
 254      -                p_bre(p, OUT, OUT);
      296 +                p_re(p, OUT, OUT);
 255  297          EMIT(OEND, 0);
 256  298          g->laststate = THERE();
 257  299  
 258  300          /* tidy up loose ends and fill things in */
 259  301          stripsnug(p, g);
 260  302          findmust(p, g);
 261  303          /*
 262  304           * only use Boyer-Moore algorithm if the pattern is bigger
 263  305           * than three characters
 264  306           */
↓ open down ↓ 16 lines elided ↑ open up ↑
 281  323                  SETERROR(REG_EFATAL);
 282  324  #endif
 283  325  
 284  326          /* win or lose, we're done */
 285  327          if (p->error != 0)      /* lose */
 286  328                  regfree(preg);
 287  329          return (p->error);
 288  330  }
 289  331  
 290  332  /*
 291      - * p_ere - ERE parser top level, concatenation and alternation
      333 + * Parse one subERE, an atom possibly followed by a repetition op,
      334 + * return whether we should terminate or not.
 292  335   */
 293      -static void
 294      -p_ere(struct parse *p,
 295      -    int stop)           /* character this ERE should end at */
      336 +static bool
      337 +p_ere_exp(struct parse *p, struct branchc *bc)
 296  338  {
 297  339          char c;
 298      -        sopno prevback;
 299      -        sopno prevfwd;
 300      -        sopno conc;
 301      -        int first = 1;          /* is this the first alternative? */
 302      -
 303      -        for (;;) {
 304      -                /* do a bunch of concatenated expressions */
 305      -                conc = HERE();
 306      -                while (MORE() && (c = PEEK()) != '|' && c != stop)
 307      -                        p_ere_exp(p);
 308      -                /* require nonempty */
 309      -                (void) REQUIRE(HERE() != conc, REG_BADPAT);
 310      -
 311      -                if (!EAT('|'))
 312      -                        break;          /* NOTE BREAK OUT */
 313      -
 314      -                if (first) {
 315      -                        INSERT(OCH_, conc);     /* offset is wrong */
 316      -                        prevfwd = conc;
 317      -                        prevback = conc;
 318      -                        first = 0;
 319      -                }
 320      -                ASTERN(OOR1, prevback);
 321      -                prevback = THERE();
 322      -                AHEAD(prevfwd);                 /* fix previous offset */
 323      -                prevfwd = HERE();
 324      -                EMIT(OOR2, 0);                  /* offset is very wrong */
 325      -        }
 326      -
 327      -        if (!first) {           /* tail-end fixups */
 328      -                AHEAD(prevfwd);
 329      -                ASTERN(O_CH, prevback);
 330      -        }
 331      -
 332      -        assert(!MORE() || SEE(stop));
 333      -}
 334      -
 335      -/*
 336      - * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
 337      - */
 338      -static void
 339      -p_ere_exp(struct parse *p)
 340      -{
 341      -        char c;
 342  340          wint_t wc;
 343  341          sopno pos;
 344  342          int count;
 345  343          int count2;
 346  344          sopno subno;
 347  345          int wascaret = 0;
 348  346  
      347 +        (void) bc;
 349  348          assert(MORE());         /* caller should have ensured this */
 350  349          c = GETNEXT();
 351  350  
 352  351          pos = HERE();
 353  352          switch (c) {
 354  353          case '(':
 355  354                  (void) REQUIRE(MORE(), REG_EPAREN);
 356  355                  p->g->nsub++;
 357  356                  subno = p->g->nsub;
 358  357                  if (subno < NPAREN)
 359  358                          p->pbegin[subno] = HERE();
 360  359                  EMIT(OLPAREN, subno);
 361  360                  if (!SEE(')'))
 362      -                        p_ere(p, ')');
      361 +                        p_re(p, ')', IGN);
 363  362                  if (subno < NPAREN) {
 364  363                          p->pend[subno] = HERE();
 365  364                          assert(p->pend[subno] != 0);
 366  365                  }
 367  366                  EMIT(ORPAREN, subno);
 368  367                  (void) MUSTEAT(')', REG_EPAREN);
 369  368                  break;
 370  369  #ifndef POSIX_MISTAKE
 371  370          case ')':               /* happens only if no current unmatched ( */
 372  371                  /*
↓ open down ↓ 45 lines elided ↑ open up ↑
 418  417                  case '>':
 419  418                          EMIT(OEOW, 0);
 420  419                          break;
 421  420                  default:
 422  421                          ordinary(p, wc);
 423  422                          break;
 424  423                  }
 425  424                  break;
 426  425          default:
 427  426                  if (p->error != 0)
 428      -                        return;
      427 +                        return (false);
 429  428                  p->next--;
 430  429                  wc = WGETNEXT();
 431  430                  ordinary(p, wc);
 432  431                  break;
 433  432          }
 434  433  
 435  434          if (!MORE())
 436      -                return;
      435 +                return (false);
 437  436          c = PEEK();
 438  437          /* we call { a repetition if followed by a digit */
 439  438          if (!(c == '*' || c == '+' || c == '?' || c == '{'))
 440      -                return;         /* no repetition, we're done */
      439 +                return (false);         /* no repetition, we're done */
 441  440          else if (c == '{')
 442  441                  (void) REQUIRE(MORE2() && \
 443  442                      (isdigit((uch)PEEK2()) || PEEK2() == ','), REG_BADRPT);
 444  443          NEXT();
 445  444  
 446  445          (void) REQUIRE(!wascaret, REG_BADRPT);
 447  446          switch (c) {
 448  447          case '*':       /* implemented as +? */
 449  448                  /* this case does not require the (y|) trick, noKLUDGE */
 450  449                  INSERT(OPLUS_, pos);
↓ open down ↓ 28 lines elided ↑ open up ↑
 479  478                  if (!EAT('}')) {        /* error heuristics */
 480  479                          while (MORE() && PEEK() != '}')
 481  480                                  NEXT();
 482  481                          (void) REQUIRE(MORE(), REG_EBRACE);
 483  482                          SETERROR(REG_BADBR);
 484  483                  }
 485  484                  break;
 486  485          }
 487  486  
 488  487          if (!MORE())
 489      -                return;
      488 +                return (false);
 490  489          c = PEEK();
 491  490          if (!(c == '*' || c == '+' || c == '?' ||
 492  491              (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
 493      -                return;
      492 +                return (false);
 494  493          SETERROR(REG_BADRPT);
      494 +        return (false);
 495  495  }
 496  496  
 497  497  /*
 498  498   * p_str - string (no metacharacters) "parser"
 499  499   */
 500  500  static void
 501  501  p_str(struct parse *p)
 502  502  {
 503  503          (void) REQUIRE(MORE(), REG_BADPAT);
 504  504          while (MORE())
 505  505                  ordinary(p, WGETNEXT());
 506  506  }
 507  507  
 508  508  /*
 509      - * p_bre - BRE parser top level, anchoring and concatenation
 510      - * Giving end1 as OUT essentially eliminates the end1/end2 check.
 511      - *
 512      - * This implementation is a bit of a kludge, in that a trailing $ is first
 513      - * taken as an ordinary character and then revised to be an anchor.
 514      - * The amount of lookahead needed to avoid this kludge is excessive.
      509 + * Eat consecutive branch delimiters for the kind of expression that we are
      510 + * parsing, return the number of delimiters that we ate.
 515  511   */
      512 +static int
      513 +p_branch_eat_delim(struct parse *p, struct branchc *bc)
      514 +{
      515 +        int nskip;
      516 +
      517 +        (void) bc;
      518 +        nskip = 0;
      519 +        while (EAT('|'))
      520 +                ++nskip;
      521 +        return (nskip);
      522 +}
      523 +
      524 +/*
      525 + * Insert necessary branch book-keeping operations. This emits a
      526 + * bogus 'next' offset, since we still have more to parse
      527 + */
 516  528  static void
 517      -p_bre(struct parse *p,
 518      -    int end1,           /* first terminating character */
 519      -    int end2)           /* second terminating character */
      529 +p_branch_ins_offset(struct parse *p, struct branchc *bc)
 520  530  {
 521      -        sopno start = HERE();
 522      -        int first = 1;                  /* first subexpression? */
 523      -        int wasdollar = 0;
      531 +        if (bc->nbranch == 0) {
      532 +                INSERT(OCH_, bc->start);        /* offset is wrong */
      533 +                bc->fwd = bc->start;
      534 +                bc->back = bc->start;
      535 +        }
 524  536  
      537 +        ASTERN(OOR1, bc->back);
      538 +        bc->back = THERE();
      539 +        AHEAD(bc->fwd);                 /* fix previous offset */
      540 +        bc->fwd = HERE();
      541 +        EMIT(OOR2, 0);                  /* offset is very wrong */
      542 +        ++bc->nbranch;
      543 +}
      544 +
      545 +/*
      546 + * Fix the offset of the tail branch, if we actually had any branches.
      547 + * This is to correct the bogus placeholder offset that we use.
      548 + */
      549 +static void
      550 +p_branch_fix_tail(struct parse *p, struct branchc *bc)
      551 +{
      552 +        /* Fix bogus offset at the tail if we actually have branches */
      553 +        if (bc->nbranch > 0) {
      554 +                AHEAD(bc->fwd);
      555 +                ASTERN(O_CH, bc->back);
      556 +        }
      557 +}
      558 +
      559 +/*
      560 + * Signal to the parser that an empty branch has been encountered; this will,
      561 + * in the future, be used to allow for more permissive behavior with empty
      562 + * branches. The return value should indicate whether parsing may continue
      563 + * or not.
      564 + */
      565 +static bool
      566 +p_branch_empty(struct parse *p, struct branchc *bc)
      567 +{
      568 +        (void) bc;
      569 +        SETERROR(REG_BADPAT);
      570 +        return (false);
      571 +}
      572 +
      573 +/*
      574 + * Take care of any branching requirements. This includes inserting the
      575 + * appropriate branching instructions as well as eating all of the branch
      576 + * delimiters until we either run out of pattern or need to parse more pattern.
      577 + */
      578 +static bool
      579 +p_branch_do(struct parse *p, struct branchc *bc)
      580 +{
      581 +        int ate = 0;
      582 +
      583 +        ate = p_branch_eat_delim(p, bc);
      584 +        if (ate == 0)
      585 +                return (false);
      586 +        else if ((ate > 1 || (bc->outer && !MORE())) && !p_branch_empty(p, bc))
      587 +                /*
      588 +                 * Halt parsing only if we have an empty branch and
      589 +                 * p_branch_empty indicates that we must not continue.
      590 +                 * In the future, this will not  necessarily be an error.
      591 +                 */
      592 +                return (false);
      593 +        p_branch_ins_offset(p, bc);
      594 +
      595 +        return (true);
      596 +}
      597 +
      598 +static void
      599 +p_bre_pre_parse(struct parse *p, struct branchc *bc)
      600 +{
      601 +        (void) bc;
      602 +        /*
      603 +         * Does not move cleanly into expression parser because of
      604 +         * ordinary interpration of * at the beginning position of
      605 +         * an expression.
      606 +         */
 525  607          if (EAT('^')) {
 526  608                  EMIT(OBOL, 0);
 527  609                  p->g->iflags |= USEBOL;
 528  610                  p->g->nbol++;
 529  611          }
 530      -        while (MORE() && !SEETWO(end1, end2)) {
 531      -                wasdollar = p_simp_re(p, first);
 532      -                first = 0;
 533      -        }
 534      -        if (wasdollar) {        /* oops, that was a trailing anchor */
      612 +}
      613 +
      614 +static void
      615 +p_bre_post_parse(struct parse *p, struct branchc *bc)
      616 +{
      617 +        /* Expression is terminating due to EOL token */
      618 +        if (bc->terminate) {
 535  619                  DROP(1);
 536  620                  EMIT(OEOL, 0);
 537  621                  p->g->iflags |= USEEOL;
 538  622                  p->g->neol++;
 539  623          }
      624 +}
 540  625  
 541      -        (void) REQUIRE(HERE() != start, REG_BADPAT);    /* require nonempty */
      626 +/*
      627 + * Top level parser, concatenation and BRE anchoring.
      628 + * Giving end1 as OUT essentially eliminates the end1/end2 check.
      629 + *
      630 + * This implementation is a bit of a kludge, in that a trailing $ is first
      631 + * taken as an ordinary character and then revised to be an anchor.
      632 + * The amount of lookahead needed to avoid this kludge is excessive.
      633 + */
      634 +static void
      635 +p_re(struct parse *p,
      636 +    int end1,   /* first terminating character */
      637 +    int end2)   /* second terminating character; ignored for EREs */
      638 +{
      639 +        struct branchc bc;
      640 +
      641 +        bc.nbranch = 0;
      642 +        if (end1 == OUT && end2 == OUT)
      643 +                bc.outer = true;
      644 +        else
      645 +                bc.outer = false;
      646 +#define SEEEND()        (!p->bre ? SEE(end1) : SEETWO(end1, end2))
      647 +        for (;;) {
      648 +                bc.start = HERE();
      649 +                bc.nchain = 0;
      650 +                bc.terminate = false;
      651 +                if (p->pre_parse != NULL)
      652 +                        p->pre_parse(p, &bc);
      653 +                while (MORE() && (!p->allowbranch || !SEESPEC('|')) &&
      654 +                    !SEEEND()) {
      655 +                        bc.terminate = p->parse_expr(p, &bc);
      656 +                        ++bc.nchain;
      657 +                }
      658 +                if (p->post_parse != NULL)
      659 +                        p->post_parse(p, &bc);
      660 +                (void) REQUIRE(HERE() != bc.start, REG_BADPAT);
      661 +                if (!p->allowbranch)
      662 +                        break;
      663 +                /*
      664 +                 * p_branch_do's return value indicates whether we should
      665 +                 * continue parsing or not. This is both for correctness and
      666 +                 * a slight optimization, because it will check if we've
      667 +                 * encountered an empty branch or the end of the string
      668 +                 * immediately following a branch delimiter.
      669 +                 */
      670 +                if (!p_branch_do(p, &bc))
      671 +                        break;
      672 +        }
      673 +#undef SEE_END
      674 +        if (p->allowbranch)
      675 +                p_branch_fix_tail(p, &bc);
      676 +        assert(!MORE() || SEE(end1));
 542  677  }
 543  678  
 544  679  /*
 545  680   * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 546  681   */
 547      -static int                      /* was the simple RE an unbackslashed $? */
 548      -p_simp_re(struct parse *p,
 549      -    int starordinary)   /* is a leading * an ordinary character? */
      682 +static bool                     /* was the simple RE an unbackslashed $? */
      683 +p_simp_re(struct parse *p, struct branchc *bc)
 550  684  {
 551  685          int c;
 552  686          int count;
 553  687          int count2;
 554  688          sopno pos;
 555  689          int i;
 556  690          wint_t wc;
 557  691          sopno subno;
 558  692  #define BACKSL  (1<<CHAR_BIT)
 559  693  
↓ open down ↓ 25 lines elided ↑ open up ↑
 585  719                  SETERROR(REG_BADRPT);
 586  720                  break;
 587  721          case BACKSL|'(':
 588  722                  p->g->nsub++;
 589  723                  subno = p->g->nsub;
 590  724                  if (subno < NPAREN)
 591  725                          p->pbegin[subno] = HERE();
 592  726                  EMIT(OLPAREN, subno);
 593  727                  /* the MORE here is an error heuristic */
 594  728                  if (MORE() && !SEETWO('\\', ')'))
 595      -                        p_bre(p, '\\', ')');
      729 +                        p_re(p, '\\', ')');
 596  730                  if (subno < NPAREN) {
 597  731                          p->pend[subno] = HERE();
 598  732                          assert(p->pend[subno] != 0);
 599  733                  }
 600  734                  EMIT(ORPAREN, subno);
 601  735                  (void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
 602  736                  break;
 603  737          case BACKSL|')':        /* should not get here -- must be user */
 604  738                  SETERROR(REG_EPAREN);
 605  739                  break;
↓ open down ↓ 14 lines elided ↑ open up ↑
 620  754                          assert(p->pbegin[i] != 0);
 621  755                          assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
 622  756                          assert(OP(p->strip[p->pend[i]]) == ORPAREN);
 623  757                          (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
 624  758                          EMIT(O_BACK, i);
 625  759                  } else
 626  760                          SETERROR(REG_ESUBREG);
 627  761                  p->g->backrefs = 1;
 628  762                  break;
 629  763          case '*':
 630      -                (void) REQUIRE(starordinary, REG_BADRPT);
      764 +                /*
      765 +                 * Ordinary if used as the first character beyond BOL anchor of
      766 +                 * a (sub-)expression, counts as a bad repetition operator if it
      767 +                 * appears otherwise.
      768 +                 */
      769 +                (void) REQUIRE(bc->nchain == 0, REG_BADRPT);
 631  770                  /* FALLTHROUGH */
 632  771          default:
 633  772                  if (p->error != 0)
 634      -                        return (0);     /* Definitely not $... */
      773 +                        return (false); /* Definitely not $... */
 635  774                  p->next--;
 636  775                  wc = WGETNEXT();
 637  776                  ordinary(p, wc);
 638  777                  break;
 639  778          }
 640  779  
 641  780          if (EAT('*')) {         /* implemented as +? */
 642  781                  /* this case does not require the (y|) trick, noKLUDGE */
 643  782                  INSERT(OPLUS_, pos);
 644  783                  ASTERN(O_PLUS, pos);
↓ open down ↓ 10 lines elided ↑ open up ↑
 655  794                  } else          /* just a single number */
 656  795                          count2 = count;
 657  796                  repeat(p, pos, count, count2);
 658  797                  if (!EATTWO('\\', '}')) {       /* error heuristics */
 659  798                          while (MORE() && !SEETWO('\\', '}'))
 660  799                                  NEXT();
 661  800                          (void) REQUIRE(MORE(), REG_EBRACE);
 662  801                          SETERROR(REG_BADBR);
 663  802                  }
 664  803          } else if (c == '$')    /* $ (but not \$) ends it */
 665      -                return (1);
      804 +                return (true);
 666  805  
 667      -        return (0);
      806 +        return (false);
 668  807  }
 669  808  
 670  809  /*
 671  810   * p_count - parse a repetition count
 672  811   */
 673  812  static int                      /* the value */
 674  813  p_count(struct parse *p)
 675  814  {
 676  815          int count = 0;
 677  816          int ndigits = 0;
↓ open down ↓ 208 lines elided ↑ open up ↑
 886 1025          size_t clen, len;
 887 1026  
 888 1027          while (MORE() && !SEETWO(endc, ']'))
 889 1028                  NEXT();
 890 1029          if (!MORE()) {
 891 1030                  SETERROR(REG_EBRACK);
 892 1031                  return (0);
 893 1032          }
 894 1033          len = p->next - sp;
 895 1034          for (cp = cnames; cp->name != NULL; cp++)
 896      -                if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
     1035 +                if (strncmp(cp->name, sp, len) == 0 && strlen(cp->name) == len)
 897 1036                          return (cp->code);      /* known name */
 898 1037          (void) memset(&mbs, 0, sizeof (mbs));
 899 1038          if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
 900 1039                  return (wc);                    /* single character */
 901 1040          else if (clen == (size_t)-1 || clen == (size_t)-2)
 902 1041                  SETERROR(REG_ECHAR);
 903 1042          else
 904 1043                  SETERROR(REG_ECOLLATE);         /* neither */
 905 1044          return (0);
 906 1045  }
↓ open down ↓ 522 lines elided ↑ open up ↑
1429 1568                  case ORPAREN:
1430 1569                          break;
1431 1570                  case OQUEST_:           /* things that must be skipped */
1432 1571                  case OCH_:
1433 1572                          offset = altoffset(scan, offset);
1434 1573                          scan--;
1435 1574                          do {
1436 1575                                  scan += OPND(s);
1437 1576                                  s = *scan;
1438 1577                                  /* assert() interferes w debug printouts */
1439      -                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1440      -                                    OP(s) != OOR2) {
     1578 +                                if (OP(s) != (sop)O_QUEST &&
     1579 +                                    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2) {
1441 1580                                          g->iflags |= BAD;
1442 1581                                          return;
1443 1582                                  }
1444      -                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
     1583 +                        } while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
1445 1584                          /* FALLTHROUGH */
1446 1585                  case OBOW:              /* things that break a sequence */
1447 1586                  case OEOW:
1448 1587                  case OBOL:
1449 1588                  case OEOL:
1450 1589                  case O_QUEST:
1451 1590                  case O_CH:
1452 1591                  case OEND:
1453      -                        if (newlen > g->mlen) {         /* ends one */
     1592 +                        if (newlen > (sopno)g->mlen) {          /* ends one */
1454 1593                                  start = newstart;
1455 1594                                  g->mlen = newlen;
1456 1595                                  if (offset > -1) {
1457 1596                                          g->moffset += offset;
1458 1597                                          offset = newlen;
1459 1598                                  } else
1460 1599                                          g->moffset = offset;
1461 1600                          } else {
1462 1601                                  if (offset > -1)
1463 1602                                          offset += newlen;
1464 1603                          }
1465 1604                          newlen = 0;
1466 1605                          break;
1467 1606                  case OANY:
1468      -                        if (newlen > g->mlen) {         /* ends one */
     1607 +                        if (newlen > (sopno)g->mlen) {          /* ends one */
1469 1608                                  start = newstart;
1470 1609                                  g->mlen = newlen;
1471 1610                                  if (offset > -1) {
1472 1611                                          g->moffset += offset;
1473 1612                                          offset = newlen;
1474 1613                                  } else
1475 1614                                          g->moffset = offset;
1476 1615                          } else {
1477 1616                                  if (offset > -1)
1478 1617                                          offset += newlen;
1479 1618                          }
1480 1619                          if (offset > -1)
1481 1620                                  offset++;
1482 1621                          newlen = 0;
1483 1622                          break;
1484 1623                  case OANYOF:            /* may or may not invalidate offset */
1485 1624                          /* First, everything as OANY */
1486      -                        if (newlen > g->mlen) {         /* ends one */
     1625 +                        if (newlen > (sopno)g->mlen) {          /* ends one */
1487 1626                                  start = newstart;
1488 1627                                  g->mlen = newlen;
1489 1628                                  if (offset > -1) {
1490 1629                                          g->moffset += offset;
1491 1630                                          offset = newlen;
1492 1631                                  } else
1493 1632                                          g->moffset = offset;
1494 1633                          } else {
1495 1634                                  if (offset > -1)
1496 1635                                          offset += newlen;
↓ open down ↓ 3 lines elided ↑ open up ↑
1500 1639                          newlen = 0;
1501 1640                          break;
1502 1641                  toohard:
1503 1642                  default:
1504 1643                          /*
1505 1644                           * Anything here makes it impossible or too hard
1506 1645                           * to calculate the offset -- so we give up;
1507 1646                           * save the last known good offset, in case the
1508 1647                           * must sequence doesn't occur later.
1509 1648                           */
1510      -                        if (newlen > g->mlen) {         /* ends one */
     1649 +                        if (newlen > (sopno)g->mlen) {          /* ends one */
1511 1650                                  start = newstart;
1512 1651                                  g->mlen = newlen;
1513 1652                                  if (offset > -1)
1514 1653                                          g->moffset += offset;
1515 1654                                  else
1516 1655                                          g->moffset = offset;
1517 1656                          }
1518 1657                          offset = -1;
1519 1658                          newlen = 0;
1520 1659                          break;
↓ open down ↓ 39 lines elided ↑ open up ↑
1560 1699          int try;
1561 1700          sop s;
1562 1701  
1563 1702          /* If we gave up already on offsets, return */
1564 1703          if (offset == -1)
1565 1704                  return (-1);
1566 1705  
1567 1706          largest = 0;
1568 1707          try = 0;
1569 1708          s = *scan++;
1570      -        while (OP(s) != O_QUEST && OP(s) != O_CH) {
     1709 +        while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH) {
1571 1710                  switch (OP(s)) {
1572 1711                  case OOR1:
1573 1712                          if (try > largest)
1574 1713                                  largest = try;
1575 1714                          try = 0;
1576 1715                          break;
1577 1716                  case OQUEST_:
1578 1717                  case OCH_:
1579 1718                          try = altoffset(scan, try);
1580 1719                          if (try == -1)
1581 1720                                  return (-1);
1582 1721                          scan--;
1583 1722                          do {
1584 1723                                  scan += OPND(s);
1585 1724                                  s = *scan;
1586      -                                if (OP(s) != O_QUEST && OP(s) != O_CH &&
1587      -                                    OP(s) != OOR2)
     1725 +                                if (OP(s) != (sop)O_QUEST &&
     1726 +                                    OP(s) != (sop)O_CH && OP(s) != (sop)OOR2)
1588 1727                                          return (-1);
1589      -                        } while (OP(s) != O_QUEST && OP(s) != O_CH);
     1728 +                        } while (OP(s) != (sop)O_QUEST && OP(s) != (sop)O_CH);
1590 1729                          /*
1591 1730                           * We must skip to the next position, or we'll
1592 1731                           * leave altoffset() too early.
1593 1732                           */
1594 1733                          scan++;
1595 1734                          break;
1596 1735                  case OANYOF:
1597 1736                  case OCHAR:
1598 1737                  case OANY:
1599 1738                          try++;
↓ open down ↓ 184 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX