illumos-gate Wdiff usr/src/lib/libc/port/locale/regcomp.c

Print this page

2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libc/port/locale/regcomp.c
          +++ new/usr/src/lib/libc/port/locale/regcomp.c
   1    1  /*
        2 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   2    3   * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3    4   * Copyright 2012 Milan Jurik. All rights reserved.
   4    5   * Copyright (c) 1992, 1993, 1994 Henry Spencer.
   5    6   * Copyright (c) 1992, 1993, 1994
   6    7   *      The Regents of the University of California.  All rights reserved.
   7    8   *
   8    9   * This code is derived from software contributed to Berkeley by
   9   10   * Henry Spencer.
  10   11   *
  11   12   * Redistribution and use in source and binary forms, with or without

  12   13   * modification, are permitted provided that the following conditions
  13   14   * are met:
  14   15   * 1. Redistributions of source code must retain the above copyright
  15   16   *    notice, this list of conditions and the following disclaimer.
  16   17   * 2. Redistributions in binary form must reproduce the above copyright
  17   18   *    notice, this list of conditions and the following disclaimer in the
  18   19   *    documentation and/or other materials provided with the distribution.
  19   20   * 4. Neither the name of the University nor the names of its contributors
  20   21   *    may be used to endorse or promote products derived from this software
  21   22   *    without specific prior written permission.
  22   23   *
  23   24   * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  24   25   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25   26   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26   27   * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27   28   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28   29   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  29   30   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  30   31   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31   32   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  32   33   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33   34   * SUCH DAMAGE.
  34   35   */
  35   36  
  36   37  #include "lint.h"
  37   38  #include "file64.h"
  38   39  #include <sys/types.h>
  39   40  #include <stdio.h>
  40   41  #include <string.h>
  41   42  #include <ctype.h>
  42   43  #include <limits.h>
  43   44  #include <stdlib.h>
  44   45  #include <regex.h>
  45   46  #include <wchar.h>
  46   47  #include <wctype.h>
  47   48  
  48   49  #include "runetype.h"
  49   50  #include "collate.h"
  50   51  
  51   52  #include "utils.h"
  52   53  #include "regex2.h"
  53   54  
  54   55  #include "cname.h"
  55   56  #include "mblocal.h"
  56   57  
  57   58  /*
  58   59   * parse structure, passed up and down to avoid global variables and
  59   60   * other clumsinesses
  60   61   */
  61   62  struct parse {
  62   63          char *next;             /* next character in RE */
  63   64          char *end;              /* end of string (-> NUL normally) */
  64   65          int error;              /* has an error been seen? */
  65   66          sop *strip;             /* malloced strip */
  66   67          sopno ssize;            /* malloced strip size (allocated) */
  67   68          sopno slen;             /* malloced strip length (used) */
  68   69          int ncsalloc;           /* number of csets allocated */
  69   70          struct re_guts *g;
  70   71  #define NPAREN  10              /* we need to remember () 1-9 for back refs */
  71   72          sopno pbegin[NPAREN];   /* -> ( ([0] unused) */
  72   73          sopno pend[NPAREN];     /* -> ) ([0] unused) */
  73   74  };
  74   75  
  75   76  /* ========= begin header generated by ./mkh ========= */
  76   77  #ifdef __cplusplus
  77   78  extern "C" {
  78   79  #endif
  79   80  
  80   81  /* === regcomp.c === */
  81   82  static void p_ere(struct parse *p, wint_t stop);
  82   83  static void p_ere_exp(struct parse *p);
  83   84  static void p_str(struct parse *p);
  84   85  static void p_bre(struct parse *p, wint_t end1, wint_t end2);
  85   86  static int p_simp_re(struct parse *p, int starordinary);
  86   87  static int p_count(struct parse *p);
  87   88  static void p_bracket(struct parse *p);
  88   89  static void p_b_term(struct parse *p, cset *cs);
  89   90  static void p_b_cclass(struct parse *p, cset *cs);
  90   91  static void p_b_eclass(struct parse *p, cset *cs);
  91   92  static wint_t p_b_symbol(struct parse *p);
  92   93  static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
  93   94  static wint_t othercase(wint_t ch);
  94   95  static void bothcases(struct parse *p, wint_t ch);
  95   96  static void ordinary(struct parse *p, wint_t ch);
  96   97  static void nonnewline(struct parse *p);
  97   98  static void repeat(struct parse *p, sopno start, int from, int to);
  98   99  static int seterr(struct parse *p, int e);
  99  100  static cset *allocset(struct parse *p);
 100  101  static void freeset(struct parse *p, cset *cs);
 101  102  static void CHadd(struct parse *p, cset *cs, wint_t ch);
 102  103  static void CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max);
 103  104  static void CHaddtype(struct parse *p, cset *cs, wctype_t wct);
 104  105  static wint_t singleton(cset *cs);
 105  106  static sopno dupl(struct parse *p, sopno start, sopno finish);
 106  107  static void doemit(struct parse *p, sop op, size_t opnd);
 107  108  static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
 108  109  static void dofwd(struct parse *p, sopno pos, sop value);
 109  110  static void enlarge(struct parse *p, sopno size);
 110  111  static void stripsnug(struct parse *p, struct re_guts *g);
 111  112  static void findmust(struct parse *p, struct re_guts *g);
 112  113  static int altoffset(sop *scan, int offset);
 113  114  static void computejumps(struct parse *p, struct re_guts *g);
 114  115  static void computematchjumps(struct parse *p, struct re_guts *g);
 115  116  static sopno pluscount(struct parse *p, struct re_guts *g);
 116  117  static wint_t wgetnext(struct parse *p);
 117  118  
 118  119  #ifdef __cplusplus
 119  120  }
 120  121  #endif
 121  122  /* ========= end header generated by ./mkh ========= */
 122  123  
 123  124  static char nuls[10];           /* place to point scanner in event of error */
 124  125  
 125  126  /*
 126  127   * macros for use with parse structure
 127  128   * BEWARE:  these know that the parse structure is named `p' !!!
 128  129   */
 129  130  #define PEEK()  (*p->next)
 130  131  #define PEEK2() (*(p->next+1))
 131  132  #define MORE()  (p->next < p->end)
 132  133  #define MORE2() (p->next+1 < p->end)
 133  134  #define SEE(c)  (MORE() && PEEK() == (c))
 134  135  #define SEETWO(a, b)    (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
 135  136  #define EAT(c)  ((SEE(c)) ? (NEXT(), 1) : 0)
 136  137  #define EATTWO(a, b)    ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
 137  138  #define NEXT()  (p->next++)
 138  139  #define NEXT2() (p->next += 2)
 139  140  #define NEXTn(n)        (p->next += (n))
 140  141  #define GETNEXT()       (*p->next++)
 141  142  #define WGETNEXT()      wgetnext(p)
 142  143  #define SETERROR(e)     ((void)seterr(p, (e)))
 143  144  #define REQUIRE(co, e)  ((co) || seterr(p, e))
 144  145  #define MUSTSEE(c, e)   (REQUIRE(MORE() && PEEK() == (c), e))
 145  146  #define MUSTEAT(c, e)   (REQUIRE(MORE() && GETNEXT() == (c), e))
 146  147  #define MUSTNOTSEE(c, e)        (REQUIRE(!MORE() || PEEK() != (c), e))
 147  148  #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
 148  149  #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
 149  150  #define AHEAD(pos)              dofwd(p, pos, HERE()-(pos))
 150  151  #define ASTERN(sop, pos)        EMIT(sop, HERE()-pos)
 151  152  #define HERE()          (p->slen)
 152  153  #define THERE()         (p->slen - 1)
 153  154  #define THERETHERE()    (p->slen - 2)
 154  155  #define DROP(n) (p->slen -= (n))
 155  156  
 156  157  #ifndef NDEBUG
 157  158  static int never = 0;           /* for use in asserts; shuts lint up */
 158  159  #else
 159  160  #define never   0               /* some <assert.h>s have bugs too */
 160  161  #endif
 161  162  
 162  163  /*
 163  164   * regcomp - interface for parser and compilation
 164  165   */
 165  166  int                             /* 0 success, otherwise REG_something */
 166  167  regcomp(regex_t *_RESTRICT_KYWD preg,
 167  168          const char *_RESTRICT_KYWD pattern,
 168  169          int cflags)
 169  170  {
 170  171          struct parse pa;
 171  172          struct re_guts *g;
 172  173          struct parse *p = &pa;
 173  174          int i;
 174  175          size_t len;
 175  176  #ifdef REDEBUG
 176  177  #define GOODFLAGS(f)    (f)
 177  178  #else
 178  179  #define GOODFLAGS(f)    ((f)&~REG_DUMP)
 179  180  #endif
 180  181  
 181  182          /* We had REG_INVARG, but we don't have that on Solaris. */
 182  183          cflags = GOODFLAGS(cflags);
 183  184          if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
 184  185                  return (REG_EFATAL);
 185  186  
 186  187          if (cflags&REG_PEND) {
 187  188                  if (preg->re_endp < pattern)
 188  189                          return (REG_EFATAL);
 189  190                  len = preg->re_endp - pattern;
 190  191          } else
 191  192                  len = strlen((char *)pattern);
 192  193  
 193  194          /* do the mallocs early so failure handling is easy */
 194  195          g = (struct re_guts *)malloc(sizeof (struct re_guts));
 195  196          if (g == NULL)
 196  197                  return (REG_ESPACE);
 197  198          p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
 198  199          p->strip = (sop *)malloc(p->ssize * sizeof (sop));
 199  200          p->slen = 0;
 200  201          if (p->strip == NULL) {
 201  202                  free((char *)g);
 202  203                  return (REG_ESPACE);
 203  204          }
 204  205  
 205  206          /* set things up */
 206  207          p->g = g;
 207  208          p->next = (char *)pattern;      /* convenience; we do not modify it */
 208  209          p->end = p->next + len;
 209  210          p->error = 0;
 210  211          p->ncsalloc = 0;
 211  212          for (i = 0; i < NPAREN; i++) {
 212  213                  p->pbegin[i] = 0;
 213  214                  p->pend[i] = 0;
 214  215          }
 215  216          g->sets = NULL;
 216  217          g->ncsets = 0;
 217  218          g->cflags = cflags;
 218  219          g->iflags = 0;
 219  220          g->nbol = 0;
 220  221          g->neol = 0;
 221  222          g->must = NULL;
 222  223          g->moffset = -1;
 223  224          g->charjump = NULL;
 224  225          g->matchjump = NULL;
 225  226          g->mlen = 0;
 226  227          g->nsub = 0;
 227  228          g->backrefs = 0;
 228  229  
 229  230          /* do it */
 230  231          EMIT(OEND, 0);
 231  232          g->firststate = THERE();
 232  233          if (cflags&REG_EXTENDED)
 233  234                  p_ere(p, OUT);
 234  235          else if (cflags&REG_NOSPEC)
 235  236                  p_str(p);
 236  237          else
 237  238                  p_bre(p, OUT, OUT);
 238  239          EMIT(OEND, 0);
 239  240          g->laststate = THERE();
 240  241  
 241  242          /* tidy up loose ends and fill things in */
 242  243          stripsnug(p, g);
 243  244          findmust(p, g);
 244  245          /*
 245  246           * only use Boyer-Moore algorithm if the pattern is bigger
 246  247           * than three characters
 247  248           */
 248  249          if (g->mlen > 3) {
 249  250                  computejumps(p, g);
 250  251                  computematchjumps(p, g);
 251  252                  if (g->matchjump == NULL && g->charjump != NULL) {
 252  253                          free(g->charjump);
 253  254                          g->charjump = NULL;
 254  255                  }
 255  256          }
 256  257          g->nplus = pluscount(p, g);
 257  258          g->magic = MAGIC2;
 258  259          preg->re_nsub = g->nsub;
 259  260          preg->re_g = g;
 260  261          preg->re_magic = MAGIC1;
 261  262  #ifndef REDEBUG
 262  263          /* not debugging, so can't rely on the assert() in regexec() */
 263  264          if (g->iflags&BAD)
 264  265                  SETERROR(REG_EFATAL);
 265  266  #endif
 266  267  
 267  268          /* win or lose, we're done */
 268  269          if (p->error != 0)      /* lose */
 269  270                  regfree(preg);
 270  271          return (p->error);
 271  272  }
 272  273  
 273  274  /*
 274  275   * p_ere - ERE parser top level, concatenation and alternation
 275  276   */
 276  277  static void
 277  278  p_ere(struct parse *p,
 278  279      wint_t stop)                /* character this ERE should end at */
 279  280  {
 280  281          char c;
 281  282          sopno prevback;
 282  283          sopno prevfwd;
 283  284          sopno conc;
 284  285          int first = 1;          /* is this the first alternative? */
 285  286  
 286  287          for (;;) {
 287  288                  /* do a bunch of concatenated expressions */
 288  289                  conc = HERE();
 289  290                  while (MORE() && (c = PEEK()) != '|' && c != stop)
 290  291                          p_ere_exp(p);
 291  292                  /* require nonempty */
 292  293                  (void) REQUIRE(HERE() != conc, REG_BADPAT);
 293  294  
 294  295                  if (!EAT('|'))
 295  296                          break;          /* NOTE BREAK OUT */
 296  297  
 297  298                  if (first) {
 298  299                          INSERT(OCH_, conc);     /* offset is wrong */
 299  300                          prevfwd = conc;
 300  301                          prevback = conc;
 301  302                          first = 0;
 302  303                  }
 303  304                  ASTERN(OOR1, prevback);
 304  305                  prevback = THERE();
 305  306                  AHEAD(prevfwd);                 /* fix previous offset */
 306  307                  prevfwd = HERE();
 307  308                  EMIT(OOR2, 0);                  /* offset is very wrong */
 308  309          }
 309  310  
 310  311          if (!first) {           /* tail-end fixups */
 311  312                  AHEAD(prevfwd);
 312  313                  ASTERN(O_CH, prevback);
 313  314          }
 314  315  
 315  316          assert(!MORE() || SEE(stop));
 316  317  }
 317  318  
 318  319  /*
 319  320   * p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
 320  321   */
 321  322  static void
 322  323  p_ere_exp(struct parse *p)
 323  324  {
 324  325          char c;
 325  326          wint_t wc;
 326  327          sopno pos;
 327  328          int count;
 328  329          int count2;
 329  330          sopno subno;
 330  331          int wascaret = 0;
 331  332  
 332  333          assert(MORE());         /* caller should have ensured this */
 333  334          c = GETNEXT();
 334  335  
 335  336          pos = HERE();
 336  337          switch (c) {
 337  338          case '(':
 338  339                  (void) REQUIRE(MORE(), REG_EPAREN);
 339  340                  p->g->nsub++;
 340  341                  subno = p->g->nsub;
 341  342                  if (subno < NPAREN)
 342  343                          p->pbegin[subno] = HERE();
 343  344                  EMIT(OLPAREN, subno);
 344  345                  if (!SEE(')'))
 345  346                          p_ere(p, ')');
 346  347                  if (subno < NPAREN) {
 347  348                          p->pend[subno] = HERE();
 348  349                          assert(p->pend[subno] != 0);
 349  350                  }
 350  351                  EMIT(ORPAREN, subno);
 351  352                  (void) MUSTEAT(')', REG_EPAREN);
 352  353                  break;
 353  354  #ifndef POSIX_MISTAKE
 354  355          case ')':               /* happens only if no current unmatched ( */
 355  356                  /*
 356  357                   * You may ask, why the ifndef?  Because I didn't notice
 357  358                   * this until slightly too late for 1003.2, and none of the
 358  359                   * other 1003.2 regular-expression reviewers noticed it at
 359  360                   * all.  So an unmatched ) is legal POSIX, at least until
 360  361                   * we can get it fixed.
 361  362                   */
 362  363                  SETERROR(REG_EPAREN);
 363  364                  break;
 364  365  #endif
 365  366          case '^':
 366  367                  EMIT(OBOL, 0);
 367  368                  p->g->iflags |= USEBOL;
 368  369                  p->g->nbol++;
 369  370                  wascaret = 1;
 370  371                  break;
 371  372          case '$':
 372  373                  EMIT(OEOL, 0);
 373  374                  p->g->iflags |= USEEOL;
 374  375                  p->g->neol++;
 375  376                  break;
 376  377          case '|':
 377  378                  SETERROR(REG_BADPAT);
 378  379                  break;
 379  380          case '*':
 380  381          case '+':
 381  382          case '?':
 382  383                  SETERROR(REG_BADRPT);
 383  384                  break;
 384  385          case '.':
 385  386                  if (p->g->cflags&REG_NEWLINE)
 386  387                          nonnewline(p);
 387  388                  else
 388  389                          EMIT(OANY, 0);
 389  390                  break;
 390  391          case '[':
 391  392                  p_bracket(p);
 392  393                  break;
 393  394          case '\\':
 394  395                  (void) REQUIRE(MORE(), REG_EESCAPE);
 395  396                  wc = WGETNEXT();
 396  397                  switch (wc) {
 397  398                  case '<':
 398  399                          EMIT(OBOW, 0);
 399  400                          break;
 400  401                  case '>':
 401  402                          EMIT(OEOW, 0);
 402  403                          break;
 403  404                  default:
 404  405                          ordinary(p, wc);
 405  406                          break;
 406  407                  }
 407  408                  break;
 408  409          case '{':               /* okay as ordinary except if digit follows */
 409  410                  (void) REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
 410  411                  /* FALLTHROUGH */
 411  412          default:
 412  413                  p->next--;
 413  414                  wc = WGETNEXT();
 414  415                  ordinary(p, wc);
 415  416                  break;
 416  417          }
 417  418  
 418  419          if (!MORE())
 419  420                  return;
 420  421          c = PEEK();
 421  422          /* we call { a repetition if followed by a digit */
 422  423          if (!(c == '*' || c == '+' || c == '?' ||
 423  424              (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
 424  425                  return;         /* no repetition, we're done */
 425  426          NEXT();
 426  427  
 427  428          (void) REQUIRE(!wascaret, REG_BADRPT);
 428  429          switch (c) {
 429  430          case '*':       /* implemented as +? */
 430  431                  /* this case does not require the (y|) trick, noKLUDGE */
 431  432                  INSERT(OPLUS_, pos);
 432  433                  ASTERN(O_PLUS, pos);
 433  434                  INSERT(OQUEST_, pos);
 434  435                  ASTERN(O_QUEST, pos);
 435  436                  break;
 436  437          case '+':
 437  438                  INSERT(OPLUS_, pos);
 438  439                  ASTERN(O_PLUS, pos);
 439  440                  break;
 440  441          case '?':
 441  442                  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
 442  443                  INSERT(OCH_, pos);              /* offset slightly wrong */
 443  444                  ASTERN(OOR1, pos);              /* this one's right */
 444  445                  AHEAD(pos);                     /* fix the OCH_ */
 445  446                  EMIT(OOR2, 0);                  /* offset very wrong... */
 446  447                  AHEAD(THERE());                 /* ...so fix it */
 447  448                  ASTERN(O_CH, THERETHERE());
 448  449                  break;
 449  450          case '{':
 450  451                  count = p_count(p);
 451  452                  if (EAT(',')) {
 452  453                          if (isdigit((uch)PEEK())) {
 453  454                                  count2 = p_count(p);
 454  455                                  (void) REQUIRE(count <= count2, REG_BADBR);
 455  456                          } else          /* single number with comma */
 456  457                                  count2 = INFINITY;
 457  458                  } else          /* just a single number */
 458  459                          count2 = count;
 459  460                  repeat(p, pos, count, count2);
 460  461                  if (!EAT('}')) {        /* error heuristics */
 461  462                          while (MORE() && PEEK() != '}')
 462  463                                  NEXT();
 463  464                          (void) REQUIRE(MORE(), REG_EBRACE);
 464  465                          SETERROR(REG_BADBR);
 465  466                  }
 466  467                  break;
 467  468          }
 468  469  
 469  470          if (!MORE())
 470  471                  return;
 471  472          c = PEEK();
 472  473          if (!(c == '*' || c == '+' || c == '?' ||
 473  474              (c == '{' && MORE2() && isdigit((uch)PEEK2()))))
 474  475                  return;
 475  476          SETERROR(REG_BADRPT);
 476  477  }
 477  478  
 478  479  /*
 479  480   * p_str - string (no metacharacters) "parser"
 480  481   */
 481  482  static void
 482  483  p_str(struct parse *p)
 483  484  {
 484  485          (void) REQUIRE(MORE(), REG_BADPAT);
 485  486          while (MORE())
 486  487                  ordinary(p, WGETNEXT());
 487  488  }
 488  489  
 489  490  /*
 490  491   * p_bre - BRE parser top level, anchoring and concatenation
 491  492   * Giving end1 as OUT essentially eliminates the end1/end2 check.
 492  493   *
 493  494   * This implementation is a bit of a kludge, in that a trailing $ is first
 494  495   * taken as an ordinary character and then revised to be an anchor.
 495  496   * The amount of lookahead needed to avoid this kludge is excessive.
 496  497   */
 497  498  static void
 498  499  p_bre(struct parse *p,
 499  500      wint_t end1,                /* first terminating character */
 500  501      wint_t end2)                /* second terminating character */
 501  502  {
 502  503          sopno start = HERE();
 503  504          int first = 1;                  /* first subexpression? */
 504  505          int wasdollar = 0;
 505  506  
 506  507          if (EAT('^')) {
 507  508                  EMIT(OBOL, 0);
 508  509                  p->g->iflags |= USEBOL;
 509  510                  p->g->nbol++;
 510  511          }
 511  512          while (MORE() && !SEETWO(end1, end2)) {
 512  513                  wasdollar = p_simp_re(p, first);
 513  514                  first = 0;
 514  515          }
 515  516          if (wasdollar) {        /* oops, that was a trailing anchor */
 516  517                  DROP(1);
 517  518                  EMIT(OEOL, 0);
 518  519                  p->g->iflags |= USEEOL;
 519  520                  p->g->neol++;
 520  521          }
 521  522  
 522  523          (void) REQUIRE(HERE() != start, REG_BADPAT);    /* require nonempty */
 523  524  }
 524  525  
 525  526  /*
 526  527   * p_simp_re - parse a simple RE, an atom possibly followed by a repetition
 527  528   */
 528  529  static int                      /* was the simple RE an unbackslashed $? */
 529  530  p_simp_re(struct parse *p,
 530  531          int starordinary)       /* is a leading * an ordinary character? */
 531  532  {
 532  533          int c;
 533  534          int count;
 534  535          int count2;
 535  536          sopno pos;
 536  537          int i;
 537  538          wint_t wc;
 538  539          sopno subno;
 539  540  #define BACKSL  (1<<CHAR_BIT)
 540  541  
 541  542          pos = HERE();           /* repetion op, if any, covers from here */
 542  543  
 543  544          assert(MORE());         /* caller should have ensured this */
 544  545          c = GETNEXT();
 545  546          if (c == '\\') {
 546  547                  (void) REQUIRE(MORE(), REG_EESCAPE);
 547  548                  c = BACKSL | GETNEXT();
 548  549          }
 549  550          switch (c) {
 550  551          case '.':
 551  552                  if (p->g->cflags&REG_NEWLINE)
 552  553                          nonnewline(p);
 553  554                  else
 554  555                          EMIT(OANY, 0);
 555  556                  break;
 556  557          case '[':
 557  558                  p_bracket(p);
 558  559                  break;
 559  560          case BACKSL|'<':
 560  561                  EMIT(OBOW, 0);
 561  562                  break;
 562  563          case BACKSL|'>':
 563  564                  EMIT(OEOW, 0);
 564  565                  break;
 565  566          case BACKSL|'{':
 566  567                  SETERROR(REG_BADRPT);
 567  568                  break;
 568  569          case BACKSL|'(':
 569  570                  p->g->nsub++;
 570  571                  subno = p->g->nsub;
 571  572                  if (subno < NPAREN)
 572  573                          p->pbegin[subno] = HERE();
 573  574                  EMIT(OLPAREN, subno);
 574  575                  /* the MORE here is an error heuristic */
 575  576                  if (MORE() && !SEETWO('\\', ')'))
 576  577                          p_bre(p, '\\', ')');
 577  578                  if (subno < NPAREN) {
 578  579                          p->pend[subno] = HERE();
 579  580                          assert(p->pend[subno] != 0);
 580  581                  }
 581  582                  EMIT(ORPAREN, subno);
 582  583                  (void) REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
 583  584                  break;
 584  585          case BACKSL|')':        /* should not get here -- must be user */
 585  586          case BACKSL|'}':
 586  587                  SETERROR(REG_EPAREN);
 587  588                  break;
 588  589          case BACKSL|'1':
 589  590          case BACKSL|'2':
 590  591          case BACKSL|'3':
 591  592          case BACKSL|'4':
 592  593          case BACKSL|'5':
 593  594          case BACKSL|'6':
 594  595          case BACKSL|'7':
 595  596          case BACKSL|'8':
 596  597          case BACKSL|'9':
 597  598                  i = (c&~BACKSL) - '0';
 598  599                  assert(i < NPAREN);
 599  600                  if (p->pend[i] != 0) {
 600  601                          assert(i <= p->g->nsub);
 601  602                          EMIT(OBACK_, i);
 602  603                          assert(p->pbegin[i] != 0);
 603  604                          assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
 604  605                          assert(OP(p->strip[p->pend[i]]) == ORPAREN);
 605  606                          (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
 606  607                          EMIT(O_BACK, i);
 607  608                  } else
 608  609                          SETERROR(REG_ESUBREG);
 609  610                  p->g->backrefs = 1;
 610  611                  break;
 611  612          case '*':
 612  613                  (void) REQUIRE(starordinary, REG_BADRPT);
 613  614                  /* FALLTHROUGH */
 614  615          default:
 615  616                  p->next--;
 616  617                  wc = WGETNEXT();
 617  618                  ordinary(p, wc);
 618  619                  break;
 619  620          }
 620  621  
 621  622          if (EAT('*')) {         /* implemented as +? */
 622  623                  /* this case does not require the (y|) trick, noKLUDGE */
 623  624                  INSERT(OPLUS_, pos);
 624  625                  ASTERN(O_PLUS, pos);
 625  626                  INSERT(OQUEST_, pos);
 626  627                  ASTERN(O_QUEST, pos);
 627  628          } else if (EATTWO('\\', '{')) {
 628  629                  count = p_count(p);
 629  630                  if (EAT(',')) {
 630  631                          if (MORE() && isdigit((uch)PEEK())) {
 631  632                                  count2 = p_count(p);
 632  633                                  (void) REQUIRE(count <= count2, REG_BADBR);
 633  634                          } else          /* single number with comma */
 634  635                                  count2 = INFINITY;
 635  636                  } else          /* just a single number */
 636  637                          count2 = count;
 637  638                  repeat(p, pos, count, count2);
 638  639                  if (!EATTWO('\\', '}')) {       /* error heuristics */
 639  640                          while (MORE() && !SEETWO('\\', '}'))
 640  641                                  NEXT();
 641  642                          (void) REQUIRE(MORE(), REG_EBRACE);
 642  643                          SETERROR(REG_BADBR);
 643  644                  }
 644  645          } else if (c == '$')    /* $ (but not \$) ends it */
 645  646                  return (1);
 646  647  
 647  648          return (0);
 648  649  }
 649  650  
 650  651  /*
 651  652   * p_count - parse a repetition count
 652  653   */
 653  654  static int                      /* the value */
 654  655  p_count(struct parse *p)
 655  656  {
 656  657          int count = 0;
 657  658          int ndigits = 0;
 658  659  
 659  660          while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
 660  661                  count = count*10 + (GETNEXT() - '0');
 661  662                  ndigits++;
 662  663          }
 663  664  
 664  665          (void) REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
 665  666          return (count);
 666  667  }
 667  668  
 668  669  /*
 669  670   * p_bracket - parse a bracketed character list
 670  671   */
 671  672  static void
 672  673  p_bracket(struct parse *p)
 673  674  {
 674  675          cset *cs;
 675  676          wint_t ch;
 676  677  
 677  678          /* Dept of Truly Sickening Special-Case Kludges */
 678  679          if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
 679  680                  EMIT(OBOW, 0);
 680  681                  NEXTn(6);
 681  682                  return;
 682  683          }
 683  684          if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
 684  685                  EMIT(OEOW, 0);
 685  686                  NEXTn(6);
 686  687                  return;
 687  688          }
 688  689  
 689  690          if ((cs = allocset(p)) == NULL)
 690  691                  return;
 691  692  
 692  693          if (p->g->cflags&REG_ICASE)
 693  694                  cs->icase = 1;
 694  695          if (EAT('^'))
 695  696                  cs->invert = 1;
 696  697          if (EAT(']'))
 697  698                  CHadd(p, cs, ']');
 698  699          else if (EAT('-'))
 699  700                  CHadd(p, cs, '-');
 700  701          while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
 701  702                  p_b_term(p, cs);
 702  703          if (EAT('-'))
 703  704                  CHadd(p, cs, '-');
 704  705          (void) MUSTEAT(']', REG_EBRACK);
 705  706  
 706  707          if (p->error != 0)      /* don't mess things up further */
 707  708                  return;
 708  709  
 709  710          if (cs->invert && p->g->cflags&REG_NEWLINE)
 710  711                  cs->bmp['\n' >> 3] |= 1 << ('\n' & 7);
 711  712  
 712  713          if ((ch = singleton(cs)) != OUT) {      /* optimize singleton sets */
 713  714                  ordinary(p, ch);
 714  715                  freeset(p, cs);
 715  716          } else
 716  717                  EMIT(OANYOF, (int)(cs - p->g->sets));
 717  718  }

↓ open down ↓

706 lines elided

↑ open up ↑

 718  719  
 719  720  /*
 720  721   * p_b_term - parse one term of a bracketed character list
 721  722   */
 722  723  static void
 723  724  p_b_term(struct parse *p, cset *cs)
 724  725  {
 725  726          char c;
 726  727          wint_t start, finish;
 727  728          wint_t i;
      729 +        locale_t loc = uselocale(NULL);
 728  730  
 729  731          /* classify what we've got */
 730  732          switch ((MORE()) ? PEEK() : '\0') {
 731  733          case '[':
 732  734                  c = (MORE2()) ? PEEK2() : '\0';
 733  735                  break;
 734  736          case '-':
 735  737                  SETERROR(REG_ERANGE);
 736  738                  return;                 /* NOTE RETURN */
 737  739          default:

 738  740                  c = '\0';
 739  741                  break;
 740  742          }
 741  743  
 742  744          switch (c) {
 743  745          case ':':               /* character class */
 744  746                  NEXT2();
 745  747                  (void) REQUIRE(MORE(), REG_EBRACK);
 746  748                  c = PEEK();
 747  749                  (void) REQUIRE(c != '-' && c != ']', REG_ECTYPE);
 748  750                  p_b_cclass(p, cs);
 749  751                  (void) REQUIRE(MORE(), REG_EBRACK);
 750  752                  (void) REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
 751  753                  break;
 752  754          case '=':               /* equivalence class */
 753  755                  NEXT2();
 754  756                  (void) REQUIRE(MORE(), REG_EBRACK);
 755  757                  c = PEEK();
 756  758                  (void) REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
 757  759                  p_b_eclass(p, cs);
 758  760                  (void) REQUIRE(MORE(), REG_EBRACK);
 759  761                  (void) REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
 760  762                  break;
 761  763          default:                /* symbol, ordinary character, or range */
 762  764                  start = p_b_symbol(p);
 763  765                  if (SEE('-') && MORE2() && PEEK2() != ']') {
 764  766                          /* range */

↓ open down ↓

27 lines elided

↑ open up ↑

 765  767                          NEXT();
 766  768                          if (EAT('-'))
 767  769                                  finish = '-';
 768  770                          else
 769  771                                  finish = p_b_symbol(p);
 770  772                  } else
 771  773                          finish = start;
 772  774                  if (start == finish)
 773  775                          CHadd(p, cs, start);
 774  776                  else {
 775      -                        if (_collate_load_error) {
      777 +                        if (loc->collate->lc_is_posix) {
 776  778                                  (void) REQUIRE((uch)start <= (uch)finish,
 777  779                                      REG_ERANGE);
 778  780                                  CHaddrange(p, cs, start, finish);
 779  781                          } else {
 780  782                                  (void) REQUIRE(_collate_range_cmp(start,
 781      -                                    finish) <= 0, REG_ERANGE);
      783 +                                    finish, loc) <= 0, REG_ERANGE);
 782  784                                  for (i = 0; i <= UCHAR_MAX; i++) {
 783      -                                        if (_collate_range_cmp(start, i) <= 0 &&
 784      -                                            _collate_range_cmp(i, finish) <= 0)
      785 +                                        if (_collate_range_cmp(start, i, loc)
      786 +                                            <= 0 &&
      787 +                                            _collate_range_cmp(i, finish, loc)
      788 +                                            <= 0)
 785  789                                                  CHadd(p, cs, i);
 786  790                                  }
 787  791                          }
 788  792                  }
 789  793                  break;
 790  794          }
 791  795  }
 792  796  
 793  797  /*
 794  798   * p_b_cclass - parse a character-class name and deal with it

 795  799   */
 796  800  static void
 797  801  p_b_cclass(struct parse *p, cset *cs)
 798  802  {
 799  803          char *sp = p->next;
 800  804          size_t len;
 801  805          wctype_t wct;
 802  806          char clname[16];
 803  807  
 804  808          while (MORE() && isalpha((uch)PEEK()))
 805  809                  NEXT();
 806  810          len = p->next - sp;
 807  811          if (len >= sizeof (clname) - 1) {
 808  812                  SETERROR(REG_ECTYPE);
 809  813                  return;
 810  814          }
 811  815          (void) memcpy(clname, sp, len);
 812  816          clname[len] = '\0';
 813  817          if ((wct = wctype(clname)) == 0) {
 814  818                  SETERROR(REG_ECTYPE);
 815  819                  return;
 816  820          }
 817  821          CHaddtype(p, cs, wct);
 818  822  }
 819  823  
 820  824  /*
 821  825   * p_b_eclass - parse an equivalence-class name and deal with it
 822  826   *
 823  827   * This implementation is incomplete. xxx
 824  828   */
 825  829  static void
 826  830  p_b_eclass(struct parse *p, cset *cs)
 827  831  {
 828  832          wint_t c;
 829  833  
 830  834          c = p_b_coll_elem(p, '=');
 831  835          CHadd(p, cs, c);
 832  836  }
 833  837  
 834  838  /*
 835  839   * p_b_symbol - parse a character or [..]ed multicharacter collating symbol
 836  840   */
 837  841  static wint_t                   /* value of symbol */
 838  842  p_b_symbol(struct parse *p)
 839  843  {
 840  844          wint_t value;
 841  845  
 842  846          (void) REQUIRE(MORE(), REG_EBRACK);
 843  847          if (!EATTWO('[', '.'))
 844  848                  return (WGETNEXT());
 845  849  
 846  850          /* collating symbol */
 847  851          value = p_b_coll_elem(p, '.');
 848  852          (void) REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
 849  853          return (value);
 850  854  }
 851  855  
 852  856  /*
 853  857   * p_b_coll_elem - parse a collating-element name and look it up
 854  858   */
 855  859  static wint_t                   /* value of collating element */
 856  860  p_b_coll_elem(struct parse *p,
 857  861          wint_t endc)            /* name ended by endc,']' */
 858  862  {
 859  863          char *sp = p->next;
 860  864          struct cname *cp;
 861  865          int len;
 862  866          mbstate_t mbs;
 863  867          wchar_t wc;
 864  868          size_t clen;
 865  869  
 866  870          while (MORE() && !SEETWO(endc, ']'))
 867  871                  NEXT();
 868  872          if (!MORE()) {
 869  873                  SETERROR(REG_EBRACK);
 870  874                  return (0);
 871  875          }
 872  876          len = p->next - sp;
 873  877          for (cp = cnames; cp->name != NULL; cp++)
 874  878                  if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
 875  879                          return (cp->code);      /* known name */
 876  880          (void) memset(&mbs, 0, sizeof (mbs));
 877  881          if ((clen = mbrtowc(&wc, sp, len, &mbs)) == len)
 878  882                  return (wc);                    /* single character */
 879  883          else if (clen == (size_t)-1 || clen == (size_t)-2)
 880  884                  SETERROR(REG_ECHAR);
 881  885          else
 882  886                  SETERROR(REG_ECOLLATE);         /* neither */
 883  887          return (0);
 884  888  }
 885  889  
 886  890  /*
 887  891   * othercase - return the case counterpart of an alphabetic
 888  892   */
 889  893  static wint_t                   /* if no counterpart, return ch */
 890  894  othercase(wint_t ch)
 891  895  {
 892  896          assert(iswalpha(ch));
 893  897          if (iswupper(ch))
 894  898                  return (towlower(ch));
 895  899          else if (iswlower(ch))
 896  900                  return (towupper(ch));
 897  901          else                    /* peculiar, but could happen */
 898  902                  return (ch);
 899  903  }
 900  904  
 901  905  /*
 902  906   * bothcases - emit a dualcase version of a two-case character
 903  907   *
 904  908   * Boy, is this implementation ever a kludge...
 905  909   */
 906  910  static void
 907  911  bothcases(struct parse *p, wint_t ch)
 908  912  {
 909  913          char *oldnext = p->next;
 910  914          char *oldend = p->end;
 911  915          char bracket[3 + MB_LEN_MAX];
 912  916          size_t n;
 913  917          mbstate_t mbs;
 914  918  
 915  919          assert(othercase(ch) != ch);    /* p_bracket() would recurse */
 916  920          p->next = bracket;
 917  921          (void) memset(&mbs, 0, sizeof (mbs));
 918  922          n = wcrtomb(bracket, ch, &mbs);
 919  923          assert(n != (size_t)-1);
 920  924          bracket[n] = ']';
 921  925          bracket[n + 1] = '\0';
 922  926          p->end = bracket+n+1;
 923  927          p_bracket(p);
 924  928          assert(p->next == p->end);
 925  929          p->next = oldnext;
 926  930          p->end = oldend;
 927  931  }
 928  932  
 929  933  /*
 930  934   * ordinary - emit an ordinary character
 931  935   */
 932  936  static void
 933  937  ordinary(struct parse *p, wint_t ch)
 934  938  {
 935  939          cset *cs;
 936  940  
 937  941          if ((p->g->cflags&REG_ICASE) && iswalpha(ch) && othercase(ch) != ch)
 938  942                  bothcases(p, ch);
 939  943          else if ((ch & OPDMASK) == ch)
 940  944                  EMIT(OCHAR, ch);
 941  945          else {
 942  946                  /*
 943  947                   * Kludge: character is too big to fit into an OCHAR operand.
 944  948                   * Emit a singleton set.
 945  949                   */
 946  950                  if ((cs = allocset(p)) == NULL)
 947  951                          return;
 948  952                  CHadd(p, cs, ch);
 949  953                  EMIT(OANYOF, (int)(cs - p->g->sets));
 950  954          }
 951  955  }
 952  956  
 953  957  /*
 954  958   * nonnewline - emit REG_NEWLINE version of OANY
 955  959   *
 956  960   * Boy, is this implementation ever a kludge...
 957  961   */
 958  962  static void
 959  963  nonnewline(struct parse *p)
 960  964  {
 961  965          char *oldnext = p->next;
 962  966          char *oldend = p->end;
 963  967          char bracket[4];
 964  968  
 965  969          p->next = bracket;
 966  970          p->end = bracket+3;
 967  971          bracket[0] = '^';
 968  972          bracket[1] = '\n';
 969  973          bracket[2] = ']';
 970  974          bracket[3] = '\0';
 971  975          p_bracket(p);
 972  976          assert(p->next == bracket+3);
 973  977          p->next = oldnext;
 974  978          p->end = oldend;
 975  979  }
 976  980  
 977  981  /*
 978  982   * repeat - generate code for a bounded repetition, recursively if needed
 979  983   */
 980  984  static void
 981  985  repeat(struct parse *p,
 982  986      sopno start,                /* operand from here to end of strip */
 983  987      int from,                   /* repeated from this number */
 984  988      int to)                     /* to this number of times (maybe INFINITY) */
 985  989  {
 986  990          sopno finish = HERE();
 987  991  #define N       2
 988  992  #define INF     3
 989  993  #define REP(f, t)       ((f)*8 + (t))
 990  994  #define MAP(n)  (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
 991  995          sopno copy;
 992  996  
 993  997          if (p->error != 0)      /* head off possible runaway recursion */
 994  998                  return;
 995  999  
 996 1000          assert(from <= to);
 997 1001  
 998 1002          switch (REP(MAP(from), MAP(to))) {
 999 1003          case REP(0, 0):                 /* must be user doing this */
1000 1004                  DROP(finish-start);     /* drop the operand */
1001 1005                  break;
1002 1006          case REP(0, 1):                 /* as x{1,1}? */
1003 1007          case REP(0, N):                 /* as x{1,n}? */
1004 1008          case REP(0, INF):               /* as x{1,}? */
1005 1009                  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1006 1010                  INSERT(OCH_, start);            /* offset is wrong... */
1007 1011                  repeat(p, start+1, 1, to);
1008 1012                  ASTERN(OOR1, start);
1009 1013                  AHEAD(start);                   /* ... fix it */
1010 1014                  EMIT(OOR2, 0);
1011 1015                  AHEAD(THERE());
1012 1016                  ASTERN(O_CH, THERETHERE());
1013 1017                  break;
1014 1018          case REP(1, 1):                 /* trivial case */
1015 1019                  /* done */
1016 1020                  break;
1017 1021          case REP(1, N):                 /* as x?x{1,n-1} */
1018 1022                  /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
1019 1023                  INSERT(OCH_, start);
1020 1024                  ASTERN(OOR1, start);
1021 1025                  AHEAD(start);
1022 1026                  EMIT(OOR2, 0);                  /* offset very wrong... */
1023 1027                  AHEAD(THERE());                 /* ...so fix it */
1024 1028                  ASTERN(O_CH, THERETHERE());
1025 1029                  copy = dupl(p, start+1, finish+1);
1026 1030                  assert(copy == finish+4);
1027 1031                  repeat(p, copy, 1, to-1);
1028 1032                  break;
1029 1033          case REP(1, INF):               /* as x+ */
1030 1034                  INSERT(OPLUS_, start);
1031 1035                  ASTERN(O_PLUS, start);
1032 1036                  break;
1033 1037          case REP(N, N):                 /* as xx{m-1,n-1} */
1034 1038                  copy = dupl(p, start, finish);
1035 1039                  repeat(p, copy, from-1, to-1);
1036 1040                  break;
1037 1041          case REP(N, INF):               /* as xx{n-1,INF} */
1038 1042                  copy = dupl(p, start, finish);
1039 1043                  repeat(p, copy, from-1, to);
1040 1044                  break;
1041 1045          default:                        /* "can't happen" */
1042 1046                  SETERROR(REG_EFATAL);   /* just in case */
1043 1047                  break;
1044 1048          }
1045 1049  }
1046 1050  
1047 1051  /*
1048 1052   * wgetnext - helper function for WGETNEXT() macro. Gets the next wide
1049 1053   * character from the parse struct, signals a REG_ILLSEQ error if the
1050 1054   * character can't be converted. Returns the number of bytes consumed.
1051 1055   */
1052 1056  static wint_t
1053 1057  wgetnext(struct parse *p)
1054 1058  {
1055 1059          mbstate_t mbs;
1056 1060          wchar_t wc;
1057 1061          size_t n;
1058 1062  
1059 1063          (void) memset(&mbs, 0, sizeof (mbs));
1060 1064          n = mbrtowc(&wc, p->next, p->end - p->next, &mbs);
1061 1065          if (n == (size_t)-1 || n == (size_t)-2) {
1062 1066                  SETERROR(REG_ECHAR);
1063 1067                  return (0);
1064 1068          }
1065 1069          if (n == 0)
1066 1070                  n = 1;
1067 1071          p->next += n;
1068 1072          return (wc);
1069 1073  }
1070 1074  
1071 1075  /*
1072 1076   * seterr - set an error condition
1073 1077   */
1074 1078  static int                      /* useless but makes type checking happy */
1075 1079  seterr(struct parse *p, int e)
1076 1080  {
1077 1081          if (p->error == 0)      /* keep earliest error condition */
1078 1082                  p->error = e;
1079 1083          p->next = nuls;         /* try to bring things to a halt */
1080 1084          p->end = nuls;
1081 1085          return (0);             /* make the return value well-defined */
1082 1086  }
1083 1087  
1084 1088  /*
1085 1089   * allocset - allocate a set of characters for []
1086 1090   */
1087 1091  static cset *
1088 1092  allocset(struct parse *p)
1089 1093  {
1090 1094          cset *cs, *ncs;
1091 1095  
1092 1096          ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof (*ncs));
1093 1097          if (ncs == NULL) {
1094 1098                  SETERROR(REG_ESPACE);
1095 1099                  return (NULL);
1096 1100          }
1097 1101          p->g->sets = ncs;
1098 1102          cs = &p->g->sets[p->g->ncsets++];
1099 1103          (void) memset(cs, 0, sizeof (*cs));
1100 1104  
1101 1105          return (cs);
1102 1106  }
1103 1107  
1104 1108  /*
1105 1109   * freeset - free a now-unused set
1106 1110   */
1107 1111  static void
1108 1112  freeset(struct parse *p, cset *cs)
1109 1113  {
1110 1114          cset *top = &p->g->sets[p->g->ncsets];
1111 1115  
1112 1116          free(cs->wides);
1113 1117          free(cs->ranges);
1114 1118          free(cs->types);
1115 1119          (void) memset(cs, 0, sizeof (*cs));
1116 1120          if (cs == top-1)        /* recover only the easy case */
1117 1121                  p->g->ncsets--;
1118 1122  }
1119 1123  
1120 1124  /*
1121 1125   * singleton - Determine whether a set contains only one character,
1122 1126   * returning it if so, otherwise returning OUT.
1123 1127   */
1124 1128  static wint_t
1125 1129  singleton(cset *cs)
1126 1130  {
1127 1131          wint_t i, s, n;
1128 1132  
1129 1133          for (i = n = 0; i < NC; i++)
1130 1134                  if (CHIN(cs, i)) {
1131 1135                          n++;
1132 1136                          s = i;
1133 1137                  }
1134 1138          if (n == 1)
1135 1139                  return (s);
1136 1140          if (cs->nwides == 1 && cs->nranges == 0 && cs->ntypes == 0 &&
1137 1141              cs->icase == 0)
1138 1142                  return (cs->wides[0]);
1139 1143          /* Don't bother handling the other cases. */
1140 1144          return (OUT);
1141 1145  }
1142 1146  
1143 1147  /*
1144 1148   * CHadd - add character to character set.
1145 1149   */
1146 1150  static void
1147 1151  CHadd(struct parse *p, cset *cs, wint_t ch)
1148 1152  {
1149 1153          wint_t nch, *newwides;
1150 1154          assert(ch >= 0);
1151 1155          if (ch < NC)
1152 1156                  cs->bmp[ch >> 3] |= 1 << (ch & 7);
1153 1157          else {
1154 1158                  newwides = realloc(cs->wides, (cs->nwides + 1) *
1155 1159                      sizeof (*cs->wides));
1156 1160                  if (newwides == NULL) {
1157 1161                          SETERROR(REG_ESPACE);
1158 1162                          return;
1159 1163                  }
1160 1164                  cs->wides = newwides;
1161 1165                  cs->wides[cs->nwides++] = ch;
1162 1166          }
1163 1167          if (cs->icase) {
1164 1168                  if ((nch = towlower(ch)) < NC)
1165 1169                          cs->bmp[nch >> 3] |= 1 << (nch & 7);
1166 1170                  if ((nch = towupper(ch)) < NC)
1167 1171                          cs->bmp[nch >> 3] |= 1 << (nch & 7);
1168 1172          }
1169 1173  }
1170 1174  
1171 1175  /*
1172 1176   * CHaddrange - add all characters in the range [min,max] to a character set.
1173 1177   */
1174 1178  static void
1175 1179  CHaddrange(struct parse *p, cset *cs, wint_t min, wint_t max)
1176 1180  {
1177 1181          crange *newranges;
1178 1182  
1179 1183          for (; min < NC && min <= max; min++)
1180 1184                  CHadd(p, cs, min);
1181 1185          if (min >= max)
1182 1186                  return;
1183 1187          newranges = realloc(cs->ranges, (cs->nranges + 1) *
1184 1188              sizeof (*cs->ranges));
1185 1189          if (newranges == NULL) {
1186 1190                  SETERROR(REG_ESPACE);
1187 1191                  return;
1188 1192          }
1189 1193          cs->ranges = newranges;
1190 1194          cs->ranges[cs->nranges].min = min;
1191 1195          cs->ranges[cs->nranges].min = max;
1192 1196          cs->nranges++;
1193 1197  }
1194 1198  
1195 1199  /*
1196 1200   * CHaddtype - add all characters of a certain type to a character set.
1197 1201   */
1198 1202  static void
1199 1203  CHaddtype(struct parse *p, cset *cs, wctype_t wct)
1200 1204  {
1201 1205          wint_t i;
1202 1206          wctype_t *newtypes;
1203 1207  
1204 1208          for (i = 0; i < NC; i++)
1205 1209                  if (iswctype(i, wct))
1206 1210                          CHadd(p, cs, i);
1207 1211          newtypes = realloc(cs->types, (cs->ntypes + 1) *
1208 1212              sizeof (*cs->types));
1209 1213          if (newtypes == NULL) {
1210 1214                  SETERROR(REG_ESPACE);
1211 1215                  return;
1212 1216          }
1213 1217          cs->types = newtypes;
1214 1218          cs->types[cs->ntypes++] = wct;
1215 1219  }
1216 1220  
1217 1221  /*
1218 1222   * dupl - emit a duplicate of a bunch of sops
1219 1223   */
1220 1224  static sopno                    /* start of duplicate */
1221 1225  dupl(struct parse *p,
1222 1226          sopno start,            /* from here */
1223 1227          sopno finish)           /* to this less one */
1224 1228  {
1225 1229          sopno ret = HERE();
1226 1230          sopno len = finish - start;
1227 1231  
1228 1232          assert(finish >= start);
1229 1233          if (len == 0)
1230 1234                  return (ret);
1231 1235          enlarge(p, p->ssize + len);     /* this many unexpected additions */
1232 1236          assert(p->ssize >= p->slen + len);
1233 1237          (void) memcpy((char *)(p->strip + p->slen),
1234 1238              (char *)(p->strip + start), (size_t)len*sizeof (sop));
1235 1239          p->slen += len;
1236 1240          return (ret);
1237 1241  }
1238 1242  
1239 1243  /*
1240 1244   * doemit - emit a strip operator
1241 1245   *
1242 1246   * It might seem better to implement this as a macro with a function as
1243 1247   * hard-case backup, but it's just too big and messy unless there are
1244 1248   * some changes to the data structures.  Maybe later.
1245 1249   */
1246 1250  static void
1247 1251  doemit(struct parse *p, sop op, size_t opnd)
1248 1252  {
1249 1253          /* avoid making error situations worse */
1250 1254          if (p->error != 0)
1251 1255                  return;
1252 1256  
1253 1257          /* deal with oversize operands ("can't happen", more or less) */
1254 1258          assert(opnd < 1<<OPSHIFT);
1255 1259  
1256 1260          /* deal with undersized strip */
1257 1261          if (p->slen >= p->ssize)
1258 1262                  enlarge(p, (p->ssize+1) / 2 * 3);       /* +50% */
1259 1263          assert(p->slen < p->ssize);
1260 1264  
1261 1265          /* finally, it's all reduced to the easy case */
1262 1266          p->strip[p->slen++] = SOP(op, opnd);
1263 1267  }
1264 1268  
1265 1269  /*
1266 1270   * doinsert - insert a sop into the strip
1267 1271   */
1268 1272  static void
1269 1273  doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
1270 1274  {
1271 1275          sopno sn;
1272 1276          sop s;
1273 1277          int i;
1274 1278  
1275 1279          /* avoid making error situations worse */
1276 1280          if (p->error != 0)
1277 1281                  return;
1278 1282  
1279 1283          sn = HERE();
1280 1284          EMIT(op, opnd);         /* do checks, ensure space */
1281 1285          assert(HERE() == sn+1);
1282 1286          s = p->strip[sn];
1283 1287  
1284 1288          /* adjust paren pointers */
1285 1289          assert(pos > 0);
1286 1290          for (i = 1; i < NPAREN; i++) {
1287 1291                  if (p->pbegin[i] >= pos) {
1288 1292                          p->pbegin[i]++;
1289 1293                  }
1290 1294                  if (p->pend[i] >= pos) {
1291 1295                          p->pend[i]++;
1292 1296                  }
1293 1297          }
1294 1298  
1295 1299          (void) memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1296 1300              (HERE()-pos-1)*sizeof (sop));
1297 1301          p->strip[pos] = s;
1298 1302  }
1299 1303  
1300 1304  /*
1301 1305   * dofwd - complete a forward reference
1302 1306   */
1303 1307  static void
1304 1308  dofwd(struct parse *p, sopno pos, sop value)
1305 1309  {
1306 1310          /* avoid making error situations worse */
1307 1311          if (p->error != 0)
1308 1312                  return;
1309 1313  
1310 1314          assert(value < 1<<OPSHIFT);
1311 1315          p->strip[pos] = OP(p->strip[pos]) | value;
1312 1316  }
1313 1317  
1314 1318  /*
1315 1319   * enlarge - enlarge the strip
1316 1320   */
1317 1321  static void
1318 1322  enlarge(struct parse *p, sopno size)
1319 1323  {
1320 1324          sop *sp;
1321 1325  
1322 1326          if (p->ssize >= size)
1323 1327                  return;
1324 1328  
1325 1329          sp = (sop *)realloc(p->strip, size*sizeof (sop));
1326 1330          if (sp == NULL) {
1327 1331                  SETERROR(REG_ESPACE);
1328 1332                  return;
1329 1333          }
1330 1334          p->strip = sp;
1331 1335          p->ssize = size;
1332 1336  }
1333 1337  
1334 1338  /*
1335 1339   * stripsnug - compact the strip
1336 1340   */
1337 1341  static void
1338 1342  stripsnug(struct parse *p, struct re_guts *g)
1339 1343  {
1340 1344          g->nstates = p->slen;
1341 1345          g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof (sop));
1342 1346          if (g->strip == NULL) {
1343 1347                  SETERROR(REG_ESPACE);
1344 1348                  g->strip = p->strip;
1345 1349          }
1346 1350  }
1347 1351  
1348 1352  /*
1349 1353   * findmust - fill in must and mlen with longest mandatory literal string
1350 1354   *
1351 1355   * This algorithm could do fancy things like analyzing the operands of |
1352 1356   * for common subsequences.  Someday.  This code is simple and finds most
1353 1357   * of the interesting cases.
1354 1358   *
1355 1359   * Note that must and mlen got initialized during setup.
1356 1360   */
1357 1361  static void
1358 1362  findmust(struct parse *p, struct re_guts *g)
1359 1363  {

↓ open down ↓

565 lines elided

↑ open up ↑

1360 1364          sop *scan;
1361 1365          sop *start;
1362 1366          sop *newstart;
1363 1367          sopno newlen;
1364 1368          sop s;
1365 1369          char *cp;
1366 1370          int offset;
1367 1371          char buf[MB_LEN_MAX];
1368 1372          size_t clen;
1369 1373          mbstate_t mbs;
     1374 +        locale_t loc = uselocale(NULL);
1370 1375  
1371 1376          /* avoid making error situations worse */
1372 1377          if (p->error != 0)
1373 1378                  return;
1374 1379  
1375 1380          /*
1376 1381           * It's not generally safe to do a ``char'' substring search on
1377 1382           * multibyte character strings, but it's safe for at least
1378 1383           * UTF-8 (see RFC 3629).
1379 1384           */
1380 1385          if (MB_CUR_MAX > 1 &&
1381      -            strcmp(_CurrentRuneLocale->__encoding, "UTF-8") != 0)
     1386 +            strcmp(loc->runelocale->__encoding, "UTF-8") != 0)
1382 1387                  return;
1383 1388  
1384 1389          /* find the longest OCHAR sequence in strip */
1385 1390          newlen = 0;
1386 1391          offset = 0;
1387 1392          g->moffset = 0;
1388 1393          scan = g->strip + 1;
1389 1394          do {
1390 1395                  s = *scan++;
1391 1396                  switch (OP(s)) {

1392 1397                  case OCHAR:             /* sequence member */
1393 1398                          if (newlen == 0) {              /* new sequence */
1394 1399                                  (void) memset(&mbs, 0, sizeof (mbs));
1395 1400                                  newstart = scan - 1;
1396 1401                          }
1397 1402                          clen = wcrtomb(buf, OPND(s), &mbs);
1398 1403                          if (clen == (size_t)-1)
1399 1404                                  goto toohard;
1400 1405                          newlen += clen;
1401 1406                          break;
1402 1407                  case OPLUS_:            /* things that don't break one */
1403 1408                  case OLPAREN:
1404 1409                  case ORPAREN:
1405 1410                          break;
1406 1411                  case OQUEST_:           /* things that must be skipped */
1407 1412                  case OCH_:
1408 1413                          offset = altoffset(scan, offset);
1409 1414                          scan--;
1410 1415                          do {
1411 1416                                  scan += OPND(s);
1412 1417                                  s = *scan;
1413 1418                                  /* assert() interferes w debug printouts */
1414 1419                                  if (OP(s) != O_QUEST && OP(s) != O_CH &&
1415 1420                                      OP(s) != OOR2) {
1416 1421                                          g->iflags |= BAD;
1417 1422                                          return;
1418 1423                                  }
1419 1424                          } while (OP(s) != O_QUEST && OP(s) != O_CH);
1420 1425                          /* FALLTHROUGH */
1421 1426                  case OBOW:              /* things that break a sequence */
1422 1427                  case OEOW:
1423 1428                  case OBOL:
1424 1429                  case OEOL:
1425 1430                  case O_QUEST:
1426 1431                  case O_CH:
1427 1432                  case OEND:
1428 1433                          if (newlen > g->mlen) {         /* ends one */
1429 1434                                  start = newstart;
1430 1435                                  g->mlen = newlen;
1431 1436                                  if (offset > -1) {
1432 1437                                          g->moffset += offset;
1433 1438                                          offset = newlen;
1434 1439                                  } else
1435 1440                                          g->moffset = offset;
1436 1441                          } else {
1437 1442                                  if (offset > -1)
1438 1443                                          offset += newlen;
1439 1444                          }
1440 1445                          newlen = 0;
1441 1446                          break;
1442 1447                  case OANY:
1443 1448                          if (newlen > g->mlen) {         /* ends one */
1444 1449                                  start = newstart;
1445 1450                                  g->mlen = newlen;
1446 1451                                  if (offset > -1) {
1447 1452                                          g->moffset += offset;
1448 1453                                          offset = newlen;
1449 1454                                  } else
1450 1455                                          g->moffset = offset;
1451 1456                          } else {
1452 1457                                  if (offset > -1)
1453 1458                                          offset += newlen;
1454 1459                          }
1455 1460                          if (offset > -1)
1456 1461                                  offset++;
1457 1462                          newlen = 0;
1458 1463                          break;
1459 1464                  case OANYOF:            /* may or may not invalidate offset */
1460 1465                          /* First, everything as OANY */
1461 1466                          if (newlen > g->mlen) {         /* ends one */
1462 1467                                  start = newstart;
1463 1468                                  g->mlen = newlen;
1464 1469                                  if (offset > -1) {
1465 1470                                          g->moffset += offset;
1466 1471                                          offset = newlen;
1467 1472                                  } else
1468 1473                                          g->moffset = offset;
1469 1474                          } else {
1470 1475                                  if (offset > -1)
1471 1476                                          offset += newlen;
1472 1477                          }
1473 1478                          if (offset > -1)
1474 1479                                  offset++;
1475 1480                          newlen = 0;
1476 1481                          break;
1477 1482                  toohard:
1478 1483                  default:
1479 1484                          /*
1480 1485                           * Anything here makes it impossible or too hard
1481 1486                           * to calculate the offset -- so we give up;
1482 1487                           * save the last known good offset, in case the
1483 1488                           * must sequence doesn't occur later.
1484 1489                           */
1485 1490                          if (newlen > g->mlen) {         /* ends one */
1486 1491                                  start = newstart;
1487 1492                                  g->mlen = newlen;
1488 1493                                  if (offset > -1)
1489 1494                                          g->moffset += offset;
1490 1495                                  else
1491 1496                                          g->moffset = offset;
1492 1497                          }
1493 1498                          offset = -1;
1494 1499                          newlen = 0;
1495 1500                          break;
1496 1501                  }
1497 1502          } while (OP(s) != OEND);
1498 1503  
1499 1504          if (g->mlen == 0) {             /* there isn't one */
1500 1505                  g->moffset = -1;
1501 1506                  return;
1502 1507          }
1503 1508  
1504 1509          /* turn it into a character string */
1505 1510          g->must = malloc((size_t)g->mlen + 1);
1506 1511          if (g->must == NULL) {          /* argh; just forget it */
1507 1512                  g->mlen = 0;
1508 1513                  g->moffset = -1;
1509 1514                  return;
1510 1515          }
1511 1516          cp = g->must;
1512 1517          scan = start;
1513 1518          (void) memset(&mbs, 0, sizeof (mbs));
1514 1519          while (cp < g->must + g->mlen) {
1515 1520                  while (OP(s = *scan++) != OCHAR)
1516 1521                          continue;
1517 1522                  clen = wcrtomb(cp, OPND(s), &mbs);
1518 1523                  assert(clen != (size_t)-1);
1519 1524                  cp += clen;
1520 1525          }
1521 1526          assert(cp == g->must + g->mlen);
1522 1527          *cp++ = '\0';           /* just on general principles */
1523 1528  }
1524 1529  
1525 1530  /*
1526 1531   * altoffset - choose biggest offset among multiple choices
1527 1532   *
1528 1533   * Compute, recursively if necessary, the largest offset among multiple
1529 1534   * re paths.
1530 1535   */
1531 1536  static int
1532 1537  altoffset(sop *scan, int offset)
1533 1538  {
1534 1539          int largest;
1535 1540          int try;
1536 1541          sop s;
1537 1542  
1538 1543          /* If we gave up already on offsets, return */
1539 1544          if (offset == -1)
1540 1545                  return (-1);
1541 1546  
1542 1547          largest = 0;
1543 1548          try = 0;
1544 1549          s = *scan++;
1545 1550          while (OP(s) != O_QUEST && OP(s) != O_CH) {
1546 1551                  switch (OP(s)) {
1547 1552                  case OOR1:
1548 1553                          if (try > largest)
1549 1554                                  largest = try;
1550 1555                          try = 0;
1551 1556                          break;
1552 1557                  case OQUEST_:
1553 1558                  case OCH_:
1554 1559                          try = altoffset(scan, try);
1555 1560                          if (try == -1)
1556 1561                                  return (-1);
1557 1562                          scan--;
1558 1563                          do {
1559 1564                                  scan += OPND(s);
1560 1565                                  s = *scan;
1561 1566                                  if (OP(s) != O_QUEST && OP(s) != O_CH &&
1562 1567                                      OP(s) != OOR2)
1563 1568                                          return (-1);
1564 1569                          } while (OP(s) != O_QUEST && OP(s) != O_CH);
1565 1570                          /*
1566 1571                           * We must skip to the next position, or we'll
1567 1572                           * leave altoffset() too early.
1568 1573                           */
1569 1574                          scan++;
1570 1575                          break;
1571 1576                  case OANYOF:
1572 1577                  case OCHAR:
1573 1578                  case OANY:
1574 1579                          try++;
1575 1580                          /*FALLTHRU*/
1576 1581                  case OBOW:
1577 1582                  case OEOW:
1578 1583                  case OLPAREN:
1579 1584                  case ORPAREN:
1580 1585                  case OOR2:
1581 1586                          break;
1582 1587                  default:
1583 1588                          try = -1;
1584 1589                          break;
1585 1590                  }
1586 1591                  if (try == -1)
1587 1592                          return (-1);
1588 1593                  s = *scan++;
1589 1594          }
1590 1595  
1591 1596          if (try > largest)
1592 1597                  largest = try;
1593 1598  
1594 1599          return (largest+offset);
1595 1600  }
1596 1601  
1597 1602  /*
1598 1603   * computejumps - compute char jumps for BM scan
1599 1604   *
1600 1605   * This algorithm assumes g->must exists and is has size greater than
1601 1606   * zero. It's based on the algorithm found on Computer Algorithms by
1602 1607   * Sara Baase.
1603 1608   *
1604 1609   * A char jump is the number of characters one needs to jump based on
1605 1610   * the value of the character from the text that was mismatched.
1606 1611   */
1607 1612  static void
1608 1613  computejumps(struct parse *p, struct re_guts *g)
1609 1614  {
1610 1615          int ch;
1611 1616          int mindex;
1612 1617  
1613 1618          /* Avoid making errors worse */
1614 1619          if (p->error != 0)
1615 1620                  return;
1616 1621  
1617 1622          g->charjump = (int *)malloc((NC + 1) * sizeof (int));
1618 1623          if (g->charjump == NULL)        /* Not a fatal error */
1619 1624                  return;
1620 1625          /* Adjust for signed chars, if necessary */
1621 1626          g->charjump = &g->charjump[-(CHAR_MIN)];
1622 1627  
1623 1628          /*
1624 1629           * If the character does not exist in the pattern, the jump
1625 1630           * is equal to the number of characters in the pattern.
1626 1631           */
1627 1632          for (ch = CHAR_MIN; ch < (CHAR_MAX + 1); ch++)
1628 1633                  g->charjump[ch] = g->mlen;
1629 1634  
1630 1635          /*
1631 1636           * If the character does exist, compute the jump that would
1632 1637           * take us to the last character in the pattern equal to it
1633 1638           * (notice that we match right to left, so that last character
1634 1639           * is the first one that would be matched).
1635 1640           */
1636 1641          for (mindex = 0; mindex < g->mlen; mindex++)
1637 1642                  g->charjump[(int)g->must[mindex]] = g->mlen - mindex - 1;
1638 1643  }
1639 1644  
1640 1645  /*
1641 1646   * computematchjumps - compute match jumps for BM scan
1642 1647   *
1643 1648   * This algorithm assumes g->must exists and is has size greater than
1644 1649   * zero. It's based on the algorithm found on Computer Algorithms by
1645 1650   * Sara Baase.
1646 1651   *
1647 1652   * A match jump is the number of characters one needs to advance based
1648 1653   * on the already-matched suffix.
1649 1654   * Notice that all values here are minus (g->mlen-1), because of the way
1650 1655   * the search algorithm works.
1651 1656   */
1652 1657  static void
1653 1658  computematchjumps(struct parse *p, struct re_guts *g)
1654 1659  {
1655 1660          int mindex;             /* General "must" iterator */
1656 1661          int suffix;             /* Keeps track of matching suffix */
1657 1662          int ssuffix;            /* Keeps track of suffixes' suffix */
1658 1663          int *pmatches;
1659 1664                                  /*
1660 1665                                   * pmatches[k] points to the next i
1661 1666                                   * such that i+1...mlen is a substring
1662 1667                                   * of k+1...k+mlen-i-1
1663 1668                                   */
1664 1669  
1665 1670          /* Avoid making errors worse */
1666 1671          if (p->error != 0)
1667 1672                  return;
1668 1673  
1669 1674          pmatches = (int *)malloc(g->mlen * sizeof (unsigned int));
1670 1675          if (pmatches == NULL) {
1671 1676                  g->matchjump = NULL;
1672 1677                  return;
1673 1678          }
1674 1679  
1675 1680          g->matchjump = (int *)malloc(g->mlen * sizeof (unsigned int));
1676 1681          if (g->matchjump == NULL)       /* Not a fatal error */
1677 1682                  return;
1678 1683  
1679 1684          /* Set maximum possible jump for each character in the pattern */
1680 1685          for (mindex = 0; mindex < g->mlen; mindex++)
1681 1686                  g->matchjump[mindex] = 2*g->mlen - mindex - 1;
1682 1687  
1683 1688          /* Compute pmatches[] */
1684 1689          for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
1685 1690              mindex--, suffix--) {
1686 1691                  pmatches[mindex] = suffix;
1687 1692  
1688 1693                  /*
1689 1694                   * If a mismatch is found, interrupting the substring,
1690 1695                   * compute the matchjump for that position. If no
1691 1696                   * mismatch is found, then a text substring mismatched
1692 1697                   * against the suffix will also mismatch against the
1693 1698                   * substring.
1694 1699                   */
1695 1700                  while (suffix < g->mlen && g->must[mindex] != g->must[suffix]) {
1696 1701                          g->matchjump[suffix] = MIN(g->matchjump[suffix],
1697 1702                              g->mlen - mindex - 1);
1698 1703                          suffix = pmatches[suffix];
1699 1704                  }
1700 1705          }
1701 1706  
1702 1707          /*
1703 1708           * Compute the matchjump up to the last substring found to jump
1704 1709           * to the beginning of the largest must pattern prefix matching
1705 1710           * it's own suffix.
1706 1711           */
1707 1712          for (mindex = 0; mindex <= suffix; mindex++)
1708 1713                  g->matchjump[mindex] = MIN(g->matchjump[mindex],
1709 1714                      g->mlen + suffix - mindex);
1710 1715  
1711 1716          ssuffix = pmatches[suffix];
1712 1717          while (suffix < g->mlen) {
1713 1718                  while (suffix <= ssuffix && suffix < g->mlen) {
1714 1719                          g->matchjump[suffix] = MIN(g->matchjump[suffix],
1715 1720                              g->mlen + ssuffix - suffix);
1716 1721                          suffix++;
1717 1722                  }
1718 1723                  if (suffix < g->mlen)
1719 1724                          ssuffix = pmatches[ssuffix];
1720 1725          }
1721 1726  
1722 1727          free(pmatches);
1723 1728  }
1724 1729  
1725 1730  /*
1726 1731   * pluscount - count + nesting
1727 1732   */
1728 1733  static sopno                    /* nesting depth */
1729 1734  pluscount(struct parse *p, struct re_guts *g)
1730 1735  {
1731 1736          sop *scan;
1732 1737          sop s;
1733 1738          sopno plusnest = 0;
1734 1739          sopno maxnest = 0;
1735 1740  
1736 1741          if (p->error != 0)
1737 1742                  return (0);     /* there may not be an OEND */
1738 1743  
1739 1744          scan = g->strip + 1;
1740 1745          do {
1741 1746                  s = *scan++;
1742 1747                  switch (OP(s)) {
1743 1748                  case OPLUS_:
1744 1749                          plusnest++;
1745 1750                          break;
1746 1751                  case O_PLUS:
1747 1752                          if (plusnest > maxnest)
1748 1753                                  maxnest = plusnest;
1749 1754                          plusnest--;
1750 1755                          break;
1751 1756                  }
1752 1757          } while (OP(s) != OEND);
1753 1758          if (plusnest != 0)
1754 1759                  g->iflags |= BAD;
1755 1760          return (maxnest);
1756 1761  }

↓ open down ↓

365 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX