1 /*
   2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28 
  29 /*
  30   tre-compile.c - TRE regex compiler
  31 */
  32 
  33 /*
  34   TODO:
  35    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
  36      function calls.
  37 */
  38 
  39 #include <stdio.h>
  40 #include <assert.h>
  41 #include <string.h>
  42 #include <limits.h>
  43 
  44 #include "tre-internal.h"
  45 #include "tre-mem.h"
  46 #include "tre-stack.h"
  47 #include "tre-ast.h"
  48 #include "tre-parse.h"
  49 #include "tre-compile.h"
  50 #include "tre.h"
  51 
  52 /*
  53   Algorithms to setup tags so that submatch addressing can be done.
  54 */
  55 
  56 #ifdef TRE_DEBUG
  57 static const char *tag_dir_str[] = {
  58   "minimize",
  59   "maximize",
  60   "left-maximize"
  61 };
  62 
  63 static const char _indent[] = "  ";
  64 
  65 static void
  66 print_indent(int indent)
  67 {
  68   while (indent-- > 0)
  69     DPRINT((_indent));
  70 }
  71 
  72 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
  73                                    int num_tags);
  74 static void
  75 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
  76                             int num_tags)
  77 {
  78   tre_last_matched_pre_t *u = branch->last_matched;
  79   int n_last_matched = 0;
  80 
  81   while (u)
  82     {
  83       n_last_matched++;
  84       u = u->next;
  85     }
  86 
  87   print_indent(indent);
  88   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
  89           branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
  90   print_indent(indent);
  91   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
  92           n_last_matched));
  93   if (branch->n_last_matched != n_last_matched)
  94     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
  95   if (branch->cmp_tag > 0)
  96     {
  97       int i;
  98       const char *sep = " tags=";
  99       print_indent(indent);
 100       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 101       for (i = 0; i < num_tags; i++)
 102         if (bit_test(branch->tags, i))
 103           {
 104             DPRINT(("%s%d", sep, i));
 105             sep = ",";
 106           }
 107       DPRINT(("\n"));
 108     }
 109 
 110   u = branch->last_matched;
 111   indent++;
 112   while (u)
 113     {
 114       print_last_matched_pre(u, indent, num_tags);
 115       u = u->next;
 116     }
 117 }
 118 
 119 static void
 120 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
 121 {
 122   tre_last_matched_branch_pre_t *b = lm->branches;
 123   int n_branches = 0;
 124 
 125   while (b)
 126     {
 127       n_branches++;
 128       b = b->next;
 129     }
 130 
 131   print_indent(indent);
 132   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
 133           lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
 134   print_indent(indent);
 135   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
 136           lm->n_branches, n_branches));
 137   if (lm->n_branches != n_branches)
 138     DPRINT(("*** mismatch between n and branches ***\n"));
 139 
 140   b = lm->branches;
 141   indent++;
 142   while (b)
 143     {
 144       print_last_match_branch_pre(b, indent, num_tags);
 145       b = b->next;
 146     }
 147 }
 148 
 149 static void print_last_matched(tre_last_matched_t *lm, int indent);
 150 static void
 151 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
 152 {
 153   tre_last_matched_t *u;
 154   int i;
 155 
 156   print_indent(indent);
 157   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
 158   if (branch->cmp_tag > 0)
 159     {
 160       print_indent(indent);
 161       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 162       if (branch->n_tags > 0)
 163         {
 164           const char *sep = " tags=";
 165           for (i = 0; i < branch->n_tags; i++)
 166             {
 167               DPRINT(("%s%d", sep, branch->tags[i]));
 168               sep = ",";
 169             }
 170         }
 171       DPRINT(("\n"));
 172     }
 173 
 174   u = branch->last_matched;
 175   indent++;
 176   for (i = branch->n_last_matched; i > 0; i--, u++)
 177     print_last_matched(u, indent);
 178 }
 179 
 180 static void
 181 print_last_matched(tre_last_matched_t *lm, int indent)
 182 {
 183   int i;
 184   tre_last_matched_branch_t *b;
 185 
 186   print_indent(indent);
 187   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
 188           lm->start_tag));
 189 
 190   b = lm->branches;
 191   indent++;
 192   for (i = lm->n_branches; i > 0; i--, b++)
 193     print_last_match_branch(b, indent);
 194 }
 195 #endif /* TRE_DEBUG */
 196 
 197 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
 198    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
 199    create an unset tre_last_matched_branch_pre_t. */
 200 static reg_errcode_t
 201 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
 202                    int tag_id, int num_tags)
 203 {
 204   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
 205   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
 206 
 207   if (db)
 208     {
 209       if (sb)
 210         {
 211           bitstr_t *l = db->tags;
 212           bitstr_t *r = sb->tags;
 213           int i = bitstr_size(num_tags);
 214 
 215           while(i-- > 0)
 216             *l++ |= *r++;
 217           /* db and sb are the info from two parallel sub-trees, so the tags
 218              must be mutually exclusive, and we can just add their numbers */
 219           db->n_tags += sb->n_tags;
 220           db->tot_tags += sb->tot_tags;
 221           if (db->last_matched)
 222             {
 223               if (sb->last_matched)
 224                 {
 225                   tre_last_matched_pre_t *u = db->last_matched;
 226 
 227                   while(u->next)
 228                     u = u->next;
 229                   u->next = sb->last_matched;
 230                   db->n_last_matched += sb->n_last_matched;
 231                   db->tot_branches += sb->tot_branches;
 232                   db->tot_last_matched += sb->tot_last_matched;
 233                 }
 234             }
 235             else if (sb->last_matched)
 236               {
 237                 db->last_matched = sb->last_matched;
 238                 db->n_last_matched = sb->n_last_matched;
 239                 db->tot_branches = sb->tot_branches;
 240                 db->tot_last_matched = sb->tot_last_matched;
 241               }
 242         }
 243     }
 244   else
 245     db = sb;
 246 
 247   if (tag_id != 0)
 248     {
 249       if (!db)
 250         {
 251           db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
 252                               + bitstr_size(num_tags));
 253           if (db == NULL)
 254             return REG_ESPACE;
 255           db->tot_branches = 1;
 256         }
 257       if (tag_id > 0)
 258         {
 259           /* tag_id is a new tag, and shouldn't exist in db's tags,
 260              so we can always increment n_tags */
 261           bit_set(db->tags, tag_id);
 262           db->n_tags++;
 263           db->tot_tags++;
 264         }
 265     }
 266   dst->last_matched_branch = db;
 267   return REG_OK;
 268 }
 269 
 270 /* Inserts a catenation node to the root of the tree given in `node'.
 271    As the left child a new tag with number `tag_id' to `node' is added,
 272    and the right child is the old root. */
 273 static reg_errcode_t
 274 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 275 {
 276   tre_catenation_t *c;
 277 
 278   DPRINT(("add_tag_left: tag %d\n", tag_id));
 279 
 280   c = tre_mem_alloc(mem, sizeof(*c));
 281   if (c == NULL)
 282     return REG_ESPACE;
 283   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
 284   if (c->left == NULL)
 285     return REG_ESPACE;
 286   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 287   if (c->right == NULL)
 288     return REG_ESPACE;
 289 
 290   c->right->obj = node->obj;
 291   c->right->type = node->type;
 292   c->right->last_matched_branch = node->last_matched_branch;
 293   c->right->nullable = -1;
 294   c->right->submatch_id = -1;
 295   node->obj = c;
 296   node->type = CATENATION;
 297   node->original = c->right;
 298   return REG_OK;
 299 }
 300 
 301 /* Inserts a catenation node to the root of the tree given in `node'.
 302    As the right child a new tag with number `tag_id' to `node' is added,
 303    and the left child is the old root. */
 304 static reg_errcode_t
 305 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 306 {
 307   tre_catenation_t *c;
 308 
 309   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
 310 
 311   c = tre_mem_alloc(mem, sizeof(*c));
 312   if (c == NULL)
 313     return REG_ESPACE;
 314   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
 315   if (c->right == NULL)
 316     return REG_ESPACE;
 317   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 318   if (c->left == NULL)
 319     return REG_ESPACE;
 320 
 321   c->left->obj = node->obj;
 322   c->left->type = node->type;
 323   c->left->last_matched_branch = node->last_matched_branch;
 324   c->left->nullable = -1;
 325   c->left->submatch_id = -1;
 326   node->obj = c;
 327   node->type = CATENATION;
 328   node->original = c->left;
 329   return REG_OK;
 330 }
 331 
 332 typedef enum {
 333   ADDTAGS_RECURSE,
 334   ADDTAGS_RECURSE_NOT_TOP_UNION,
 335   ADDTAGS_AFTER_ITERATION,
 336   ADDTAGS_AFTER_UNION_LEFT,
 337   ADDTAGS_AFTER_UNION_RIGHT,
 338   ADDTAGS_AFTER_CAT_LEFT,
 339   ADDTAGS_AFTER_CAT_RIGHT,
 340   ADDTAGS_SET_SUBMATCH_END,
 341   ADDTAGS_UNION_RECURSE,
 342   ADDTAGS_UNION_RIGHT_RECURSE,
 343   ADDTAGS_AFTER_UNION_TOP,
 344 } tre_addtags_symbol_t;
 345 
 346 enum {
 347   COPY_LAST_MATCHED_BRANCH,
 348   COPY_LAST_MATCHED_BRANCH_NEXT,
 349   COPY_LAST_MATCHED,
 350   COPY_LAST_MATCHED_NEXT,
 351 };
 352 
 353 #define REGSET_UNSET            ((unsigned)-1)
 354 
 355 /* Go through `regset' and set submatch data for submatches that are
 356    using this tag. */
 357 static void
 358 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
 359 {
 360   int i;
 361 
 362   for (i = 0; regset[i] != REGSET_UNSET; i++)
 363     {
 364       int id = regset[i] / 2;
 365       int start = !(regset[i] % 2);
 366       if (id >= SUBMATCH_ID_INVISIBLE_START)
 367         continue;
 368       DPRINT(("  Using tag %d for %s offset of "
 369               "submatch %d\n", tag,
 370               start ? "start" : "end", id));
 371       if (start)
 372         tnfa->submatch_data[id].so_tag = tag;
 373       else
 374         tnfa->submatch_data[id].eo_tag = tag;
 375     }
 376   regset[0] = -1;
 377 }
 378 
 379 #define REGSET_HAS_STARTS       0x1
 380 #define REGSET_HAS_ENDS         0x2
 381 
 382 /* Adds tags to appropriate locations in the parse tree in `tree', so that
 383    subexpressions marked for submatch addressing can be traced. */
 384 static reg_errcode_t
 385 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
 386              tre_tnfa_t *tnfa)
 387 {
 388   reg_errcode_t status = REG_OK;
 389   tre_addtags_symbol_t symbol;
 390   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
 391   int bottom = tre_stack_num_objects(stack);
 392   /* True for first pass (counting number of needed tags) */
 393   int first_pass = (mem == NULL || tnfa == NULL);
 394   unsigned *regset, *orig_regset;
 395   int regset_contains = 0;
 396   int num_tags = 0; /* Total number of tags. */
 397   int num_minimals = 0;  /* Number of special minimal tags. */
 398   int tag = 0;      /* The tag that is to be added next. */
 399   int next_tag = 1; /* Next tag to use after this one. */
 400   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
 401   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
 402                              * the first is the tag to reorder, the second
 403                              * is the tag after which the first is reordered */
 404   int *rtp;                 /* Pointer used to fill in reorder_tags and
 405                              * tag_order */
 406   int *to_reorder;          /* Transform array converting sequential order to
 407                              * that specified by reorder_tags */
 408   int id;
 409 
 410   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
 411   if (!first_pass)
 412     {
 413       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
 414       tnfa->end_tag = 0;
 415       tnfa->minimal_tags[0] = -1;
 416     }
 417 
 418   regset = malloc(sizeof(*regset) * ((tnfa->num_submatches
 419                    + tnfa->num_submatches_invisible + 1) * 2));
 420   if (regset == NULL)
 421     {
 422       status = REG_ESPACE;
 423       goto error_regset;
 424     }
 425   regset[0] = REGSET_UNSET;
 426   orig_regset = regset;
 427 
 428   if (!first_pass)
 429     {
 430       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
 431        * to_reorder in one batch (assuming all are the same type) */
 432       rtp = reorder_tags = malloc(sizeof(*reorder_tags) *
 433                                    ((2 * tnfa->num_reorder_tags + 1) +
 434                                    tnfa->num_tags));
 435       if (reorder_tags == NULL)
 436         {
 437           status = REG_ESPACE;
 438           goto error_reorder_tags;
 439         }
 440       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
 441     }
 442 
 443   STACK_PUSH(stack, voidptr, node);
 444   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
 445 
 446   while (tre_stack_num_objects(stack) > bottom)
 447     {
 448       if (status != REG_OK)
 449         break;
 450 
 451       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 452       switch (symbol)
 453         {
 454           int top_union;
 455 
 456         case ADDTAGS_SET_SUBMATCH_END:
 457           {
 458             int i;
 459 
 460             id = tre_stack_pop_int(stack);
 461             node = tre_stack_pop_voidptr(stack);
 462             /* Add end of this submatch to regset. */
 463             for (i = 0; regset[i] != REGSET_UNSET; i++);
 464             regset[i] = id * 2 + 1;
 465             regset[i + 1] = -1;
 466             regset_contains |= REGSET_HAS_ENDS;
 467 
 468             /* Always put a tag after a minimal iterator. */
 469             if (minimal_tag >= 0)
 470               {
 471                 if (first_pass)
 472                   {
 473                     node->num_tags++;
 474                     DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
 475                             node->num_tags));
 476                   }
 477                 else
 478                   {
 479                     int i;
 480                     status = tre_merge_branches(mem, node, NULL, tag,
 481                                                 tnfa->num_tags);
 482                     if (status != REG_OK)
 483                       break;
 484                     status = tre_add_tag_right(mem, node, tag);
 485                     if (status != REG_OK)
 486                       break;
 487                     tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
 488                     DPRINT(("Setting t%d direction to %s\n", tag,
 489                             tag_dir_str[tnfa->tag_directions[tag]]));
 490                     DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
 491                     for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
 492                     tnfa->minimal_tags[i] = tag;
 493                     tnfa->minimal_tags[i + 1] = minimal_tag;
 494                     tnfa->minimal_tags[i + 2] = -1;
 495 
 496                     DPRINT(("  Minimal end: t%d reordered to "
 497                             "after t%d\n", tag, minimal_tag));
 498                     /* Append to tag_order, move "tag" after
 499                      * "minimal_tag" */
 500                     *rtp++ = tag;
 501                     *rtp++ = minimal_tag;
 502 
 503                     num_minimals++;
 504                     tre_purge_regset(regset, tnfa, tag);
 505                   }
 506 
 507                 minimal_tag = -1;
 508                 DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
 509                 regset[0] = REGSET_UNSET;
 510                 regset_contains = 0;
 511                 tag = next_tag;
 512                 num_tags++;
 513                 next_tag++;
 514               }
 515             break;
 516           }
 517 
 518         case ADDTAGS_RECURSE_NOT_TOP_UNION:
 519           /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
 520            * indicating that if a union is being processed, it is not the
 521            * top-most of a series */
 522           top_union = 0;
 523           goto do_addtags_recurse;
 524 
 525         case ADDTAGS_RECURSE:
 526           /* Setting top_union to 1 means that if a union is begin processed,
 527            * it is the top-most of a series, and should recurse through the
 528            * series to set the left_tag and right_tag values */
 529           top_union = 1;
 530 
 531 do_addtags_recurse:
 532           node = tre_stack_pop_voidptr(stack);
 533 
 534           id = node->submatch_id;
 535           if (id >= 0)
 536             {
 537               int i;
 538 
 539               /* Add start of this submatch to regset. */
 540               for (i = 0; regset[i] != REGSET_UNSET; i++);
 541               regset[i] = id * 2;
 542               regset[i + 1] = -1;
 543               regset_contains |= REGSET_HAS_STARTS;
 544 
 545               /* Add end of this submatch to regset after processing this
 546                  node. */
 547               STACK_PUSH(stack, voidptr, node);
 548               STACK_PUSHX(stack, int, id);
 549               STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
 550             }
 551 
 552           switch (node->type)
 553             {
 554             case LITERAL:
 555               {
 556                 tre_literal_t *lit = node->obj;
 557 
 558                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
 559                   {
 560                     DPRINT(("Literal %d-%d\n",
 561                             (int)lit->code_min, (int)lit->code_max));
 562                     if (regset_contains)
 563                       {
 564                         /* Regset is not empty, so add a tag before the
 565                            literal or backref. */
 566                         if (first_pass)
 567                           {
 568                             DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
 569                             node->num_tags = 1;
 570                           }
 571                         else
 572                           {
 573                             status = tre_merge_branches(mem, node, NULL, tag,
 574                                                         tnfa->num_tags);
 575                             if (status != REG_OK)
 576                               break;
 577                             status = tre_add_tag_left(mem, node, tag);
 578                             if (status != REG_OK)
 579                               break;
 580                             if (regset_contains == REGSET_HAS_STARTS)
 581                               tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
 582                             else
 583                               tnfa->tag_directions[tag] = direction;
 584                             DPRINT(("Setting t%d direction to %s\n", tag,
 585                                     tag_dir_str[tnfa->tag_directions[tag]]));
 586                             tre_purge_regset(regset, tnfa, tag);
 587 
 588                             if (IS_BACKREF(lit))
 589                               {
 590                                 int b = lit->code_max;
 591                                 int t = tnfa->submatch_data[b].so_tag;
 592                                 /* Fail if the referenced submatch hasn't been
 593                                  * completed yet */
 594                                 if (tnfa->submatch_data[b].eo_tag < 0)
 595                                   {
 596                                     status = REG_ESUBREG;
 597                                     break;
 598                                   }
 599                                 if (t < tag)
 600                                   {
 601                                     DPRINT(("  Backref %d start: "
 602                                             "t%d reordered to before t%d\n",
 603                                             b, tag, t));
 604                                     if(t > 0)
 605                                       t--;
 606                                     /* Append to tag_order, move "tag" after
 607                                      * "t" */
 608                                     *rtp++ = tag;
 609                                     *rtp++ = t;
 610                                   }
 611 #if TRE_DEBUG
 612                                 else
 613                                   DPRINT(("  Backref %d start: "
 614                                           "(t%d already before t%d)\n",
 615                                             b, tag, t));
 616 #endif /* TRE_DEBUG */
 617                               }
 618                           }
 619 
 620                         DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
 621                                 tag));
 622                         regset[0] = REGSET_UNSET;
 623                         regset_contains = 0;
 624                         tag = next_tag;
 625                         num_tags++;
 626                         next_tag++;
 627                       }
 628                   }
 629                 else
 630                   {
 631                     assert(!IS_TAG(lit));
 632                   }
 633                 break;
 634               }
 635             case CATENATION:
 636               {
 637                 tre_catenation_t *cat = node->obj;
 638                 tre_ast_node_t *left = cat->left;
 639                 tre_ast_node_t *right = cat->right;
 640                 int reserved_tag = -1;
 641                 DPRINT(("Catenation, next_tag = %d\n", next_tag));
 642 
 643                 /* After processing right child. */
 644                 STACK_PUSHX(stack, voidptr, node);
 645                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
 646 
 647                 /* Process right child. */
 648                 STACK_PUSHX(stack, voidptr, right);
 649                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 650 
 651                 /* After processing left child. */
 652                 STACK_PUSHX(stack, int, next_tag + left->num_tags);
 653                 DPRINT(("  Pushing %d for after left\n",
 654                         next_tag + left->num_tags));
 655                 if (left->num_tags > 0 && right->num_tags > 0)
 656                   {
 657                     /* Reserve the next tag to the right child. */
 658                     DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
 659                             "Reserving next_tag %d to right child\n",
 660                             next_tag));
 661                     reserved_tag = next_tag;
 662                     next_tag++;
 663                   }
 664                 STACK_PUSHX(stack, int, reserved_tag);
 665                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
 666 
 667                 /* Process left child. */
 668                 STACK_PUSHX(stack, voidptr, left);
 669                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 670 
 671                 }
 672               break;
 673             case ITERATION:
 674               {
 675                 tre_iteration_t *iter = node->obj;
 676                 DPRINT(("Iteration\n"));
 677 
 678                 if (first_pass)
 679                   STACK_PUSHX(stack, int, regset_contains != 0);
 680                 STACK_PUSHX(stack, int, tag);
 681                 STACK_PUSHX(stack, voidptr, node);
 682                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
 683 
 684                 STACK_PUSHX(stack, voidptr, iter->arg);
 685                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 686 
 687                 /* Regset is not empty, so add a tag here (this always happens
 688                    because iterators always get submatch id, even if in the
 689                    invisible range) */
 690                 if (regset_contains)
 691                   {
 692                     if (!first_pass)
 693                       {
 694                         status = tre_merge_branches(mem, node, NULL, tag,
 695                                                     tnfa->num_tags);
 696                         if (status != REG_OK)
 697                           break;
 698                         status = tre_add_tag_left(mem, node, tag);
 699                         if (status != REG_OK)
 700                           break;
 701                         if (regset_contains == REGSET_HAS_STARTS && tag != 0)
 702                           tnfa->tag_directions[tag] = iter->minimal ?
 703                                                       TRE_TAG_MINIMIZE :
 704                                                       TRE_TAG_LEFT_MAXIMIZE;
 705                         else
 706                           tnfa->tag_directions[tag] = direction;
 707                         DPRINT(("Setting t%d direction to %s\n", tag,
 708                                 tag_dir_str[tnfa->tag_directions[tag]]));
 709                         tre_purge_regset(regset, tnfa, tag);
 710                       }
 711 
 712                     DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
 713                             tag));
 714                     regset[0] = REGSET_UNSET;
 715                     regset_contains = 0;
 716                     tag = next_tag;
 717                     num_tags++;
 718                     next_tag++;
 719                   }
 720                 direction = TRE_TAG_LEFT_MAXIMIZE;
 721                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 722               }
 723               break;
 724             case UNION:
 725               {
 726                 tre_union_t *uni;
 727                 tre_ast_node_t *left;
 728                 tre_ast_node_t *right;
 729                 int front_tag = -1;
 730 
 731                 DPRINT(("Union\n"));
 732 
 733                 if (regset_contains)
 734                   {
 735                     DPRINT(("  UNION num_tags++ tag=%d\n", tag));
 736                     front_tag = tag;
 737                     tag = next_tag;
 738                     num_tags++;
 739                     next_tag++;
 740                   }
 741 
 742                 /* For the top union, walk the tree of consecutive unions,
 743                  * setting the left_tag and right_tag values in increasing
 744                  * order (left to right priority) */
 745                 if (top_union &&
 746                     (node->num_submatches -
 747                     (node->submatch_id >= 0 &&
 748                     node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
 749                   {
 750                     tre_ast_node_t *n;
 751                     int last = tre_stack_num_objects(stack);
 752 
 753                     STACK_PUSH(stack, voidptr, node);
 754                     STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
 755 
 756                     while (tre_stack_num_objects(stack) > last)
 757                       {
 758                         symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 759                         switch (symbol)
 760                           {
 761                           case ADDTAGS_UNION_RECURSE:
 762                             n = tre_stack_pop_voidptr(stack);
 763                             uni = n->obj;
 764                             left = uni->left;
 765 
 766                             /* Since the top union has num_submatches > 0,
 767                              * we set all the consecutive union's
 768                              * make_branches to 1 to force the generation
 769                              * of end tags for each union branch. */
 770                             n->make_branches = 1;
 771 
 772                             STACK_PUSH(stack, voidptr, n);
 773                             STACK_PUSH(stack, int,
 774                                        ADDTAGS_UNION_RIGHT_RECURSE);
 775 
 776                             if (left->type == UNION)
 777                               {
 778                                 STACK_PUSH(stack, voidptr, left);
 779                                 STACK_PUSH(stack, int,
 780                                            ADDTAGS_UNION_RECURSE);
 781                               }
 782                             else
 783                               {
 784                                 DPRINT(("  ADDTAGS_UNION_RECURSE "
 785                                         "num_tags++ tag=%d\n", tag));
 786                                 uni->left_tag = tag;
 787                                 tag = next_tag;
 788                                 num_tags++;
 789                                 next_tag++;
 790                               }
 791                             break;
 792 
 793                           case ADDTAGS_UNION_RIGHT_RECURSE:
 794                             n = tre_stack_pop_voidptr(stack);
 795                             uni = n->obj;
 796                             right = uni->right;
 797 
 798                             if (right->type == UNION)
 799                               {
 800                                 STACK_PUSH(stack, voidptr, right);
 801                                 STACK_PUSH(stack, int,
 802                                            ADDTAGS_UNION_RECURSE);
 803                               }
 804                             else
 805                               {
 806                                 DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
 807                                         "num_tags++ tag=%d\n", tag));
 808                                 uni->right_tag = tag;
 809                                 tag = next_tag;
 810                                 num_tags++;
 811                                 next_tag++;
 812                               }
 813 
 814                             break;
 815 
 816                           default:
 817                             assert(0);
 818                             break;
 819 
 820                           } /* end switch(symbol) */
 821                       } /* end while(tre_stack_num_objects(stack) > last */
 822                     if (!first_pass)
 823                       {
 824                         STACK_PUSHX(stack, int, front_tag);
 825                         STACK_PUSHX(stack, voidptr, node);
 826                         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
 827                       }
 828                   } /* end if (top_union && ...) */
 829 
 830                 uni = node->obj;
 831                 left = uni->left;
 832                 right = uni->right;
 833 
 834                 /* After processing right child. */
 835                 STACK_PUSHX(stack, voidptr, regset);
 836                 STACK_PUSHX(stack, int, regset_contains != 0);
 837                 STACK_PUSHX(stack, voidptr, node);
 838                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
 839 
 840                 /* Process right child. */
 841                 STACK_PUSHX(stack, voidptr, right);
 842                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 843 
 844                 /* After processing left child. */
 845                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
 846 
 847                 /* Process left child. */
 848                 STACK_PUSHX(stack, voidptr, left);
 849                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 850 
 851                 /* Regset is not empty, so add a tag here. */
 852                 if (regset_contains)
 853                   {
 854                     if (!first_pass)
 855                       {
 856                         status = tre_merge_branches(mem, node, NULL, front_tag,
 857                                                     tnfa->num_tags);
 858                         if (status != REG_OK)
 859                           break;
 860                         status = tre_add_tag_left(mem, node, front_tag);
 861                         if (status != REG_OK)
 862                           break;
 863                         if (regset_contains == REGSET_HAS_STARTS)
 864                           tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
 865                         else
 866                           tnfa->tag_directions[front_tag] = direction;
 867                         DPRINT(("Setting t%d direction to %s\n", front_tag,
 868                                 tag_dir_str[tnfa->tag_directions[front_tag]]));
 869                         tre_purge_regset(regset, tnfa, front_tag);
 870                       }
 871 
 872                     regset[0] = REGSET_UNSET;
 873                     regset_contains = 0;
 874                   }
 875 
 876                 break;
 877               }
 878             } /* end switch (node->type) */
 879 
 880           break; /* end case: ADDTAGS_RECURSE */
 881 
 882         case ADDTAGS_AFTER_ITERATION:
 883           {
 884             tre_iteration_t *iter;
 885             tre_ast_node_t *orig;
 886             int enter_tag;
 887 
 888             node = tre_stack_pop_voidptr(stack);
 889             orig = node->original ? node->original : node;
 890             iter = (tre_iteration_t *)orig->obj;
 891             enter_tag = tre_stack_pop_int(stack);
 892             if (iter->minimal)
 893               minimal_tag = enter_tag;
 894 
 895             DPRINT(("After iteration\n"));
 896             if (first_pass)
 897               {
 898                 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
 899                 DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
 900                         node->num_tags));
 901               }
 902             else
 903               {
 904                 /* node->last_matched_branch will have the start tag (the tag
 905                    just *before* the iteration).  iter->arg->last_matched_branch
 906                    will have the tag(s) inside the iteration, the ones that
 907                    may need to be reset if the iteration doesn't match.  So
 908                    before we merge iter->arg into node, we need to set up
 909                    a new tre_last_matched_t and tre_last_matched_branch_t,
 910                    using any of the inside tags as cmp_tag (we choose the first
 911                    tag found by bit_ffs).  If there are no inside tags, we
 912                    don't bother creating the extra structures. */
 913                 tre_last_matched_branch_pre_t *b =
 914                                                 iter->arg->last_matched_branch;
 915 
 916                 if (b && b->n_tags > 0)
 917                   {
 918                     tre_last_matched_pre_t *u;
 919 
 920                     bit_ffs(b->tags, num_tags, &b->cmp_tag);
 921                     DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
 922                             "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
 923 
 924                     u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
 925                                        sizeof(tre_last_matched_branch_pre_t)
 926                                        + bitstr_size(tnfa->num_tags));
 927                     if (!u)
 928                       {
 929                         status = REG_ESPACE;
 930                         break;
 931                       }
 932                     u->branches = b;
 933                     u->n_branches = 1;
 934                     u->start_tag = b->cmp_tag;
 935                     u->tot_branches = b->tot_branches;
 936                     u->tot_last_matched = 1 + b->tot_last_matched;
 937                     u->tot_tags = b->tot_tags;
 938 
 939                     b = (tre_last_matched_branch_pre_t *)(u + 1);
 940                     b->last_matched = u;
 941                     b->n_last_matched = 1;
 942                     b->tot_branches = 1 + u->tot_branches;
 943                     b->tot_last_matched = u->tot_last_matched;
 944                     b->tot_tags = u->tot_tags;
 945 
 946                     iter->arg->last_matched_branch = b;
 947                   }
 948                 status = tre_merge_branches(mem, node, iter->arg, 0,
 949                                             tnfa->num_tags);
 950                 if (status != REG_OK)
 951                   break;
 952 
 953                 if (iter->minimal)
 954                   {
 955                     /* Add a union with a left EMPTY literal and the right
 956                        being iter->arg.  This should force the tags inside
 957                        the minimal iteration to prefer being unset */
 958                     if (iter->min == 0 && iter->max <= 1)
 959                       {
 960                         tre_ast_node_t *u, *e;
 961 
 962                         e = tre_ast_new_literal(mem, EMPTY, -1, -1);
 963                         if (e == NULL)
 964                           {
 965                             status = REG_ESPACE;
 966                             break;
 967                           }
 968                         u = tre_ast_new_union(mem, e, iter->arg);
 969                         if (u == NULL)
 970                           {
 971                             status = REG_ESPACE;
 972                             break;
 973                           }
 974                         iter->arg = u;
 975                       }
 976 
 977                     direction = TRE_TAG_MINIMIZE;
 978                   }
 979                 else
 980                   direction = TRE_TAG_MAXIMIZE;
 981                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 982               }
 983             break;
 984           }
 985 
 986         case ADDTAGS_AFTER_CAT_LEFT:
 987           {
 988             int new_tag = tre_stack_pop_int(stack);
 989             next_tag = tre_stack_pop_int(stack);
 990             DPRINT(("After cat left, tag = %d, next_tag = %d\n",
 991                     tag, next_tag));
 992             if (new_tag >= 0)
 993               {
 994                 DPRINT(("  Setting tag to %d\n", new_tag));
 995                 tag = new_tag;
 996               }
 997             break;
 998           }
 999 
1000         case ADDTAGS_AFTER_CAT_RIGHT:
1001           {
1002             tre_catenation_t *cat;
1003 
1004             DPRINT(("After cat right\n"));
1005             node = tre_stack_pop_voidptr(stack);
1006             cat = node->obj;
1007             if (first_pass)
1008               {
1009                 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1010                 DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1011                         node->num_tags));
1012               }
1013             else
1014               {
1015                 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1016                                             tnfa->num_tags);
1017                 if (status != REG_OK)
1018                   break;
1019                 status = tre_merge_branches(mem, node, cat->left, 0,
1020                                             tnfa->num_tags);
1021               }
1022             break;
1023           }
1024 
1025         case ADDTAGS_AFTER_UNION_LEFT:
1026           DPRINT(("After union left\n"));
1027           /* Lift the bottom of the `regset' array so that when processing
1028              the right operand the items currently in the array are
1029              invisible.  The original bottom was saved at ADDTAGS_UNION and
1030              will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1031           while (*regset != REGSET_UNSET)
1032             regset++;
1033           regset_contains = 0;
1034           break;
1035 
1036         case ADDTAGS_AFTER_UNION_RIGHT:
1037           {
1038             int added_tags;
1039             tre_ast_node_t *orig;
1040             tre_union_t *uni;
1041             /* Note: node may not be a UNION, but a CATENATION with a left
1042              * tag.  So that is why we pass the original node->obj on the
1043              * stack, to get the union's true values. */
1044 
1045             DPRINT(("After union right\n"));
1046             node = tre_stack_pop_voidptr(stack);
1047             orig = node->original ? node->original : node;
1048             uni = (tre_union_t *)orig->obj;
1049             added_tags = tre_stack_pop_int(stack);
1050             if (first_pass)
1051               {
1052                 node->num_tags = uni->left->num_tags + uni->right->num_tags
1053                                  + added_tags;
1054                 if (uni->left_tag > 0)
1055                   node->num_tags++;
1056                 if (uni->right_tag > 0)
1057                   node->num_tags++;
1058                 DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1059                         node->num_tags));
1060               }
1061             regset = tre_stack_pop_voidptr(stack);
1062 
1063             /* Add tags after both children, the left child gets a smaller
1064                tag than the right child.  This guarantees that we prefer
1065                the left child over the right child. */
1066             /* XXX - This is not always necessary (if the children have
1067                tags which must be seen for every match of that child). */
1068             if (!first_pass && node->make_branches)
1069               {
1070                 tre_last_matched_branch_pre_t *lb =
1071                   uni->left->last_matched_branch;
1072                 tre_last_matched_branch_pre_t *rb =
1073                   uni->right->last_matched_branch;
1074                 tre_last_matched_pre_t *lu =
1075                   uni->left->last_matched_in_progress;
1076                 tre_last_matched_pre_t *ru =
1077                   uni->right->last_matched_in_progress;
1078                 tre_last_matched_pre_t *u;
1079                 /* We don't need to call tre_merge_branches because these
1080                  * tags don't participate in submatch ranges, so don't need
1081                  * to be recorded.  But we do set the cmp_tag entry of the
1082                  * tre_last_matched_branch_pre_t, so we might call
1083                  * tre_merge_branches if we need to create an empty
1084                  * tre_last_matched_branch_pre_t. */
1085                 if (uni->left_tag > 0)
1086                   {
1087                     DPRINT(("Setting t%d direction to maximize\n",
1088                             uni->left_tag));
1089                     status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1090                     if (status != REG_OK)
1091                       break;
1092                     tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1093                     if (!lb)
1094                       {
1095                         status = tre_merge_branches(mem, uni->left, NULL, -1,
1096                                                     tnfa->num_tags);
1097                         if (status != REG_OK)
1098                           break;
1099                         lb = uni->left->last_matched_branch;
1100                       }
1101                     lb->cmp_tag = uni->left_tag;
1102                   }
1103                 if (uni->right_tag > 0)
1104                   {
1105                     DPRINT(("Setting t%d direction to maximize\n",
1106                             uni->right_tag));
1107                     status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1108                     if (status != REG_OK)
1109                       break;
1110                     tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1111                     if (!rb)
1112                       {
1113                         status = tre_merge_branches(mem, uni->right, NULL, -1,
1114                                                     tnfa->num_tags);
1115                         if (status != REG_OK)
1116                           break;
1117                         rb = uni->right->last_matched_branch;
1118                       }
1119                     rb->cmp_tag = uni->right_tag;
1120                   }
1121                 /* Now merge the tre_last_matched_branch_pre_t into a
1122                    tre_last_matched_pre_t */
1123                 if (lu == NULL)
1124                   {
1125                     if (ru == NULL)
1126                       {
1127                         /* Create a new tre_last_matched_pre_t */
1128                         u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1129                         if (!u)
1130                           {
1131                             status = REG_ESPACE;
1132                             break;
1133                           }
1134                         u->tot_last_matched = 1;
1135 
1136                         if (lb)
1137                           {
1138                             u->branches = lb;
1139                             u->n_branches = 1;
1140                             u->tot_branches += lb->tot_branches;
1141                             u->tot_last_matched += lb->tot_last_matched;
1142                             u->tot_tags += lb->tot_tags;
1143                             if (rb)
1144                               {
1145                                 lb->next = rb;
1146                                 u->n_branches++;
1147                                 u->tot_branches += rb->tot_branches;
1148                                 u->tot_last_matched += rb->tot_last_matched;
1149                                 u->tot_tags += rb->tot_tags;
1150                               }
1151                           }
1152                         else if (rb)
1153                           {
1154                             u->branches = rb;
1155                             u->n_branches = 1;
1156                             u->tot_branches += rb->tot_branches;
1157                             u->tot_last_matched += rb->tot_last_matched;
1158                             u->tot_tags += rb->tot_tags;
1159                           }
1160                       }
1161                     else
1162                       {
1163                         /* Use ru, and add lb */
1164                         u = ru;
1165                         if (lb)
1166                           {
1167                             lb->next = u->branches;
1168                             u->branches = lb;
1169                             u->n_branches++;
1170                             u->tot_branches += lb->tot_branches;
1171                             u->tot_last_matched += lb->tot_last_matched;
1172                             u->tot_tags += lb->tot_tags;
1173                           }
1174                       }
1175                   }
1176                 else if (ru == NULL)
1177                   {
1178                     /* Use lu, and add rb */
1179                     u = lu;
1180                     if (rb)
1181                       {
1182                         rb->next = u->branches;
1183                         u->branches = rb;
1184                         u->n_branches++;
1185                         u->tot_branches += rb->tot_branches;
1186                         u->tot_last_matched += rb->tot_last_matched;
1187                         u->tot_tags += rb->tot_tags;
1188                       }
1189                   }
1190                 else
1191                   {
1192                     /* Merge lu and ru into lu */
1193                     if (lu->branches)
1194                       {
1195                         if (ru->branches)
1196                           {
1197                             tre_last_matched_branch_pre_t *b = lu->branches;
1198                             while (b->next) b = b->next;
1199                             b->next = ru->branches;
1200                             lu->n_branches += ru->n_branches;
1201                           }
1202                       }
1203                     else if (ru->branches)
1204                       {
1205                         lu->branches = ru->branches;
1206                         lu->n_branches = ru->n_branches;
1207                       }
1208                     lu->tot_branches += ru->tot_branches;
1209                     lu->tot_last_matched += ru->tot_last_matched - 1;
1210                     lu->tot_tags += ru->tot_tags;
1211                     u = lu;
1212                   }
1213                 node->last_matched_in_progress = u;
1214               }
1215             direction = TRE_TAG_MAXIMIZE;
1216             break;
1217           }
1218 
1219         case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1220           {
1221             tre_last_matched_branch_pre_t *b;
1222             tre_last_matched_pre_t *u;
1223             int start_tag;
1224 
1225             DPRINT(("After union top\n"));
1226             node = tre_stack_pop_voidptr(stack);
1227             start_tag = tre_stack_pop_int(stack);
1228             b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1229                                + bitstr_size(tnfa->num_tags));
1230             if (!b)
1231               {
1232                 status = REG_ESPACE;
1233                 break;
1234               }
1235 
1236             u = node->last_matched_in_progress;
1237             u->start_tag = start_tag;
1238             b->tot_branches = 1 + u->tot_branches;
1239             b->tot_last_matched = u->tot_last_matched;
1240             b->tot_tags = u->tot_tags;
1241             b->last_matched = u;
1242             b->n_last_matched = 1;
1243             node->last_matched_branch = b;
1244             node->last_matched_in_progress = NULL;
1245             break;
1246           }
1247 
1248         default:
1249           assert(0);
1250           break;
1251 
1252         } /* end switch(symbol) */
1253     } /* end while(tre_stack_num_objects(stack) > bottom) */
1254 
1255   if (status != REG_OK)
1256     {
1257       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1258       goto error_post_compile;
1259     }
1260 
1261   if (!first_pass)
1262     {
1263       int i;
1264       if (num_tags != tnfa->num_tags)
1265         {
1266           DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1267                   tnfa->num_tags));
1268           status = REG_BADPAT;
1269           goto error_post_compile;
1270         }
1271 
1272       tre_purge_regset(regset, tnfa, tag);
1273       DPRINT(("Setting t%d to %s\n", num_tags,
1274               tag_dir_str[direction]));
1275       tnfa->tag_directions[num_tags] = direction;
1276 
1277       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1278         {
1279           DPRINT(("Processed %d reorder tags instead of %d\n",
1280                   (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1281           status = REG_BADPAT;
1282           goto error_post_compile;
1283         }
1284     *rtp = -1;
1285 #if TRE_DEBUG
1286       if (reorder_tags[0] >= 0)
1287         {
1288           DPRINT(("reorder_tags:\n"));
1289           for (rtp = reorder_tags; *rtp >= 0;)
1290             {
1291               DPRINT(("%d after ", *rtp++));
1292               DPRINT(("%d\n", *rtp++));
1293             }
1294         }
1295         else
1296           DPRINT(("No reorder_tags\n"));
1297 #endif /* TRE_DEBUG */
1298 
1299       /* Initialize to_reorder */
1300       for (i = 0; i < num_tags; i++)
1301         to_reorder[i] = i;
1302       /* Use to_seq_order to convert reorder_tags values, and use those to
1303        * reorder to_reorder */
1304       for (rtp = reorder_tags; *rtp >= 0;)
1305         {
1306           int j, high, low;
1307           int ti = *rtp++;
1308 
1309           /* Skip reordering the final tag */
1310           if (ti >= num_tags)
1311             {
1312               DPRINT(("Skipping reorder of %d\n", ti));
1313               rtp++;
1314               continue;
1315             }
1316           /* The number of the tag to reorder */
1317           high = to_reorder[ti];
1318           /* Reorder after this tag */
1319           low = to_reorder[*rtp++];
1320 
1321           DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1322           if (low > high)
1323             {
1324               DPRINT(("Tag %d already before %d\n", high, low));
1325               continue;
1326             }
1327           for (j = 0; j < num_tags; j++)
1328             if (to_reorder[j] > low && to_reorder[j] < high)
1329               to_reorder[j]++;
1330           to_reorder[ti] = low + 1;
1331 #ifdef TRE_DEBUG
1332           DPRINT(("to_reorder=("));
1333           for (j = 0; j < num_tags; j++)
1334             {
1335               DPRINT(("%d", to_reorder[j]));
1336               if (j < num_tags - 1)
1337                 DPRINT((","));
1338             }
1339           DPRINT((")\n"));
1340 #endif /* TRE_DEBUG */
1341         }
1342       /* Determine if reordering in really necessary */
1343       {
1344         int need_reorder = 0;
1345         for (i = 0; i < num_tags; i++)
1346           if(to_reorder[i] != i)
1347             {
1348               need_reorder = 1;
1349               break;
1350             }
1351         /* If need_reorder is not set, free reorder_tags, and set to NULL,
1352          * indicating no reordering is needed */
1353         if (!need_reorder)
1354           {
1355             DPRINT(("Don't need to reorder\n"));
1356             free(reorder_tags);
1357             reorder_tags = NULL;
1358           }
1359       }
1360     }
1361 
1362   if (reorder_tags)
1363     {
1364       int i;
1365       tre_tag_direction_t *new_tag_directions;
1366 #if TRE_DEBUG
1367       DPRINT(("to_reorder:"));
1368       for (i = 0; i < num_tags; i++)
1369         DPRINT((" %d->%d", i, to_reorder[i]));
1370       DPRINT(("\n"));
1371 #endif /* TRE_DEBUG */
1372 
1373       DPRINT(("Reordering submatch_data\n"));
1374       for (i = 0; i < tnfa->num_submatches; i++)
1375         {
1376 #if TRE_DEBUG
1377           int so = tnfa->submatch_data[i].so_tag;
1378           int eo = tnfa->submatch_data[i].eo_tag;
1379 #endif /* TRE_DEBUG */
1380           tnfa->submatch_data[i].so_tag =
1381             to_reorder[tnfa->submatch_data[i].so_tag];
1382           tnfa->submatch_data[i].eo_tag =
1383             tnfa->submatch_data[i].eo_tag < num_tags ?
1384             to_reorder[tnfa->submatch_data[i].eo_tag] :
1385             tnfa->submatch_data[i].eo_tag;
1386           DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1387                   tnfa->submatch_data[i].so_tag,
1388                   tnfa->submatch_data[i].eo_tag));
1389         }
1390 
1391       DPRINT(("Reordering tag_directions\n"));
1392       /* We only allocate num_tags directions and reorder them.  The
1393        * num_tags-th direction (end tag) is left unchanged. */
1394       new_tag_directions = malloc(sizeof(*new_tag_directions) * num_tags);
1395       if (new_tag_directions == NULL)
1396         {
1397           status = REG_ESPACE;
1398           goto error_post_compile;
1399         }
1400       for (i = 0; i < num_tags; i++)
1401         {
1402           new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1403         }
1404 #if TRE_DEBUG
1405       for (i = 0; i < num_tags; i++)
1406         {
1407           DPRINT(("t%d %s->%s\n", i,
1408                   tag_dir_str[tnfa->tag_directions[i]],
1409                   tag_dir_str[new_tag_directions[i]]));
1410         }
1411         DPRINT(("t%d %s->%s\n", num_tags,
1412                 tag_dir_str[tnfa->tag_directions[num_tags]],
1413                 tag_dir_str[tnfa->tag_directions[num_tags]]));
1414 #endif /* TRE_DEBUG */
1415       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1416       free(new_tag_directions);
1417 
1418       DPRINT(("Reordering minimal_tags\n"));
1419       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1420         tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1421                                 to_reorder[tnfa->minimal_tags[i]] :
1422                                 tnfa->minimal_tags[i];
1423 
1424       DPRINT(("Reordering AST tags\n"));
1425       STACK_PUSH(stack, voidptr, tree);
1426       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1427         {
1428           node = tre_stack_pop_voidptr(stack);
1429 
1430           switch (node->type)
1431             {
1432             case LITERAL:
1433               {
1434                 tre_literal_t *lit = (tre_literal_t *)node->obj;
1435                 if (IS_TAG(lit))
1436                   lit->code_max = to_reorder[lit->code_max];
1437                 break;
1438               }
1439 
1440             case UNION:
1441               {
1442                 tre_union_t *uni = (tre_union_t *)node->obj;
1443                 STACK_PUSHX(stack, voidptr, uni->right);
1444                 STACK_PUSHX(stack, voidptr, uni->left);
1445                 break;
1446               }
1447 
1448             case CATENATION:
1449               {
1450                 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1451                 STACK_PUSHX(stack, voidptr, cat->right);
1452                 STACK_PUSHX(stack, voidptr, cat->left);
1453                 break;
1454               }
1455 
1456             case ITERATION:
1457               {
1458                 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1459                 STACK_PUSHX(stack, voidptr, iter->arg);
1460                 break;
1461               }
1462 
1463             default:
1464               assert(0);
1465               break;
1466             }
1467         }
1468         if (status != REG_OK)
1469           {
1470             DPRINT(("Error while reordering tags\n"));
1471             goto error_post_compile;
1472           }
1473     }
1474 
1475   if (!first_pass)
1476     {
1477       if (tree->last_matched_branch)
1478         {
1479           tre_last_matched_branch_t *buf, *b, *bb;
1480           tre_last_matched_branch_pre_t *bp;
1481           tre_last_matched_t *u, *uu;
1482           tre_last_matched_pre_t *up;
1483           int *t;
1484           int i;
1485 #ifdef TRE_DEBUG
1486           tre_last_matched_branch_t *_b;
1487           tre_last_matched_t *_u;
1488           int *_t;
1489 
1490           DPRINT(("last_match_branch_pre:\n"));
1491           print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1492 #endif /* TRE_DEBUG */
1493           buf = (tre_last_matched_branch_t *)calloc(1,
1494                                      tree->last_matched_branch->tot_branches
1495                                      * sizeof(tre_last_matched_branch_t) +
1496                                      tree->last_matched_branch->tot_last_matched
1497                                      * sizeof(tre_last_matched_t) +
1498                                      tree->last_matched_branch->tot_tags *
1499                                      sizeof(int));
1500           if (!buf)
1501             {
1502               status = REG_ESPACE;
1503               goto error_post_compile;
1504             }
1505 
1506           b = buf;
1507           u = (tre_last_matched_t *)(b +
1508               tree->last_matched_branch->tot_branches);
1509           t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1510 #ifdef TRE_DEBUG
1511           _b = b;
1512           _u = u;
1513           _t = t;
1514 #endif /* TRE_DEBUG */
1515           DPRINT(("Copying info_pre to info\n"));
1516           STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1517           STACK_PUSH(stack, int, 1);
1518           STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1519 
1520           while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1521             {
1522               switch (tre_stack_pop_int(stack))
1523                 {
1524                 case COPY_LAST_MATCHED_BRANCH:
1525                   i = tre_stack_pop_int(stack);
1526                   /* The tre_last_matched_branch_pre_t * is still on the
1527                      stack */
1528                   STACK_PUSHX(stack, voidptr, b);
1529                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1530                   b += i;
1531                   break;
1532 
1533                 case COPY_LAST_MATCHED_BRANCH_NEXT:
1534                   bb = tre_stack_pop_voidptr(stack);
1535                   bp = tre_stack_pop_voidptr(stack);
1536                   bb->n_last_matched = bp->n_last_matched;
1537                   bb->cmp_tag = bp->cmp_tag;
1538                   if (bp->n_tags > 0)
1539                     {
1540                       int n;
1541                       n = bb->n_tags = bp->n_tags;
1542                       bb->tags = t;
1543                       for (i = 0; i < num_tags; i++)
1544                         if (bit_test(bp->tags, i))
1545                           {
1546                             *t++ = i;
1547                             if (--n <= 0)
1548                               break;
1549                           }
1550                     }
1551                   if (bp->next)
1552                     {
1553                       STACK_PUSHX(stack, voidptr, bp->next);
1554                       STACK_PUSHX(stack, voidptr, bb + 1);
1555                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1556                     }
1557                   if (bp->n_last_matched > 0)
1558                     {
1559                       bb->last_matched = u;
1560                       STACK_PUSHX(stack, voidptr, bp->last_matched);
1561                       STACK_PUSHX(stack, int, bp->n_last_matched);
1562                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1563                     }
1564                   break;
1565 
1566                 case COPY_LAST_MATCHED:
1567                   i = tre_stack_pop_int(stack);
1568                   /* The tre_last_matched_pre_t * is still on the stack */
1569                   STACK_PUSHX(stack, voidptr, u);
1570                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1571                   u += i;
1572                   break;
1573 
1574                 case COPY_LAST_MATCHED_NEXT:
1575                   uu = tre_stack_pop_voidptr(stack);
1576                   up = tre_stack_pop_voidptr(stack);
1577                   uu->n_branches = up->n_branches;
1578                   uu->branches = b;
1579                   uu->start_tag = up->start_tag;
1580                   if (up->next)
1581                     {
1582                       STACK_PUSHX(stack, voidptr, up->next);
1583                       STACK_PUSHX(stack, voidptr, uu + 1);
1584                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1585                     }
1586                   STACK_PUSHX(stack, voidptr, up->branches);
1587                   STACK_PUSHX(stack, int, up->n_branches);
1588                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1589                   break;
1590                 }
1591             }
1592           if (status != REG_OK)
1593             goto error_post_compile;
1594 #ifdef TRE_DEBUG
1595           DPRINT(("last_matched_branch:\n"));
1596           print_last_match_branch(buf, 0);
1597           if (b != _b + tree->last_matched_branch->tot_branches)
1598             DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1599                     b, _b + tree->last_matched_branch->tot_branches));
1600           if (u != _u + tree->last_matched_branch->tot_last_matched)
1601             DPRINT(("u/%p != _u + "
1602                     "tree->last_matched_branch->tot_last_matched/%p\n",
1603                     u, _u + tree->last_matched_branch->tot_last_matched));
1604           if (t != _t + tree->last_matched_branch->tot_tags)
1605             DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1606                     t, _t + tree->last_matched_branch->tot_tags));
1607 #endif /* TRE_DEBUG */
1608           tnfa->last_matched_branch = buf;
1609         }
1610 #ifdef TRE_DEBUG
1611       else
1612         DPRINT(("No last_match_branch_pre\n"));
1613 #endif /* TRE_DEBUG */
1614     }
1615 
1616   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1617           first_pass? "First pass" : "Second pass", num_tags));
1618 #ifdef TRE_DEBUG
1619   tre_ast_print(tree);
1620 #endif /* TRE_DEBUG */
1621   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1622           num_tags));
1623   assert(tree->num_tags == num_tags);
1624   tnfa->end_tag = num_tags;
1625   tnfa->num_tags = num_tags;
1626   tnfa->num_minimals = num_minimals;
1627 error_post_compile:
1628   free(reorder_tags);
1629 error_reorder_tags:
1630   free(orig_regset);
1631 error_regset:
1632   return status;
1633 }
1634 
1635 /*
1636   AST to TNFA compilation routines.
1637 */
1638 
1639 typedef enum {
1640   COPY_RECURSE,
1641   COPY_SET_RESULT_PTR
1642 } tre_copyast_symbol_t;
1643 
1644 /* Flags for tre_copy_ast(). */
1645 #define COPY_REMOVE_TAGS         1
1646 #define COPY_MAXIMIZE_FIRST_TAG  2
1647 
1648 static reg_errcode_t
1649 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1650              int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1651              tre_ast_node_t **copy, int *max_pos)
1652 {
1653   reg_errcode_t status = REG_OK;
1654   int bottom = tre_stack_num_objects(stack);
1655   int num_copied = 0;
1656   int first_tag = 1;
1657   tre_ast_node_t **result = copy;
1658   tre_copyast_symbol_t symbol;
1659 
1660   STACK_PUSH(stack, voidptr, ast);
1661   STACK_PUSH(stack, int, COPY_RECURSE);
1662 
1663   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1664     {
1665       tre_ast_node_t *node;
1666       if (status != REG_OK)
1667         break;
1668 
1669       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1670       switch (symbol)
1671         {
1672         case COPY_SET_RESULT_PTR:
1673           result = tre_stack_pop_voidptr(stack);
1674           break;
1675         case COPY_RECURSE:
1676           node = tre_stack_pop_voidptr(stack);
1677           switch (node->type)
1678             {
1679             case LITERAL:
1680               {
1681                 tre_literal_t *lit = node->obj;
1682                 int pos = lit->position;
1683                 int min = lit->code_min;
1684                 int max = lit->code_max;
1685                 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1686                                                   lit->u.bracket_match_list :
1687                                                   NULL;
1688                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1689                   {
1690                     /* XXX - e.g. [ab] has only one position but two
1691                        nodes, so we are creating holes in the state space
1692                        here.  Not fatal, just wastes memory. */
1693                     pos += *pos_add;
1694                     num_copied++;
1695                   }
1696                 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1697                   {
1698                     /* Change this tag to empty. */
1699                     min = EMPTY;
1700                     max = pos = -1;
1701                   }
1702                 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1703                          && first_tag)
1704                   {
1705                     /* Maximize the first tag. */
1706                     if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1707                       tag_directions[max] = TRE_TAG_MAXIMIZE;
1708                     first_tag = 0;
1709                   }
1710                 *result = tre_ast_new_literal(mem, min, max, pos);
1711                 if (*result == NULL)
1712                   status = REG_ESPACE;
1713 
1714                 if (pos > *max_pos)
1715                   *max_pos = pos;
1716 
1717                 if (!IS_SPECIAL(lit))
1718                   ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1719                       = list;
1720                 break;
1721               }
1722             case UNION:
1723               {
1724                 tre_union_t *uni = node->obj;
1725                 tre_union_t *tmp;
1726                 *result = tre_ast_new_union(mem, uni->left, uni->right);
1727                 if (*result == NULL)
1728                   {
1729                     status = REG_ESPACE;
1730                     break;
1731                   }
1732                 tmp = (*result)->obj;
1733                 result = &tmp->left;
1734                 STACK_PUSHX(stack, voidptr, uni->right);
1735                 STACK_PUSHX(stack, int, COPY_RECURSE);
1736                 STACK_PUSHX(stack, voidptr, &tmp->right);
1737                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1738                 STACK_PUSHX(stack, voidptr, uni->left);
1739                 STACK_PUSHX(stack, int, COPY_RECURSE);
1740                 break;
1741               }
1742             case CATENATION:
1743               {
1744                 tre_catenation_t *cat = node->obj;
1745                 tre_catenation_t *tmp;
1746                 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1747                 if (*result == NULL)
1748                   {
1749                     status = REG_ESPACE;
1750                     break;
1751                   }
1752                 tmp = (*result)->obj;
1753                 tmp->left = NULL;
1754                 tmp->right = NULL;
1755                 result = &tmp->left;
1756 
1757                 STACK_PUSHX(stack, voidptr, cat->right);
1758                 STACK_PUSHX(stack, int, COPY_RECURSE);
1759                 STACK_PUSHX(stack, voidptr, &tmp->right);
1760                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1761                 STACK_PUSHX(stack, voidptr, cat->left);
1762                 STACK_PUSHX(stack, int, COPY_RECURSE);
1763                 break;
1764               }
1765             case ITERATION:
1766               {
1767                 tre_iteration_t *iter = node->obj;
1768                 STACK_PUSHX(stack, voidptr, iter->arg);
1769                 STACK_PUSHX(stack, int, COPY_RECURSE);
1770                 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1771                                            iter->max, iter->minimal);
1772                 if (*result == NULL)
1773                   {
1774                     status = REG_ESPACE;
1775                     break;
1776                   }
1777                 iter = (*result)->obj;
1778                 result = &iter->arg;
1779                 break;
1780               }
1781             default:
1782               assert(0);
1783               break;
1784             }
1785           break;
1786         }
1787     }
1788   *pos_add += num_copied;
1789   return status;
1790 }
1791 
1792 typedef enum {
1793   EXPAND_RECURSE,
1794   EXPAND_AFTER_ITER
1795 } tre_expand_ast_symbol_t;
1796 
1797 /* Expands each iteration node that has a finite nonzero minimum or maximum
1798    iteration count to a catenated sequence of copies of the node. */
1799 static reg_errcode_t
1800 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1801                int *position, tre_tag_direction_t *tag_directions,
1802                int *max_depth)
1803 {
1804   reg_errcode_t status = REG_OK;
1805   int bottom = tre_stack_num_objects(stack);
1806   int pos_add = 0;
1807   int pos_add_total = 0;
1808   int max_pos = 0;
1809   int iter_depth = 0;
1810 
1811   STACK_PUSHR(stack, voidptr, ast);
1812   STACK_PUSHR(stack, int, EXPAND_RECURSE);
1813   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1814     {
1815       tre_ast_node_t *node;
1816       tre_expand_ast_symbol_t symbol;
1817 
1818       if (status != REG_OK)
1819         break;
1820 
1821       DPRINT(("pos_add %d\n", pos_add));
1822 
1823       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1824       node = tre_stack_pop_voidptr(stack);
1825       switch (symbol)
1826         {
1827         case EXPAND_RECURSE:
1828           switch (node->type)
1829             {
1830             case LITERAL:
1831               {
1832                 tre_literal_t *lit= node->obj;
1833                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1834                   {
1835                     lit->position += pos_add;
1836                     if (lit->position > max_pos)
1837                       max_pos = lit->position;
1838                   }
1839                 break;
1840               }
1841             case UNION:
1842               {
1843                 tre_union_t *uni = node->obj;
1844                 STACK_PUSHX(stack, voidptr, uni->right);
1845                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1846                 STACK_PUSHX(stack, voidptr, uni->left);
1847                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1848                 break;
1849               }
1850             case CATENATION:
1851               {
1852                 tre_catenation_t *cat = node->obj;
1853                 STACK_PUSHX(stack, voidptr, cat->right);
1854                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1855                 STACK_PUSHX(stack, voidptr, cat->left);
1856                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1857                 break;
1858               }
1859             case ITERATION:
1860               {
1861                 tre_iteration_t *iter = node->obj;
1862                 STACK_PUSHX(stack, int, pos_add);
1863                 STACK_PUSHX(stack, voidptr, node);
1864                 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1865                 STACK_PUSHX(stack, voidptr, iter->arg);
1866                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1867                 /* If we are going to expand this node at EXPAND_AFTER_ITER
1868                    then don't increase the `pos' fields of the nodes now, it
1869                    will get done when expanding. */
1870                 if (iter->min > 1 || iter->max > 1)
1871                   pos_add = 0;
1872                 iter_depth++;
1873                 DPRINT(("iter\n"));
1874                 break;
1875               }
1876             default:
1877               assert(0);
1878               break;
1879             }
1880           break;
1881         case EXPAND_AFTER_ITER:
1882           {
1883             tre_iteration_t *iter = node->obj;
1884             int pos_add_last;
1885             pos_add = tre_stack_pop_int(stack);
1886             pos_add_last = pos_add;
1887             /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1888                immediate replace the whole iteration with EMPTY.  This
1889                unfortunately drops any submatches, and messes up setting the
1890                pmatch values (we can get tags of -1, and tag values in the
1891                billions).  So we left it there and replace with EMPTY here. */
1892             if (iter->min == 0 && iter->max == 0)
1893               {
1894                 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1895                 if (empty == NULL)
1896                   return REG_ESPACE;
1897                 node->obj = empty->obj;
1898                 node->type = empty->type;
1899               }
1900             else if (iter->min > 1 || iter->max > 1)
1901               {
1902                 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1903                 int j;
1904                 int pos_add_save = pos_add;
1905 
1906                 /* Create a catenated sequence of copies of the node. */
1907                 for (j = 0; j < iter->min; j++)
1908                   {
1909                     tre_ast_node_t *copy;
1910                     /* Remove tags from all but the last copy. */
1911                     int flags = ((j + 1 < iter->min)
1912                                  ? COPY_REMOVE_TAGS
1913                                  : COPY_MAXIMIZE_FIRST_TAG);
1914                     DPRINT(("  pos_add %d\n", pos_add));
1915                     pos_add_save = pos_add;
1916                     status = tre_copy_ast(mem, stack, iter->arg, flags,
1917                                           &pos_add, tag_directions, &copy,
1918                                           &max_pos);
1919                     if (status != REG_OK)
1920                       return status;
1921                     if (seq1 != NULL)
1922                       seq1 = tre_ast_new_catenation(mem, seq1, copy);
1923                     else
1924                       seq1 = copy;
1925                     if (seq1 == NULL)
1926                       return REG_ESPACE;
1927                   }
1928 
1929                 if (iter->max == -1)
1930                   {
1931                     /* No upper limit. */
1932                     pos_add_save = pos_add;
1933                     status = tre_copy_ast(mem, stack, iter->arg, 0,
1934                                           &pos_add, NULL, &seq2, &max_pos);
1935                     if (status != REG_OK)
1936                       return status;
1937                     seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1938                     if (seq2 == NULL)
1939                       return REG_ESPACE;
1940                   }
1941                 else
1942                   {
1943                     for (j = iter->min; j < iter->max; j++)
1944                       {
1945                         tre_ast_node_t *tmp, *copy;
1946                         pos_add_save = pos_add;
1947                         status = tre_copy_ast(mem, stack, iter->arg, 0,
1948                                               &pos_add, NULL, &copy, &max_pos);
1949                         if (status != REG_OK)
1950                           return status;
1951                         if (seq2 != NULL)
1952                           seq2 = tre_ast_new_catenation(mem, copy, seq2);
1953                         else
1954                           seq2 = copy;
1955                         if (seq2 == NULL)
1956                           return REG_ESPACE;
1957                         tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1958                         if (tmp == NULL)
1959                           return REG_ESPACE;
1960                         seq2 = tre_ast_new_union(mem, tmp, seq2);
1961                         if (seq2 == NULL)
1962                           return REG_ESPACE;
1963                       }
1964                   }
1965 
1966                 pos_add = pos_add_save;
1967                 if (seq1 == NULL)
1968                   seq1 = seq2;
1969                 else if (seq2 != NULL)
1970                   seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1971                 if (seq1 == NULL)
1972                   return REG_ESPACE;
1973                 node->obj = seq1->obj;
1974                 node->type = seq1->type;
1975               }
1976 
1977             iter_depth--;
1978             pos_add_total += pos_add - pos_add_last;
1979             if (iter_depth == 0)
1980               pos_add = pos_add_total;
1981 
1982             break;
1983           }
1984         default:
1985           assert(0);
1986           break;
1987         }
1988     }
1989 
1990   *position += pos_add_total;
1991 
1992   /* `max_pos' should never be larger than `*position' if the above
1993      code works, but just an extra safeguard let's make sure
1994      `*position' is set large enough so enough memory will be
1995      allocated for the transition table. */
1996   if (max_pos > *position)
1997     *position = max_pos;
1998 
1999 #ifdef TRE_DEBUG
2000   DPRINT(("Expanded AST:\n"));
2001   tre_ast_print(ast);
2002   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2003 #endif
2004 
2005   return status;
2006 }
2007 
2008 static tre_pos_and_tags_t *
2009 tre_set_empty(tre_mem_t mem)
2010 {
2011   tre_pos_and_tags_t *new_set;
2012 
2013   new_set = tre_mem_calloc(mem, sizeof(*new_set));
2014   if (new_set == NULL)
2015     return NULL;
2016 
2017   new_set[0].position = -1;
2018   new_set[0].code_min = -1;
2019   new_set[0].code_max = -1;
2020 
2021   return new_set;
2022 }
2023 
2024 static tre_pos_and_tags_t *
2025 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2026             tre_bracket_match_list_t *bracket_match_list, int backref)
2027 {
2028   tre_pos_and_tags_t *new_set;
2029 
2030   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2031   if (new_set == NULL)
2032     return NULL;
2033 
2034   new_set[0].position = position;
2035   new_set[0].code_min = code_min;
2036   new_set[0].code_max = code_max;
2037   new_set[0].bracket_match_list = bracket_match_list;
2038   new_set[0].backref = backref;
2039   new_set[1].position = -1;
2040   new_set[1].code_min = -1;
2041   new_set[1].code_max = -1;
2042 
2043   return new_set;
2044 }
2045 
2046 static tre_pos_and_tags_t *
2047 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2048               int *tags, int assertions, int *params)
2049 {
2050   int s1, s2, i, j;
2051   tre_pos_and_tags_t *new_set;
2052   int *new_tags;
2053   int num_tags;
2054 
2055   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2056   for (s1 = 0; set1[s1].position >= 0; s1++);
2057   for (s2 = 0; set2[s2].position >= 0; s2++);
2058   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2059   if (!new_set )
2060     return NULL;
2061 
2062   for (s1 = 0; set1[s1].position >= 0; s1++)
2063     {
2064       new_set[s1].position = set1[s1].position;
2065       new_set[s1].code_min = set1[s1].code_min;
2066       new_set[s1].code_max = set1[s1].code_max;
2067       new_set[s1].assertions = set1[s1].assertions | assertions;
2068       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2069       new_set[s1].backref = set1[s1].backref;
2070       if (set1[s1].tags == NULL && tags == NULL)
2071         new_set[s1].tags = NULL;
2072       else
2073         {
2074           for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2075           new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2076                                          * (i + num_tags + 1)));
2077           if (new_tags == NULL)
2078             return NULL;
2079           for (j = 0; j < i; j++)
2080             new_tags[j] = set1[s1].tags[j];
2081           for (i = 0; i < num_tags; i++)
2082             new_tags[j + i] = tags[i];
2083           new_tags[j + i] = -1;
2084           new_set[s1].tags = new_tags;
2085         }
2086       if (set1[s1].params)
2087         new_set[s1].params = set1[s1].params;
2088       if (params)
2089         {
2090           if (!new_set[s1].params)
2091             new_set[s1].params = params;
2092           else
2093             {
2094               new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2095                                                  TRE_PARAM_LAST);
2096               if (!new_set[s1].params)
2097                 return NULL;
2098               for (i = 0; i < TRE_PARAM_LAST; i++)
2099                 if (params[i] != TRE_PARAM_UNSET)
2100                   new_set[s1].params[i] = params[i];
2101             }
2102         }
2103     }
2104 
2105   for (s2 = 0; set2[s2].position >= 0; s2++)
2106     {
2107       new_set[s1 + s2].position = set2[s2].position;
2108       new_set[s1 + s2].code_min = set2[s2].code_min;
2109       new_set[s1 + s2].code_max = set2[s2].code_max;
2110       /* XXX - why not | assertions here as well? */
2111       new_set[s1 + s2].assertions = set2[s2].assertions;
2112       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2113       new_set[s1 + s2].backref = set2[s2].backref;
2114       if (set2[s2].tags == NULL)
2115         new_set[s1 + s2].tags = NULL;
2116       else
2117         {
2118           for (i = 0; set2[s2].tags[i] >= 0; i++);
2119           new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2120           if (new_tags == NULL)
2121             return NULL;
2122           for (j = 0; j < i; j++)
2123             new_tags[j] = set2[s2].tags[j];
2124           new_tags[j] = -1;
2125           new_set[s1 + s2].tags = new_tags;
2126         }
2127       if (set2[s2].params)
2128         new_set[s1 + s2].params = set2[s2].params;
2129       if (params)
2130         {
2131           if (!new_set[s1 + s2].params)
2132             new_set[s1 + s2].params = params;
2133           else
2134             {
2135               new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2136                                                       TRE_PARAM_LAST);
2137               if (!new_set[s1 + s2].params)
2138                 return NULL;
2139               for (i = 0; i < TRE_PARAM_LAST; i++)
2140                 if (params[i] != TRE_PARAM_UNSET)
2141                   new_set[s1 + s2].params[i] = params[i];
2142             }
2143         }
2144     }
2145   new_set[s1 + s2].position = -1;
2146   return new_set;
2147 }
2148 
2149 /* Finds the empty path through `node' which is the one that should be
2150    taken according to POSIX.2 rules, and adds the tags on that path to
2151    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2152    set to the number of tags seen on the path. */
2153 static reg_errcode_t
2154 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2155                 int *assertions, int *params, int *num_tags_seen,
2156                 int *params_seen)
2157 {
2158   tre_literal_t *lit;
2159   tre_union_t *uni;
2160   tre_catenation_t *cat;
2161   tre_iteration_t *iter;
2162   int i;
2163   int bottom = tre_stack_num_objects(stack);
2164   reg_errcode_t status = REG_OK;
2165   if (num_tags_seen)
2166     *num_tags_seen = 0;
2167   if (params_seen)
2168     *params_seen = 0;
2169 
2170   status = tre_stack_push_voidptr(stack, node);
2171 
2172   /* Walk through the tree recursively. */
2173   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2174     {
2175       node = tre_stack_pop_voidptr(stack);
2176 
2177       switch (node->type)
2178         {
2179         case LITERAL:
2180           lit = (tre_literal_t *)node->obj;
2181           switch (lit->code_min)
2182             {
2183             case TAG:
2184               if (lit->code_max >= 0)
2185                 {
2186                   if (tags != NULL)
2187                     {
2188                       /* Add the tag to `tags'. */
2189                       for (i = 0; tags[i] >= 0; i++)
2190                         if (tags[i] == lit->code_max)
2191                           break;
2192                       if (tags[i] < 0)
2193                         {
2194                           tags[i] = lit->code_max;
2195                           tags[i + 1] = -1;
2196                         }
2197                     }
2198                   if (num_tags_seen)
2199                     (*num_tags_seen)++;
2200                 }
2201               break;
2202             case ASSERTION:
2203               assert(lit->code_max >= 1
2204                      || lit->code_max <= ASSERT_LAST);
2205               if (assertions != NULL)
2206                 *assertions |= lit->code_max;
2207               break;
2208             case PARAMETER:
2209               if (params != NULL)
2210                 for (i = 0; i < TRE_PARAM_LAST; i++)
2211                   params[i] = lit->u.params[i];
2212               if (params_seen != NULL)
2213                 *params_seen = 1;
2214               break;
2215             case EMPTY:
2216               break;
2217             default:
2218               assert(0);
2219               break;
2220             }
2221           break;
2222 
2223         case UNION:
2224           /* Subexpressions starting earlier take priority over ones
2225              starting later, so we prefer the left subexpression over the
2226              right subexpression. */
2227           uni = (tre_union_t *)node->obj;
2228           if (uni->left->nullable)
2229             STACK_PUSHX(stack, voidptr, uni->left)
2230           else if (uni->right->nullable)
2231             STACK_PUSHX(stack, voidptr, uni->right)
2232           else
2233             assert(0);
2234           break;
2235 
2236         case CATENATION:
2237           /* The path must go through both children. */
2238           cat = (tre_catenation_t *)node->obj;
2239           assert(cat->left->nullable);
2240           assert(cat->right->nullable);
2241           STACK_PUSHX(stack, voidptr, cat->left);
2242           STACK_PUSHX(stack, voidptr, cat->right);
2243           break;
2244 
2245         case ITERATION:
2246           /* A match with an empty string is preferred over no match at
2247              all, so we go through the argument if possible. */
2248           iter = (tre_iteration_t *)node->obj;
2249           if (iter->arg->nullable)
2250             STACK_PUSHX(stack, voidptr, iter->arg);
2251           break;
2252 
2253         default:
2254           assert(0);
2255           break;
2256         }
2257     }
2258 
2259   return status;
2260 }
2261 
2262 typedef enum {
2263   NFL_RECURSE,
2264   NFL_POST_UNION,
2265   NFL_POST_CATENATION,
2266   NFL_POST_ITERATION
2267 } tre_nfl_stack_symbol_t;
2268 
2269 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2270    the nodes of the AST `tree'. */
2271 static reg_errcode_t
2272 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2273 {
2274   int bottom = tre_stack_num_objects(stack);
2275 
2276   STACK_PUSHR(stack, voidptr, tree);
2277   STACK_PUSHR(stack, int, NFL_RECURSE);
2278 
2279   while (tre_stack_num_objects(stack) > bottom)
2280     {
2281       tre_nfl_stack_symbol_t symbol;
2282       tre_ast_node_t *node;
2283 
2284       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2285       node = tre_stack_pop_voidptr(stack);
2286       switch (symbol)
2287         {
2288         case NFL_RECURSE:
2289           switch (node->type)
2290             {
2291             case LITERAL:
2292               {
2293                 tre_literal_t *lit = (tre_literal_t *)node->obj;
2294                 if (IS_BACKREF(lit))
2295                   {
2296                     /* Back references: nullable = false, firstpos = {i},
2297                        lastpos = {i}. */
2298                     node->nullable = 0;
2299                     node->firstpos = tre_set_one(mem, lit->position, 0,
2300                                              TRE_CHAR_MAX, NULL, -1);
2301                     if (!node->firstpos)
2302                       return REG_ESPACE;
2303                     node->lastpos = tre_set_one(mem, lit->position, 0,
2304                                                 TRE_CHAR_MAX, NULL,
2305                                                 (int)lit->code_max);
2306                     if (!node->lastpos)
2307                       return REG_ESPACE;
2308                   }
2309                 else if (lit->code_min < 0)
2310                   {
2311                     /* Tags, empty strings, params, and zero width assertions:
2312                        nullable = true, firstpos = {}, and lastpos = {}. */
2313                     node->nullable = 1;
2314                     node->firstpos = tre_set_empty(mem);
2315                     if (!node->firstpos)
2316                       return REG_ESPACE;
2317                     node->lastpos = tre_set_empty(mem);
2318                     if (!node->lastpos)
2319                       return REG_ESPACE;
2320                   }
2321                 else
2322                   {
2323                     /* Literal at position i: nullable = false, firstpos = {i},
2324                        lastpos = {i}. */
2325                     node->nullable = 0;
2326                     node->firstpos =
2327                       tre_set_one(mem, lit->position, (int)lit->code_min,
2328                                   (int)lit->code_max, NULL, -1);
2329                     if (!node->firstpos)
2330                       return REG_ESPACE;
2331                     node->lastpos = tre_set_one(mem, lit->position,
2332                                                 (int)lit->code_min,
2333                                                 (int)lit->code_max,
2334                                                 lit->u.bracket_match_list,
2335                                                 -1);
2336                     if (!node->lastpos)
2337                       return REG_ESPACE;
2338                   }
2339                 break;
2340               }
2341 
2342             case UNION:
2343               /* Compute the attributes for the two subtrees, and after that
2344                  for this node. */
2345               STACK_PUSHR(stack, voidptr, node);
2346               STACK_PUSHR(stack, int, NFL_POST_UNION);
2347               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2348               STACK_PUSHR(stack, int, NFL_RECURSE);
2349               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2350               STACK_PUSHR(stack, int, NFL_RECURSE);
2351               break;
2352 
2353             case CATENATION:
2354               /* Compute the attributes for the two subtrees, and after that
2355                  for this node. */
2356               STACK_PUSHR(stack, voidptr, node);
2357               STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2358               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2359               STACK_PUSHR(stack, int, NFL_RECURSE);
2360               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2361               STACK_PUSHR(stack, int, NFL_RECURSE);
2362               break;
2363 
2364             case ITERATION:
2365               /* Compute the attributes for the subtree, and after that for
2366                  this node. */
2367               STACK_PUSHR(stack, voidptr, node);
2368               STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2369               STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2370               STACK_PUSHR(stack, int, NFL_RECURSE);
2371               break;
2372             }
2373           break; /* end case: NFL_RECURSE */
2374 
2375         case NFL_POST_UNION:
2376           {
2377             tre_union_t *uni = (tre_union_t *)node->obj;
2378             node->nullable = uni->left->nullable || uni->right->nullable;
2379             node->firstpos = tre_set_union(mem, uni->left->firstpos,
2380                                            uni->right->firstpos, NULL, 0, NULL);
2381             if (!node->firstpos)
2382               return REG_ESPACE;
2383             node->lastpos = tre_set_union(mem, uni->left->lastpos,
2384                                           uni->right->lastpos, NULL, 0, NULL);
2385             if (!node->lastpos)
2386               return REG_ESPACE;
2387             break;
2388           }
2389 
2390         case NFL_POST_ITERATION:
2391           {
2392             int num_tags, *tags, assertions, params_seen;
2393             int *params;
2394             reg_errcode_t status;
2395             tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2396 
2397             /* From Ville Laurikari's original 2001 Master's thesis, the
2398                firstpos(n) and lastpos(n) of an iteration is just the
2399                corresponding values of the iteration's argument.  Unfortunately,
2400                this isn't sufficient for the following BRE:
2401 
2402                    \(a*\)*b\(\1\)    matched against    ab
2403 
2404                The backreference wants to force the first subexpression to
2405                be the empty string, but there is no transition for this.  So
2406                we need to modify the lastpos(n) of an iteration to be the
2407                equivalent of that of catentation.  Using the same notation as
2408                in the thesis, lastpos(n) is redefined as:
2409 
2410                    if nullable(c1) then
2411                        lastpos(c1) U
2412                        addtags(lastpos(c1),
2413                                emptymatch(c1))
2414                    else
2415                        lastpos(c1)
2416 
2417                where c1 is the argument node.  firstpos(n) remains the same. */
2418 
2419             /* Compute lastpos. */
2420             if (iter->min == 0 || iter->arg->nullable)
2421               {
2422                 node->nullable = 1;
2423                 if (iter->arg->nullable)
2424                   {
2425                     /* The arg matches the empty string.  Make a first pass
2426                        with tre_match_empty() to get the number of tags and
2427                        parameters. */
2428                     status = tre_match_empty(stack, iter->arg,
2429                                              NULL, NULL, NULL, &num_tags,
2430                                              &params_seen);
2431                     if (status != REG_OK)
2432                       return status;
2433                     /* Allocate arrays for the tags and parameters. */
2434                     tags = malloc(sizeof(int) * (num_tags + 1));
2435                     if (!tags)
2436                       return REG_ESPACE;
2437                     tags[0] = -1;
2438                     assertions = 0;
2439                     params = NULL;
2440                     if (params_seen)
2441                       {
2442                         params = tre_mem_alloc(mem, sizeof(*params)
2443                                                * TRE_PARAM_LAST);
2444                         if (!params)
2445                           {
2446                             free(tags);
2447                             return REG_ESPACE;
2448                           }
2449                       }
2450                     /* Second pass with tre_mach_empty() to get the list of
2451                        tags and parameters. */
2452                     status = tre_match_empty(stack, iter->arg, tags,
2453                                              &assertions, params, NULL, NULL);
2454                     if (status != REG_OK)
2455                       {
2456                         free(tags);
2457                         return status;
2458                       }
2459                     node->lastpos =
2460                       tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2461                                     tags, assertions, params);
2462                     free(tags);
2463                     if (!node->lastpos)
2464                       return REG_ESPACE;
2465                   }
2466                 else
2467                   node->lastpos = iter->arg->lastpos;
2468               }
2469             else
2470               {
2471                 node->nullable = 0;
2472                 node->lastpos = iter->arg->lastpos;
2473               }
2474             node->firstpos = iter->arg->firstpos;
2475             break;
2476           }
2477 
2478         case NFL_POST_CATENATION:
2479           {
2480             int num_tags, *tags, assertions, params_seen;
2481             int *params;
2482             reg_errcode_t status;
2483             tre_catenation_t *cat = node->obj;
2484             node->nullable = cat->left->nullable && cat->right->nullable;
2485 
2486             /* Compute firstpos. */
2487             if (cat->left->nullable)
2488               {
2489                 /* The left side matches the empty string.  Make a first pass
2490                    with tre_match_empty() to get the number of tags and
2491                    parameters. */
2492                 status = tre_match_empty(stack, cat->left,
2493                                          NULL, NULL, NULL, &num_tags,
2494                                          &params_seen);
2495                 if (status != REG_OK)
2496                   return status;
2497                 /* Allocate arrays for the tags and parameters. */
2498                 tags = malloc(sizeof(*tags) * (num_tags + 1));
2499                 if (!tags)
2500                   return REG_ESPACE;
2501                 tags[0] = -1;
2502                 assertions = 0;
2503                 params = NULL;
2504                 if (params_seen)
2505                   {
2506                     params = tre_mem_alloc(mem, sizeof(*params)
2507                                            * TRE_PARAM_LAST);
2508                     if (!params)
2509                       {
2510                         free(tags);
2511                         return REG_ESPACE;
2512                       }
2513                   }
2514                 /* Second pass with tre_mach_empty() to get the list of
2515                    tags and parameters. */
2516                 status = tre_match_empty(stack, cat->left, tags,
2517                                          &assertions, params, NULL, NULL);
2518                 if (status != REG_OK)
2519                   {
2520                     free(tags);
2521                     return status;
2522                   }
2523                 node->firstpos =
2524                   tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2525                                 tags, assertions, params);
2526                 free(tags);
2527                 if (!node->firstpos)
2528                   return REG_ESPACE;
2529               }
2530             else
2531               {
2532                 node->firstpos = cat->left->firstpos;
2533               }
2534 
2535             /* Compute lastpos. */
2536             if (cat->right->nullable)
2537               {
2538                 /* The right side matches the empty string.  Make a first pass
2539                    with tre_match_empty() to get the number of tags and
2540                    parameters. */
2541                 status = tre_match_empty(stack, cat->right,
2542                                          NULL, NULL, NULL, &num_tags,
2543                                          &params_seen);
2544                 if (status != REG_OK)
2545                   return status;
2546                 /* Allocate arrays for the tags and parameters. */
2547                 tags = malloc(sizeof(int) * (num_tags + 1));
2548                 if (!tags)
2549                   return REG_ESPACE;
2550                 tags[0] = -1;
2551                 assertions = 0;
2552                 params = NULL;
2553                 if (params_seen)
2554                   {
2555                     params = tre_mem_alloc(mem, sizeof(*params)
2556                                            * TRE_PARAM_LAST);
2557                     if (!params)
2558                       {
2559                         free(tags);
2560                         return REG_ESPACE;
2561                       }
2562                   }
2563                 /* Second pass with tre_mach_empty() to get the list of
2564                    tags and parameters. */
2565                 status = tre_match_empty(stack, cat->right, tags,
2566                                          &assertions, params, NULL, NULL);
2567                 if (status != REG_OK)
2568                   {
2569                     free(tags);
2570                     return status;
2571                   }
2572                 node->lastpos =
2573                   tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2574                                 tags, assertions, params);
2575                 free(tags);
2576                 if (!node->lastpos)
2577                   return REG_ESPACE;
2578               }
2579             else
2580               {
2581                 node->lastpos = cat->right->lastpos;
2582               }
2583             break;
2584           }
2585 
2586         default:
2587           assert(0);
2588           break;
2589         }
2590     }
2591 
2592   return REG_OK;
2593 }
2594 
2595 /* Adds a transition from each position in `p1' to each position in `p2'. */
2596 static reg_errcode_t
2597 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2598                tre_tnfa_transition_t *transitions,
2599                int *counts, int *offs)
2600 {
2601   tre_pos_and_tags_t *orig_p2 = p2;
2602   tre_tnfa_transition_t *trans;
2603   int i, j, k, l, dup, prev_p2_pos;
2604 
2605   if (transitions != NULL)
2606     while (p1->position >= 0)
2607       {
2608         p2 = orig_p2;
2609         prev_p2_pos = -1;
2610         while (p2->position >= 0)
2611           {
2612             /* Optimization: if this position was already handled, skip it. */
2613             if (p2->position == prev_p2_pos)
2614               {
2615                 p2++;
2616                 continue;
2617               }
2618             prev_p2_pos = p2->position;
2619             /* Set `trans' to point to the next unused transition from
2620                position `p1->position'. */
2621             trans = transitions + offs[p1->position];
2622             while (trans->state != NULL)
2623               {
2624 #if 0
2625                 /* If we find a previous transition from `p1->position' to
2626                    `p2->position', it is overwritten.  This can happen only
2627                    if there are nested loops in the regexp, like in "((a)*)*".
2628                    In POSIX.2 repetition using the outer loop is always
2629                    preferred over using the inner loop.  Therefore the
2630                    transition for the inner loop is useless and can be thrown
2631                    away. */
2632                 /* XXX - The same position is used for all nodes in a bracket
2633                    expression, so this optimization cannot be used (it will
2634                    break bracket expressions) unless I figure out a way to
2635                    detect it here. */
2636                 if (trans->state_id == p2->position)
2637                   {
2638                     DPRINT(("*"));
2639                     break;
2640                   }
2641 #endif
2642                 trans++;
2643               }
2644 
2645             if (trans->state == NULL)
2646               (trans + 1)->state = NULL;
2647             /* Use the character ranges, assertions, etc. from `p1' for
2648                the transition from `p1' to `p2'. */
2649             trans->code_min = p1->code_min;
2650             trans->code_max = p1->code_max;
2651             trans->state = transitions + offs[p2->position];
2652             trans->state_id = p2->position;
2653             trans->assertions = p1->assertions | p2->assertions
2654               | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2655             if (p1->backref >= 0)
2656               {
2657                 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2658                 assert(p2->backref < 0);
2659                 trans->u.backref = p1->backref;
2660                 trans->assertions |= ASSERT_BACKREF;
2661               }
2662             if (p1->bracket_match_list != NULL)
2663               {
2664                 trans->u.bracket_match_list =
2665                   malloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2666                 if (trans->u.bracket_match_list == NULL)
2667                   return REG_ESPACE;
2668                 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2669                        SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2670               }
2671 
2672             /* Find out how many tags this transition has. */
2673             i = 0;
2674             if (p1->tags != NULL)
2675               while(p1->tags[i] >= 0)
2676                 i++;
2677             j = 0;
2678             if (p2->tags != NULL)
2679               while(p2->tags[j] >= 0)
2680                 j++;
2681 
2682             /* If we are overwriting a transition, free the old tag array. */
2683             if (trans->tags != NULL)
2684               free(trans->tags);
2685             trans->tags = NULL;
2686 
2687             /* If there were any tags, allocate an array and fill it. */
2688             if (i + j > 0)
2689               {
2690                 trans->tags = malloc(sizeof(*trans->tags) * (i + j + 1));
2691                 if (!trans->tags)
2692                   return REG_ESPACE;
2693                 i = 0;
2694                 if (p1->tags != NULL)
2695                   while(p1->tags[i] >= 0)
2696                     {
2697                       trans->tags[i] = p1->tags[i];
2698                       i++;
2699                     }
2700                 l = i;
2701                 j = 0;
2702                 if (p2->tags != NULL)
2703                   while (p2->tags[j] >= 0)
2704                     {
2705                       /* Don't add duplicates. */
2706                       dup = 0;
2707                       for (k = 0; k < i; k++)
2708                         if (trans->tags[k] == p2->tags[j])
2709                           {
2710                             dup = 1;
2711                             break;
2712                           }
2713                       if (!dup)
2714                         trans->tags[l++] = p2->tags[j];
2715                       j++;
2716                     }
2717                 trans->tags[l] = -1;
2718               }
2719 
2720             /* Set the parameter array.  If both `p2' and `p1' have same
2721                parameters, the values in `p2' override those in `p1'. */
2722             if (p1->params || p2->params)
2723               {
2724                 if (!trans->params)
2725                   trans->params = malloc(sizeof(*trans->params)
2726                                           * TRE_PARAM_LAST);
2727                 if (!trans->params)
2728                   return REG_ESPACE;
2729                 for (i = 0; i < TRE_PARAM_LAST; i++)
2730                   {
2731                     trans->params[i] = TRE_PARAM_UNSET;
2732                     if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2733                       trans->params[i] = p1->params[i];
2734                     if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2735                       trans->params[i] = p2->params[i];
2736                   }
2737               }
2738             else
2739               {
2740                 if (trans->params)
2741                   free(trans->params);
2742                 trans->params = NULL;
2743               }
2744 
2745 #ifdef TRE_DEBUG
2746             {
2747               int *tags;
2748 
2749               DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
2750                       p1->code_min));
2751               if (p1->code_max != p1->code_min)
2752                 DPRINT(("-%3d", p1->code_max));
2753               tags = trans->tags;
2754               if (tags)
2755                 {
2756                   DPRINT((", tags ["));
2757                   while (*tags >= 0)
2758                     {
2759                       DPRINT(("%d", *tags));
2760                       tags++;
2761                       if (*tags >= 0)
2762                         DPRINT((","));
2763                     }
2764                   DPRINT(("]"));
2765                 }
2766               if (trans->assertions)
2767                 DPRINT((", assert %d", trans->assertions));
2768               if (trans->assertions & ASSERT_BACKREF)
2769                 DPRINT((", backref %d", trans->u.backref));
2770               else if (trans->assertions & ASSERT_BRACKET_MATCH)
2771                 DPRINT((", bracket_match_list %p",
2772                         trans->u.bracket_match_list));
2773               if (trans->params)
2774                 {
2775                   DPRINT((", "));
2776                   tre_print_params(trans->params);
2777                 }
2778               DPRINT(("\n"));
2779             }
2780 #endif /* TRE_DEBUG */
2781             p2++;
2782           }
2783         p1++;
2784       }
2785   else
2786     /* Compute a maximum limit for the number of transitions leaving
2787        from each state. */
2788     while (p1->position >= 0)
2789       {
2790         p2 = orig_p2;
2791         while (p2->position >= 0)
2792           {
2793             counts[p1->position]++;
2794             p2++;
2795           }
2796         p1++;
2797       }
2798   return REG_OK;
2799 }
2800 
2801 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
2802    labelled with one character range (there are no transitions on empty
2803    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2804    the regexp. */
2805 static reg_errcode_t
2806 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2807                 int *counts, int *offs)
2808 {
2809   tre_union_t *uni;
2810   tre_catenation_t *cat;
2811   tre_iteration_t *iter;
2812   reg_errcode_t errcode = REG_OK;
2813 
2814   /* XXX - recurse using a stack!. */
2815   switch (node->type)
2816     {
2817     case LITERAL:
2818       break;
2819     case UNION:
2820       uni = (tre_union_t *)node->obj;
2821       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2822       if (errcode != REG_OK)
2823         return errcode;
2824       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2825       break;
2826 
2827     case CATENATION:
2828       cat = (tre_catenation_t *)node->obj;
2829       /* Add a transition from each position in cat->left->lastpos
2830          to each position in cat->right->firstpos. */
2831       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2832                                transitions, counts, offs);
2833       if (errcode != REG_OK)
2834         return errcode;
2835       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2836       if (errcode != REG_OK)
2837         return errcode;
2838       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2839       break;
2840 
2841     case ITERATION:
2842       iter = (tre_iteration_t *)node->obj;
2843       assert(iter->max == -1 || iter->max == 1);
2844 
2845       if (iter->max == -1)
2846         {
2847           assert(iter->min == 0 || iter->min == 1);
2848           /* Add a transition from each last position in the iterated
2849              expression to each first position. */
2850           errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2851                                    transitions, counts, offs);
2852           if (errcode != REG_OK)
2853             return errcode;
2854         }
2855       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2856       break;
2857     }
2858   return errcode;
2859 }
2860 
2861 #define ERROR_EXIT(err)           \
2862   do                              \
2863     {                             \
2864       errcode = err;              \
2865       if (/*CONSTCOND*/1)         \
2866         goto error_exit;          \
2867     }                             \
2868  while (/*CONSTCOND*/0)
2869 
2870 int
2871 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2872             locale_t loc)
2873 {
2874   tre_stack_t *stack;
2875   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2876   tre_pos_and_tags_t *p;
2877   int *counts = NULL, *offs = NULL;
2878   int i, add = 0;
2879   tre_tnfa_transition_t *transitions, *initial;
2880   tre_tnfa_t *tnfa = NULL;
2881   tre_submatch_data_t *submatch_data = NULL;
2882   tre_tag_direction_t *tag_directions = NULL;
2883   reg_errcode_t errcode;
2884   tre_mem_t mem;
2885 
2886   /* Parse context. */
2887   tre_parse_ctx_t parse_ctx;
2888 
2889   /* Allocate a stack used throughout the compilation process for various
2890      purposes. */
2891   stack = tre_stack_new(512, 10240, 128);
2892   if (!stack)
2893     return REG_ESPACE;
2894   /* Allocate a fast memory allocator. */
2895   mem = tre_mem_new();
2896   if (!mem)
2897     {
2898       tre_stack_destroy(stack);
2899       return REG_ESPACE;
2900     }
2901 
2902   /* Parse the regexp. */
2903   memset(&parse_ctx, 0, sizeof(parse_ctx));
2904   parse_ctx.mem = mem;
2905   parse_ctx.stack = stack;
2906   parse_ctx.re = regex;
2907   parse_ctx.len = n;
2908   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2909      are also set */
2910   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2911     cflags &= ~REG_UNGREEDY;
2912   parse_ctx.cflags = cflags;
2913   parse_ctx.max_backref = -1;
2914   parse_ctx.loc = loc;
2915   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2916 
2917   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2918   errcode = tre_parse(&parse_ctx);
2919   if (errcode != REG_OK)
2920     ERROR_EXIT(errcode);
2921   preg->re_nsub = parse_ctx.submatch_id - 1;
2922   tree = parse_ctx.result;
2923 
2924   /* Back references and approximate matching cannot currently be used
2925      in the same regexp. */
2926   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
2927     ERROR_EXIT(REG_BADPAT);
2928 
2929 #ifdef TRE_DEBUG
2930   tre_ast_print(tree);
2931 #endif /* TRE_DEBUG */
2932 
2933   /* Referring to nonexistent subexpressions is illegal. */
2934   if (parse_ctx.max_backref > (int)preg->re_nsub)
2935     ERROR_EXIT(REG_ESUBREG);
2936 
2937   /* Allocate the TNFA struct. */
2938   tnfa = calloc(1, sizeof(tre_tnfa_t));
2939   if (tnfa == NULL)
2940     ERROR_EXIT(REG_ESPACE);
2941   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2942   tnfa->have_approx = parse_ctx.have_approx;
2943   tnfa->num_submatches = parse_ctx.submatch_id;
2944   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
2945                                    - SUBMATCH_ID_INVISIBLE_START;
2946   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
2947   tnfa->loc = duplocale(parse_ctx.loc);
2948   if (tnfa->loc == (locale_t)0)
2949     ERROR_EXIT(REG_ESPACE);
2950 
2951   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
2952      regexp does not have back references, this can be skipped. */
2953   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
2954     {
2955       DPRINT(("tre_compile: setting up tags\n"));
2956 
2957       /* Figure out how many tags we will need. */
2958       errcode = tre_add_tags(NULL, stack, tree, tnfa);
2959       if (errcode != REG_OK)
2960         ERROR_EXIT(errcode);
2961 #ifdef TRE_DEBUG
2962       tre_ast_print(tree);
2963 #endif /* TRE_DEBUG */
2964 
2965       if (tnfa->num_tags > 0)
2966         {
2967           tag_directions = malloc(sizeof(*tag_directions)
2968                                    * (tnfa->num_tags + 1));
2969           if (tag_directions == NULL)
2970             ERROR_EXIT(REG_ESPACE);
2971           tnfa->tag_directions = tag_directions;
2972           memset(tag_directions, -1,
2973                  sizeof(*tag_directions) * (tnfa->num_tags + 1));
2974         }
2975       tnfa->minimal_tags = calloc((unsigned)tnfa->num_tags * 2 + 3,
2976                                    sizeof(tnfa->minimal_tags));
2977       if (tnfa->minimal_tags == NULL)
2978         ERROR_EXIT(REG_ESPACE);
2979 
2980       submatch_data = calloc((unsigned)parse_ctx.submatch_id,
2981                               sizeof(*submatch_data));
2982       if (submatch_data == NULL)
2983         ERROR_EXIT(REG_ESPACE);
2984       /* Set the eo_tag value to -1 to indicate that that corresponding
2985        * submatch has not be completed yet */
2986       for (i = 0; i < parse_ctx.submatch_id; i++)
2987         {
2988           submatch_data[i].eo_tag = -1;
2989         }
2990       tnfa->submatch_data = submatch_data;
2991 
2992       errcode = tre_add_tags(mem, stack, tree, tnfa);
2993       if (errcode != REG_OK)
2994         ERROR_EXIT(errcode);
2995 
2996 #ifdef TRE_DEBUG
2997       for (i = 0; i < parse_ctx.submatch_id; i++)
2998         DPRINT(("pmatch[%d] = {t%d, t%d}\n",
2999                 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3000       for (i = 0; i <= tnfa->num_tags; i++)
3001         DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3002 #endif /* TRE_DEBUG */
3003     }
3004 
3005   /* Expand iteration nodes. */
3006   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3007                            tag_directions, &tnfa->params_depth);
3008   if (errcode != REG_OK)
3009     ERROR_EXIT(errcode);
3010 
3011   /* Add a dummy node for the final state.
3012      XXX - For certain patterns this dummy node can be optimized away,
3013            for example "a*" or "ab*".   Figure out a simple way to detect
3014            this possibility. */
3015   tmp_ast_l = tree;
3016   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3017   if (tmp_ast_r == NULL)
3018     ERROR_EXIT(REG_ESPACE);
3019 
3020   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3021   if (tree == NULL)
3022     ERROR_EXIT(REG_ESPACE);
3023 
3024 #ifdef TRE_DEBUG
3025   tre_ast_print(tree);
3026   DPRINT(("Number of states: %d\n", parse_ctx.position));
3027   if (submatch_data)
3028     for (i = 0; i < parse_ctx.submatch_id; i++)
3029       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3030               i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3031   if (tag_directions)
3032     for (i = 0; i <= tnfa->num_tags; i++)
3033       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3034 #endif /* TRE_DEBUG */
3035 
3036   errcode = tre_compute_nfl(mem, stack, tree);
3037   if (errcode != REG_OK)
3038     ERROR_EXIT(errcode);
3039 
3040   counts = malloc(sizeof(int) * parse_ctx.position);
3041   if (counts == NULL)
3042     ERROR_EXIT(REG_ESPACE);
3043 
3044   offs = malloc(sizeof(int) * parse_ctx.position);
3045   if (offs == NULL)
3046     ERROR_EXIT(REG_ESPACE);
3047 
3048   for (i = 0; i < parse_ctx.position; i++)
3049     counts[i] = 0;
3050   tre_ast_to_tnfa(tree, NULL, counts, NULL);
3051 
3052   add = 0;
3053   for (i = 0; i < parse_ctx.position; i++)
3054     {
3055       offs[i] = add;
3056       add += counts[i] + 1;
3057       counts[i] = 0;
3058     }
3059   transitions = calloc((unsigned)add + 1, sizeof(*transitions));
3060   if (transitions == NULL)
3061     ERROR_EXIT(REG_ESPACE);
3062   tnfa->transitions = transitions;
3063   tnfa->num_transitions = add;
3064 
3065   DPRINT(("Converting to TNFA:\n"));
3066   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3067   if (errcode != REG_OK)
3068     ERROR_EXIT(errcode);
3069 
3070   /* Set first_char only if there is only one character that can be the
3071      first character of a match */
3072   tnfa->first_char = -1;
3073   if (!tmp_ast_l->nullable)
3074     {
3075       int scanning = 1;
3076       for (p = tree->firstpos; scanning && p->position >= 0; p++)
3077         {
3078           tre_tnfa_transition_t *j = transitions + offs[p->position];
3079           while (j->state != NULL)
3080             {
3081               if (j->code_min <= j->code_max)
3082                 {
3083                   if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3084                     {
3085                       tnfa->first_char = -1;
3086                       scanning = 0;
3087                       break;
3088                     }
3089                   tnfa->first_char = j->code_min;
3090                 }
3091               j++;
3092             }
3093         }
3094 #ifdef TRE_DEBUG
3095       if (tnfa->first_char >= 0)
3096         DPRINT(("first char must be %d\n", tnfa->first_char));
3097 #endif /* TRE_DEBUG */
3098     }
3099 
3100   p = tree->firstpos;
3101   i = 0;
3102   while (p->position >= 0)
3103     {
3104       i++;
3105 
3106 #ifdef TRE_DEBUG
3107       {
3108         int *tags;
3109         DPRINT(("initial: %d", p->position));
3110         tags = p->tags;
3111         if (tags != NULL)
3112           {
3113             if (*tags >= 0)
3114               DPRINT(("/"));
3115             while (*tags >= 0)
3116               {
3117                 DPRINT(("%d", *tags));
3118                 tags++;
3119                 if (*tags >= 0)
3120                   DPRINT((","));
3121               }
3122           }
3123         DPRINT((", assert %d", p->assertions));
3124         if (p->params)
3125           {
3126             DPRINT((", "));
3127             tre_print_params(p->params);
3128           }
3129         DPRINT(("\n"));
3130       }
3131 #endif /* TRE_DEBUG */
3132 
3133       p++;
3134     }
3135 
3136   initial = calloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3137   if (initial == NULL)
3138     ERROR_EXIT(REG_ESPACE);
3139   tnfa->initial = initial;
3140 
3141   i = 0;
3142   for (p = tree->firstpos; p->position >= 0; p++)
3143     {
3144       initial[i].state = transitions + offs[p->position];
3145       initial[i].state_id = p->position;
3146       initial[i].tags = NULL;
3147       /* Copy the arrays p->tags, and p->params, they are allocated
3148          from a tre_mem object. */
3149       if (p->tags)
3150         {
3151           int j;
3152           for (j = 0; p->tags[j] >= 0; j++);
3153           initial[i].tags = malloc(sizeof(*p->tags) * (j + 1));
3154           if (!initial[i].tags)
3155             ERROR_EXIT(REG_ESPACE);
3156           memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3157         }
3158       initial[i].params = NULL;
3159       if (p->params)
3160         {
3161           initial[i].params = malloc(sizeof(*p->params) * TRE_PARAM_LAST);
3162           if (!initial[i].params)
3163             ERROR_EXIT(REG_ESPACE);
3164           memcpy(initial[i].params, p->params,
3165                  sizeof(*p->params) * TRE_PARAM_LAST);
3166         }
3167       initial[i].assertions = p->assertions;
3168       i++;
3169     }
3170   initial[i].state = NULL;
3171 
3172   tnfa->num_transitions = add;
3173   tnfa->final = transitions + offs[tree->lastpos[0].position];
3174   tnfa->num_states = parse_ctx.position;
3175   tnfa->cflags = cflags;
3176 
3177   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3178           (void *)tnfa->final));
3179 
3180   tre_mem_destroy(mem);
3181   tre_stack_destroy(stack);
3182   free(counts);
3183   free(offs);
3184 
3185   preg->re_magic = RE_MAGIC;
3186   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3187   return REG_OK;
3188 
3189  error_exit:
3190   /* Free everything that was allocated and return the error code. */
3191   tre_mem_destroy(mem);
3192   if (stack != NULL)
3193     tre_stack_destroy(stack);
3194   if (counts != NULL)
3195     free(counts);
3196   if (offs != NULL)
3197     free(offs);
3198 
3199   /* Set tnfa into preg, so that calling tre_free() will free the contents
3200      of tnfa.  NULL out the loc field since we never retained the locale. */
3201   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3202   if(tnfa)
3203     {
3204       if (tnfa->loc != (locale_t)0)
3205         freelocale(tnfa->loc);
3206       tnfa->loc = NULL;
3207     }
3208 
3209   tre_free(preg);
3210   return errcode;
3211 }
3212 
3213 void
3214 tre_free(regex_t *preg)
3215 {
3216   tre_tnfa_t *tnfa;
3217   unsigned int i;
3218   tre_tnfa_transition_t *trans;
3219 
3220   preg->re_magic = 0;
3221   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3222   if (!tnfa)
3223     return;
3224   preg->TRE_REGEX_T_FIELD = NULL;
3225 
3226   for (i = 0; i < tnfa->num_transitions; i++)
3227     if (tnfa->transitions[i].state)
3228       {
3229         if (tnfa->transitions[i].tags)
3230           free(tnfa->transitions[i].tags);
3231         if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3232           free(tnfa->transitions[i].u.bracket_match_list);
3233         if (tnfa->transitions[i].params)
3234           free(tnfa->transitions[i].params);
3235       }
3236   if (tnfa->transitions)
3237     free(tnfa->transitions);
3238 
3239   if (tnfa->initial)
3240     {
3241       for (trans = tnfa->initial; trans->state; trans++)
3242         {
3243           if (trans->tags)
3244             free(trans->tags);
3245           if (trans->params)
3246             free(trans->params);
3247         }
3248       free(tnfa->initial);
3249     }
3250 
3251   if (tnfa->submatch_data)
3252     {
3253       free(tnfa->submatch_data);
3254     }
3255 
3256   if (tnfa->tag_directions)
3257     free(tnfa->tag_directions);
3258   if (tnfa->minimal_tags)
3259     free(tnfa->minimal_tags);
3260 
3261   if (tnfa->loc != (locale_t)0)
3262     freelocale(tnfa->loc);
3263 
3264   if (tnfa->last_matched_branch)
3265     free(tnfa->last_matched_branch);
3266 
3267   free(tnfa);
3268 }