1 /*
   2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28 
  29 /*
  30   tre-ast.c - Abstract syntax tree (AST) routines
  31 */
  32 
  33 #include <assert.h>
  34 
  35 #include "tre-ast.h"
  36 #include "tre-mem.h"
  37 
  38 tre_ast_node_t *
  39 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
  40 {
  41   tre_ast_node_t *node;
  42 
  43   node = tre_mem_calloc(mem, sizeof(*node));
  44   if (!node)
  45     return NULL;
  46   node->obj = tre_mem_calloc(mem, size);
  47   if (!node->obj)
  48     return NULL;
  49   node->type = type;
  50   node->nullable = -1;
  51   node->submatch_id = -1;
  52 
  53   return node;
  54 }
  55 
  56 tre_ast_node_t *
  57 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
  58 {
  59   tre_ast_node_t *node;
  60   tre_literal_t *lit;
  61 
  62   node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
  63   if (!node)
  64     return NULL;
  65   lit = node->obj;
  66   lit->code_min = code_min;
  67   lit->code_max = code_max;
  68   lit->position = position;
  69 
  70   return node;
  71 }
  72 
  73 tre_ast_node_t *
  74 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
  75                  int minimal)
  76 {
  77   tre_ast_node_t *node;
  78   tre_iteration_t *iter;
  79 
  80   node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
  81   if (!node)
  82     return NULL;
  83   iter = node->obj;
  84   iter->arg = arg;
  85   iter->min = min;
  86   iter->max = max;
  87   iter->minimal = minimal;
  88   node->num_submatches = arg->num_submatches;
  89 
  90   return node;
  91 }
  92 
  93 tre_ast_node_t *
  94 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
  95 {
  96   tre_ast_node_t *node;
  97 
  98   node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
  99   if (node == NULL)
 100     return NULL;
 101   ((tre_union_t *)node->obj)->left = left;
 102   ((tre_union_t *)node->obj)->right = right;
 103   node->num_submatches = left->num_submatches + right->num_submatches;
 104 
 105   return node;
 106 }
 107 
 108 tre_ast_node_t *
 109 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
 110                        tre_ast_node_t *right)
 111 {
 112   tre_ast_node_t *node;
 113 
 114   node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
 115   if (node == NULL)
 116     return NULL;
 117   ((tre_catenation_t *)node->obj)->left = left;
 118   ((tre_catenation_t *)node->obj)->right = right;
 119   node->num_submatches = left->num_submatches + right->num_submatches;
 120 
 121   return node;
 122 }
 123 
 124 #ifdef TRE_DEBUG
 125 
 126 static void
 127 tre_findent(FILE *stream, int i)
 128 {
 129   while (i-- > 0)
 130     fputc(' ', stream);
 131 }
 132 
 133 void
 134 tre_print_params(int *params)
 135 {
 136   int i;
 137   if (params)
 138     {
 139       DPRINT(("params ["));
 140       for (i = 0; i < TRE_PARAM_LAST; i++)
 141         {
 142           if (params[i] == TRE_PARAM_UNSET)
 143             DPRINT(("unset"));
 144           else if (params[i] == TRE_PARAM_DEFAULT)
 145             DPRINT(("default"));
 146           else
 147             DPRINT(("%d", params[i]));
 148           if (i < TRE_PARAM_LAST - 1)
 149             DPRINT((", "));
 150         }
 151       DPRINT(("]"));
 152     }
 153 }
 154 
 155 static void
 156 tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
 157 {
 158   int code_min, code_max, pos;
 159   int num_tags = ast->num_tags;
 160   tre_literal_t *lit;
 161   tre_iteration_t *iter;
 162 
 163   tre_findent(stream, indent);
 164   switch (ast->type)
 165     {
 166     case LITERAL:
 167       lit = ast->obj;
 168       code_min = lit->code_min;
 169       code_max = lit->code_max;
 170       pos = lit->position;
 171       if (IS_EMPTY(lit))
 172         {
 173           fprintf(stream, "literal empty\n");
 174         }
 175       else if (IS_ASSERTION(lit))
 176         {
 177           int i;
 178           char *assertions[] = { "bol", "eol", "bracket",
 179                                  "bow", "eow", "wb", "!wb", "backref" };
 180           if (code_max >= ASSERT_LAST << 1)
 181             assert(0);
 182           fprintf(stream, "assertions: ");
 183           for (i = 0; (1 << i) <= ASSERT_LAST; i++)
 184             if (code_max & (1 << i))
 185               fprintf(stream, "%s ", assertions[i]);
 186           fprintf(stream, "\n");
 187         }
 188       else if (IS_TAG(lit))
 189         {
 190           fprintf(stream, "tag %d\n", code_max);
 191         }
 192       else if (IS_BACKREF(lit))
 193         {
 194           fprintf(stream, "backref %d, pos %d\n", code_max, pos);
 195         }
 196       else if (IS_PARAMETER(lit))
 197         {
 198           tre_print_params(lit->u.params);
 199           fprintf(stream, "\n");
 200         }
 201       else
 202         {
 203           fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
 204                   "%d tags\n", code_min, code_max, code_min, code_max, pos,
 205                   ast->submatch_id, num_tags);
 206         }
 207       break;
 208     case ITERATION:
 209       iter = ast->obj;
 210       fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
 211               iter->min, iter->max, ast->submatch_id, num_tags,
 212               iter->minimal ? "minimal" : "greedy");
 213       tre_do_print(stream, iter->arg, indent + 2);
 214       break;
 215     case UNION:
 216       fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
 217       tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
 218       tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
 219       break;
 220     case CATENATION:
 221       fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
 222               num_tags);
 223       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
 224       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
 225       break;
 226     default:
 227       assert(0);
 228       break;
 229     }
 230 }
 231 
 232 static void
 233 tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
 234 {
 235   tre_do_print(stream, ast, 0);
 236 }
 237 
 238 void
 239 tre_ast_print(tre_ast_node_t *tree)
 240 {
 241   printf("AST:\n");
 242   tre_ast_fprint(stdout, tree);
 243 }
 244 
 245 #endif /* TRE_DEBUG */