1 /* 2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 tre-ast.h - Abstract syntax tree (AST) definitions 31 */ 32 33 #ifndef _TRE_AST_H 34 #define _TRE_AST_H 35 36 #include <limits.h> 37 38 #include "tre-mem.h" 39 #include "tre-internal.h" 40 #include "tre-compile.h" 41 42 /* The different AST node types. */ 43 typedef enum { 44 LITERAL, 45 CATENATION, 46 ITERATION, 47 UNION 48 } tre_ast_type_t; 49 50 /* Special subtypes of TRE_LITERAL. */ 51 #define EMPTY -1 /* Empty leaf (denotes empty string). */ 52 #define ASSERTION -2 /* Assertion leaf. */ 53 #define TAG -3 /* Tag leaf. */ 54 #define BACKREF -4 /* Back reference leaf. */ 55 #define PARAMETER -5 /* Parameter. */ 56 57 #define IS_SPECIAL(x) ((x)->code_min < 0) 58 #define IS_EMPTY(x) ((x)->code_min == EMPTY) 59 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) 60 #define IS_TAG(x) ((x)->code_min == TAG) 61 #define IS_BACKREF(x) ((x)->code_min == BACKREF) 62 #define IS_PARAMETER(x) ((x)->code_min == PARAMETER) 63 64 #define SUBMATCH_ID_INVISIBLE_START (INT_MAX / 2 + 1) 65 66 /* A generic AST node. All AST nodes consist of this node on the top 67 level with `obj' pointing to the actual content. */ 68 typedef struct _tre_ast_node { 69 void *obj; /* Pointer to actual node. */ 70 tre_last_matched_branch_pre_t *last_matched_branch; 71 tre_last_matched_pre_t *last_matched_in_progress; 72 tre_pos_and_tags_t *firstpos; 73 tre_pos_and_tags_t *lastpos; 74 /* The original pointer is used to point to the original node, when a 75 * CATENATION and tag are added. If NULL, this is node is the original */ 76 struct _tre_ast_node *original; 77 tre_ast_type_t type; /* Type of the node. */ 78 int submatch_id; 79 int num_submatches; 80 int num_tags; 81 short nullable; 82 short make_branches; 83 } tre_ast_node_t; 84 85 /* A "literal" node. These are created for assertions, back references, 86 tags, matching parameter settings, and all expressions that match one 87 character. */ 88 typedef struct { 89 tre_cint_t code_min; 90 tre_cint_t code_max; 91 int position; 92 union { 93 tre_bracket_match_list_t *bracket_match_list; 94 int *params; 95 } u; 96 } tre_literal_t; 97 98 /* A "catenation" node. These are created when two regexps are concatenated. 99 If there are more than one subexpressions in sequence, the `left' part 100 holds all but the last, and `right' part holds the last subexpression 101 (catenation is left associative). */ 102 typedef struct { 103 tre_ast_node_t *left; 104 tre_ast_node_t *right; 105 } tre_catenation_t; 106 107 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" 108 operators. */ 109 typedef struct { 110 /* Subexpression to match. */ 111 tre_ast_node_t *arg; 112 /* Minimum number of consecutive matches. */ 113 int min; 114 /* Maximum number of consecutive matches. */ 115 int max; 116 /* If 0, match as many characters as possible, if 1 match as few as 117 possible. Note that this does not always mean the same thing as 118 matching as many/few repetitions as possible. */ 119 unsigned int minimal:1; 120 /* Approximate matching parameters (or NULL). */ 121 int *params; 122 } tre_iteration_t; 123 124 /* An "union" node. These are created for the "|" operator. */ 125 typedef struct { 126 tre_ast_node_t *left; 127 tre_ast_node_t *right; 128 /* The left end right end tags if non-zero */ 129 int left_tag; 130 int right_tag; 131 } tre_union_t; 132 133 tre_ast_node_t * 134 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); 135 136 tre_ast_node_t * 137 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); 138 139 tre_ast_node_t * 140 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, 141 int minimal); 142 143 tre_ast_node_t * 144 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); 145 146 tre_ast_node_t * 147 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, 148 tre_ast_node_t *right); 149 150 #ifdef TRE_DEBUG 151 void 152 tre_ast_print(tre_ast_node_t *tree); 153 154 /* XXX - rethink AST printing API */ 155 void 156 tre_print_params(int *params); 157 #endif /* TRE_DEBUG */ 158 159 #endif /* _TRE_AST_H */