1 /*
2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 tre-ast.h - Abstract syntax tree (AST) definitions
31 */
32
33 #ifndef _TRE_AST_H
34 #define _TRE_AST_H
35
36 #include <limits.h>
37
38 #include "tre-mem.h"
39 #include "tre-internal.h"
40 #include "tre-compile.h"
41
42 /* The different AST node types. */
43 typedef enum {
44 LITERAL,
45 CATENATION,
46 ITERATION,
47 UNION
48 } tre_ast_type_t;
49
50 /* Special subtypes of TRE_LITERAL. */
51 #define EMPTY -1 /* Empty leaf (denotes empty string). */
52 #define ASSERTION -2 /* Assertion leaf. */
53 #define TAG -3 /* Tag leaf. */
54 #define BACKREF -4 /* Back reference leaf. */
55 #define PARAMETER -5 /* Parameter. */
56
57 #define IS_SPECIAL(x) ((x)->code_min < 0)
58 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
59 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
60 #define IS_TAG(x) ((x)->code_min == TAG)
61 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
62 #define IS_PARAMETER(x) ((x)->code_min == PARAMETER)
63
64 #define SUBMATCH_ID_INVISIBLE_START (INT_MAX / 2 + 1)
65
66 /* A generic AST node. All AST nodes consist of this node on the top
67 level with `obj' pointing to the actual content. */
68 typedef struct _tre_ast_node {
69 void *obj; /* Pointer to actual node. */
70 tre_last_matched_branch_pre_t *last_matched_branch;
71 tre_last_matched_pre_t *last_matched_in_progress;
72 tre_pos_and_tags_t *firstpos;
73 tre_pos_and_tags_t *lastpos;
74 /* The original pointer is used to point to the original node, when a
75 * CATENATION and tag are added. If NULL, this is node is the original */
76 struct _tre_ast_node *original;
77 tre_ast_type_t type; /* Type of the node. */
78 int submatch_id;
79 int num_submatches;
80 int num_tags;
81 short nullable;
82 short make_branches;
83 } tre_ast_node_t;
84
85 /* A "literal" node. These are created for assertions, back references,
86 tags, matching parameter settings, and all expressions that match one
87 character. */
88 typedef struct {
89 tre_cint_t code_min;
90 tre_cint_t code_max;
91 int position;
92 union {
93 tre_bracket_match_list_t *bracket_match_list;
94 int *params;
95 } u;
96 } tre_literal_t;
97
98 /* A "catenation" node. These are created when two regexps are concatenated.
99 If there are more than one subexpressions in sequence, the `left' part
100 holds all but the last, and `right' part holds the last subexpression
101 (catenation is left associative). */
102 typedef struct {
103 tre_ast_node_t *left;
104 tre_ast_node_t *right;
105 } tre_catenation_t;
106
107 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
108 operators. */
109 typedef struct {
110 /* Subexpression to match. */
111 tre_ast_node_t *arg;
112 /* Minimum number of consecutive matches. */
113 int min;
114 /* Maximum number of consecutive matches. */
115 int max;
116 /* If 0, match as many characters as possible, if 1 match as few as
117 possible. Note that this does not always mean the same thing as
118 matching as many/few repetitions as possible. */
119 unsigned int minimal:1;
120 /* Approximate matching parameters (or NULL). */
121 int *params;
122 } tre_iteration_t;
123
124 /* An "union" node. These are created for the "|" operator. */
125 typedef struct {
126 tre_ast_node_t *left;
127 tre_ast_node_t *right;
128 /* The left end right end tags if non-zero */
129 int left_tag;
130 int right_tag;
131 } tre_union_t;
132
133 tre_ast_node_t *
134 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size);
135
136 tre_ast_node_t *
137 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position);
138
139 tre_ast_node_t *
140 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
141 int minimal);
142
143 tre_ast_node_t *
144 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right);
145
146 tre_ast_node_t *
147 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
148 tre_ast_node_t *right);
149
150 #ifdef TRE_DEBUG
151 void
152 tre_ast_print(tre_ast_node_t *tree);
153
154 /* XXX - rethink AST printing API */
155 void
156 tre_print_params(int *params);
157 #endif /* TRE_DEBUG */
158
159 #endif /* _TRE_AST_H */