1 /*
2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 tre-parse.c - Regexp parser definitions
31 */
32
33 #ifndef _TRE_PARSE_H
34 #define _TRE_PARSE_H
35
36 #include "../locale/collate.h"
37
38 /* Parse context. */
39 typedef struct {
40 /* Memory allocator. The AST is allocated using this. */
41 tre_mem_t mem;
42 /* Stack used for keeping track of regexp syntax. */
43 tre_stack_t *stack;
44 /* The parse result. */
45 tre_ast_node_t *result;
46 /* The regexp to parse and its length. */
47 const tre_char_t *re;
48 /* The first character of the entire regexp. */
49 const tre_char_t *re_start;
50 /* The first character after the end of the regexp. */
51 const tre_char_t *re_end;
52 /* The current locale */
53 locale_t loc;
54 int len;
55 /* Current submatch ID. */
56 int submatch_id;
57 /* Current invisible submatch ID. */
58 int submatch_id_invisible;
59 /* Current position (number of literal). */
60 int position;
61 /* The highest back reference or -1 if none seen so far. */
62 int max_backref;
63 /* Number of tags that need reordering. */
64 int num_reorder_tags;
65 /* This flag is set if the regexp uses approximate matching. */
66 int have_approx;
67 /* Compilation flags. */
68 int cflags;
69 /* If this flag is set the top-level submatch is not captured. */
70 int nofirstsub;
71 /* The currently set approximate matching parameters. */
72 int params[TRE_PARAM_LAST];
73 } tre_parse_ctx_t;
74
75 /* Parses a wide character regexp pattern into a syntax tree. This parser
76 handles both syntaxes (BRE and ERE), including the TRE extensions. */
77 reg_errcode_t
78 tre_parse(tre_parse_ctx_t *ctx);
79
80 #endif /* _TRE_PARSE_H */