1 /*
   2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28 
  29 /*
  30   tre-parse.c - Regexp parser definitions
  31 */
  32 
  33 #ifndef _TRE_PARSE_H
  34 #define _TRE_PARSE_H
  35 
  36 #include "../locale/collate.h"
  37 
  38 /* Parse context. */
  39 typedef struct {
  40   /* Memory allocator.  The AST is allocated using this. */
  41   tre_mem_t mem;
  42   /* Stack used for keeping track of regexp syntax. */
  43   tre_stack_t *stack;
  44   /* The parse result. */
  45   tre_ast_node_t *result;
  46   /* The regexp to parse and its length. */
  47   const tre_char_t *re;
  48   /* The first character of the entire regexp. */
  49   const tre_char_t *re_start;
  50   /* The first character after the end of the regexp. */
  51   const tre_char_t *re_end;
  52   /* The current locale */
  53   locale_t loc;
  54   int len;
  55   /* Current submatch ID. */
  56   int submatch_id;
  57   /* Current invisible submatch ID. */
  58   int submatch_id_invisible;
  59   /* Current position (number of literal). */
  60   int position;
  61   /* The highest back reference or -1 if none seen so far. */
  62   int max_backref;
  63   /* Number of tags that need reordering. */
  64   int num_reorder_tags;
  65   /* This flag is set if the regexp uses approximate matching. */
  66   int have_approx;
  67   /* Compilation flags. */
  68   int cflags;
  69   /* If this flag is set the top-level submatch is not captured. */
  70   int nofirstsub;
  71   /* The currently set approximate matching parameters. */
  72   int params[TRE_PARAM_LAST];
  73 } tre_parse_ctx_t;
  74 
  75 /* Parses a wide character regexp pattern into a syntax tree.  This parser
  76    handles both syntaxes (BRE and ERE), including the TRE extensions. */
  77 reg_errcode_t
  78 tre_parse(tre_parse_ctx_t *ctx);
  79 
  80 #endif  /* _TRE_PARSE_H */