Print this page
9083 replace regex implementation with tre


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 
  23 /*
  24  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  25  *
  26  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 /*
  31  * Copyright 1989, 1994 by Mortice Kern Systems Inc.
  32  * All rights reserved.
  33  */
  34 
  35 /*
  36  * Copyright 2017 Nexenta Systems, Inc.

  37  */
  38 
  39 #ifndef _REGEX_H
  40 #define _REGEX_H
  41 
  42 #include <sys/feature_tests.h>
  43 #include <sys/types.h>
  44 
  45 #ifdef  __cplusplus
  46 extern "C" {
  47 #endif
  48 
  49 

  50 /*
  51  * wchar_t is a built-in type in standard C++ and as such is not
  52  * defined here when using standard C++. However, the GNU compiler
  53  * fixincludes utility nonetheless creates its own version of this
  54  * header for use by gcc and g++. In that version it adds a redundant
  55  * guard for __cplusplus. To avoid the creation of a gcc/g++ specific
  56  * header we need to include the following magic comment:
  57  *
  58  * we must use the C++ compiler's type
  59  *
  60  * The above comment should not be removed or changed until GNU
  61  * gcc/fixinc/inclhack.def is updated to bypass this header.
  62  */
  63 #if !defined(__cplusplus) || (__cplusplus < 199711L && !defined(__GNUG__))
  64 #ifndef _WCHAR_T
  65 #define _WCHAR_T
  66 #if defined(_LP64)
  67 typedef int     wchar_t;
  68 #else
  69 typedef long    wchar_t;
  70 #endif
  71 #endif  /* !_WCHAR_T */
  72 #endif  /* !defined(__cplusplus) ... */

  73 
  74 typedef ssize_t regoff_t;
  75 
  76 /* regcomp flags */
  77 #define REG_BASIC       0x00000
  78 #define REG_EXTENDED    0x00001         /* Use Extended Regular Expressions */
  79 #define REG_NOSUB       0x00002         /* Don't set subexpression */
  80 #define REG_ICASE       0x00004         /* Ignore case in match */
  81 #define REG_NEWLINE     0x00008         /* Treat \n as regular character */
  82 #define REG_DELIM       0x00010         /* legacy, no effect */
  83 #define REG_DEBUG       0x00020         /* legacy, no effect */
  84 #define REG_ANCHOR      0x00040         /* legacy, no effect */
  85 #define REG_WORDS       0x00080         /* legacy, no effect */
  86 #define REG_EGREP       0x01000         /* legacy, no effect */
  87 #define REG_DUMP        0x02000         /* internal */
  88 #define REG_PEND        0x04000         /* NULs are ordinary characters */
  89 #define REG_NOSPEC      0x08000         /* no special characters */

  90 
  91 /* internal flags */
  92 #define REG_MUST        0x00100         /* legacy, no effect */
  93 
  94 /* regexec flags */
  95 #define REG_NOTBOL      0x00200         /* string is not BOL */
  96 #define REG_NOTEOL      0x00400         /* string has no EOL */
  97 #define REG_NOOPT       0x00800         /* legacy, no effect */
  98 #define REG_STARTEND    0x10000         /* match whole pattern */
  99 #define REG_TRACE       0x20000         /* tracing of execution */
 100 #define REG_LARGE       0x40000         /* force large representation */
 101 #define REG_BACKR       0x80000         /* force use of backref code */
 102 
 103 /* regcomp and regexec return codes */
 104 #define REG_OK          0               /* success (non-standard) */
 105 #define REG_NOMATCH     1               /* regexec failed to match */
 106 #define REG_ECOLLATE    2               /* invalid collation element ref. */
 107 #define REG_EESCAPE     3               /* trailing \ in pattern */
 108 #define REG_ENEWLINE    4               /* \n found before end of pattern */
 109 #define REG_ENSUB       5               /* more than 9 \( \) pairs (OBS) */
 110 #define REG_ESUBREG     6               /* number in \[0-9] invalid */
 111 #define REG_EBRACK      7               /* [ ] imbalance */
 112 #define REG_EPAREN      8               /* ( ) imbalance */
 113 #define REG_EBRACE      9               /* \{ \} imbalance */
 114 #define REG_ERANGE      10              /* invalid endpoint in range */
 115 #define REG_ESPACE      11              /* no memory for compiled pattern */
 116 #define REG_BADRPT      12              /* invalid repetition */
 117 #define REG_ECTYPE      13              /* invalid char-class type */
 118 #define REG_BADPAT      14              /* syntax error */
 119 #define REG_BADBR       15              /* \{ \} contents bad */
 120 #define REG_EFATAL      16              /* internal error, not POSIX.2 */
 121 #define REG_ECHAR       17              /* invalid multibyte character */
 122 #define REG_STACK       18              /* backtrack stack overflow */



 123 #define REG_ENOSYS      19              /* function not supported (XPG4) */
 124 #define REG__LAST       20              /* first unused code */
 125 #define REG_EBOL        21              /* ^ anchor and not BOL */
 126 #define REG_EEOL        22              /* $ anchor and not EOL */
 127 #define REG_ATOI        255             /* convert name to number (!) */
 128 #define REG_ITOA        256             /* convert number to name (!) */

 129 
 130 #define _REG_BACKREF_MAX 9              /* Max # of subexp. backreference */
 131 





 132 typedef struct {                /* regcomp() data saved for regexec() */
 133         size_t  re_nsub;        /* # of subexpressions in RE pattern */



 134 
 135         /*
 136          * Internal use only.  Note that any changes to this structure
 137          * have to preserve sizing, as it is baked into applications.
 138          */
 139         struct re_guts *re_g;
 140         int re_magic;
 141         const char *re_endp;
 142 
 143         /* here for compat */
 144         size_t  re_len;         /* # wchar_t chars in compiled pattern */
 145         struct _regex_ext_t *re_sc;     /* for binary compatibility */
 146 } regex_t;
 147 
 148 /* subexpression positions */
 149 typedef struct {
 150         const char      *rm_sp, *rm_ep; /* Start pointer, end pointer */
 151         regoff_t        rm_so, rm_eo;   /* Start offset, end offset */
 152         int             rm_ss, rm_es;   /* Used internally */
 153 } regmatch_t;
 154 
 155 
 156 /*
 157  * IEEE Std 1003.2 ("POSIX.2") regular expressions API.
 158  */
 159 
 160 extern int regcomp(regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, int);
 161 extern int regexec(const regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD,
 162     size_t, regmatch_t *_RESTRICT_KYWD, int);
 163 extern size_t regerror(int, const regex_t *_RESTRICT_KYWD, char *_RESTRICT_KYWD,
 164     size_t);
 165 extern void regfree(regex_t *);
 166 
 167 #ifdef  __cplusplus
 168 }
 169 #endif
 170 
 171 #endif  /* _REGEX_H */


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License, Version 1.0 only
   6  * (the "License").  You may not use this file except in compliance
   7  * with the License.
   8  *
   9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  10  * or http://www.opensolaris.org/os/licensing.
  11  * See the License for the specific language governing permissions
  12  * and limitations under the License.
  13  *
  14  * When distributing Covered Code, include this CDDL HEADER in each
  15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  16  * If applicable, add the following below this CDDL HEADER, with the
  17  * fields enclosed by brackets "[]" replaced with your own identifying
  18  * information: Portions Copyright [yyyy] [name of copyright owner]
  19  *
  20  * CDDL HEADER END
  21  */
  22 
  23 /*


  24  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  25  * Use is subject to license terms.
  26  */
  27 
  28 /*
  29  * Copyright 1989, 1994 by Mortice Kern Systems Inc.
  30  * All rights reserved.
  31  */
  32 
  33 /*
  34  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  35  * Copyright 2018 Nexenta Systems, Inc.
  36  */
  37 
  38 #ifndef _REGEX_H
  39 #define _REGEX_H
  40 
  41 #include <sys/feature_tests.h>
  42 #include <sys/types.h>
  43 
  44 #ifdef  __cplusplus
  45 extern "C" {
  46 #endif
  47 
  48 /* XXX is this still needed?!?! */
  49 #if 0
  50 /*
  51  * wchar_t is a built-in type in standard C++ and as such is not
  52  * defined here when using standard C++. However, the GNU compiler
  53  * fixincludes utility nonetheless creates its own version of this
  54  * header for use by gcc and g++. In that version it adds a redundant
  55  * guard for __cplusplus. To avoid the creation of a gcc/g++ specific
  56  * header we need to include the following magic comment:
  57  *
  58  * we must use the C++ compiler's type
  59  *
  60  * The above comment should not be removed or changed until GNU
  61  * gcc/fixinc/inclhack.def is updated to bypass this header.
  62  */
  63 #if !defined(__cplusplus) || (__cplusplus < 199711L && !defined(__GNUG__))
  64 #ifndef _WCHAR_T
  65 #define _WCHAR_T
  66 #if defined(_LP64)
  67 typedef int     wchar_t;
  68 #else
  69 typedef long    wchar_t;
  70 #endif
  71 #endif  /* !_WCHAR_T */
  72 #endif  /* !defined(__cplusplus) ... */
  73 #endif
  74 
  75 typedef ssize_t regoff_t;
  76 
  77 /* regcomp flags */
  78 #define REG_BASIC       0x00000
  79 #define REG_EXTENDED    0x00001         /* use EREs (POSIX) */
  80 #define REG_NOSUB       0x00002         /* don't set subexpression (POSIX) */
  81 #define REG_ICASE       0x00004         /* ignore case in match (POSIX) */
  82 #define REG_NEWLINE     0x00008         /* treat \n as regular char (POSIX) */
  83 /* was  REG_DELIM       0x00010 */
  84 /* was  REG_DEBUG       0x00020 */
  85 /* was  REG_ANCHOR      0x00040 */
  86 /* was  REG_WORDS       0x00080 */
  87 /* was  REG_EGREP       0x01000 */
  88 /* was  REG_DUMP        0x02000 */
  89 #define REG_PEND        0x04000         /* NULs are ordinary characters */
  90 #define REG_NOSPEC      0x08000         /* no special characters */
  91 #define REG_LITERAL     0x08000         /* no special characters */
  92 



  93 /* regexec flags */
  94 /* was  REG_MUST        0x00100 */
  95 #define REG_NOTBOL      0x00200         /* string is not BOL (POSIX) */
  96 #define REG_NOTEOL      0x00400         /* string has no EOL (POSIX) */
  97 /* was  REG_NOOPT       0x00800 */
  98 #define REG_STARTEND    0x10000         /* match whole pattern (BSD) */


  99 
 100 /* regcomp and regexec return codes */
 101 #define REG_OK          0               /* success (non-standard) */
 102 #define REG_NOMATCH     1               /* regexec failed to match (POSIX) */
 103 #define REG_ECOLLATE    2               /* invalid coll. element ref. (POSIX) */
 104 #define REG_EESCAPE     3               /* trailing \ in pattern (POSIX) */
 105 /* was  REG_ENEWLINE    4 */
 106 /* was  REG_ENSUB       5 */
 107 #define REG_ESUBREG     6               /* number in \[0-9] invalid (POSIX) */
 108 #define REG_EBRACK      7               /* [ ] imbalance (POSIX) */
 109 #define REG_EPAREN      8               /* ( ) imbalance (POSIX) */
 110 #define REG_EBRACE      9               /* \{ \} imbalance (POSIX) */
 111 #define REG_ERANGE      10              /* invalid endpoint in range (POSIX) */
 112 #define REG_ESPACE      11              /* out of memory (POSIX) */
 113 #define REG_BADRPT      12              /* invalid repetition (POSIX) */
 114 #define REG_ECTYPE      13              /* invalid char-class type (POSIX) */
 115 #define REG_BADPAT      14              /* syntax error (POSIX) */
 116 #define REG_BADBR       15              /* \{ \} contents bad (POSIX) */
 117 /* was  REG_EFATAL      16 */
 118 #define REG_ECHAR       17              /* invalid multibyte character */
 119 #define REG_ILLSEQ      17              /* invalid multibyte character (BSD) */
 120 /* was  REG_STACK       18 */
 121 /* REG_ENOSYS was removed in XPG7 */
 122 #if defined(_STRICT_SYMBOLS) && !defined(_XPG7)
 123 #define REG_ENOSYS      19              /* function not supported (XPG4) */
 124 #endif
 125 /* was  REG__LAST       20 */
 126 /* was  REG_EBOL        21 */
 127 /* was  REG_EEOL        22 */
 128 #define REG_EMPTY       23              /* empty (sub)expression */
 129 #define REG_INVARG      24              /* invalid argument */
 130 
 131 #define _REG_BACKREF_MAX 9              /* Max # of subexp. backreference */
 132 
 133 /*
 134  * Note that any changes to this structure have to preserve sizing,
 135  * as it is baked into applications.  Size needs to stay 24/48 bytes
 136  * for 32/64 bit libc, respectively.
 137  */
 138 typedef struct {                /* regcomp() data saved for regexec() */
 139         size_t  re_nsub;        /* # of subexpressions in RE pattern */
 140         void *value;            /* internal use only */
 141         int re_magic;
 142         const void *re_endp;
 143 
 144         /*
 145          * These are here for binary compatibility (see the note about
 146          * sizing above).
 147          */
 148         size_t  __pad1;
 149         void    *__pad2;





 150 } regex_t;
 151 
 152 /* subexpression positions */
 153 typedef struct {
 154         const char      *rm_sp, *rm_ep; /* Start pointer, end pointer */
 155         regoff_t        rm_so, rm_eo;   /* Start offset, end offset */
 156         int             rm_ss, rm_es;   /* Used internally */
 157 } regmatch_t;
 158 

 159 /*
 160  * IEEE Std 1003.2 ("POSIX.2") regular expressions API.
 161  */
 162 
 163 extern int regcomp(regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, int);
 164 extern int regexec(const regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD,
 165     size_t, regmatch_t *_RESTRICT_KYWD, int);
 166 extern size_t regerror(int, const regex_t *_RESTRICT_KYWD, char *_RESTRICT_KYWD,
 167     size_t);
 168 extern void regfree(regex_t *);
 169 
 170 #ifdef  __cplusplus
 171 }
 172 #endif
 173 
 174 #endif  /* _REGEX_H */