Print this page
9083 replace regex implementation with tre

@@ -19,12 +19,10 @@
  *
  * CDDL HEADER END
  */
 
 /*
- * Copyright 2014 Garrett D'Amore <garrett@damore.org>
- *
  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*

@@ -31,11 +29,12 @@
  * Copyright 1989, 1994 by Mortice Kern Systems Inc.
  * All rights reserved.
  */
 
 /*
- * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2018 Nexenta Systems, Inc.
  */
 
 #ifndef _REGEX_H
 #define _REGEX_H
 

@@ -44,11 +43,12 @@
 
 #ifdef  __cplusplus
 extern "C" {
 #endif
 
-
+/* XXX is this still needed?!?! */
+#if 0
 /*
  * wchar_t is a built-in type in standard C++ and as such is not
  * defined here when using standard C++. However, the GNU compiler
  * fixincludes utility nonetheless creates its own version of this
  * header for use by gcc and g++. In that version it adds a redundant

@@ -68,93 +68,96 @@
 #else
 typedef long    wchar_t;
 #endif
 #endif  /* !_WCHAR_T */
 #endif  /* !defined(__cplusplus) ... */
+#endif
 
 typedef ssize_t regoff_t;
 
 /* regcomp flags */
 #define REG_BASIC       0x00000
-#define REG_EXTENDED    0x00001         /* Use Extended Regular Expressions */
-#define REG_NOSUB       0x00002         /* Don't set subexpression */
-#define REG_ICASE       0x00004         /* Ignore case in match */
-#define REG_NEWLINE     0x00008         /* Treat \n as regular character */
-#define REG_DELIM       0x00010         /* legacy, no effect */
-#define REG_DEBUG       0x00020         /* legacy, no effect */
-#define REG_ANCHOR      0x00040         /* legacy, no effect */
-#define REG_WORDS       0x00080         /* legacy, no effect */
-#define REG_EGREP       0x01000         /* legacy, no effect */
-#define REG_DUMP        0x02000         /* internal */
+#define REG_EXTENDED    0x00001         /* use EREs (POSIX) */
+#define REG_NOSUB       0x00002         /* don't set subexpression (POSIX) */
+#define REG_ICASE       0x00004         /* ignore case in match (POSIX) */
+#define REG_NEWLINE     0x00008         /* treat \n as regular char (POSIX) */
+/* was  REG_DELIM       0x00010 */
+/* was  REG_DEBUG       0x00020 */
+/* was  REG_ANCHOR      0x00040 */
+/* was  REG_WORDS       0x00080 */
+/* was  REG_EGREP       0x01000 */
+/* was  REG_DUMP        0x02000 */
 #define REG_PEND        0x04000         /* NULs are ordinary characters */
 #define REG_NOSPEC      0x08000         /* no special characters */
+#define REG_LITERAL     0x08000         /* no special characters */
 
-/* internal flags */
-#define REG_MUST        0x00100         /* legacy, no effect */
-
 /* regexec flags */
-#define REG_NOTBOL      0x00200         /* string is not BOL */
-#define REG_NOTEOL      0x00400         /* string has no EOL */
-#define REG_NOOPT       0x00800         /* legacy, no effect */
-#define REG_STARTEND    0x10000         /* match whole pattern */
-#define REG_TRACE       0x20000         /* tracing of execution */
-#define REG_LARGE       0x40000         /* force large representation */
-#define REG_BACKR       0x80000         /* force use of backref code */
+/* was  REG_MUST        0x00100 */
+#define REG_NOTBOL      0x00200         /* string is not BOL (POSIX) */
+#define REG_NOTEOL      0x00400         /* string has no EOL (POSIX) */
+/* was  REG_NOOPT       0x00800 */
+#define REG_STARTEND    0x10000         /* match whole pattern (BSD) */
 
 /* regcomp and regexec return codes */
 #define REG_OK          0               /* success (non-standard) */
-#define REG_NOMATCH     1               /* regexec failed to match */
-#define REG_ECOLLATE    2               /* invalid collation element ref. */
-#define REG_EESCAPE     3               /* trailing \ in pattern */
-#define REG_ENEWLINE    4               /* \n found before end of pattern */
-#define REG_ENSUB       5               /* more than 9 \( \) pairs (OBS) */
-#define REG_ESUBREG     6               /* number in \[0-9] invalid */
-#define REG_EBRACK      7               /* [ ] imbalance */
-#define REG_EPAREN      8               /* ( ) imbalance */
-#define REG_EBRACE      9               /* \{ \} imbalance */
-#define REG_ERANGE      10              /* invalid endpoint in range */
-#define REG_ESPACE      11              /* no memory for compiled pattern */
-#define REG_BADRPT      12              /* invalid repetition */
-#define REG_ECTYPE      13              /* invalid char-class type */
-#define REG_BADPAT      14              /* syntax error */
-#define REG_BADBR       15              /* \{ \} contents bad */
-#define REG_EFATAL      16              /* internal error, not POSIX.2 */
+#define REG_NOMATCH     1               /* regexec failed to match (POSIX) */
+#define REG_ECOLLATE    2               /* invalid coll. element ref. (POSIX) */
+#define REG_EESCAPE     3               /* trailing \ in pattern (POSIX) */
+/* was  REG_ENEWLINE    4 */
+/* was  REG_ENSUB       5 */
+#define REG_ESUBREG     6               /* number in \[0-9] invalid (POSIX) */
+#define REG_EBRACK      7               /* [ ] imbalance (POSIX) */
+#define REG_EPAREN      8               /* ( ) imbalance (POSIX) */
+#define REG_EBRACE      9               /* \{ \} imbalance (POSIX) */
+#define REG_ERANGE      10              /* invalid endpoint in range (POSIX) */
+#define REG_ESPACE      11              /* out of memory (POSIX) */
+#define REG_BADRPT      12              /* invalid repetition (POSIX) */
+#define REG_ECTYPE      13              /* invalid char-class type (POSIX) */
+#define REG_BADPAT      14              /* syntax error (POSIX) */
+#define REG_BADBR       15              /* \{ \} contents bad (POSIX) */
+/* was  REG_EFATAL      16 */
 #define REG_ECHAR       17              /* invalid multibyte character */
-#define REG_STACK       18              /* backtrack stack overflow */
+#define REG_ILLSEQ      17              /* invalid multibyte character (BSD) */
+/* was  REG_STACK       18 */
+/* REG_ENOSYS was removed in XPG7 */
+#if defined(_STRICT_SYMBOLS) && !defined(_XPG7)
 #define REG_ENOSYS      19              /* function not supported (XPG4) */
-#define REG__LAST       20              /* first unused code */
-#define REG_EBOL        21              /* ^ anchor and not BOL */
-#define REG_EEOL        22              /* $ anchor and not EOL */
-#define REG_ATOI        255             /* convert name to number (!) */
-#define REG_ITOA        256             /* convert number to name (!) */
+#endif
+/* was  REG__LAST       20 */
+/* was  REG_EBOL        21 */
+/* was  REG_EEOL        22 */
+#define REG_EMPTY       23              /* empty (sub)expression */
+#define REG_INVARG      24              /* invalid argument */
 
 #define _REG_BACKREF_MAX 9              /* Max # of subexp. backreference */
 
+/*
+ * Note that any changes to this structure have to preserve sizing,
+ * as it is baked into applications.  Size needs to stay 24/48 bytes
+ * for 32/64 bit libc, respectively.
+ */
 typedef struct {                /* regcomp() data saved for regexec() */
         size_t  re_nsub;        /* # of subexpressions in RE pattern */
+        void *value;            /* internal use only */
+        int re_magic;
+        const void *re_endp;
 
         /*
-         * Internal use only.  Note that any changes to this structure
-         * have to preserve sizing, as it is baked into applications.
+         * These are here for binary compatibility (see the note about
+         * sizing above).
          */
-        struct re_guts *re_g;
-        int re_magic;
-        const char *re_endp;
-
-        /* here for compat */
-        size_t  re_len;         /* # wchar_t chars in compiled pattern */
-        struct _regex_ext_t *re_sc;     /* for binary compatibility */
+        size_t  __pad1;
+        void    *__pad2;
 } regex_t;
 
 /* subexpression positions */
 typedef struct {
         const char      *rm_sp, *rm_ep; /* Start pointer, end pointer */
         regoff_t        rm_so, rm_eo;   /* Start offset, end offset */
         int             rm_ss, rm_es;   /* Used internally */
 } regmatch_t;
 
-
 /*
  * IEEE Std 1003.2 ("POSIX.2") regular expressions API.
  */
 
 extern int regcomp(regex_t *_RESTRICT_KYWD, const char *_RESTRICT_KYWD, int);