1 /*
2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 tre_regcomp.c - TRE POSIX compatible regex compilation functions.
31 */
32
33 #include <string.h>
34 #include <errno.h>
35 #include <stdlib.h>
36
37 #include "tre.h"
38 #include "tre-internal.h"
39
40 int
41 tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
42 locale_t loc)
43 {
44 int ret;
45 tre_char_t *wregex;
46 size_t wlen;
47
48 wregex = malloc(sizeof(tre_char_t) * (n + 1));
49 if (wregex == NULL)
50 return REG_ESPACE;
51
52 /* If the current locale uses the standard single byte encoding of
53 characters, we don't do a multibyte string conversion. If we did,
54 many applications which use the default locale would break since
55 the default "C" locale uses the 7-bit ASCII character set, and
56 all characters with the eighth bit set would be considered invalid. */
57 if (TRE_MB_CUR_MAX_L(loc) == 1)
58 {
59 unsigned int i;
60 const unsigned char *str = (const unsigned char *)regex;
61 tre_char_t *wstr = wregex;
62
63 for (i = 0; i < n; i++)
64 *(wstr++) = *(str++);
65 wlen = n;
66 }
67 else
68 {
69 size_t consumed;
70 tre_char_t *wcptr = wregex;
71 mbstate_t state;
72 memset(&state, '\0', sizeof(state));
73 while (n > 0)
74 {
75 consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
76
77 switch (consumed)
78 {
79 case 0:
80 if (*regex == '\0')
81 consumed = 1;
82 else
83 {
84 free(wregex);
85 return REG_BADPAT;
86 }
87 break;
88 case (size_t)-1:
89 case (size_t)-2:
90 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
91 free(wregex);
92 return REG_ILLSEQ;
93 }
94 regex += consumed;
95 n -= consumed;
96 wcptr++;
97 }
98 wlen = wcptr - wregex;
99 }
100
101 wregex[wlen] = L'\0';
102 ret = tre_compile(preg, wregex, wlen, cflags, loc);
103 free(wregex);
104
105 return ret;
106 }
107
108 int
109 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
110 {
111 return tre_regncomp_l(preg, regex, n, cflags, uselocale((locale_t)0));
112 }
113
114 int
115 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
116 {
117 size_t len;
118
119 if (cflags & REG_PEND)
120 {
121 if ((const char *)(preg->re_endp) < regex)
122 return REG_INVARG;
123 len = (const char *)(preg->re_endp) - regex;
124 }
125 else
126 len = strlen(regex);
127 return tre_regncomp_l(preg, regex, len, cflags, loc);
128 }
129
130 int
131 regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD regex,
132 int cflags)
133 {
134 return tre_regcomp_l(preg, regex, cflags, uselocale((locale_t)0));
135 }
136
137 void
138 regfree(regex_t *preg)
139 {
140 tre_free(preg);
141 }