1 /* 2 * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 tre_regcomp.c - TRE POSIX compatible regex compilation functions. 31 */ 32 33 #include <string.h> 34 #include <errno.h> 35 #include <stdlib.h> 36 37 #include "tre.h" 38 #include "tre-internal.h" 39 40 int 41 tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags, 42 locale_t loc) 43 { 44 int ret; 45 tre_char_t *wregex; 46 size_t wlen; 47 48 wregex = malloc(sizeof(tre_char_t) * (n + 1)); 49 if (wregex == NULL) 50 return REG_ESPACE; 51 52 /* If the current locale uses the standard single byte encoding of 53 characters, we don't do a multibyte string conversion. If we did, 54 many applications which use the default locale would break since 55 the default "C" locale uses the 7-bit ASCII character set, and 56 all characters with the eighth bit set would be considered invalid. */ 57 if (TRE_MB_CUR_MAX_L(loc) == 1) 58 { 59 unsigned int i; 60 const unsigned char *str = (const unsigned char *)regex; 61 tre_char_t *wstr = wregex; 62 63 for (i = 0; i < n; i++) 64 *(wstr++) = *(str++); 65 wlen = n; 66 } 67 else 68 { 69 size_t consumed; 70 tre_char_t *wcptr = wregex; 71 mbstate_t state; 72 memset(&state, '\0', sizeof(state)); 73 while (n > 0) 74 { 75 consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc); 76 77 switch (consumed) 78 { 79 case 0: 80 if (*regex == '\0') 81 consumed = 1; 82 else 83 { 84 free(wregex); 85 return REG_BADPAT; 86 } 87 break; 88 case (size_t)-1: 89 case (size_t)-2: 90 DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno))); 91 free(wregex); 92 return REG_ILLSEQ; 93 } 94 regex += consumed; 95 n -= consumed; 96 wcptr++; 97 } 98 wlen = wcptr - wregex; 99 } 100 101 wregex[wlen] = L'\0'; 102 ret = tre_compile(preg, wregex, wlen, cflags, loc); 103 free(wregex); 104 105 return ret; 106 } 107 108 int 109 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags) 110 { 111 return tre_regncomp_l(preg, regex, n, cflags, uselocale((locale_t)0)); 112 } 113 114 int 115 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc) 116 { 117 size_t len; 118 119 if (cflags & REG_PEND) 120 { 121 if ((const char *)(preg->re_endp) < regex) 122 return REG_INVARG; 123 len = (const char *)(preg->re_endp) - regex; 124 } 125 else 126 len = strlen(regex); 127 return tre_regncomp_l(preg, regex, len, cflags, loc); 128 } 129 130 int 131 regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD regex, 132 int cflags) 133 { 134 return tre_regcomp_l(preg, regex, cflags, uselocale((locale_t)0)); 135 } 136 137 void 138 regfree(regex_t *preg) 139 { 140 tre_free(preg); 141 }