1 /*
   2  * Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  *
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  *
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
  17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
  20  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27  */
  28 
  29 /*
  30   tre_regcomp.c - TRE POSIX compatible regex compilation functions.
  31 */
  32 
  33 #include <string.h>
  34 #include <errno.h>
  35 #include <stdlib.h>
  36 
  37 #include "tre.h"
  38 #include "tre-internal.h"
  39 
  40 int
  41 tre_regncomp_l(regex_t *preg, const char *regex, size_t n, int cflags,
  42     locale_t loc)
  43 {
  44   int ret;
  45   tre_char_t *wregex;
  46   size_t wlen;
  47 
  48   wregex = malloc(sizeof(tre_char_t) * (n + 1));
  49   if (wregex == NULL)
  50     return REG_ESPACE;
  51 
  52   /* If the current locale uses the standard single byte encoding of
  53      characters, we don't do a multibyte string conversion.  If we did,
  54      many applications which use the default locale would break since
  55      the default "C" locale uses the 7-bit ASCII character set, and
  56      all characters with the eighth bit set would be considered invalid. */
  57   if (TRE_MB_CUR_MAX_L(loc) == 1)
  58     {
  59       unsigned int i;
  60       const unsigned char *str = (const unsigned char *)regex;
  61       tre_char_t *wstr = wregex;
  62 
  63       for (i = 0; i < n; i++)
  64         *(wstr++) = *(str++);
  65       wlen = n;
  66     }
  67   else
  68     {
  69       size_t consumed;
  70       tre_char_t *wcptr = wregex;
  71       mbstate_t state;
  72       memset(&state, '\0', sizeof(state));
  73       while (n > 0)
  74         {
  75           consumed = tre_mbrtowc_l(wcptr, regex, n, &state, loc);
  76 
  77           switch (consumed)
  78             {
  79             case 0:
  80               if (*regex == '\0')
  81                 consumed = 1;
  82               else
  83                 {
  84                   free(wregex);
  85                   return REG_BADPAT;
  86                 }
  87               break;
  88             case (size_t)-1:
  89             case (size_t)-2:
  90               DPRINT(("mbrtowc: error %d: %s.\n", errno, strerror(errno)));
  91               free(wregex);
  92               return REG_ILLSEQ;
  93             }
  94           regex += consumed;
  95           n -= consumed;
  96           wcptr++;
  97         }
  98       wlen = wcptr - wregex;
  99     }
 100 
 101   wregex[wlen] = L'\0';
 102   ret = tre_compile(preg, wregex, wlen, cflags, loc);
 103   free(wregex);
 104 
 105   return ret;
 106 }
 107 
 108 int
 109 tre_regncomp(regex_t *preg, const char *regex, size_t n, int cflags)
 110 {
 111   return tre_regncomp_l(preg, regex, n, cflags, uselocale((locale_t)0));
 112 }
 113 
 114 int
 115 tre_regcomp_l(regex_t *preg, const char *regex, int cflags, locale_t loc)
 116 {
 117   size_t len;
 118 
 119   if (cflags & REG_PEND)
 120     {
 121       if ((const char *)(preg->re_endp) < regex)
 122         return REG_INVARG;
 123       len = (const char *)(preg->re_endp) - regex;
 124     }
 125   else
 126     len = strlen(regex);
 127   return tre_regncomp_l(preg, regex, len, cflags, loc);
 128 }
 129 
 130 int
 131 regcomp(regex_t *_RESTRICT_KYWD preg, const char *_RESTRICT_KYWD regex,
 132     int cflags)
 133 {
 134   return tre_regcomp_l(preg, regex, cflags, uselocale((locale_t)0));
 135 }
 136 
 137 void
 138 regfree(regex_t *preg)
 139 {
 140   tre_free(preg);
 141 }