1 /*
   2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * This code is derived from software contributed to Berkeley by
   7  * Paul Borman at Krystal Technologies.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  */
  33 
  34 #include "lint.h"
  35 #include <ctype.h>
  36 #include <wchar.h>
  37 #include "runetype.h"
  38 #include "mblocal.h"
  39 #include "_ctype.h"
  40 
  41 _RuneLocale _DefaultRuneLocale = {
  42         _RUNE_MAGIC_1,
  43         "NONE",
  44         {
  45                 /* 00 */
  46                 _CTYPE_C,
  47                 _CTYPE_C,
  48                 _CTYPE_C,
  49                 _CTYPE_C,
  50                 _CTYPE_C,
  51                 _CTYPE_C,
  52                 _CTYPE_C,
  53                 _CTYPE_C,
  54                 /* 08 */
  55                 _CTYPE_C,
  56                 _CTYPE_C|_CTYPE_S|_CTYPE_B,
  57                 _CTYPE_C|_CTYPE_S,
  58                 _CTYPE_C|_CTYPE_S,
  59                 _CTYPE_C|_CTYPE_S,
  60                 _CTYPE_C|_CTYPE_S,
  61                 _CTYPE_C,
  62                 _CTYPE_C,
  63                 /* 10 */
  64                 _CTYPE_C,
  65                 _CTYPE_C,
  66                 _CTYPE_C,
  67                 _CTYPE_C,
  68                 _CTYPE_C,
  69                 _CTYPE_C,
  70                 _CTYPE_C,
  71                 _CTYPE_C,
  72                 /* 18 */
  73                 _CTYPE_C,
  74                 _CTYPE_C,
  75                 _CTYPE_C,
  76                 _CTYPE_C,
  77                 _CTYPE_C,
  78                 _CTYPE_C,
  79                 _CTYPE_C,
  80                 _CTYPE_C,
  81                 /* 20 */
  82                 _CTYPE_S|_CTYPE_B|_CTYPE_R,
  83                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  84                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  85                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  86                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  87                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  88                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  89                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  90                 /* 28 */
  91                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  92                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  93                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  94                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  95                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  96                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  97                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  98                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
  99                 /* 30 */
 100                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 101                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 102                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 103                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 104                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 105                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 106                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 107                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 108                 /* 38 */
 109                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 110                 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X,
 111                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 112                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 113                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 114                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 115                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 116                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 117                 /* 40 */
 118                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 119                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 120                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 121                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 122                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 123                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 124                 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 125                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 126                 /* 48 */
 127                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 128                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 129                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 130                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 131                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 132                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 133                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 134                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 135                 /* 50 */
 136                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 137                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 138                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 139                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 140                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 141                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 142                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 143                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 144                 /* 58 */
 145                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 146                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 147                 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 148                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 149                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 150                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 151                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 152                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 153                 /* 60 */
 154                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 155                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 156                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 157                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 158                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 159                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 160                 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 161                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 162                 /* 68 */
 163                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 164                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 165                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 166                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 167                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 168                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 169                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 170                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 171                 /* 70 */
 172                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 173                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 174                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 175                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 176                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 177                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 178                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 179                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 180                 /* 78 */
 181                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 182                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 183                 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A,
 184                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 185                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 186                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 187                 _CTYPE_P|_CTYPE_R|_CTYPE_G,
 188                 _CTYPE_C,
 189         },
 190 
 191         /* BEGIN CSTYLED */
 192         {
 193         0x00,   0x01,   0x02,   0x03,   0x04,   0x05,   0x06,   0x07,
 194         0x08,   0x09,   0x0a,   0x0b,   0x0c,   0x0d,   0x0e,   0x0f,
 195         0x10,   0x11,   0x12,   0x13,   0x14,   0x15,   0x16,   0x17,
 196         0x18,   0x19,   0x1a,   0x1b,   0x1c,   0x1d,   0x1e,   0x1f,
 197         0x20,   0x21,   0x22,   0x23,   0x24,   0x25,   0x26,   0x27,
 198         0x28,   0x29,   0x2a,   0x2b,   0x2c,   0x2d,   0x2e,   0x2f,
 199         0x30,   0x31,   0x32,   0x33,   0x34,   0x35,   0x36,   0x37,
 200         0x38,   0x39,   0x3a,   0x3b,   0x3c,   0x3d,   0x3e,   0x3f,
 201         0x40,   'a',    'b',    'c',    'd',    'e',    'f',    'g',
 202         'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o',
 203         'p',    'q',    'r',    's',    't',    'u',    'v',    'w',
 204         'x',    'y',    'z',    0x5b,   0x5c,   0x5d,   0x5e,   0x5f,
 205         0x60,   'a',    'b',    'c',    'd',    'e',    'f',    'g',
 206         'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o',
 207         'p',    'q',    'r',    's',    't',    'u',    'v',    'w',
 208         'x',    'y',    'z',    0x7b,   0x7c,   0x7d,   0x7e,   0x7f,
 209         0x80,   0x81,   0x82,   0x83,   0x84,   0x85,   0x86,   0x87,
 210         0x88,   0x89,   0x8a,   0x8b,   0x8c,   0x8d,   0x8e,   0x8f,
 211         0x90,   0x91,   0x92,   0x93,   0x94,   0x95,   0x96,   0x97,
 212         0x98,   0x99,   0x9a,   0x9b,   0x9c,   0x9d,   0x9e,   0x9f,
 213         0xa0,   0xa1,   0xa2,   0xa3,   0xa4,   0xa5,   0xa6,   0xa7,
 214         0xa8,   0xa9,   0xaa,   0xab,   0xac,   0xad,   0xae,   0xaf,
 215         0xb0,   0xb1,   0xb2,   0xb3,   0xb4,   0xb5,   0xb6,   0xb7,
 216         0xb8,   0xb9,   0xba,   0xbb,   0xbc,   0xbd,   0xbe,   0xbf,
 217         0xc0,   0xc1,   0xc2,   0xc3,   0xc4,   0xc5,   0xc6,   0xc7,
 218         0xc8,   0xc9,   0xca,   0xcb,   0xcc,   0xcd,   0xce,   0xcf,
 219         0xd0,   0xd1,   0xd2,   0xd3,   0xd4,   0xd5,   0xd6,   0xd7,
 220         0xd8,   0xd9,   0xda,   0xdb,   0xdc,   0xdd,   0xde,   0xdf,
 221         0xe0,   0xe1,   0xe2,   0xe3,   0xe4,   0xe5,   0xe6,   0xe7,
 222         0xe8,   0xe9,   0xea,   0xeb,   0xec,   0xed,   0xee,   0xef,
 223         0xf0,   0xf1,   0xf2,   0xf3,   0xf4,   0xf5,   0xf6,   0xf7,
 224         0xf8,   0xf9,   0xfa,   0xfb,   0xfc,   0xfd,   0xfe,   0xff,
 225         },
 226         {
 227         0x00,   0x01,   0x02,   0x03,   0x04,   0x05,   0x06,   0x07,
 228         0x08,   0x09,   0x0a,   0x0b,   0x0c,   0x0d,   0x0e,   0x0f,
 229         0x10,   0x11,   0x12,   0x13,   0x14,   0x15,   0x16,   0x17,
 230         0x18,   0x19,   0x1a,   0x1b,   0x1c,   0x1d,   0x1e,   0x1f,
 231         0x20,   0x21,   0x22,   0x23,   0x24,   0x25,   0x26,   0x27,
 232         0x28,   0x29,   0x2a,   0x2b,   0x2c,   0x2d,   0x2e,   0x2f,
 233         0x30,   0x31,   0x32,   0x33,   0x34,   0x35,   0x36,   0x37,
 234         0x38,   0x39,   0x3a,   0x3b,   0x3c,   0x3d,   0x3e,   0x3f,
 235         0x40,   'A',    'B',    'C',    'D',    'E',    'F',    'G',
 236         'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O',
 237         'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W',
 238         'X',    'Y',    'Z',    0x5b,   0x5c,   0x5d,   0x5e,   0x5f,
 239         0x60,   'A',    'B',    'C',    'D',    'E',    'F',    'G',
 240         'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O',
 241         'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W',
 242         'X',    'Y',    'Z',    0x7b,   0x7c,   0x7d,   0x7e,   0x7f,
 243         0x80,   0x81,   0x82,   0x83,   0x84,   0x85,   0x86,   0x87,
 244         0x88,   0x89,   0x8a,   0x8b,   0x8c,   0x8d,   0x8e,   0x8f,
 245         0x90,   0x91,   0x92,   0x93,   0x94,   0x95,   0x96,   0x97,
 246         0x98,   0x99,   0x9a,   0x9b,   0x9c,   0x9d,   0x9e,   0x9f,
 247         0xa0,   0xa1,   0xa2,   0xa3,   0xa4,   0xa5,   0xa6,   0xa7,
 248         0xa8,   0xa9,   0xaa,   0xab,   0xac,   0xad,   0xae,   0xaf,
 249         0xb0,   0xb1,   0xb2,   0xb3,   0xb4,   0xb5,   0xb6,   0xb7,
 250         0xb8,   0xb9,   0xba,   0xbb,   0xbc,   0xbd,   0xbe,   0xbf,
 251         0xc0,   0xc1,   0xc2,   0xc3,   0xc4,   0xc5,   0xc6,   0xc7,
 252         0xc8,   0xc9,   0xca,   0xcb,   0xcc,   0xcd,   0xce,   0xcf,
 253         0xd0,   0xd1,   0xd2,   0xd3,   0xd4,   0xd5,   0xd6,   0xd7,
 254         0xd8,   0xd9,   0xda,   0xdb,   0xdc,   0xdd,   0xde,   0xdf,
 255         0xe0,   0xe1,   0xe2,   0xe3,   0xe4,   0xe5,   0xe6,   0xe7,
 256         0xe8,   0xe9,   0xea,   0xeb,   0xec,   0xed,   0xee,   0xef,
 257         0xf0,   0xf1,   0xf2,   0xf3,   0xf4,   0xf5,   0xf6,   0xf7,
 258         0xf8,   0xf9,   0xfa,   0xfb,   0xfc,   0xfd,   0xfe,   0xff,
 259         },
 260         /* END CSTYLED */
 261 };
 262 
 263 _RuneLocale *_CurrentRuneLocale = &_DefaultRuneLocale;
 264 
 265 /* Taken from former _ctype.c */
 266 unsigned int *__ctype_mask = _DefaultRuneLocale.__runetype;
 267 
 268 int *__trans_lower = _DefaultRuneLocale.__maplower;
 269 int *__trans_upper = _DefaultRuneLocale.__mapupper;
 270 
 271 /*
 272  * Used in various string routines to conditionalize versions optimized for
 273  * the ASCII case
 274  */
 275 int charset_is_ascii = 1;