1 /*
   2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   3  * Copyright 2017 Nexenta Systems, Inc.
   4  * Copyright 2019 Joyent, Inc.
   5  * Copyright (c) 1993
   6  *      The Regents of the University of California.  All rights reserved.
   7  *
   8  * This code is derived from software contributed to Berkeley by
   9  * Paul Borman at Krystal Technologies.
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 4. Neither the name of the University nor the names of its contributors
  20  *    may be used to endorse or promote products derived from this software
  21  *    without specific prior written permission.
  22  *
  23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33  * SUCH DAMAGE.
  34  */
  35 
  36 #include "lint.h"
  37 #include <ctype.h>
  38 #include <wchar.h>
  39 #include "runetype.h"
  40 #include "mblocal.h"
  41 #include "_ctype.h"
  42 
  43 #define _DEFRUNETYPE \
  44         /* 00 */ \
  45         _CTYPE_C, \
  46         _CTYPE_C, \
  47         _CTYPE_C, \
  48         _CTYPE_C, \
  49         _CTYPE_C, \
  50         _CTYPE_C, \
  51         _CTYPE_C, \
  52         _CTYPE_C, \
  53         /* 08 */ \
  54         _CTYPE_C, \
  55         _CTYPE_C|_CTYPE_S|_CTYPE_B, \
  56         _CTYPE_C|_CTYPE_S, \
  57         _CTYPE_C|_CTYPE_S, \
  58         _CTYPE_C|_CTYPE_S, \
  59         _CTYPE_C|_CTYPE_S, \
  60         _CTYPE_C, \
  61         _CTYPE_C, \
  62         /* 10 */ \
  63         _CTYPE_C, \
  64         _CTYPE_C, \
  65         _CTYPE_C, \
  66         _CTYPE_C, \
  67         _CTYPE_C, \
  68         _CTYPE_C, \
  69         _CTYPE_C, \
  70         _CTYPE_C, \
  71         /* 18 */ \
  72         _CTYPE_C, \
  73         _CTYPE_C, \
  74         _CTYPE_C, \
  75         _CTYPE_C, \
  76         _CTYPE_C, \
  77         _CTYPE_C, \
  78         _CTYPE_C, \
  79         _CTYPE_C, \
  80         /* 20 */ \
  81         _CTYPE_S|_CTYPE_B|_CTYPE_R, \
  82         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  83         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  84         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  85         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  86         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  87         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  88         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  89         /* 28 */ \
  90         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  91         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  92         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  93         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  94         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  95         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  96         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  97         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
  98         /* 30 */ \
  99         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 100         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 101         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 102         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 103         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 104         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 105         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 106         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 107         /* 38 */ \
 108         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 109         _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \
 110         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 111         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 112         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 113         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 114         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 115         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 116         /* 40 */ \
 117         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 118         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 119         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 120         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 121         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 122         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 123         _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 124         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 125         /* 48 */ \
 126         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 127         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 128         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 129         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 130         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 131         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 132         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 133         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 134         /* 50 */ \
 135         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 136         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 137         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 138         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 139         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 140         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 141         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 142         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 143         /* 58 */ \
 144         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 145         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 146         _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 147         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 148         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 149         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 150         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 151         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 152         /* 60 */ \
 153         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 154         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 155         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 156         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 157         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 158         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 159         _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 160         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 161         /* 68 */ \
 162         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 163         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 164         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 165         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 166         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 167         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 168         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 169         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 170         /* 70 */ \
 171         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 172         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 173         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 174         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 175         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 176         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 177         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 178         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 179         /* 78 */ \
 180         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 181         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 182         _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \
 183         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 184         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 185         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 186         _CTYPE_P|_CTYPE_R|_CTYPE_G, \
 187         _CTYPE_C
 188 
 189 #define _DEFMAPLOWER \
 190         0x00,   0x01,   0x02,   0x03,   0x04,   0x05,   0x06,   0x07, \
 191         0x08,   0x09,   0x0a,   0x0b,   0x0c,   0x0d,   0x0e,   0x0f, \
 192         0x10,   0x11,   0x12,   0x13,   0x14,   0x15,   0x16,   0x17, \
 193         0x18,   0x19,   0x1a,   0x1b,   0x1c,   0x1d,   0x1e,   0x1f, \
 194         0x20,   0x21,   0x22,   0x23,   0x24,   0x25,   0x26,   0x27, \
 195         0x28,   0x29,   0x2a,   0x2b,   0x2c,   0x2d,   0x2e,   0x2f, \
 196         0x30,   0x31,   0x32,   0x33,   0x34,   0x35,   0x36,   0x37, \
 197         0x38,   0x39,   0x3a,   0x3b,   0x3c,   0x3d,   0x3e,   0x3f, \
 198         0x40,   'a',    'b',    'c',    'd',    'e',    'f',    'g', \
 199         'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o', \
 200         'p',    'q',    'r',    's',    't',    'u',    'v',    'w', \
 201         'x',    'y',    'z',    0x5b,   0x5c,   0x5d,   0x5e,   0x5f, \
 202         0x60,   'a',    'b',    'c',    'd',    'e',    'f',    'g', \
 203         'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o', \
 204         'p',    'q',    'r',    's',    't',    'u',    'v',    'w', \
 205         'x',    'y',    'z',    0x7b,   0x7c,   0x7d,   0x7e,   0x7f, \
 206         0x80,   0x81,   0x82,   0x83,   0x84,   0x85,   0x86,   0x87, \
 207         0x88,   0x89,   0x8a,   0x8b,   0x8c,   0x8d,   0x8e,   0x8f, \
 208         0x90,   0x91,   0x92,   0x93,   0x94,   0x95,   0x96,   0x97, \
 209         0x98,   0x99,   0x9a,   0x9b,   0x9c,   0x9d,   0x9e,   0x9f, \
 210         0xa0,   0xa1,   0xa2,   0xa3,   0xa4,   0xa5,   0xa6,   0xa7, \
 211         0xa8,   0xa9,   0xaa,   0xab,   0xac,   0xad,   0xae,   0xaf, \
 212         0xb0,   0xb1,   0xb2,   0xb3,   0xb4,   0xb5,   0xb6,   0xb7, \
 213         0xb8,   0xb9,   0xba,   0xbb,   0xbc,   0xbd,   0xbe,   0xbf, \
 214         0xc0,   0xc1,   0xc2,   0xc3,   0xc4,   0xc5,   0xc6,   0xc7, \
 215         0xc8,   0xc9,   0xca,   0xcb,   0xcc,   0xcd,   0xce,   0xcf, \
 216         0xd0,   0xd1,   0xd2,   0xd3,   0xd4,   0xd5,   0xd6,   0xd7, \
 217         0xd8,   0xd9,   0xda,   0xdb,   0xdc,   0xdd,   0xde,   0xdf, \
 218         0xe0,   0xe1,   0xe2,   0xe3,   0xe4,   0xe5,   0xe6,   0xe7, \
 219         0xe8,   0xe9,   0xea,   0xeb,   0xec,   0xed,   0xee,   0xef, \
 220         0xf0,   0xf1,   0xf2,   0xf3,   0xf4,   0xf5,   0xf6,   0xf7, \
 221         0xf8,   0xf9,   0xfa,   0xfb,   0xfc,   0xfd,   0xfe,   0xff
 222 
 223 #define _DEFMAPUPPER \
 224         0x00,   0x01,   0x02,   0x03,   0x04,   0x05,   0x06,   0x07, \
 225         0x08,   0x09,   0x0a,   0x0b,   0x0c,   0x0d,   0x0e,   0x0f, \
 226         0x10,   0x11,   0x12,   0x13,   0x14,   0x15,   0x16,   0x17, \
 227         0x18,   0x19,   0x1a,   0x1b,   0x1c,   0x1d,   0x1e,   0x1f, \
 228         0x20,   0x21,   0x22,   0x23,   0x24,   0x25,   0x26,   0x27, \
 229         0x28,   0x29,   0x2a,   0x2b,   0x2c,   0x2d,   0x2e,   0x2f, \
 230         0x30,   0x31,   0x32,   0x33,   0x34,   0x35,   0x36,   0x37, \
 231         0x38,   0x39,   0x3a,   0x3b,   0x3c,   0x3d,   0x3e,   0x3f, \
 232         0x40,   'A',    'B',    'C',    'D',    'E',    'F',    'G', \
 233         'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O', \
 234         'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W', \
 235         'X',    'Y',    'Z',    0x5b,   0x5c,   0x5d,   0x5e,   0x5f, \
 236         0x60,   'A',    'B',    'C',    'D',    'E',    'F',    'G', \
 237         'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O', \
 238         'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W', \
 239         'X',    'Y',    'Z',    0x7b,   0x7c,   0x7d,   0x7e,   0x7f, \
 240         0x80,   0x81,   0x82,   0x83,   0x84,   0x85,   0x86,   0x87, \
 241         0x88,   0x89,   0x8a,   0x8b,   0x8c,   0x8d,   0x8e,   0x8f, \
 242         0x90,   0x91,   0x92,   0x93,   0x94,   0x95,   0x96,   0x97, \
 243         0x98,   0x99,   0x9a,   0x9b,   0x9c,   0x9d,   0x9e,   0x9f, \
 244         0xa0,   0xa1,   0xa2,   0xa3,   0xa4,   0xa5,   0xa6,   0xa7, \
 245         0xa8,   0xa9,   0xaa,   0xab,   0xac,   0xad,   0xae,   0xaf, \
 246         0xb0,   0xb1,   0xb2,   0xb3,   0xb4,   0xb5,   0xb6,   0xb7, \
 247         0xb8,   0xb9,   0xba,   0xbb,   0xbc,   0xbd,   0xbe,   0xbf, \
 248         0xc0,   0xc1,   0xc2,   0xc3,   0xc4,   0xc5,   0xc6,   0xc7, \
 249         0xc8,   0xc9,   0xca,   0xcb,   0xcc,   0xcd,   0xce,   0xcf, \
 250         0xd0,   0xd1,   0xd2,   0xd3,   0xd4,   0xd5,   0xd6,   0xd7, \
 251         0xd8,   0xd9,   0xda,   0xdb,   0xdc,   0xdd,   0xde,   0xdf, \
 252         0xe0,   0xe1,   0xe2,   0xe3,   0xe4,   0xe5,   0xe6,   0xe7, \
 253         0xe8,   0xe9,   0xea,   0xeb,   0xec,   0xed,   0xee,   0xef, \
 254         0xf0,   0xf1,   0xf2,   0xf3,   0xf4,   0xf5,   0xf6,   0xf7, \
 255         0xf8,   0xf9,   0xfa,   0xfb,   0xfc,   0xfd,   0xfe,   0xff,
 256 
 257 _RuneLocale _DefaultRuneLocale = {
 258         _RUNE_MAGIC_1,
 259         "NONE",
 260         { _DEFRUNETYPE },
 261         { _DEFMAPLOWER },
 262         { _DEFMAPUPPER },
 263 };
 264 
 265 /*
 266  * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and
 267  * have to stay pointers for binary compatibility, so we provide separate
 268  * storage for them, initialized to "C" locale contents by default.  Note that
 269  * legacy code may dereference __ctype_mask[-1] when checking against EOF,
 270  * relying on that value to be 0.  To allow this, ___ctype_mask is expanded by
 271  * one value and prepended with a leading 0, with __ctype_mask being set to
 272  * point to ___ctype_mask[1].  (__trans_lower and __trans_upper do not suffer
 273  * from this as EOF access was prevented in legacy code by a check against
 274  * isascii(), which always returned 0 for EOF.)
 275  */
 276 static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE };
 277 unsigned int *__ctype_mask = &___ctype_mask[1];
 278 
 279 static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER };
 280 int *__trans_lower = ___trans_lower;
 281 
 282 static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER };
 283 int *__trans_upper = ___trans_upper;