Print this page
3154 Nonconforming tolower and toupper with UTF-8 locales
Reviewed by: Garrett D'Amore <garrett.damore@gmail.com>


 194                 ctn->toupper = wc;
 195                 break;
 196         case T_TOLOWER:
 197                 ctn->tolower = wc;
 198                 break;
 199         default:
 200                 INTERR;
 201                 break;
 202         }
 203 }
 204 
 205 void
 206 dump_ctype(void)
 207 {
 208         FILE            *f;
 209         _FileRuneLocale rl;
 210         ctype_node_t    *ctn, *last_ct, *last_lo, *last_up;
 211         _FileRuneEntry  *ct = NULL;
 212         _FileRuneEntry  *lo = NULL;
 213         _FileRuneEntry  *up = NULL;

 214 
 215         (void) memset(&rl, 0, sizeof (rl));
 216         last_ct = NULL;
 217         last_lo = NULL;
 218         last_up = NULL;
 219 
 220         if ((f = open_category()) == NULL)
 221                 return;
 222 
 223         (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
 224         (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
 225 
 226         for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {






 227 
 228                 wchar_t wc = ctn->wc;
 229                 int conflict = 0;
 230 


 231                 /*
 232                  * POSIX requires certain portable characters have
 233                  * certain types.  Add them if they are missing.
 234                  */
 235                 if ((wc >= 1) && (wc <= 127)) {
 236                         if ((wc >= 'A') && (wc <= 'Z'))
 237                                 ctn->ctype |= _ISUPPER;
 238                         if ((wc >= 'a') && (wc <= 'z'))
 239                                 ctn->ctype |= _ISLOWER;
 240                         if ((wc >= '0') && (wc <= '9'))
 241                                 ctn->ctype |= _ISDIGIT;
 242                         if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
 243                                 ctn->ctype |= _ISSPACE;
 244                         if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
 245                                 ctn->ctype |= _ISXDIGIT;
 246                         if (strchr(" \t", (char)wc))
 247                                 ctn->ctype |= _ISBLANK;
 248 
 249                         /*
 250                          * Technically these settings are only


 288                     (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
 289                         conflict++;
 290                 if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
 291                         conflict++;
 292                 if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
 293                         conflict++;
 294                 if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
 295                         conflict++;
 296 
 297                 if (conflict) {
 298                         warn("conflicting classes for character 0x%x (%x)",
 299                             wc, ctn->ctype);
 300                 }
 301                 /*
 302                  * Handle the lower 256 characters using the simple
 303                  * optimization.  Note that if we have not defined the
 304                  * upper/lower case, then we identity map it.
 305                  */
 306                 if ((unsigned)wc < _CACHED_RUNES) {
 307                         rl.runetype[wc] = ctn->ctype;
 308                         rl.maplower[wc] = ctn->tolower ? ctn->tolower : wc;
 309                         rl.mapupper[wc] = ctn->toupper ? ctn->toupper : wc;


 310                         continue;
 311                 }
 312 
 313                 if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
 314                         ct[rl.runetype_ext_nranges-1].max = wc;
 315                         last_ct = ctn;
 316                 } else {
 317                         rl.runetype_ext_nranges++;
 318                         ct = realloc(ct,
 319                             sizeof (*ct) * rl.runetype_ext_nranges);
 320                         ct[rl.runetype_ext_nranges - 1].min = wc;
 321                         ct[rl.runetype_ext_nranges - 1].max = wc;
 322                         ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
 323                         last_ct = ctn;
 324                 }
 325                 if (ctn->tolower == 0) {
 326                         last_lo = NULL;
 327                 } else if ((last_lo != NULL) &&
 328                     (last_lo->tolower + 1 == ctn->tolower)) {
 329                         lo[rl.maplower_ext_nranges-1].max = wc;




 194                 ctn->toupper = wc;
 195                 break;
 196         case T_TOLOWER:
 197                 ctn->tolower = wc;
 198                 break;
 199         default:
 200                 INTERR;
 201                 break;
 202         }
 203 }
 204 
 205 void
 206 dump_ctype(void)
 207 {
 208         FILE            *f;
 209         _FileRuneLocale rl;
 210         ctype_node_t    *ctn, *last_ct, *last_lo, *last_up;
 211         _FileRuneEntry  *ct = NULL;
 212         _FileRuneEntry  *lo = NULL;
 213         _FileRuneEntry  *up = NULL;
 214         wchar_t         wc;
 215 
 216         (void) memset(&rl, 0, sizeof (rl));
 217         last_ct = NULL;
 218         last_lo = NULL;
 219         last_up = NULL;
 220 
 221         if ((f = open_category()) == NULL)
 222                 return;
 223 
 224         (void) memcpy(rl.magic, _FILE_RUNE_MAGIC_1, 8);
 225         (void) strncpy(rl.encoding, get_wide_encoding(), sizeof (rl.encoding));
 226 
 227         /*
 228          * Initialize the identity map.
 229          */
 230         for (wc = 0; (unsigned)wc < _CACHED_RUNES; wc++) {
 231                 rl.maplower[wc] = wc;
 232                 rl.mapupper[wc] = wc;
 233         }
 234 
 235         for (ctn = avl_first(&ctypes); ctn; ctn = AVL_NEXT(&ctypes, ctn)) {
 236                 int conflict = 0;
 237 
 238                 wc = ctn->wc;
 239 
 240                 /*
 241                  * POSIX requires certain portable characters have
 242                  * certain types.  Add them if they are missing.
 243                  */
 244                 if ((wc >= 1) && (wc <= 127)) {
 245                         if ((wc >= 'A') && (wc <= 'Z'))
 246                                 ctn->ctype |= _ISUPPER;
 247                         if ((wc >= 'a') && (wc <= 'z'))
 248                                 ctn->ctype |= _ISLOWER;
 249                         if ((wc >= '0') && (wc <= '9'))
 250                                 ctn->ctype |= _ISDIGIT;
 251                         if (strchr(" \f\n\r\t\v", (char)wc) != NULL)
 252                                 ctn->ctype |= _ISSPACE;
 253                         if (strchr("0123456789ABCDEFabcdef", (char)wc) != NULL)
 254                                 ctn->ctype |= _ISXDIGIT;
 255                         if (strchr(" \t", (char)wc))
 256                                 ctn->ctype |= _ISBLANK;
 257 
 258                         /*
 259                          * Technically these settings are only


 297                     (ctn->ctype & (_ISDIGIT|_ISALPHA|_ISXDIGIT)))
 298                         conflict++;
 299                 if ((ctn->ctype & _ISSPACE) && (ctn->ctype & _ISGRAPH))
 300                         conflict++;
 301                 if ((ctn->ctype & _ISCNTRL) & _ISPRINT)
 302                         conflict++;
 303                 if ((wc == ' ') && (ctn->ctype & (_ISPUNCT|_ISGRAPH)))
 304                         conflict++;
 305 
 306                 if (conflict) {
 307                         warn("conflicting classes for character 0x%x (%x)",
 308                             wc, ctn->ctype);
 309                 }
 310                 /*
 311                  * Handle the lower 256 characters using the simple
 312                  * optimization.  Note that if we have not defined the
 313                  * upper/lower case, then we identity map it.
 314                  */
 315                 if ((unsigned)wc < _CACHED_RUNES) {
 316                         rl.runetype[wc] = ctn->ctype;
 317                         if (ctn->tolower)
 318                                 rl.maplower[wc] = ctn->tolower;
 319                         if (ctn->toupper)
 320                                 rl.mapupper[wc] = ctn->toupper;
 321                         continue;
 322                 }
 323 
 324                 if ((last_ct != NULL) && (last_ct->ctype == ctn->ctype)) {
 325                         ct[rl.runetype_ext_nranges-1].max = wc;
 326                         last_ct = ctn;
 327                 } else {
 328                         rl.runetype_ext_nranges++;
 329                         ct = realloc(ct,
 330                             sizeof (*ct) * rl.runetype_ext_nranges);
 331                         ct[rl.runetype_ext_nranges - 1].min = wc;
 332                         ct[rl.runetype_ext_nranges - 1].max = wc;
 333                         ct[rl.runetype_ext_nranges - 1].map = ctn->ctype;
 334                         last_ct = ctn;
 335                 }
 336                 if (ctn->tolower == 0) {
 337                         last_lo = NULL;
 338                 } else if ((last_lo != NULL) &&
 339                     (last_lo->tolower + 1 == ctn->tolower)) {
 340                         lo[rl.maplower_ext_nranges-1].max = wc;