Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>

*** 1,6 **** --- 1,7 ---- /* + * Copyright 2014 Garrett D'Amore <garrett@damore.org> * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed
*** 36,217 **** * SUCH DAMAGE. */ #include "lint.h" #include <wctype.h> #include "runefile.h" #include "runetype.h" #include "_ctype.h" /* ! * We removed: iswascii, iswhexnumber, and iswnumber, as ! * these are not present on Solaris. Note that the standard requires ! * iswascii to be a macro, so it is defined in our headers. * ! * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the ! * equivalent values without "w". We added a new isnumber, that looks ! * for non-ASCII numbers. */ static int ! __istype(wint_t c, unsigned int f) { unsigned int rt; - /* Fast path for single byte locales */ if (c < 0 || c >= _CACHED_RUNES) ! rt = ___runetype(c); else ! rt = _CurrentRuneLocale->__runetype[c]; return (rt & f); } static int ! __isctype(wint_t c, unsigned int f) { ! unsigned int rt; ! /* Fast path for single byte locales */ ! if (c < 0 || c >= _CACHED_RUNES) ! return (0); ! else ! rt = _CurrentRuneLocale->__runetype[c]; ! return (rt & f); } #undef iswctype int iswctype(wint_t wc, wctype_t class) { return (__istype(wc, class)); } #undef _iswctype unsigned _iswctype(wchar_t wc, int class) { return (__istype((wint_t)wc, (unsigned int)class)); } ! #undef iswalnum ! int ! iswalnum(wint_t wc) ! { ! return (__istype(wc, _CTYPE_A|_CTYPE_D)); } #undef iswalpha - int - iswalpha(wint_t wc) - { - return (__istype(wc, _CTYPE_A)); - } - #undef iswblank - int - iswblank(wint_t wc) - { - return (__istype(wc, _CTYPE_B)); - } ! #undef iswcntrl ! int ! iswcntrl(wint_t wc) ! { ! return (__istype(wc, _CTYPE_C)); ! } - #undef iswdigit - int - iswdigit(wint_t wc) - { - return (__isctype(wc, _CTYPE_D)); - } ! #undef iswgraph ! int ! iswgraph(wint_t wc) ! { ! return (__istype(wc, _CTYPE_G)); ! } #undef isideogram ! int ! isideogram(wint_t wc) ! { ! return (__istype(wc, _CTYPE_I)); ! } - #undef iswlower - int - iswlower(wint_t wc) - { - return (__istype(wc, _CTYPE_L)); - } - #undef isphonogram ! int ! isphonogram(wint_t wc) ! { ! return (__istype(wc, _CTYPE_Q)); ! } - #undef iswprint - int - iswprint(wint_t wc) - { - return (__istype(wc, _CTYPE_R)); - } - - #undef iswpunct - int - iswpunct(wint_t wc) - { - return (__istype(wc, _CTYPE_P)); - } - - #undef iswspace - int - iswspace(wint_t wc) - { - return (__istype(wc, _CTYPE_S)); - } - - #undef iswupper - int - iswupper(wint_t wc) - { - return (__istype(wc, _CTYPE_U)); - } - - #undef iswxdigit - int - iswxdigit(wint_t wc) - { - return (__isctype(wc, _CTYPE_X)); - } - - #undef isenglish - int - isenglish(wint_t wc) - { - return (__istype(wc, _CTYPE_E)); - } - #undef isspecial ! int ! isspecial(wint_t wc) ! { ! return (__istype(wc, _CTYPE_T)); ! } #undef isnumber ! int ! isnumber(wint_t wc) ! { ! return (__istype(wc, _CTYPE_N)); ! } /* * FreeBSD has iswrune() for use by external programs, and this is used by * the "tr" program. As that program is part of our consolidation, we * provide an _ILLUMOS_PRIVATE version of this function that we can use. --- 37,173 ---- * SUCH DAMAGE. */ #include "lint.h" #include <wctype.h> + #include <locale.h> #include "runefile.h" #include "runetype.h" + #include "localeimpl.h" #include "_ctype.h" /* ! * Note that the standard requires iswascii to be a macro, so it is defined ! * in our headers. * ! * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the ! * equivalent values without "w". The Solaris specific function isenglish() ! * is here, but does not get an isw* equivalent. ! * ! * Note that various code assumes that "numbers" (iswdigit, iswxdigit) ! * only return true for characters in the portable set. While the assumption ! * is not technically correct, it turns out that for all of our locales this ! * is true. iswhexnumber is aliased to iswxdigit. */ static int ! __istype_l(locale_t loc, wint_t c, unsigned int f) { unsigned int rt; if (c < 0 || c >= _CACHED_RUNES) ! rt = __runetype(loc->runelocale, c); else ! rt = loc->runelocale->__runetype[c]; return (rt & f); } static int ! __istype(wint_t c, unsigned int f) { ! return (__istype_l(uselocale(NULL), c, f)); ! } ! int ! iswctype_l(wint_t wc, wctype_t class, locale_t loc) ! { ! if (iswascii(wc)) ! return (__ctype_mask[wc] & class); ! return (__istype_l(loc, wc, class)); } #undef iswctype int iswctype(wint_t wc, wctype_t class) { + /* + * Note that we don't just call iswctype_l because we optimize for + * the iswascii() case, so that most of the time we have no need to + * call uselocale(). + */ + if (iswascii(wc)) + return (__ctype_mask[wc] & class); return (__istype(wc, class)); } + /* + * This is a legacy version, baked into binaries. + */ #undef _iswctype unsigned _iswctype(wchar_t wc, int class) { + if (iswascii(wc)) + return (__ctype_mask[wc] & class); return (__istype((wint_t)wc, (unsigned int)class)); } ! #define DEFN_ISWTYPE(type, mask) \ ! int \ ! isw##type##_l(wint_t wc, locale_t loc) \ ! { \ ! return (iswascii(wc) ? \ ! (__ctype_mask[wc] & (mask)) : \ ! __istype_l(loc, wc, mask)); \ ! } \ ! \ ! int \ ! isw##type(wint_t wc) \ ! { \ ! return (iswascii(wc) ? \ ! (__ctype_mask[wc] & (mask)) : \ ! __istype(wc, mask)); \ } + /* kill off any macros */ + #undef iswalnum #undef iswalpha #undef iswblank ! DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D) ! DEFN_ISWTYPE(alpha, _CTYPE_A) ! DEFN_ISWTYPE(blank, _CTYPE_B) ! DEFN_ISWTYPE(cntrl, _CTYPE_C) ! DEFN_ISWTYPE(digit, _CTYPE_D) ! DEFN_ISWTYPE(graph, _CTYPE_D) ! DEFN_ISWTYPE(lower, _CTYPE_L) ! DEFN_ISWTYPE(upper, _CTYPE_U) ! DEFN_ISWTYPE(print, _CTYPE_R) ! DEFN_ISWTYPE(punct, _CTYPE_P) ! DEFN_ISWTYPE(space, _CTYPE_S) ! DEFN_ISWTYPE(xdigit, _CTYPE_X) ! DEFN_ISWTYPE(ideogram, _CTYPE_I) ! DEFN_ISWTYPE(phonogram, _CTYPE_Q) ! DEFN_ISWTYPE(special, _CTYPE_T) ! DEFN_ISWTYPE(number, _CTYPE_N) ! #undef iswhexnumber ! #pragma weak iswhexnumber = iswxdigit ! #pragma weak iswhexnumber_l = iswxdigit_l #undef isideogram ! #pragma weak isideogram = iswideogram #undef isphonogram ! #pragma weak isphonogram = iswphonogram #undef isspecial ! #pragma weak isspecial = iswspecial #undef isnumber ! #pragma weak isnumber = iswnumber /* * FreeBSD has iswrune() for use by external programs, and this is used by * the "tr" program. As that program is part of our consolidation, we * provide an _ILLUMOS_PRIVATE version of this function that we can use.
*** 226,233 **** /* * Note, FreeBSD ignored the low order byte, as they encode their * ctype values differently. We can't do that (ctype is baked into * applications), but instead can just check if *any* bit is set in * the ctype. Any bit being set indicates its a valid rune. */ ! return (__istype(wc, 0xffffffffU)); } --- 182,204 ---- /* * Note, FreeBSD ignored the low order byte, as they encode their * ctype values differently. We can't do that (ctype is baked into * applications), but instead can just check if *any* bit is set in * the ctype. Any bit being set indicates its a valid rune. + * + * NB: For ASCII all positions except NULL are runes. */ ! return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU)); ! } ! ! /* ! * isenglish is a Solaris legacy. No isw* equivalent. Note that this most ! * likely doesn't work, as the locale data we have doesn't include it. It ! * specifically is only valid for non-ASCII characters. We're not sure this ! * is in actual use in the wild. ! */ ! #undef isenglish ! int ! isenglish(wint_t wc) ! { ! return (__istype(wc, _CTYPE_E)); }