Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD
@@ -1,6 +1,7 @@
/*
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
@@ -36,182 +37,137 @@
* SUCH DAMAGE.
*/
#include "lint.h"
#include <wctype.h>
+#include <locale.h>
#include "runefile.h"
#include "runetype.h"
+#include "localeimpl.h"
#include "_ctype.h"
/*
- * We removed: iswascii, iswhexnumber, and iswnumber, as
- * these are not present on Solaris. Note that the standard requires
- * iswascii to be a macro, so it is defined in our headers.
+ * Note that the standard requires iswascii to be a macro, so it is defined
+ * in our headers.
*
- * We renamed (per Solaris) iswideogram, iswspecial, iswspecial to the
- * equivalent values without "w". We added a new isnumber, that looks
- * for non-ASCII numbers.
+ * We aliased (per Solaris) iswideogram, iswspecial, iswspecial to the
+ * equivalent values without "w". The Solaris specific function isenglish()
+ * is here, but does not get an isw* equivalent.
+ *
+ * Note that various code assumes that "numbers" (iswdigit, iswxdigit)
+ * only return true for characters in the portable set. While the assumption
+ * is not technically correct, it turns out that for all of our locales this
+ * is true. iswhexnumber is aliased to iswxdigit.
*/
static int
-__istype(wint_t c, unsigned int f)
+__istype_l(locale_t loc, wint_t c, unsigned int f)
{
unsigned int rt;
- /* Fast path for single byte locales */
if (c < 0 || c >= _CACHED_RUNES)
- rt = ___runetype(c);
+ rt = __runetype(loc->runelocale, c);
else
- rt = _CurrentRuneLocale->__runetype[c];
+ rt = loc->runelocale->__runetype[c];
return (rt & f);
}
static int
-__isctype(wint_t c, unsigned int f)
+__istype(wint_t c, unsigned int f)
{
- unsigned int rt;
+ return (__istype_l(uselocale(NULL), c, f));
+}
- /* Fast path for single byte locales */
- if (c < 0 || c >= _CACHED_RUNES)
- return (0);
- else
- rt = _CurrentRuneLocale->__runetype[c];
- return (rt & f);
+int
+iswctype_l(wint_t wc, wctype_t class, locale_t loc)
+{
+ if (iswascii(wc))
+ return (__ctype_mask[wc] & class);
+ return (__istype_l(loc, wc, class));
}
#undef iswctype
int
iswctype(wint_t wc, wctype_t class)
{
+ /*
+ * Note that we don't just call iswctype_l because we optimize for
+ * the iswascii() case, so that most of the time we have no need to
+ * call uselocale().
+ */
+ if (iswascii(wc))
+ return (__ctype_mask[wc] & class);
return (__istype(wc, class));
}
+/*
+ * This is a legacy version, baked into binaries.
+ */
#undef _iswctype
unsigned
_iswctype(wchar_t wc, int class)
{
+ if (iswascii(wc))
+ return (__ctype_mask[wc] & class);
return (__istype((wint_t)wc, (unsigned int)class));
}
-#undef iswalnum
-int
-iswalnum(wint_t wc)
-{
- return (__istype(wc, _CTYPE_A|_CTYPE_D));
+#define DEFN_ISWTYPE(type, mask) \
+int \
+isw##type##_l(wint_t wc, locale_t loc) \
+{ \
+ return (iswascii(wc) ? \
+ (__ctype_mask[wc] & (mask)) : \
+ __istype_l(loc, wc, mask)); \
+} \
+ \
+int \
+isw##type(wint_t wc) \
+{ \
+ return (iswascii(wc) ? \
+ (__ctype_mask[wc] & (mask)) : \
+ __istype(wc, mask)); \
}
+/* kill off any macros */
+#undef iswalnum
#undef iswalpha
-int
-iswalpha(wint_t wc)
-{
- return (__istype(wc, _CTYPE_A));
-}
-
#undef iswblank
-int
-iswblank(wint_t wc)
-{
- return (__istype(wc, _CTYPE_B));
-}
-#undef iswcntrl
-int
-iswcntrl(wint_t wc)
-{
- return (__istype(wc, _CTYPE_C));
-}
+DEFN_ISWTYPE(alnum, _CTYPE_A|_CTYPE_D)
+DEFN_ISWTYPE(alpha, _CTYPE_A)
+DEFN_ISWTYPE(blank, _CTYPE_B)
+DEFN_ISWTYPE(cntrl, _CTYPE_C)
+DEFN_ISWTYPE(digit, _CTYPE_D)
+DEFN_ISWTYPE(graph, _CTYPE_D)
+DEFN_ISWTYPE(lower, _CTYPE_L)
+DEFN_ISWTYPE(upper, _CTYPE_U)
+DEFN_ISWTYPE(print, _CTYPE_R)
+DEFN_ISWTYPE(punct, _CTYPE_P)
+DEFN_ISWTYPE(space, _CTYPE_S)
+DEFN_ISWTYPE(xdigit, _CTYPE_X)
+DEFN_ISWTYPE(ideogram, _CTYPE_I)
+DEFN_ISWTYPE(phonogram, _CTYPE_Q)
+DEFN_ISWTYPE(special, _CTYPE_T)
+DEFN_ISWTYPE(number, _CTYPE_N)
-#undef iswdigit
-int
-iswdigit(wint_t wc)
-{
- return (__isctype(wc, _CTYPE_D));
-}
-#undef iswgraph
-int
-iswgraph(wint_t wc)
-{
- return (__istype(wc, _CTYPE_G));
-}
+#undef iswhexnumber
+#pragma weak iswhexnumber = iswxdigit
+#pragma weak iswhexnumber_l = iswxdigit_l
#undef isideogram
-int
-isideogram(wint_t wc)
-{
- return (__istype(wc, _CTYPE_I));
-}
+#pragma weak isideogram = iswideogram
-#undef iswlower
-int
-iswlower(wint_t wc)
-{
- return (__istype(wc, _CTYPE_L));
-}
-
#undef isphonogram
-int
-isphonogram(wint_t wc)
-{
- return (__istype(wc, _CTYPE_Q));
-}
+#pragma weak isphonogram = iswphonogram
-#undef iswprint
-int
-iswprint(wint_t wc)
-{
- return (__istype(wc, _CTYPE_R));
-}
-
-#undef iswpunct
-int
-iswpunct(wint_t wc)
-{
- return (__istype(wc, _CTYPE_P));
-}
-
-#undef iswspace
-int
-iswspace(wint_t wc)
-{
- return (__istype(wc, _CTYPE_S));
-}
-
-#undef iswupper
-int
-iswupper(wint_t wc)
-{
- return (__istype(wc, _CTYPE_U));
-}
-
-#undef iswxdigit
-int
-iswxdigit(wint_t wc)
-{
- return (__isctype(wc, _CTYPE_X));
-}
-
-#undef isenglish
-int
-isenglish(wint_t wc)
-{
- return (__istype(wc, _CTYPE_E));
-}
-
#undef isspecial
-int
-isspecial(wint_t wc)
-{
- return (__istype(wc, _CTYPE_T));
-}
+#pragma weak isspecial = iswspecial
#undef isnumber
-int
-isnumber(wint_t wc)
-{
- return (__istype(wc, _CTYPE_N));
-}
+#pragma weak isnumber = iswnumber
/*
* FreeBSD has iswrune() for use by external programs, and this is used by
* the "tr" program. As that program is part of our consolidation, we
* provide an _ILLUMOS_PRIVATE version of this function that we can use.
@@ -226,8 +182,23 @@
/*
* Note, FreeBSD ignored the low order byte, as they encode their
* ctype values differently. We can't do that (ctype is baked into
* applications), but instead can just check if *any* bit is set in
* the ctype. Any bit being set indicates its a valid rune.
+ *
+ * NB: For ASCII all positions except NULL are runes.
*/
- return (__istype(wc, 0xffffffffU));
+ return (wc == 0 ? 0 : iswascii(wc) ? 1 : __istype(wc, 0xffffffffU));
+}
+
+/*
+ * isenglish is a Solaris legacy. No isw* equivalent. Note that this most
+ * likely doesn't work, as the locale data we have doesn't include it. It
+ * specifically is only valid for non-ASCII characters. We're not sure this
+ * is in actual use in the wild.
+ */
+#undef isenglish
+int
+isenglish(wint_t wc)
+{
+ return (__istype(wc, _CTYPE_E));
}