Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Gordon Ross <gordon.ross@nexenta.com>
Approved by: TBD
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/gb18030.c
+++ new/usr/src/lib/libc/port/locale/gb18030.c
1 1 /*
2 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
2 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3 4 * Copyright (c) 2002-2004 Tim J. Robbins
4 5 * All rights reserved.
5 6 *
6 7 * Redistribution and use in source and binary forms, with or without
7 8 * modification, are permitted provided that the following conditions
8 9 * are met:
9 10 * 1. Redistributions of source code must retain the above copyright
10 11 * notice, this list of conditions and the following disclaimer.
11 12 * 2. Redistributions in binary form must reproduce the above copyright
12 13 * notice, this list of conditions and the following disclaimer in the
13 14 * documentation and/or other materials provided with the distribution.
14 15 *
15 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 26 * SUCH DAMAGE.
26 27 */
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
27 28
28 29 /*
29 30 * PRC National Standard GB 18030-2000 encoding of Chinese text.
30 31 *
31 32 * See gb18030(5) for details.
32 33 */
33 34
34 35 #include "lint.h"
35 36 #include <sys/types.h>
36 37 #include <errno.h>
37 -#include "runetype.h"
38 38 #include <stdlib.h>
39 39 #include <string.h>
40 40 #include <wchar.h>
41 41 #include "mblocal.h"
42 +#include "lctype.h"
42 43
43 44
44 45 static size_t _GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD,
45 46 const char *_RESTRICT_KYWD,
46 47 size_t, mbstate_t *_RESTRICT_KYWD);
47 48 static int _GB18030_mbsinit(const mbstate_t *);
48 49 static size_t _GB18030_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
49 50 mbstate_t *_RESTRICT_KYWD);
51 +static size_t _GB18030_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
52 + const char **_RESTRICT_KYWD, size_t, size_t,
53 + mbstate_t *_RESTRICT_KYWD);
54 +static size_t _GB18030_wcsnrtombs(char *_RESTRICT_KYWD,
55 + const wchar_t **_RESTRICT_KYWD, size_t, size_t,
56 + mbstate_t *_RESTRICT_KYWD);
50 57
58 +
51 59 typedef struct {
52 60 int count;
53 61 uchar_t bytes[4];
54 62 } _GB18030State;
55 63
56 -int
57 -_GB18030_init(_RuneLocale *rl)
64 +void
65 +_GB18030_init(struct lc_ctype *lct)
58 66 {
59 67
60 - __mbrtowc = _GB18030_mbrtowc;
61 - __wcrtomb = _GB18030_wcrtomb;
62 - __mbsinit = _GB18030_mbsinit;
63 - _CurrentRuneLocale = rl;
64 - __ctype[520] = 4;
65 - charset_is_ascii = 0;
66 -
67 - return (0);
68 + lct->lc_mbrtowc = _GB18030_mbrtowc;
69 + lct->lc_wcrtomb = _GB18030_wcrtomb;
70 + lct->lc_mbsinit = _GB18030_mbsinit;
71 + lct->lc_mbsnrtowcs = _GB18030_mbsnrtowcs;
72 + lct->lc_wcsnrtombs = _GB18030_wcsnrtombs;
73 + lct->lc_max_mblen = 4;
74 + lct->lc_is_ascii = 0;
68 75 }
69 76
70 77 static int
71 78 _GB18030_mbsinit(const mbstate_t *ps)
72 79 {
73 80
74 81 return (ps == NULL || ((const _GB18030State *)ps)->count == 0);
75 82 }
76 83
77 84 static size_t
78 85 _GB18030_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
79 86 size_t n, mbstate_t *_RESTRICT_KYWD ps)
80 87 {
81 88 _GB18030State *gs;
82 89 wchar_t wch;
83 90 int ch, len, ocount;
84 91 size_t ncopy;
85 92
86 93 gs = (_GB18030State *)ps;
87 94
88 95 if (gs->count < 0 || gs->count > sizeof (gs->bytes)) {
89 96 errno = EINVAL;
90 97 return ((size_t)-1);
91 98 }
92 99
93 100 if (s == NULL) {
94 101 s = "";
95 102 n = 1;
96 103 pwc = NULL;
97 104 }
98 105
99 106 ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof (gs->bytes) - gs->count);
100 107 (void) memcpy(gs->bytes + gs->count, s, ncopy);
101 108 ocount = gs->count;
102 109 gs->count += ncopy;
103 110 s = (char *)gs->bytes;
104 111 n = gs->count;
105 112
106 113 if (n == 0)
107 114 /* Incomplete multibyte sequence */
108 115 return ((size_t)-2);
109 116
110 117 /*
111 118 * Single byte: [00-7f]
112 119 * Two byte: [81-fe][40-7e,80-fe]
113 120 * Four byte: [81-fe][30-39][81-fe][30-39]
114 121 */
115 122 ch = (unsigned char)*s++;
116 123 if (ch <= 0x7f) {
117 124 len = 1;
118 125 wch = ch;
119 126 } else if (ch >= 0x81 && ch <= 0xfe) {
120 127 wch = ch;
121 128 if (n < 2)
122 129 return ((size_t)-2);
123 130 ch = (unsigned char)*s++;
124 131 if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) {
125 132 wch = (wch << 8) | ch;
126 133 len = 2;
127 134 } else if (ch >= 0x30 && ch <= 0x39) {
128 135 /*
129 136 * Strip high bit off the wide character we will
130 137 * eventually output so that it is positive when
131 138 * cast to wint_t on 32-bit twos-complement machines.
132 139 */
133 140 wch = ((wch & 0x7f) << 8) | ch;
134 141 if (n < 3)
135 142 return ((size_t)-2);
136 143 ch = (unsigned char)*s++;
137 144 if (ch < 0x81 || ch > 0xfe)
138 145 goto ilseq;
139 146 wch = (wch << 8) | ch;
140 147 if (n < 4)
141 148 return ((size_t)-2);
142 149 ch = (unsigned char)*s++;
143 150 if (ch < 0x30 || ch > 0x39)
144 151 goto ilseq;
145 152 wch = (wch << 8) | ch;
146 153 len = 4;
147 154 } else
148 155 goto ilseq;
149 156 } else
150 157 goto ilseq;
151 158
152 159 if (pwc != NULL)
153 160 *pwc = wch;
154 161 gs->count = 0;
155 162 return (wch == L'\0' ? 0 : len - ocount);
156 163 ilseq:
157 164 errno = EILSEQ;
158 165 return ((size_t)-1);
159 166 }
160 167
161 168 static size_t
162 169 _GB18030_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
163 170 mbstate_t *_RESTRICT_KYWD ps)
164 171 {
165 172 _GB18030State *gs;
166 173 size_t len;
167 174 int c;
168 175
169 176 gs = (_GB18030State *)ps;
170 177
171 178 if (gs->count != 0) {
172 179 errno = EINVAL;
173 180 return ((size_t)-1);
174 181 }
175 182
176 183 if (s == NULL)
177 184 /* Reset to initial shift state (no-op) */
178 185 return (1);
179 186 if ((wc & ~0x7fffffff) != 0)
180 187 goto ilseq;
181 188 if (wc & 0x7f000000) {
182 189 /* Replace high bit that mbrtowc() removed. */
183 190 wc |= 0x80000000;
184 191 c = (wc >> 24) & 0xff;
185 192 if (c < 0x81 || c > 0xfe)
186 193 goto ilseq;
187 194 *s++ = c;
188 195 c = (wc >> 16) & 0xff;
189 196 if (c < 0x30 || c > 0x39)
190 197 goto ilseq;
191 198 *s++ = c;
192 199 c = (wc >> 8) & 0xff;
193 200 if (c < 0x81 || c > 0xfe)
194 201 goto ilseq;
195 202 *s++ = c;
196 203 c = wc & 0xff;
197 204 if (c < 0x30 || c > 0x39)
198 205 goto ilseq;
199 206 *s++ = c;
200 207 len = 4;
201 208 } else if (wc & 0x00ff0000)
202 209 goto ilseq;
203 210 else if (wc & 0x0000ff00) {
204 211 c = (wc >> 8) & 0xff;
205 212 if (c < 0x81 || c > 0xfe)
206 213 goto ilseq;
207 214 *s++ = c;
208 215 c = wc & 0xff;
209 216 if (c < 0x40 || c == 0x7f || c == 0xff)
210 217 goto ilseq;
211 218 *s++ = c;
212 219 len = 2;
↓ open down ↓ |
135 lines elided |
↑ open up ↑ |
213 220 } else if (wc <= 0x7f) {
214 221 *s++ = wc;
215 222 len = 1;
216 223 } else
217 224 goto ilseq;
218 225
219 226 return (len);
220 227 ilseq:
221 228 errno = EILSEQ;
222 229 return ((size_t)-1);
230 +}
231 +
232 +static size_t
233 +_GB18030_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
234 + const char **_RESTRICT_KYWD src, size_t nms, size_t len,
235 + mbstate_t *_RESTRICT_KYWD ps)
236 +{
237 + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc));
238 +}
239 +
240 +static size_t
241 +_GB18030_wcsnrtombs(char *_RESTRICT_KYWD dst,
242 + const wchar_t **_RESTRICT_KYWD src, size_t nwc, size_t len,
243 + mbstate_t *_RESTRICT_KYWD ps)
244 +{
245 + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb));
223 246 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX