Print this page
2964 need POSIX 2008 locale object support
Reviewed by: Robert Mustacchi <rm@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/euc.c
+++ new/usr/src/lib/libc/port/locale/euc.c
1 1 /*
2 + * Copyright 2013 Garrett D'Amore <garrett@damore.org>
2 3 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
3 4 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
4 5 * Copyright (c) 1993
5 6 * The Regents of the University of California. All rights reserved.
6 7 *
7 8 * This code is derived from software contributed to Berkeley by
8 9 * Paul Borman at Krystal Technologies.
9 10 *
10 11 * Redistribution and use in source and binary forms, with or without
11 12 * modification, are permitted provided that the following conditions
12 13 * are met:
13 14 * 1. Redistributions of source code must retain the above copyright
14 15 * notice, this list of conditions and the following disclaimer.
15 16 * 2. Redistributions in binary form must reproduce the above copyright
16 17 * notice, this list of conditions and the following disclaimer in the
17 18 * documentation and/or other materials provided with the distribution.
18 19 * 4. Neither the name of the University nor the names of its contributors
19 20 * may be used to endorse or promote products derived from this software
20 21 * without specific prior written permission.
21 22 *
22 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 33 * SUCH DAMAGE.
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
33 34 */
34 35
35 36 #include "lint.h"
36 37 #include <errno.h>
37 38 #include <limits.h>
38 39 #include <stdlib.h>
39 40 #include <string.h>
40 41 #include <wchar.h>
41 42 #include <sys/types.h>
42 43 #include <sys/euc.h>
43 -#include "runetype.h"
44 44 #include "mblocal.h"
45 +#include "lctype.h"
45 46
46 47 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD,
47 48 const char *_RESTRICT_KYWD,
48 49 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
49 50 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t,
50 51 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
51 52
52 53 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD,
53 54 const char *_RESTRICT_KYWD,
54 55 size_t, mbstate_t *_RESTRICT_KYWD);
55 56 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD,
56 57 const char *_RESTRICT_KYWD,
57 58 size_t, mbstate_t *_RESTRICT_KYWD);
58 59 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD,
59 60 const char *_RESTRICT_KYWD,
60 61 size_t, mbstate_t *_RESTRICT_KYWD);
61 62 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD,
62 63 const char *_RESTRICT_KYWD,
63 64 size_t, mbstate_t *_RESTRICT_KYWD);
65 +
64 66 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
65 67 mbstate_t *_RESTRICT_KYWD);
66 68 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
67 69 mbstate_t *_RESTRICT_KYWD);
68 70 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
69 71 mbstate_t *_RESTRICT_KYWD);
70 72 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
71 73 mbstate_t *_RESTRICT_KYWD);
74 +
75 +static size_t _EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
76 + const char **_RESTRICT_KYWD, size_t, size_t,
77 + mbstate_t *_RESTRICT_KYWD);
78 +static size_t _EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
79 + const char **_RESTRICT_KYWD, size_t, size_t,
80 + mbstate_t *_RESTRICT_KYWD);
81 +static size_t _EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
82 + const char **_RESTRICT_KYWD, size_t, size_t,
83 + mbstate_t *_RESTRICT_KYWD);
84 +static size_t _EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD,
85 + const char **_RESTRICT_KYWD, size_t, size_t,
86 + mbstate_t *_RESTRICT_KYWD);
87 +
88 +static size_t _EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD,
89 + const wchar_t **_RESTRICT_KYWD, size_t, size_t,
90 + mbstate_t *_RESTRICT_KYWD);
91 +static size_t _EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD,
92 + const wchar_t **_RESTRICT_KYWD, size_t, size_t,
93 + mbstate_t *_RESTRICT_KYWD);
94 +static size_t _EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD,
95 + const wchar_t **_RESTRICT_KYWD, size_t, size_t,
96 + mbstate_t *_RESTRICT_KYWD);
97 +static size_t _EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD,
98 + const wchar_t **_RESTRICT_KYWD, size_t, size_t,
99 + mbstate_t *_RESTRICT_KYWD);
100 +
72 101 static int _EUC_mbsinit(const mbstate_t *);
73 102
74 103 typedef struct {
75 104 wchar_t ch;
76 105 int set;
77 106 int want;
78 107 } _EucState;
79 108
80 -static int
109 +int
81 110 _EUC_mbsinit(const mbstate_t *ps)
82 111 {
83 112
84 113 return (ps == NULL || ((const _EucState *)ps)->want == 0);
85 114 }
86 115
87 116 /*
88 117 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
89 118 */
90 -int
91 -_EUC_CN_init(_RuneLocale *rl)
119 +void
120 +_EUC_CN_init(struct lc_ctype *lct)
92 121 {
93 - __mbrtowc = _EUC_CN_mbrtowc;
94 - __wcrtomb = _EUC_CN_wcrtomb;
95 - __mbsinit = _EUC_mbsinit;
122 + lct->lc_mbrtowc = _EUC_CN_mbrtowc;
123 + lct->lc_wcrtomb = _EUC_CN_wcrtomb;
124 + lct->lc_mbsnrtowcs = _EUC_CN_mbsnrtowcs;
125 + lct->lc_wcsnrtombs = _EUC_CN_wcsnrtombs;
126 + lct->lc_mbsinit = _EUC_mbsinit;
96 127
97 - _CurrentRuneLocale = rl;
98 -
99 - __ctype[520] = 4;
100 - charset_is_ascii = 0;
101 - return (0);
128 + lct->lc_max_mblen = 4;
129 + lct->lc_is_ascii = 0;
102 130 }
103 131
104 132 static size_t
105 133 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
106 134 size_t n, mbstate_t *_RESTRICT_KYWD ps)
107 135 {
108 136 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
109 137 }
110 138
111 139 static size_t
140 +_EUC_CN_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
141 + const char **_RESTRICT_KYWD src,
142 + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
143 +{
144 + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
145 +}
146 +
147 +static size_t
112 148 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
113 149 mbstate_t *_RESTRICT_KYWD ps)
114 150 {
115 151 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
116 152 }
117 153
154 +static size_t
155 +_EUC_CN_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
156 + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
157 +{
158 + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
159 +}
160 +
118 161 /*
119 162 * EUC-KR uses only CS0 and CS1.
120 163 */
121 -int
122 -_EUC_KR_init(_RuneLocale *rl)
164 +void
165 +_EUC_KR_init(struct lc_ctype *lct)
123 166 {
124 - __mbrtowc = _EUC_KR_mbrtowc;
125 - __wcrtomb = _EUC_KR_wcrtomb;
126 - __mbsinit = _EUC_mbsinit;
167 + lct->lc_mbrtowc = _EUC_KR_mbrtowc;
168 + lct->lc_wcrtomb = _EUC_KR_wcrtomb;
169 + lct->lc_mbsnrtowcs = _EUC_KR_mbsnrtowcs;
170 + lct->lc_wcsnrtombs = _EUC_KR_wcsnrtombs;
171 + lct->lc_mbsinit = _EUC_mbsinit;
127 172
128 - _CurrentRuneLocale = rl;
129 -
130 - __ctype[520] = 2;
131 - charset_is_ascii = 0;
132 - return (0);
173 + lct->lc_max_mblen = 2;
174 + lct->lc_is_ascii = 0;
133 175 }
134 176
135 177 static size_t
136 178 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
137 179 size_t n, mbstate_t *_RESTRICT_KYWD ps)
138 180 {
139 181 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
140 182 }
141 183
142 184 static size_t
185 +_EUC_KR_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
186 + const char **_RESTRICT_KYWD src,
187 + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
188 +{
189 + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
190 +}
191 +
192 +static size_t
143 193 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
144 - mbstate_t *_RESTRICT_KYWD ps)
194 + mbstate_t *_RESTRICT_KYWD ps)
145 195 {
146 196 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
147 197 }
148 198
199 +static size_t
200 +_EUC_KR_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
201 + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
202 +{
203 + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
204 +}
205 +
149 206 /*
150 207 * EUC-JP uses CS0, CS1, CS2, and CS3.
151 208 */
152 -int
153 -_EUC_JP_init(_RuneLocale *rl)
209 +void
210 +_EUC_JP_init(struct lc_ctype *lct)
154 211 {
155 - __mbrtowc = _EUC_JP_mbrtowc;
156 - __wcrtomb = _EUC_JP_wcrtomb;
157 - __mbsinit = _EUC_mbsinit;
212 + lct->lc_mbrtowc = _EUC_JP_mbrtowc;
213 + lct->lc_wcrtomb = _EUC_JP_wcrtomb;
214 + lct->lc_mbsnrtowcs = _EUC_JP_mbsnrtowcs;
215 + lct->lc_wcsnrtombs = _EUC_JP_wcsnrtombs;
216 + lct->lc_mbsinit = _EUC_mbsinit;
158 217
159 - _CurrentRuneLocale = rl;
160 -
161 - __ctype[520] = 3;
162 - charset_is_ascii = 0;
163 - return (0);
218 + lct->lc_max_mblen = 3;
219 + lct->lc_is_ascii = 0;
164 220 }
165 221
166 222 static size_t
167 223 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
168 224 size_t n, mbstate_t *_RESTRICT_KYWD ps)
169 225 {
170 226 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
171 227 }
172 228
173 229 static size_t
230 +_EUC_JP_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
231 + const char **_RESTRICT_KYWD src,
232 + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
233 +{
234 + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
235 +}
236 +
237 +static size_t
174 238 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
175 239 mbstate_t *_RESTRICT_KYWD ps)
176 240 {
177 241 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
178 242 }
179 243
244 +static size_t
245 +_EUC_JP_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
246 + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
247 +{
248 + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
249 +}
250 +
180 251 /*
181 252 * EUC-TW uses CS0, CS1, and CS2.
182 253 */
183 -int
184 -_EUC_TW_init(_RuneLocale *rl)
254 +void
255 +_EUC_TW_init(struct lc_ctype *lct)
185 256 {
186 - __mbrtowc = _EUC_TW_mbrtowc;
187 - __wcrtomb = _EUC_TW_wcrtomb;
188 - __mbsinit = _EUC_mbsinit;
257 + lct->lc_mbrtowc = _EUC_TW_mbrtowc;
258 + lct->lc_wcrtomb = _EUC_TW_wcrtomb;
259 + lct->lc_mbsnrtowcs = _EUC_TW_mbsnrtowcs;
260 + lct->lc_wcsnrtombs = _EUC_TW_wcsnrtombs;
261 + lct->lc_mbsinit = _EUC_mbsinit;
189 262
190 - _CurrentRuneLocale = rl;
191 -
192 - __ctype[520] = 4;
193 - charset_is_ascii = 0;
194 - return (0);
263 + lct->lc_max_mblen = 4;
264 + lct->lc_is_ascii = 0;
195 265 }
196 266
197 267 static size_t
198 268 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
199 - size_t n, mbstate_t *_RESTRICT_KYWD ps)
269 + size_t n, mbstate_t *_RESTRICT_KYWD ps)
200 270 {
201 271 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
202 272 }
203 273
204 274 static size_t
275 +_EUC_TW_mbsnrtowcs(wchar_t *_RESTRICT_KYWD dst,
276 + const char **_RESTRICT_KYWD src,
277 + size_t nms, size_t len, mbstate_t *_RESTRICT_KYWD ps)
278 +{
279 + return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
280 +}
281 +
282 +static size_t
205 283 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
206 - mbstate_t *_RESTRICT_KYWD ps)
284 + mbstate_t *_RESTRICT_KYWD ps)
207 285 {
208 286 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
209 287 }
210 288
289 +static size_t
290 +_EUC_TW_wcsnrtombs(char *_RESTRICT_KYWD dst, const wchar_t **_RESTRICT_KYWD src,
291 + size_t nwc, size_t len, mbstate_t *_RESTRICT_KYWD ps)
292 +{
293 + return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
294 +}
295 +
211 296 /*
212 297 * Common EUC code.
213 298 */
214 299
215 300 static size_t
216 301 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
217 - size_t n, mbstate_t *_RESTRICT_KYWD ps,
218 - uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
302 + size_t n, mbstate_t *_RESTRICT_KYWD ps,
303 + uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
219 304 {
220 305 _EucState *es;
221 306 int i, want;
222 307 wchar_t wc;
223 308 unsigned char ch;
224 309
225 310 es = (_EucState *)ps;
226 311
227 312 if (es->want < 0 || es->want > MB_CUR_MAX) {
228 313 errno = EINVAL;
229 314 return ((size_t)-1);
230 315 }
231 316
232 317 if (s == NULL) {
233 318 s = "";
234 319 n = 1;
235 320 pwc = NULL;
236 321 }
237 322
238 323 if (n == 0)
239 324 /* Incomplete multibyte sequence */
240 325 return ((size_t)-2);
241 326
242 327 if (es->want == 0) {
243 328 /* Fast path for plain ASCII (CS0) */
244 329 if (((ch = (unsigned char)*s) & 0x80) == 0) {
245 330 if (pwc != NULL)
246 331 *pwc = ch;
247 332 return (ch != '\0' ? 1 : 0);
248 333 }
249 334
250 335 if (ch >= 0xa1) {
251 336 /* CS1 */
252 337 want = 2;
253 338 } else if (ch == cs2) {
254 339 want = cs2width;
255 340 } else if (ch == cs3) {
256 341 want = cs3width;
257 342 } else {
258 343 errno = EILSEQ;
259 344 return ((size_t)-1);
260 345 }
261 346
262 347
263 348 es->want = want;
264 349 es->ch = 0;
265 350 } else {
266 351 want = es->want;
267 352 wc = es->ch;
268 353 }
269 354
270 355 for (i = 0; i < MIN(want, n); i++) {
271 356 wc <<= 8;
272 357 wc |= *s;
273 358 s++;
274 359 }
275 360 if (i < want) {
276 361 /* Incomplete multibyte sequence */
277 362 es->want = want - i;
278 363 es->ch = wc;
279 364 return ((size_t)-2);
280 365 }
281 366 if (pwc != NULL)
282 367 *pwc = wc;
283 368 es->want = 0;
284 369 return (wc == L'\0' ? 0 : want);
285 370 }
286 371
287 372 static size_t
288 373 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc,
289 374 mbstate_t *_RESTRICT_KYWD ps,
290 375 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
291 376 {
292 377 _EucState *es;
293 378 int i, len;
294 379 wchar_t nm;
295 380
296 381 es = (_EucState *)ps;
297 382
298 383 if (es->want != 0) {
299 384 errno = EINVAL;
300 385 return ((size_t)-1);
301 386 }
302 387
303 388 if (s == NULL)
304 389 /* Reset to initial shift state (no-op) */
305 390 return (1);
306 391
307 392 if ((wc & ~0x7f) == 0) {
308 393 /* Fast path for plain ASCII (CS0) */
309 394 *s = (char)wc;
310 395 return (1);
311 396 }
312 397
313 398 /* Determine the "length" */
314 399 if ((unsigned)wc > 0xffffff) {
315 400 len = 4;
316 401 } else if ((unsigned)wc > 0xffff) {
317 402 len = 3;
318 403 } else if ((unsigned)wc > 0xff) {
319 404 len = 2;
320 405 } else {
321 406 len = 1;
322 407 }
323 408
324 409 if (len > MB_CUR_MAX) {
325 410 errno = EILSEQ;
326 411 return ((size_t)-1);
327 412 }
328 413
329 414 /* This first check excludes CS1, which is implicitly valid. */
330 415 if ((wc < 0xa100) || (wc > 0xffff)) {
331 416 /* Check for valid CS2 or CS3 */
332 417 nm = (wc >> ((len - 1) * 8));
333 418 if (nm == cs2) {
334 419 if (len != cs2width) {
335 420 errno = EILSEQ;
336 421 return ((size_t)-1);
337 422 }
338 423 } else if (nm == cs3) {
339 424 if (len != cs3width) {
340 425 errno = EILSEQ;
341 426 return ((size_t)-1);
342 427 }
343 428 } else {
344 429 errno = EILSEQ;
345 430 return ((size_t)-1);
346 431 }
347 432 }
348 433
349 434 /* Stash the bytes, least significant last */
350 435 for (i = len - 1; i >= 0; i--) {
351 436 s[i] = (wc & 0xff);
352 437 wc >>= 8;
353 438 }
354 439 return (len);
355 440 }
↓ open down ↓ |
127 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX