1 /*
2 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
3 * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
4 * Copyright (c) 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Paul Borman at Krystal Technologies.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include "lint.h"
36 #include <errno.h>
37 #include <limits.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <sys/types.h>
42 #include <sys/euc.h>
43 #include "runetype.h"
44 #include "mblocal.h"
45
46 static size_t _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD,
47 const char *_RESTRICT_KYWD,
48 size_t, mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
49 static size_t _EUC_wcrtomb_impl(char *_RESTRICT_KYWD, wchar_t,
50 mbstate_t *_RESTRICT_KYWD, uint8_t, uint8_t, uint8_t, uint8_t);
51
52 static size_t _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD,
53 const char *_RESTRICT_KYWD,
54 size_t, mbstate_t *_RESTRICT_KYWD);
55 static size_t _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD,
56 const char *_RESTRICT_KYWD,
57 size_t, mbstate_t *_RESTRICT_KYWD);
58 static size_t _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD,
59 const char *_RESTRICT_KYWD,
60 size_t, mbstate_t *_RESTRICT_KYWD);
61 static size_t _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD,
62 const char *_RESTRICT_KYWD,
63 size_t, mbstate_t *_RESTRICT_KYWD);
64 static size_t _EUC_CN_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
65 mbstate_t *_RESTRICT_KYWD);
66 static size_t _EUC_JP_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
67 mbstate_t *_RESTRICT_KYWD);
68 static size_t _EUC_KR_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
69 mbstate_t *_RESTRICT_KYWD);
70 static size_t _EUC_TW_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
71 mbstate_t *_RESTRICT_KYWD);
72 static int _EUC_mbsinit(const mbstate_t *);
73
74 typedef struct {
75 wchar_t ch;
76 int set;
77 int want;
78 } _EucState;
79
80 static int
81 _EUC_mbsinit(const mbstate_t *ps)
82 {
83
84 return (ps == NULL || ((const _EucState *)ps)->want == 0);
85 }
86
87 /*
88 * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
89 */
90 int
91 _EUC_CN_init(_RuneLocale *rl)
92 {
93 __mbrtowc = _EUC_CN_mbrtowc;
94 __wcrtomb = _EUC_CN_wcrtomb;
95 __mbsinit = _EUC_mbsinit;
96
97 _CurrentRuneLocale = rl;
98
99 __ctype[520] = 4;
100 charset_is_ascii = 0;
101 return (0);
102 }
103
104 static size_t
105 _EUC_CN_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
106 size_t n, mbstate_t *_RESTRICT_KYWD ps)
107 {
108 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
109 }
110
111 static size_t
112 _EUC_CN_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
113 mbstate_t *_RESTRICT_KYWD ps)
114 {
115 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
116 }
117
118 /*
119 * EUC-KR uses only CS0 and CS1.
120 */
121 int
122 _EUC_KR_init(_RuneLocale *rl)
123 {
124 __mbrtowc = _EUC_KR_mbrtowc;
125 __wcrtomb = _EUC_KR_wcrtomb;
126 __mbsinit = _EUC_mbsinit;
127
128 _CurrentRuneLocale = rl;
129
130 __ctype[520] = 2;
131 charset_is_ascii = 0;
132 return (0);
133 }
134
135 static size_t
136 _EUC_KR_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
137 size_t n, mbstate_t *_RESTRICT_KYWD ps)
138 {
139 return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
140 }
141
142 static size_t
143 _EUC_KR_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
144 mbstate_t *_RESTRICT_KYWD ps)
145 {
146 return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
147 }
148
149 /*
150 * EUC-JP uses CS0, CS1, CS2, and CS3.
151 */
152 int
153 _EUC_JP_init(_RuneLocale *rl)
154 {
155 __mbrtowc = _EUC_JP_mbrtowc;
156 __wcrtomb = _EUC_JP_wcrtomb;
157 __mbsinit = _EUC_mbsinit;
158
159 _CurrentRuneLocale = rl;
160
161 __ctype[520] = 3;
162 charset_is_ascii = 0;
163 return (0);
164 }
165
166 static size_t
167 _EUC_JP_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
168 size_t n, mbstate_t *_RESTRICT_KYWD ps)
169 {
170 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
171 }
172
173 static size_t
174 _EUC_JP_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
175 mbstate_t *_RESTRICT_KYWD ps)
176 {
177 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
178 }
179
180 /*
181 * EUC-TW uses CS0, CS1, and CS2.
182 */
183 int
184 _EUC_TW_init(_RuneLocale *rl)
185 {
186 __mbrtowc = _EUC_TW_mbrtowc;
187 __wcrtomb = _EUC_TW_wcrtomb;
188 __mbsinit = _EUC_mbsinit;
189
190 _CurrentRuneLocale = rl;
191
192 __ctype[520] = 4;
193 charset_is_ascii = 0;
194 return (0);
195 }
196
197 static size_t
198 _EUC_TW_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
199 size_t n, mbstate_t *_RESTRICT_KYWD ps)
200 {
201 return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
202 }
203
204 static size_t
205 _EUC_TW_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc,
206 mbstate_t *_RESTRICT_KYWD ps)
207 {
208 return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
209 }
210
211 /*
212 * Common EUC code.
213 */
214
215 static size_t
216 _EUC_mbrtowc_impl(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
217 size_t n, mbstate_t *_RESTRICT_KYWD ps,
218 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
219 {
220 _EucState *es;
221 int i, want;
222 wchar_t wc;
223 unsigned char ch;
224
225 es = (_EucState *)ps;
226
227 if (es->want < 0 || es->want > MB_CUR_MAX) {
228 errno = EINVAL;
229 return ((size_t)-1);
230 }
231
232 if (s == NULL) {
233 s = "";
234 n = 1;
235 pwc = NULL;
236 }
237
238 if (n == 0)
239 /* Incomplete multibyte sequence */
240 return ((size_t)-2);
241
242 if (es->want == 0) {
243 /* Fast path for plain ASCII (CS0) */
244 if (((ch = (unsigned char)*s) & 0x80) == 0) {
245 if (pwc != NULL)
246 *pwc = ch;
247 return (ch != '\0' ? 1 : 0);
248 }
249
250 if (ch >= 0xa1) {
251 /* CS1 */
252 want = 2;
253 } else if (ch == cs2) {
254 want = cs2width;
255 } else if (ch == cs3) {
256 want = cs3width;
257 } else {
258 errno = EILSEQ;
259 return ((size_t)-1);
260 }
261
262
263 es->want = want;
264 es->ch = 0;
265 } else {
266 want = es->want;
267 wc = es->ch;
268 }
269
270 for (i = 0; i < MIN(want, n); i++) {
271 wc <<= 8;
272 wc |= *s;
273 s++;
274 }
275 if (i < want) {
276 /* Incomplete multibyte sequence */
277 es->want = want - i;
278 es->ch = wc;
279 return ((size_t)-2);
280 }
281 if (pwc != NULL)
282 *pwc = wc;
283 es->want = 0;
284 return (wc == L'\0' ? 0 : want);
285 }
286
287 static size_t
288 _EUC_wcrtomb_impl(char *_RESTRICT_KYWD s, wchar_t wc,
289 mbstate_t *_RESTRICT_KYWD ps,
290 uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
291 {
292 _EucState *es;
293 int i, len;
294 wchar_t nm;
295
296 es = (_EucState *)ps;
297
298 if (es->want != 0) {
299 errno = EINVAL;
300 return ((size_t)-1);
301 }
302
303 if (s == NULL)
304 /* Reset to initial shift state (no-op) */
305 return (1);
306
307 if ((wc & ~0x7f) == 0) {
308 /* Fast path for plain ASCII (CS0) */
309 *s = (char)wc;
310 return (1);
311 }
312
313 /* Determine the "length" */
314 if ((unsigned)wc > 0xffffff) {
315 len = 4;
316 } else if ((unsigned)wc > 0xffff) {
317 len = 3;
318 } else if ((unsigned)wc > 0xff) {
319 len = 2;
320 } else {
321 len = 1;
322 }
323
324 if (len > MB_CUR_MAX) {
325 errno = EILSEQ;
326 return ((size_t)-1);
327 }
328
329 /* This first check excludes CS1, which is implicitly valid. */
330 if ((wc < 0xa100) || (wc > 0xffff)) {
331 /* Check for valid CS2 or CS3 */
332 nm = (wc >> ((len - 1) * 8));
333 if (nm == cs2) {
334 if (len != cs2width) {
335 errno = EILSEQ;
336 return ((size_t)-1);
337 }
338 } else if (nm == cs3) {
339 if (len != cs3width) {
340 errno = EILSEQ;
341 return ((size_t)-1);
342 }
343 } else {
344 errno = EILSEQ;
345 return ((size_t)-1);
346 }
347 }
348
349 /* Stash the bytes, least significant last */
350 for (i = len - 1; i >= 0; i--) {
351 s[i] = (wc & 0xff);
352 wc >>= 8;
353 }
354 return (len);
355 }