1 '\" te
2 .\" Copyright (c) 2007, Sun Microsystems, Inc., All Rights Reserved
3 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License.
4 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License.
5 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
6 .TH KICONV_OPEN 9F "Oct 16, 2007"
7 .SH NAME
8 kiconv_open \- code conversion descriptor allocation function
9 .SH SYNOPSIS
10 .LP
11 .nf
12 #include <sys/sunddi.h>
13
14
15
16 \fBkiconv_t\fR \fBkiconv_open\fR(\fBconst\fR \fBchar *\fR\fItocode\fR, \fBconst\fR \fBchar *\fR\fIfromcode\fR);
17 .fi
18
19 .SH INTERFACE LEVEL
20 .sp
21 .LP
22 Solaris DDI specific (Solaris DDI).
23 .SH PARAMETERS
24 .sp
25 .ne 2
26 .na
27 \fB\fItocode\fR\fR
28 .ad
29 .RS 12n
30 Points to a target codeset name string.
31 .RE
32
33 .sp
34 .ne 2
35 .na
36 \fB\fIfromcode\fR\fR
37 .ad
38 .RS 12n
39 Points to a source codeset name string.
40 .RE
41
42 .SH DESCRIPTION
43 .sp
44 .LP
45 The \fBkiconv_open()\fR function returns a code conversion descriptor that
46 describes a conversion from the codeset specified by \fIfromcode\fR to the
47 codeset specified by \fItocode\fR. For state-dependent encodings, the
48 conversion descriptor is in a codeset-dependent initial state (ready for
49 immediate use with the \fBkiconv()\fR function).
50 .sp
51 .LP
52 Supported code conversions are between \fBUTF-8\fR and the following:
53 .sp
54 .in +2
55 .nf
56 Name Description
57
58 Big5 Traditional Chinese Big5
59 Big5-HKSCS Traditional Chinese Big5-Hong Kong
60 Supplementary Character Set
61 CP720 DOS Arabic
62 CP737 DOS Greek
63 CP850 DOS Latin-1 (Western European)
64 CP852 DOS Latin-2 (Eastern European)
65 CP857 DOS Latin-5 (Turkish)
66 CP862 DOS Hebrew
67 CP866 DOS Cyrillic Russian
68 CP932 Japanese Shift JIS (Windows)
69 CP950-HKSCS Traditional Chinese HKSCS-2001 (Windows)
70 CP1250 Central Europe
71 CP1251 Cyrillic
72 CP1252 Western Europe
73 CP1253 Greek
74 CP1254 Turkish
75 CP1255 Hebrew
76 CP1256 Arabic
77 CP1257 Baltic
78 EUC-CN Simplified Chinese EUC
79 EUC-JP Japanese EUC
80 EUC-JP-MS Japanese EUC MS
81 EUC-KR Korean EUC
82 EUC-TW Traditional Chinese EUC
83 GB18030 Simplified Chinese GB18030
84 GBK Simplified Chinese GBK
85 ISO-8859-1 Latin-1 (Western European)
86 ISO-8859-2 Latin-2 (Eastern European)
87 ISO-8859-3 Latin-3 (Southern European)
88 ISO-8859-4 Latin-4 (Northern European)
89 ISO-8859-5 Cyrillic
90 ISO-8859-6 Arabic
91 ISO-8859-7 Greek
92 ISO-8859-8 Hebrew
93 ISO-8859-9 Latin-5 (Turkish)
94 ISO-8859-10 Latin-6 (Nordic)
95 ISO-8859-13 Latin-7 (Baltic)
96 ISO-8859-15 Latin-9 (Western European with euro sign)
97 KOI8-R Cyrillic
98 Shift_JIS Japanese Shift JIS (JIS)
99 TIS_620 Thai (a.k.a. ISO 8859-11)
100 Unified-Hangul Korean Unified Hangul
101
102 .fi
103 .in -2
104 .sp
105
106 .sp
107 .LP
108 \fBUTF-8\fR and the above names can be used at \fItocode\fR and \fIfromcode\fR
109 to specify the desired code conversion. The following aliases are also
110 supported as alternative names to be used:
111 .sp
112 .in +2
113 .nf
114 Aliases Original Name
115 720 CP720
116 737 CP737
117 850 CP850
118 852 CP852
119 857 CP857
120 862 CP862
121 866 CP866
122 932 CP932
123 936, CP936 GBK
124 949, CP949 Unified-Hangul
125 950, CP950 Big5
126 1250 CP1250
127 1251 CP1251
128 1252 CP1252
129 1253 CP1253
130 1254 CP1254
131 1255 CP1255
132 1256 CP1256
133 1257 CP1257
134 ISO-8859-11 TIS_620
135 PCK, SJIS Shift_JIS
136 .fi
137 .in -2
138 .sp
139
140 .sp
141 .LP
142 A conversion descriptor remains valid until it is closed by using
143 \fBkiconv_close()\fR.
144 .SH RETURN VALUES
145 .sp
146 .LP
147 Upon successful completion, \fBkiconv_open()\fR returns a code conversion
148 descriptor for use on subsequent calls to \fBkiconv()\fR. Otherwise, if the
149 conversion specified by \fIfromcode\fR and \fItocode\fR is not supported or for
150 any other reasons the code conversion descriptor cannot be allocated,
151 \fBkiconv_open()\fR returns (\fBkiconv_t\fR)-1 to indicate the error.
152 .SH CONTEXT
153 .sp
154 .LP
155 \fBkiconv_close()\fR can be called from user context only.
156 .SH EXAMPLES
157 .LP
158 \fBExample 1 \fROpening a Code Conversion
159 .sp
160 .LP
161 The following example shows how to open a code conversion from \fBISO\fR
162 8859-15 to \fBUTF-8\fR
163
164 .sp
165 .in +2
166 .nf
167 #include <sys/sunddi.h>
168
169 kiconv_t cd;
170
171 cd = kiconv_open("UTF-8", "ISO-8859-15");
172 if (cd == (kiconv_t)-1) {
173 /* Cannot open up the code conversion. */
174 return (-1);
175 }
176 .fi
177 .in -2
178
179 .SH ATTRIBUTES
180 .sp
181 .LP
182 See \fBattributes\fR(5) for descriptions of the following attributes:
183 .sp
184
185 .sp
186 .TS
187 box;
188 c | c
189 l | l .
190 ATTRIBUTE TYPE ATTRIBUTE VALUE
191 _
192 Interface Stability Committed
193 .TE
194
195 .SH SEE ALSO
196 .sp
197 .LP
198 \fBiconv\fR(3C), \fBiconv_close\fR(3C), \fBiconv_open\fR(3C),
199 \fBu8_strcmp\fR(3C), \fBu8_textprep_str\fR(3C), \fBu8_validate\fR(3C),
200 \fBuconv_u16tou32\fR(3C), \fBuconv_u16tou8\fR(3C), \fBuconv_u32tou16\fR(3C),
201 \fBuconv_u32tou8\fR(3C), \fBuconv_u8tou16\fR(3C), \fBuconv_u8tou32\fR(3C),
202 \fBattributes\fR(5), \fBkiconv\fR(9F), \fBkiconvstr\fR(9F),
203 \fBkiconv_close\fR(9F), \fBu8_strcmp\fR(9F), \fBu8_textprep_str\fR(9F),
204 \fBu8_validate\fR(9F), \fBuconv_u16tou32\fR(9F), \fBuconv_u16tou8\fR(9F),
205 \fBuconv_u32tou16\fR(9F), \fBuconv_u32tou8\fR(9F), \fBuconv_u8tou16\fR(9F),
206 \fBuconv_u8tou32\fR(9F)
207 .sp
208 .LP
209 The Unicode Standard
210 .sp
211 .LP
212 http://www.unicode.org/standard/standard.html
213 .SH NOTES
214 .sp
215 .LP
216 The code conversions are available between \fBUTF-8\fR and the above noted
217 \fIcodesets\fR. For example, to convert from \fBEUC-JP \fRto \fBShift_JIS\fR,
218 first convert \fBEUC-JP\fR to \fBUTF-8\fR and then convert \fBUTF-8\fR to
219 \fBShift_JIS\fR.
220 .sp
221 .LP
222 The code conversions supported are based on simple one-to-one mappings. There
223 is no special treatment or processing done during code conversions such as case
224 conversion, Unicode Normalization, or mapping between combining or conjoining
225 sequences of \fBUTF-\fR8 and pre-composed characters in non-\fBUTF-8\fR
226 codesets.
227 .sp
228 .LP
229 All supported non-\fBUTF-8\fR codesets use pre-composed characters only.
230 However, \fBUTF-8\fR allows combining or conjoining characters too. For this
231 reason, using a form of Unicode Normalizations on \fBUTF-8\fR text with
232 \fBu8_textprep_str()\fR before or after doing code conversions might be
233 necessary.