illumos-gate New usr/src/man/man9f/kiconv

   1 '\" te
   2 .\" Copyright (c) 2007, Sun Microsystems, Inc., All Rights Reserved
   3 .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License.
   4 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.  See the License for the specific language governing permissions and limitations under the License.
   5 .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
   6 .TH KICONV_OPEN 9F "Nov 5, 2013"
   7 .SH NAME
   8 kiconv_open \- code conversion descriptor allocation function
   9 .SH SYNOPSIS
  10 .LP
  11 .nf
  12 #include <sys/sunddi.h>
  13 
  14 
  15 
  16 \fBkiconv_t\fR \fBkiconv_open\fR(\fBconst\fR \fBchar *\fR\fItocode\fR, \fBconst\fR \fBchar *\fR\fIfromcode\fR);
  17 .fi
  18 
  19 .SH INTERFACE LEVEL
  20 .sp
  21 .LP
  22 Solaris DDI specific (Solaris DDI).
  23 .SH PARAMETERS
  24 .sp
  25 .ne 2
  26 .na
  27 \fB\fItocode\fR\fR
  28 .ad
  29 .RS 12n
  30 Points to a target codeset name string.
  31 .RE
  32 
  33 .sp
  34 .ne 2
  35 .na
  36 \fB\fIfromcode\fR\fR
  37 .ad
  38 .RS 12n
  39 Points to a source codeset name string.
  40 .RE
  41 
  42 .SH DESCRIPTION
  43 .sp
  44 .LP
  45 The \fBkiconv_open()\fR function returns a code conversion descriptor that
  46 describes a conversion from the codeset specified by \fIfromcode\fR to the
  47 codeset specified by \fItocode\fR. For state-dependent encodings, the
  48 conversion descriptor is in a codeset-dependent initial state (ready for
  49 immediate use with the \fBkiconv()\fR function).
  50 .sp
  51 .LP
  52 Supported code conversions are between \fBUTF-8\fR and the following:
  53 .sp
  54 .in +2
  55 .nf
  56 Name                    Description
  57 
  58  Big5                    Traditional Chinese Big5
  59  Big5-HKSCS              Traditional Chinese Big5-Hong Kong
  60                          Supplementary Character Set
  61  CP720                   DOS Arabic
  62  CP737                   DOS Greek
  63  CP850                   DOS Latin-1 (Western European)
  64  CP852                   DOS Latin-2 (Eastern European)
  65  CP857                   DOS Latin-5 (Turkish)
  66  CP862                   DOS Hebrew
  67  CP866                   DOS Cyrillic Russian
  68  CP932                   Japanese Shift JIS (Windows)
  69  CP950-HKSCS             Traditional Chinese HKSCS-2001 (Windows)
  70  CP1250                  Central Europe
  71  CP1251                  Cyrillic
  72  CP1252                  Western Europe
  73  CP1253                  Greek
  74  CP1254                  Turkish
  75  CP1255                  Hebrew
  76  CP1256                  Arabic
  77  CP1257                  Baltic
  78  EUC-CN                  Simplified Chinese EUC
  79  EUC-JP                  Japanese EUC
  80  EUC-JP-MS               Japanese EUC MS
  81  EUC-KR                  Korean EUC
  82  EUC-TW                  Traditional Chinese EUC
  83  GB18030                 Simplified Chinese GB18030
  84  GBK                     Simplified Chinese GBK
  85  ISO-8859-1              Latin-1 (Western European)
  86  ISO-8859-2              Latin-2 (Eastern European)
  87  ISO-8859-3              Latin-3 (Southern European)
  88  ISO-8859-4              Latin-4 (Northern European)
  89  ISO-8859-5              Cyrillic
  90  ISO-8859-6              Arabic
  91  ISO-8859-7              Greek
  92  ISO-8859-8              Hebrew
  93  ISO-8859-9              Latin-5 (Turkish)
  94  ISO-8859-10             Latin-6 (Nordic)
  95  ISO-8859-13             Latin-7 (Baltic)
  96  ISO-8859-15             Latin-9 (Western European with euro sign)
  97  KOI8-R                  Cyrillic
  98  Shift_JIS               Japanese Shift JIS (JIS)
  99  TIS_620                 Thai (a.k.a. ISO 8859-11)
 100  Unified-Hangul          Korean Unified Hangul
 101 
 102 .fi
 103 .in -2
 104 .sp
 105 
 106 .sp
 107 .LP
 108 \fBUTF-8\fR and the above names can be used at \fItocode\fR and \fIfromcode\fR
 109 to specify the desired code conversion. The following aliases are also
 110 supported as alternative names to be used:
 111 .sp
 112 .in +2
 113 .nf
 114 Aliases                 Original Name
 115   720                     CP720
 116   737                     CP737
 117   850                     CP850
 118   852                     CP852
 119   857                     CP857
 120   862                     CP862
 121   866                     CP866
 122   932                     CP932
 123   936, CP936              GBK
 124   949, CP949              Unified-Hangul
 125   950, CP950              Big5
 126   1250                    CP1250
 127   1251                    CP1251
 128   1252                    CP1252
 129   1253                    CP1253
 130   1254                    CP1254
 131   1255                    CP1255
 132   1256                    CP1256
 133   1257                    CP1257
 134   ISO-8859-11             TIS_620
 135   PCK, SJIS               Shift_JIS
 136 .fi
 137 .in -2
 138 .sp
 139 
 140 .sp
 141 .LP
 142 A conversion descriptor remains valid until it is closed by using
 143 \fBkiconv_close()\fR.
 144 .SH RETURN VALUES
 145 .sp
 146 .LP
 147 Upon successful completion, \fBkiconv_open()\fR returns a code conversion
 148 descriptor for use on subsequent calls to \fBkiconv()\fR. Otherwise, if the
 149 conversion specified by \fIfromcode\fR and \fItocode\fR is not supported or for
 150 any other reasons the code conversion descriptor cannot be allocated,
 151 \fBkiconv_open()\fR returns (\fBkiconv_t\fR)-1 to indicate the error.
 152 .SH CONTEXT
 153 .sp
 154 .LP
 155 \fBkiconv_open()\fR can be called from user context only.
 156 .SH EXAMPLES
 157 .LP
 158 \fBExample 1 \fROpening a Code Conversion
 159 .sp
 160 .LP
 161 The following example shows how to open a code conversion from \fBISO\fR
 162 8859-15 to \fBUTF-8\fR
 163 
 164 .sp
 165 .in +2
 166 .nf
 167 #include <sys/sunddi.h>
 168 
 169 kiconv_t cd;
 170 
 171 cd = kiconv_open("UTF-8", "ISO-8859-15");
 172 if (cd == (kiconv_t)-1) {
 173          /* Cannot open up the code conversion. */
 174          return (-1);
 175 }
 176 .fi
 177 .in -2
 178 
 179 .SH ATTRIBUTES
 180 .sp
 181 .LP
 182 See \fBattributes\fR(5) for descriptions of the following attributes:
 183 .sp
 184 
 185 .sp
 186 .TS
 187 box;
 188 c | c
 189 l | l .
 190 ATTRIBUTE TYPE  ATTRIBUTE VALUE
 191 _
 192 Interface Stability     Committed
 193 .TE
 194 
 195 .SH SEE ALSO
 196 .sp
 197 .LP
 198 \fBiconv\fR(3C), \fBiconv_close\fR(3C), \fBiconv_open\fR(3C),
 199 \fBu8_strcmp\fR(3C), \fBu8_textprep_str\fR(3C), \fBu8_validate\fR(3C),
 200 \fBuconv_u16tou32\fR(3C), \fBuconv_u16tou8\fR(3C), \fBuconv_u32tou16\fR(3C),
 201 \fBuconv_u32tou8\fR(3C), \fBuconv_u8tou16\fR(3C), \fBuconv_u8tou32\fR(3C),
 202 \fBattributes\fR(5), \fBkiconv\fR(9F), \fBkiconvstr\fR(9F),
 203 \fBkiconv_close\fR(9F), \fBu8_strcmp\fR(9F), \fBu8_textprep_str\fR(9F),
 204 \fBu8_validate\fR(9F), \fBuconv_u16tou32\fR(9F), \fBuconv_u16tou8\fR(9F),
 205 \fBuconv_u32tou16\fR(9F), \fBuconv_u32tou8\fR(9F), \fBuconv_u8tou16\fR(9F),
 206 \fBuconv_u8tou32\fR(9F)
 207 .sp
 208 .LP
 209 The Unicode Standard
 210 .sp
 211 .LP
 212 http://www.unicode.org/standard/standard.html
 213 .SH NOTES
 214 .sp
 215 .LP
 216 The code conversions are available between \fBUTF-8\fR and the above noted
 217 \fIcodesets\fR. For example, to convert from \fBEUC-JP \fRto \fBShift_JIS\fR,
 218 first convert \fBEUC-JP\fR to \fBUTF-8\fR and then convert \fBUTF-8\fR to
 219 \fBShift_JIS\fR.
 220 .sp
 221 .LP
 222 The code conversions supported are based on simple one-to-one mappings. There
 223 is no special treatment or processing done during code conversions such as case
 224 conversion, Unicode Normalization, or mapping between combining or conjoining
 225 sequences of \fBUTF-\fR8 and pre-composed characters in non-\fBUTF-8\fR
 226 codesets.
 227 .sp
 228 .LP
 229 All supported non-\fBUTF-8\fR codesets use pre-composed characters only.
 230 However, \fBUTF-8\fR allows combining or conjoining characters too. For this
 231 reason, using a form of Unicode Normalizations on \fBUTF-8\fR text with
 232 \fBu8_textprep_str()\fR before or after doing code conversions might be
 233 necessary.