1 /*
   2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
   3  * Copyright 2010 Nexenta Systmes, Inc.  All rights reserved.
   4  * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
   5  *              at Electronni Visti IA, Kiev, Ukraine.
   6  *                      All rights reserved.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in the
  15  *    documentation and/or other materials provided with the distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29 
  30 #ifndef _COLLATE_H_
  31 #define _COLLATE_H_
  32 
  33 #include <sys/types.h>
  34 #include <limits.h>
  35 #include <locale.h>
  36 #include "localeimpl.h"
  37 
  38 #define COLLATE_STR_LEN         24              /* should be 64-bit multiple */
  39 #define COLLATE_VERSION         "IllumosCollate2\n"
  40 
  41 #define COLLATE_MAX_PRIORITY    (0x7fffffff)    /* max signed value */
  42 #define COLLATE_SUBST_PRIORITY  (0x40000000)    /* bit indicates subst table */
  43 
  44 #define DIRECTIVE_UNDEF         0x00
  45 #define DIRECTIVE_FORWARD       0x01
  46 #define DIRECTIVE_BACKWARD      0x02
  47 #define DIRECTIVE_POSITION      0x04
  48 #define DIRECTIVE_UNDEFINED     0x08    /* special last weight for UNDEFINED */
  49 
  50 #define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
  51 
  52 /*
  53  * The collate file format is as follows:
  54  *
  55  * char         version[COLLATE_STR_LEN];       // must be COLLATE_VERSION
  56  * collate_info_t       info;                   // see below, includes padding
  57  * collate_char_pri_t   char_data[256];         // 8 bit char values
  58  * collate_subst_t      subst[*];               // 0 or more substitutions
  59  * collate_chain_pri_t  chains[*];              // 0 or more chains
  60  * collate_large_pri_t  large[*];               // extended char priorities
  61  *
  62  * Note that all structures must be 32-bit aligned, as each structure
  63  * contains 32-bit member fields.  The entire file is mmap'd, so its
  64  * critical that alignment be observed.  It is not generally safe to
  65  * use any 64-bit values in the structures.
  66  */
  67 
  68 typedef struct collate_info {
  69         uint8_t directive_count;
  70         uint8_t directive[COLL_WEIGHTS_MAX];
  71         int32_t pri_count[COLL_WEIGHTS_MAX];
  72         int32_t flags;
  73         int32_t chain_count;
  74         int32_t large_count;
  75         int32_t subst_count[COLL_WEIGHTS_MAX];
  76         int32_t undef_pri[COLL_WEIGHTS_MAX];
  77 } collate_info_t;
  78 
  79 typedef struct collate_char {
  80         int32_t pri[COLL_WEIGHTS_MAX];
  81 } collate_char_t;
  82 
  83 typedef struct collate_chain {
  84         wchar_t str[COLLATE_STR_LEN];
  85         int32_t pri[COLL_WEIGHTS_MAX];
  86 } collate_chain_t;
  87 
  88 typedef struct collate_large {
  89         int32_t val;
  90         collate_char_t pri;
  91 } collate_large_t;
  92 
  93 typedef struct collate_subst {
  94         int32_t key;
  95         int32_t pri[COLLATE_STR_LEN];
  96 } collate_subst_t;
  97 
  98 struct lc_collate {
  99         int             lc_is_posix;
 100 
 101         uint8_t         lc_directive_count;
 102         uint8_t         lc_directive[COLL_WEIGHTS_MAX];
 103         int32_t         lc_pri_count[COLL_WEIGHTS_MAX];
 104         int32_t         lc_flags;
 105         int32_t         lc_chain_count;
 106         int32_t         lc_large_count;
 107         int32_t         lc_subst_count[COLL_WEIGHTS_MAX];
 108         int32_t         lc_undef_pri[COLL_WEIGHTS_MAX];
 109 
 110         collate_info_t  *lc_info;
 111         collate_char_t  *lc_char_table;
 112         collate_large_t *lc_large_table;
 113         collate_chain_t *lc_chain_table;
 114         collate_subst_t *lc_subst_table[COLL_WEIGHTS_MAX];
 115 };
 116 
 117 void    _collate_lookup(const struct lc_collate *, const wchar_t *,
 118     int *, int *, int, const int **);
 119 size_t  _collate_wxfrm(const struct lc_collate *, const wchar_t *,
 120     wchar_t *, size_t);
 121 size_t  _collate_sxfrm(const wchar_t *, char *, size_t, locale_t);
 122 int     _collate_range_cmp(wchar_t, wchar_t, locale_t);
 123 
 124 #endif /* !_COLLATE_H_ */