Print this page
Thread safety fixes.
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/locale/collate.c
+++ new/usr/src/lib/libc/port/locale/collate.c
1 1 /*
2 2 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3 3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
4 4 * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
5 5 * at Electronni Visti IA, Kiev, Ukraine.
6 6 * All rights reserved.
7 7 *
8 8 * Redistribution and use in source and binary forms, with or without
9 9 * modification, are permitted provided that the following conditions
10 10 * are met:
11 11 * 1. Redistributions of source code must retain the above copyright
12 12 * notice, this list of conditions and the following disclaimer.
13 13 * 2. Redistributions in binary form must reproduce the above copyright
14 14 * notice, this list of conditions and the following disclaimer in the
15 15 * documentation and/or other materials provided with the distribution.
16 16 *
17 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
18 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
21 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 27 * SUCH DAMAGE.
28 28 */
29 29
30 30 #include "lint.h"
31 31 #include "file64.h"
32 32 #include <stdio.h>
33 33 #include <stdlib.h>
34 34 #include <stddef.h>
35 35 #include <string.h>
36 36 #include <wchar.h>
37 37 #include <errno.h>
38 38 #include <unistd.h>
39 39 #include <ctype.h>
40 40 #include <unistd.h>
41 41 #include <fcntl.h>
42 42 #include <assert.h>
43 43 #include <sys/stat.h>
44 44 #include <sys/mman.h>
45 45
46 46 #include "collate.h"
47 47 #include "setlocale.h"
48 48
49 49 /*
50 50 * See the comments in usr/src/cmd/localedef/collate.c for further
51 51 * information. It would also be very helpful to have a copy of the
52 52 * POSIX standard for collation (in the locale format manual page)
53 53 * handy (www.opengroup.org).
54 54 */
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
55 55
56 56 /*
57 57 * POSIX uses empty tables and falls down to strcmp.
58 58 */
59 59 struct lc_collate lc_collate_posix = {
60 60 .lc_is_posix = 1,
61 61 };
62 62
63 63 struct locdata __posix_collate_locdata = {
64 64 .l_lname = "C",
65 - .l_refcnt = (uint32_t)-1,
66 65 .l_data = { &lc_collate_posix }
67 66 };
68 67
69 68
70 69 struct locdata *
71 70 __lc_collate_load(const char *locname)
72 71 {
73 72 int i, chains, z;
74 73 char buf[PATH_MAX];
75 74 char *TMP;
76 75 char *map;
77 76 collate_info_t *info;
78 77 struct stat sbuf;
79 78 int fd;
80 79 struct locdata *ldata;
81 80 struct lc_collate *lcc;
82 81
83 82 /*
84 83 * Slurp the locale file into the cache.
85 84 */
86 85
87 86 (void) snprintf(buf, sizeof (buf), "%s/%s/LC_COLLATE/LCL_DATA",
88 87 _PathLocale, locname);
89 88
90 89 if ((fd = open(buf, O_RDONLY)) < 0) {
91 90 errno = EINVAL;
92 91 return (NULL);
93 92 }
94 93 if (fstat(fd, &sbuf) < 0) {
95 94 (void) close(fd);
96 95 errno = EINVAL;
97 96 return (NULL);
98 97 }
99 98 if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
100 99 (void) close(fd);
101 100 errno = EINVAL;
102 101 return (NULL);
103 102 }
104 103 map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
105 104 (void) close(fd);
106 105 if ((TMP = map) == NULL) {
107 106 errno = EINVAL;
108 107 return (NULL);
109 108 }
110 109
111 110 if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
112 111 (void) munmap(map, sbuf.st_size);
113 112 errno = EINVAL;
114 113 return (NULL);
115 114 }
116 115 TMP += COLLATE_STR_LEN;
117 116
118 117 info = (void *)TMP;
119 118 TMP += sizeof (*info);
120 119
121 120 if ((info->directive_count < 1) ||
122 121 (info->directive_count >= COLL_WEIGHTS_MAX) ||
123 122 ((chains = info->chain_count) < 0)) {
124 123 (void) munmap(map, sbuf.st_size);
125 124 errno = EINVAL;
126 125 return (NULL);
127 126 }
128 127
129 128 i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) +
130 129 (sizeof (collate_chain_t) * chains) +
131 130 (sizeof (collate_large_t) * info->large_count);
132 131 for (z = 0; z < info->directive_count; z++) {
133 132 i += sizeof (collate_subst_t) * info->subst_count[z];
134 133 }
135 134 if (i != (sbuf.st_size - (TMP - map))) {
136 135 (void) munmap(map, sbuf.st_size);
137 136 errno = EINVAL;
138 137 return (NULL);
139 138 }
140 139
141 140
142 141 if ((ldata = __locdata_alloc(locname, sizeof (*lcc))) == NULL) {
143 142 (void) munmap(map, sbuf.st_size);
144 143 return (NULL);
145 144 }
146 145 lcc = ldata->l_data[0];
147 146 ldata->l_map = map;
148 147 ldata->l_map_len = sbuf.st_size;
149 148
150 149 lcc->lc_info = info;
151 150 lcc->lc_directive_count = info->directive_count;
152 151 lcc->lc_large_count = info->large_count;
153 152
154 153 for (z = 0; z < COLL_WEIGHTS_MAX; z++) {
155 154 lcc->lc_directive[z] = info->directive[z];
156 155 lcc->lc_subst_count[z] = info->subst_count[z];
157 156 lcc->lc_pri_count[z] = info->pri_count[z];
158 157 lcc->lc_undef_pri[z] = info->undef_pri[z];
159 158 }
160 159
161 160 lcc->lc_char_table = (void *)TMP;
162 161 TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1);
163 162
164 163 for (z = 0; z < lcc->lc_directive_count; z++) {
165 164 int count;
166 165 if ((count = lcc->lc_subst_count[z]) > 0) {
167 166 lcc->lc_subst_table[z] = (void *)TMP;
168 167 TMP += count * sizeof (collate_subst_t);
169 168 } else {
170 169 lcc->lc_subst_table[z] = NULL;
171 170 }
172 171 }
173 172
174 173 if (chains > 0) {
175 174 lcc->lc_chain_table = (void *)TMP;
176 175 TMP += chains * sizeof (collate_chain_t);
177 176 } else
178 177 lcc->lc_chain_table = NULL;
179 178 lcc->lc_chain_count = chains;
180 179 if (lcc->lc_large_count > 0)
181 180 lcc->lc_large_table = (void *)TMP;
182 181 else
183 182 lcc->lc_large_table = NULL;
184 183
185 184 return (ldata);
186 185 }
187 186
188 187 static const int32_t *
189 188 substsearch(const struct lc_collate *lcc, const wchar_t key, int pass)
190 189 {
191 190 const collate_subst_t *p;
192 191 int n = lcc->lc_subst_count[pass];
193 192
194 193 if (n == 0)
195 194 return (NULL);
196 195
197 196 if (pass >= lcc->lc_directive_count)
198 197 return (NULL);
199 198
200 199 if (!(key & COLLATE_SUBST_PRIORITY))
201 200 return (NULL);
202 201
203 202 p = lcc->lc_subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY);
204 203 assert(p->key == key);
205 204 return (p->pri);
206 205 }
207 206
208 207 /*
209 208 * Note: for performance reasons, we have expanded bsearch here. This avoids
210 209 * function call overhead with each comparison.
211 210 */
212 211
213 212 static collate_chain_t *
214 213 chainsearch(const struct lc_collate *lcc, const wchar_t *key, int *len)
215 214 {
216 215 int low;
217 216 int high;
218 217 int next, compar, l;
219 218 collate_chain_t *p;
220 219 collate_chain_t *tab;
221 220
222 221 if (lcc->lc_info->chain_count == 0)
223 222 return (NULL);
224 223
225 224 low = 0;
226 225 high = lcc->lc_info->chain_count - 1;
227 226 tab = lcc->lc_chain_table;
228 227
229 228 while (low <= high) {
230 229 next = (low + high) / 2;
231 230 p = tab + next;
232 231 compar = *key - *p->str;
233 232 if (compar == 0) {
234 233 l = wcsnlen(p->str, COLLATE_STR_LEN);
235 234 compar = wcsncmp(key, p->str, l);
236 235 if (compar == 0) {
237 236 *len = l;
238 237 return (p);
239 238 }
240 239 }
241 240 if (compar > 0)
242 241 low = next + 1;
243 242 else
244 243 high = next - 1;
245 244 }
246 245 return (NULL);
247 246 }
248 247
249 248 static collate_large_t *
250 249 largesearch(const struct lc_collate *lcc, const wchar_t key)
251 250 {
252 251 int low = 0;
253 252 int high = lcc->lc_info->large_count - 1;
254 253 int next, compar;
255 254 collate_large_t *p;
256 255 collate_large_t *tab = lcc->lc_large_table;
257 256
258 257 if (lcc->lc_info->large_count == 0)
259 258 return (NULL);
260 259
261 260 while (low <= high) {
262 261 next = (low + high) / 2;
263 262 p = tab + next;
264 263 compar = key - p->val;
265 264 if (compar == 0)
266 265 return (p);
267 266 if (compar > 0)
268 267 low = next + 1;
269 268 else
270 269 high = next - 1;
271 270 }
272 271 return (NULL);
273 272 }
274 273
275 274 void
276 275 _collate_lookup(const struct lc_collate *lcc, const wchar_t *t,
277 276 int *len, int *pri, int which, const int **state)
278 277 {
279 278 collate_chain_t *p2;
280 279 collate_large_t *match;
281 280 int p, l;
282 281 const int *sptr;
283 282
284 283 /*
285 284 * If this is the "last" pass for the UNDEFINED, then
286 285 * we just return the priority itself.
287 286 */
288 287 if (which >= lcc->lc_directive_count) {
289 288 *pri = *t;
290 289 *len = 1;
291 290 *state = NULL;
292 291 return;
293 292 }
294 293
295 294 /*
296 295 * If we have remaining substitution data from a previous
297 296 * call, consume it first.
298 297 */
299 298 if ((sptr = *state) != NULL) {
300 299 *pri = *sptr;
301 300 sptr++;
302 301 *state = *sptr ? sptr : NULL;
303 302 *len = 0;
304 303 return;
305 304 }
306 305
307 306 /* No active substitutions */
308 307 *len = 1;
309 308
310 309 /*
311 310 * Check for composites such as dipthongs that collate as a
312 311 * single element (aka chains or collating-elements).
313 312 */
314 313 if (((p2 = chainsearch(lcc, t, &l)) != NULL) &&
315 314 ((p = p2->pri[which]) >= 0)) {
316 315
317 316 *len = l;
318 317 *pri = p;
319 318
320 319 } else if (*t <= UCHAR_MAX) {
321 320
322 321 /*
323 322 * Character is a small (8-bit) character.
324 323 * We just look these up directly for speed.
325 324 */
326 325 *pri = lcc->lc_char_table[*t].pri[which];
327 326
328 327 } else if ((lcc->lc_info->large_count > 0) &&
329 328 ((match = largesearch(lcc, *t)) != NULL)) {
330 329
331 330 /*
332 331 * Character was found in the extended table.
333 332 */
334 333 *pri = match->pri.pri[which];
335 334
336 335 } else {
337 336 /*
338 337 * Character lacks a specific definition.
339 338 */
340 339 if (lcc->lc_directive[which] & DIRECTIVE_UNDEFINED) {
341 340 /* Mask off sign bit to prevent ordering confusion. */
342 341 *pri = (*t & COLLATE_MAX_PRIORITY);
343 342 } else {
344 343 *pri = lcc->lc_undef_pri[which];
345 344 }
346 345 /* No substitutions for undefined characters! */
347 346 return;
348 347 }
349 348
350 349 /*
351 350 * Try substituting (expanding) the character. We are
352 351 * currently doing this *after* the chain compression. I
353 352 * think it should not matter, but this way might be slightly
354 353 * faster.
355 354 *
356 355 * We do this after the priority search, as this will help us
357 356 * to identify a single key value. In order for this to work,
358 357 * its important that the priority assigned to a given element
359 358 * to be substituted be unique for that level. The localedef
360 359 * code ensures this for us.
361 360 */
362 361 if ((sptr = substsearch(lcc, *pri, which)) != NULL) {
363 362 if ((*pri = *sptr) != 0) {
364 363 sptr++;
365 364 *state = *sptr ? sptr : NULL;
366 365 }
367 366 }
368 367
369 368 }
370 369
371 370 /*
372 371 * This is the meaty part of wcsxfrm & strxfrm. Note that it does
373 372 * NOT NULL terminate. That is left to the caller.
374 373 */
375 374 size_t
376 375 _collate_wxfrm(const struct lc_collate *lcc, const wchar_t *src, wchar_t *xf,
377 376 size_t room)
378 377 {
379 378 int pri;
380 379 int len;
381 380 const wchar_t *t;
382 381 wchar_t *tr = NULL;
383 382 int direc;
384 383 int pass;
385 384 const int32_t *state;
386 385 size_t want = 0;
387 386 size_t need = 0;
388 387 int ndir = lcc->lc_directive_count;
389 388
390 389 assert(src);
391 390
392 391 for (pass = 0; pass <= ndir; pass++) {
393 392
394 393 state = NULL;
395 394
396 395 if (pass != 0) {
397 396 /* insert level separator from the previous pass */
398 397 if (room) {
399 398 *xf++ = 1;
400 399 room--;
401 400 }
402 401 want++;
403 402 }
404 403
405 404 /* special pass for undefined */
406 405 if (pass == ndir) {
407 406 direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
408 407 } else {
409 408 direc = lcc->lc_directive[pass];
410 409 }
411 410
412 411 t = src;
413 412
414 413 if (direc & DIRECTIVE_BACKWARD) {
415 414 wchar_t *bp, *fp, c;
416 415 if (tr)
417 416 free(tr);
418 417 if ((tr = wcsdup(t)) == NULL) {
419 418 errno = ENOMEM;
420 419 goto fail;
421 420 }
422 421 bp = tr;
423 422 fp = tr + wcslen(tr) - 1;
424 423 while (bp < fp) {
425 424 c = *bp;
426 425 *bp++ = *fp;
427 426 *fp-- = c;
428 427 }
429 428 t = (const wchar_t *)tr;
430 429 }
431 430
432 431 if (direc & DIRECTIVE_POSITION) {
433 432 while (*t || state) {
434 433 _collate_lookup(lcc, t, &len, &pri, pass,
435 434 &state);
436 435 t += len;
437 436 if (pri <= 0) {
438 437 if (pri < 0) {
439 438 errno = EINVAL;
440 439 goto fail;
441 440 }
442 441 pri = COLLATE_MAX_PRIORITY;
443 442 }
444 443 if (room) {
445 444 *xf++ = pri;
446 445 room--;
447 446 }
448 447 want++;
449 448 need = want;
450 449 }
451 450 } else {
452 451 while (*t || state) {
453 452 _collate_lookup(lcc, t, &len, &pri, pass,
454 453 &state);
455 454 t += len;
456 455 if (pri <= 0) {
457 456 if (pri < 0) {
458 457 errno = EINVAL;
459 458 goto fail;
460 459 }
461 460 continue;
462 461 }
463 462 if (room) {
464 463 *xf++ = pri;
465 464 room--;
466 465 }
467 466 want++;
468 467 need = want;
469 468 }
470 469 }
471 470 }
472 471
473 472 end:
474 473 if (tr)
475 474 free(tr);
476 475 return (need);
477 476
478 477 fail:
479 478 if (tr)
480 479 free(tr);
481 480 return ((size_t)(-1));
482 481 }
483 482
484 483 /*
485 484 * In the non-POSIX case, we transform each character into a string of
486 485 * characters representing the character's priority. Since char is usually
487 486 * signed, we are limited by 7 bits per byte. To avoid zero, we need to add
488 487 * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6
489 488 * bits per byte.
490 489 *
491 490 * It turns out that we sometimes have real priorities that are
492 491 * 31-bits wide. (But: be careful using priorities where the high
493 492 * order bit is set -- i.e. the priority is negative. The sort order
494 493 * may be surprising!)
495 494 *
496 495 * TODO: This would be a good area to optimize somewhat. It turns out
497 496 * that real prioririties *except for the last UNDEFINED pass* are generally
498 497 * very small. We need the localedef code to precalculate the max
499 498 * priority for us, and ideally also give us a mask, and then we could
500 499 * severely limit what we expand to.
501 500 */
502 501 #define XFRM_BYTES 6
503 502 #define XFRM_OFFSET ('0') /* make all printable characters */
504 503 #define XFRM_SHIFT 6
505 504 #define XFRM_MASK ((1 << XFRM_SHIFT) - 1)
506 505 #define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */
507 506
508 507 static int
509 508 xfrm(locale_t loc, unsigned char *p, int pri, int pass)
510 509 {
511 510 /* we use unsigned to ensure zero fill on right shift */
512 511 uint32_t val = (uint32_t)loc->collate->lc_pri_count[pass];
513 512 int nc = 0;
514 513
515 514 while (val) {
516 515 *p = (pri & XFRM_MASK) + XFRM_OFFSET;
517 516 pri >>= XFRM_SHIFT;
518 517 val >>= XFRM_SHIFT;
519 518 p++;
520 519 nc++;
521 520 }
522 521 return (nc);
523 522 }
524 523
525 524 size_t
526 525 _collate_sxfrm(const wchar_t *src, char *xf, size_t room, locale_t loc)
527 526 {
528 527 int pri;
529 528 int len;
530 529 const wchar_t *t;
531 530 wchar_t *tr = NULL;
532 531 int direc;
533 532 int pass;
534 533 const int32_t *state;
535 534 size_t want = 0;
536 535 size_t need = 0;
537 536 int b;
538 537 uint8_t buf[XFRM_BYTES];
539 538 const struct lc_collate *lcc = loc->collate;
540 539 int ndir = lcc->lc_directive_count;
541 540
542 541 assert(src);
543 542
544 543 for (pass = 0; pass <= ndir; pass++) {
545 544
546 545 state = NULL;
547 546
548 547 if (pass != 0) {
549 548 /* insert level separator from the previous pass */
550 549 if (room) {
551 550 *xf++ = XFRM_SEP;
552 551 room--;
553 552 }
554 553 want++;
555 554 }
556 555
557 556 /* special pass for undefined */
558 557 if (pass == ndir) {
559 558 direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
560 559 } else {
561 560 direc = lcc->lc_directive[pass];
562 561 }
563 562
564 563 t = src;
565 564
566 565 if (direc & DIRECTIVE_BACKWARD) {
567 566 wchar_t *bp, *fp, c;
568 567 if (tr)
569 568 free(tr);
570 569 if ((tr = wcsdup(t)) == NULL) {
571 570 errno = ENOMEM;
572 571 goto fail;
573 572 }
574 573 bp = tr;
575 574 fp = tr + wcslen(tr) - 1;
576 575 while (bp < fp) {
577 576 c = *bp;
578 577 *bp++ = *fp;
579 578 *fp-- = c;
580 579 }
581 580 t = (const wchar_t *)tr;
582 581 }
583 582
584 583 if (direc & DIRECTIVE_POSITION) {
585 584 while (*t || state) {
586 585
587 586 _collate_lookup(lcc, t, &len, &pri, pass,
588 587 &state);
589 588 t += len;
590 589 if (pri <= 0) {
591 590 if (pri < 0) {
592 591 errno = EINVAL;
593 592 goto fail;
594 593 }
595 594 pri = COLLATE_MAX_PRIORITY;
596 595 }
597 596
598 597 b = xfrm(loc, buf, pri, pass);
599 598 want += b;
600 599 if (room) {
601 600 while (b) {
602 601 b--;
603 602 if (room) {
604 603 *xf++ = buf[b];
605 604 room--;
606 605 }
607 606 }
608 607 }
609 608 need = want;
610 609 }
611 610 } else {
612 611 while (*t || state) {
613 612 _collate_lookup(lcc, t, &len, &pri, pass,
614 613 &state);
615 614 t += len;
616 615 if (pri <= 0) {
617 616 if (pri < 0) {
618 617 errno = EINVAL;
619 618 goto fail;
620 619 }
621 620 continue;
622 621 }
623 622
624 623 b = xfrm(loc, buf, pri, pass);
625 624 want += b;
626 625 if (room) {
627 626
628 627 while (b) {
629 628 b--;
630 629 if (room) {
631 630 *xf++ = buf[b];
632 631 room--;
633 632 }
634 633 }
635 634 }
636 635 need = want;
637 636 }
638 637 }
639 638 }
640 639
641 640 end:
642 641 if (tr)
643 642 free(tr);
644 643 return (need);
645 644
646 645 fail:
647 646 if (tr)
648 647 free(tr);
649 648 return ((size_t)(-1));
650 649 }
↓ open down ↓ |
575 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX