1 /* quotearg.c - quote arguments for output 2 3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free 4 Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20 /* Written by Paul Eggert <eggert@twinsun.com> */ 21 22 #include <config.h> 23 24 #include "quotearg.h" 25 26 #include "xalloc.h" 27 28 #include <ctype.h> 29 #include <errno.h> 30 #include <limits.h> 31 #include <stdbool.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <wchar.h> 35 #include <wctype.h> 36 37 #include "gettext.h" 38 #define _(msgid) gettext (msgid) 39 #define N_(msgid) msgid 40 41 #if !HAVE_MBRTOWC 42 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 43 other macros are defined only for documentation and to satisfy C 44 syntax. */ 45 # undef MB_CUR_MAX 46 # define MB_CUR_MAX 1 47 # undef mbstate_t 48 # define mbstate_t int 49 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 50 # define iswprint(wc) isprint ((unsigned char) (wc)) 51 # undef HAVE_MBSINIT 52 #endif 53 54 #if !defined mbsinit && !HAVE_MBSINIT 55 # define mbsinit(ps) 1 56 #endif 57 58 #ifndef SIZE_MAX 59 # define SIZE_MAX ((size_t) -1) 60 #endif 61 62 #define INT_BITS (sizeof (int) * CHAR_BIT) 63 64 struct quoting_options 65 { 66 /* Basic quoting style. */ 67 enum quoting_style style; 68 69 /* Quote the characters indicated by this bit vector even if the 70 quoting style would not normally require them to be quoted. */ 71 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 72 }; 73 74 /* Names of quoting styles. */ 75 char const *const quoting_style_args[] = 76 { 77 "literal", 78 "shell", 79 "shell-always", 80 "c", 81 "escape", 82 "locale", 83 "clocale", 84 0 85 }; 86 87 /* Correspondences to quoting style names. */ 88 enum quoting_style const quoting_style_vals[] = 89 { 90 literal_quoting_style, 91 shell_quoting_style, 92 shell_always_quoting_style, 93 c_quoting_style, 94 escape_quoting_style, 95 locale_quoting_style, 96 clocale_quoting_style 97 }; 98 99 /* The default quoting options. */ 100 static struct quoting_options default_quoting_options; 101 102 /* Allocate a new set of quoting options, with contents initially identical 103 to O if O is not null, or to the default if O is null. 104 It is the caller's responsibility to free the result. */ 105 struct quoting_options * 106 clone_quoting_options (struct quoting_options *o) 107 { 108 int e = errno; 109 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, 110 sizeof *o); 111 errno = e; 112 return p; 113 } 114 115 /* Get the value of O's quoting style. If O is null, use the default. */ 116 enum quoting_style 117 get_quoting_style (struct quoting_options *o) 118 { 119 return (o ? o : &default_quoting_options)->style; 120 } 121 122 /* In O (or in the default if O is null), 123 set the value of the quoting style to S. */ 124 void 125 set_quoting_style (struct quoting_options *o, enum quoting_style s) 126 { 127 (o ? o : &default_quoting_options)->style = s; 128 } 129 130 /* In O (or in the default if O is null), 131 set the value of the quoting options for character C to I. 132 Return the old value. Currently, the only values defined for I are 133 0 (the default) and 1 (which means to quote the character even if 134 it would not otherwise be quoted). */ 135 int 136 set_char_quoting (struct quoting_options *o, char c, int i) 137 { 138 unsigned char uc = c; 139 unsigned int *p = 140 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 141 int shift = uc % INT_BITS; 142 int r = (*p >> shift) & 1; 143 *p ^= ((i & 1) ^ r) << shift; 144 return r; 145 } 146 147 /* MSGID approximates a quotation mark. Return its translation if it 148 has one; otherwise, return either it or "\"", depending on S. */ 149 static char const * 150 gettext_quote (char const *msgid, enum quoting_style s) 151 { 152 char const *translation = _(msgid); 153 if (translation == msgid && s == clocale_quoting_style) 154 translation = "\""; 155 return translation; 156 } 157 158 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 159 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 160 non-quoting-style part of O to control quoting. 161 Terminate the output with a null character, and return the written 162 size of the output, not counting the terminating null. 163 If BUFFERSIZE is too small to store the output string, return the 164 value that would have been returned had BUFFERSIZE been large enough. 165 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. 166 167 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 168 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 169 style specified by O, and O may not be null. */ 170 171 static size_t 172 quotearg_buffer_restyled (char *buffer, size_t buffersize, 173 char const *arg, size_t argsize, 174 enum quoting_style quoting_style, 175 struct quoting_options const *o) 176 { 177 size_t i; 178 size_t len = 0; 179 char const *quote_string = 0; 180 size_t quote_string_len = 0; 181 bool backslash_escapes = false; 182 bool unibyte_locale = MB_CUR_MAX == 1; 183 184 #define STORE(c) \ 185 do \ 186 { \ 187 if (len < buffersize) \ 188 buffer[len] = (c); \ 189 len++; \ 190 } \ 191 while (0) 192 193 switch (quoting_style) 194 { 195 case c_quoting_style: 196 STORE ('"'); 197 backslash_escapes = true; 198 quote_string = "\""; 199 quote_string_len = 1; 200 break; 201 202 case escape_quoting_style: 203 backslash_escapes = true; 204 break; 205 206 case locale_quoting_style: 207 case clocale_quoting_style: 208 { 209 /* TRANSLATORS: 210 Get translations for open and closing quotation marks. 211 212 The message catalog should translate "`" to a left 213 quotation mark suitable for the locale, and similarly for 214 "'". If the catalog has no translation, 215 locale_quoting_style quotes `like this', and 216 clocale_quoting_style quotes "like this". 217 218 For example, an American English Unicode locale should 219 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 220 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 221 MARK). A British English Unicode locale should instead 222 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 223 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. 224 225 If you don't know what to put here, please see 226 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> 227 and use glyphs suitable for your language. */ 228 229 char const *left = gettext_quote (N_("`"), quoting_style); 230 char const *right = gettext_quote (N_("'"), quoting_style); 231 for (quote_string = left; *quote_string; quote_string++) 232 STORE (*quote_string); 233 backslash_escapes = true; 234 quote_string = right; 235 quote_string_len = strlen (quote_string); 236 } 237 break; 238 239 case shell_always_quoting_style: 240 STORE ('\''); 241 quote_string = "'"; 242 quote_string_len = 1; 243 break; 244 245 default: 246 break; 247 } 248 249 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) 250 { 251 unsigned char c; 252 unsigned char esc; 253 254 if (backslash_escapes 255 && quote_string_len 256 && i + quote_string_len <= argsize 257 && memcmp (arg + i, quote_string, quote_string_len) == 0) 258 STORE ('\\'); 259 260 c = arg[i]; 261 switch (c) 262 { 263 case '\0': 264 if (backslash_escapes) 265 { 266 STORE ('\\'); 267 STORE ('0'); 268 STORE ('0'); 269 c = '0'; 270 } 271 break; 272 273 case '?': 274 switch (quoting_style) 275 { 276 case shell_quoting_style: 277 goto use_shell_always_quoting_style; 278 279 case c_quoting_style: 280 if (i + 2 < argsize && arg[i + 1] == '?') 281 switch (arg[i + 2]) 282 { 283 case '!': case '\'': 284 case '(': case ')': case '-': case '/': 285 case '<': case '=': case '>': 286 /* Escape the second '?' in what would otherwise be 287 a trigraph. */ 288 c = arg[i + 2]; 289 i += 2; 290 STORE ('?'); 291 STORE ('\\'); 292 STORE ('?'); 293 break; 294 295 default: 296 break; 297 } 298 break; 299 300 default: 301 break; 302 } 303 break; 304 305 case '\a': esc = 'a'; goto c_escape; 306 case '\b': esc = 'b'; goto c_escape; 307 case '\f': esc = 'f'; goto c_escape; 308 case '\n': esc = 'n'; goto c_and_shell_escape; 309 case '\r': esc = 'r'; goto c_and_shell_escape; 310 case '\t': esc = 't'; goto c_and_shell_escape; 311 case '\v': esc = 'v'; goto c_escape; 312 case '\\': esc = c; goto c_and_shell_escape; 313 314 c_and_shell_escape: 315 if (quoting_style == shell_quoting_style) 316 goto use_shell_always_quoting_style; 317 c_escape: 318 if (backslash_escapes) 319 { 320 c = esc; 321 goto store_escape; 322 } 323 break; 324 325 case '{': case '}': /* sometimes special if isolated */ 326 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) 327 break; 328 /* Fall through. */ 329 case '#': case '~': 330 if (i != 0) 331 break; 332 /* Fall through. */ 333 case ' ': 334 case '!': /* special in bash */ 335 case '"': case '$': case '&': 336 case '(': case ')': case '*': case ';': 337 case '<': 338 case '=': /* sometimes special in 0th or (with "set -k") later args */ 339 case '>': case '[': 340 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 341 case '`': case '|': 342 /* A shell special character. In theory, '$' and '`' could 343 be the first bytes of multibyte characters, which means 344 we should check them with mbrtowc, but in practice this 345 doesn't happen so it's not worth worrying about. */ 346 if (quoting_style == shell_quoting_style) 347 goto use_shell_always_quoting_style; 348 break; 349 350 case '\'': 351 switch (quoting_style) 352 { 353 case shell_quoting_style: 354 goto use_shell_always_quoting_style; 355 356 case shell_always_quoting_style: 357 STORE ('\''); 358 STORE ('\\'); 359 STORE ('\''); 360 break; 361 362 default: 363 break; 364 } 365 break; 366 367 case '%': case '+': case ',': case '-': case '.': case '/': 368 case '0': case '1': case '2': case '3': case '4': case '5': 369 case '6': case '7': case '8': case '9': case ':': 370 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 371 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 372 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 373 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 374 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 375 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 376 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 377 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 378 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 379 /* These characters don't cause problems, no matter what the 380 quoting style is. They cannot start multibyte sequences. */ 381 break; 382 383 default: 384 /* If we have a multibyte sequence, copy it until we reach 385 its end, find an error, or come back to the initial shift 386 state. For C-like styles, if the sequence has 387 unprintable characters, escape the whole sequence, since 388 we can't easily escape single characters within it. */ 389 { 390 /* Length of multibyte sequence found so far. */ 391 size_t m; 392 393 bool printable; 394 395 if (unibyte_locale) 396 { 397 m = 1; 398 printable = isprint (c) != 0; 399 } 400 else 401 { 402 mbstate_t mbstate; 403 memset (&mbstate, 0, sizeof mbstate); 404 405 m = 0; 406 printable = true; 407 if (argsize == SIZE_MAX) 408 argsize = strlen (arg); 409 410 do 411 { 412 wchar_t w; 413 size_t bytes = mbrtowc (&w, &arg[i + m], 414 argsize - (i + m), &mbstate); 415 if (bytes == 0) 416 break; 417 else if (bytes == (size_t) -1) 418 { 419 printable = false; 420 break; 421 } 422 else if (bytes == (size_t) -2) 423 { 424 printable = false; 425 while (i + m < argsize && arg[i + m]) 426 m++; 427 break; 428 } 429 else 430 { 431 /* Work around a bug with older shells that "see" a '\' 432 that is really the 2nd byte of a multibyte character. 433 In practice the problem is limited to ASCII 434 chars >= '@' that are shell special chars. */ 435 if ('[' == 0x5b && quoting_style == shell_quoting_style) 436 { 437 size_t j; 438 for (j = 1; j < bytes; j++) 439 switch (arg[i + m + j]) 440 { 441 case '[': case '\\': case '^': 442 case '`': case '|': 443 goto use_shell_always_quoting_style; 444 445 default: 446 break; 447 } 448 } 449 450 if (! iswprint (w)) 451 printable = false; 452 m += bytes; 453 } 454 } 455 while (! mbsinit (&mbstate)); 456 } 457 458 if (1 < m || (backslash_escapes && ! printable)) 459 { 460 /* Output a multibyte sequence, or an escaped 461 unprintable unibyte character. */ 462 size_t ilim = i + m; 463 464 for (;;) 465 { 466 if (backslash_escapes && ! printable) 467 { 468 STORE ('\\'); 469 STORE ('0' + (c >> 6)); 470 STORE ('0' + ((c >> 3) & 7)); 471 c = '0' + (c & 7); 472 } 473 if (ilim <= i + 1) 474 break; 475 STORE (c); 476 c = arg[++i]; 477 } 478 479 goto store_c; 480 } 481 } 482 } 483 484 if (! (backslash_escapes 485 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 486 goto store_c; 487 488 store_escape: 489 STORE ('\\'); 490 491 store_c: 492 STORE (c); 493 } 494 495 if (i == 0 && quoting_style == shell_quoting_style) 496 goto use_shell_always_quoting_style; 497 498 if (quote_string) 499 for (; *quote_string; quote_string++) 500 STORE (*quote_string); 501 502 if (len < buffersize) 503 buffer[len] = '\0'; 504 return len; 505 506 use_shell_always_quoting_style: 507 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 508 shell_always_quoting_style, o); 509 } 510 511 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 512 argument ARG (of size ARGSIZE), using O to control quoting. 513 If O is null, use the default. 514 Terminate the output with a null character, and return the written 515 size of the output, not counting the terminating null. 516 If BUFFERSIZE is too small to store the output string, return the 517 value that would have been returned had BUFFERSIZE been large enough. 518 If ARGSIZE is SIZE_MAX, use the string length of the argument for 519 ARGSIZE. */ 520 size_t 521 quotearg_buffer (char *buffer, size_t buffersize, 522 char const *arg, size_t argsize, 523 struct quoting_options const *o) 524 { 525 struct quoting_options const *p = o ? o : &default_quoting_options; 526 int e = errno; 527 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 528 p->style, p); 529 errno = e; 530 return r; 531 } 532 533 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly 534 allocated storage containing the quoted string. */ 535 char * 536 quotearg_alloc (char const *arg, size_t argsize, 537 struct quoting_options const *o) 538 { 539 int e = errno; 540 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; 541 char *buf = xcharalloc (bufsize); 542 quotearg_buffer (buf, bufsize, arg, argsize, o); 543 errno = e; 544 return buf; 545 } 546 547 /* A storage slot with size and pointer to a value. */ 548 struct slotvec 549 { 550 size_t size; 551 char *val; 552 }; 553 554 /* Preallocate a slot 0 buffer, so that the caller can always quote 555 one small component of a "memory exhausted" message in slot 0. */ 556 static char slot0[256]; 557 static unsigned int nslots = 1; 558 static struct slotvec slotvec0 = {sizeof slot0, slot0}; 559 static struct slotvec *slotvec = &slotvec0; 560 561 void 562 quotearg_free (void) 563 { 564 struct slotvec *sv = slotvec; 565 unsigned int i; 566 for (i = 1; i < nslots; i++) 567 free (sv[i].val); 568 if (sv[0].val != slot0) 569 { 570 free (sv[0].val); 571 slotvec0.size = sizeof slot0; 572 slotvec0.val = slot0; 573 } 574 if (sv != &slotvec0) 575 { 576 free (sv); 577 slotvec = &slotvec0; 578 } 579 nslots = 1; 580 } 581 582 /* Use storage slot N to return a quoted version of argument ARG. 583 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a 584 null-terminated string. 585 OPTIONS specifies the quoting options. 586 The returned value points to static storage that can be 587 reused by the next call to this function with the same value of N. 588 N must be nonnegative. N is deliberately declared with type "int" 589 to allow for future extensions (using negative values). */ 590 static char * 591 quotearg_n_options (int n, char const *arg, size_t argsize, 592 struct quoting_options const *options) 593 { 594 int e = errno; 595 596 unsigned int n0 = n; 597 struct slotvec *sv = slotvec; 598 599 if (n < 0) 600 abort (); 601 602 if (nslots <= n0) 603 { 604 /* FIXME: technically, the type of n1 should be `unsigned int', 605 but that evokes an unsuppressible warning from gcc-4.0.1 and 606 older. If gcc ever provides an option to suppress that warning, 607 revert to the original type, so that the test in xalloc_oversized 608 is once again performed only at compile time. */ 609 size_t n1 = n0 + 1; 610 bool preallocated = (sv == &slotvec0); 611 612 if (xalloc_oversized (n1, sizeof *sv)) 613 xalloc_die (); 614 615 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); 616 if (preallocated) 617 *sv = slotvec0; 618 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); 619 nslots = n1; 620 } 621 622 { 623 size_t size = sv[n].size; 624 char *val = sv[n].val; 625 size_t qsize = quotearg_buffer (val, size, arg, argsize, options); 626 627 if (size <= qsize) 628 { 629 sv[n].size = size = qsize + 1; 630 if (val != slot0) 631 free (val); 632 sv[n].val = val = xcharalloc (size); 633 quotearg_buffer (val, size, arg, argsize, options); 634 } 635 636 errno = e; 637 return val; 638 } 639 } 640 641 char * 642 quotearg_n (int n, char const *arg) 643 { 644 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); 645 } 646 647 char * 648 quotearg (char const *arg) 649 { 650 return quotearg_n (0, arg); 651 } 652 653 /* Return quoting options for STYLE, with no extra quoting. */ 654 static struct quoting_options 655 quoting_options_from_style (enum quoting_style style) 656 { 657 struct quoting_options o; 658 o.style = style; 659 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 660 return o; 661 } 662 663 char * 664 quotearg_n_style (int n, enum quoting_style s, char const *arg) 665 { 666 struct quoting_options const o = quoting_options_from_style (s); 667 return quotearg_n_options (n, arg, SIZE_MAX, &o); 668 } 669 670 char * 671 quotearg_n_style_mem (int n, enum quoting_style s, 672 char const *arg, size_t argsize) 673 { 674 struct quoting_options const o = quoting_options_from_style (s); 675 return quotearg_n_options (n, arg, argsize, &o); 676 } 677 678 char * 679 quotearg_style (enum quoting_style s, char const *arg) 680 { 681 return quotearg_n_style (0, s, arg); 682 } 683 684 char * 685 quotearg_char (char const *arg, char ch) 686 { 687 struct quoting_options options; 688 options = default_quoting_options; 689 set_char_quoting (&options, ch, 1); 690 return quotearg_n_options (0, arg, SIZE_MAX, &options); 691 } 692 693 char * 694 quotearg_colon (char const *arg) 695 { 696 return quotearg_char (arg, ':'); 697 }