1 /*
   2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
   3  * Copyright (c) 1989, 1993
   4  *      The Regents of the University of California.  All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  * 4. Neither the name of the University nor the names of its contributors
  15  *    may be used to endorse or promote products derived from this software
  16  *    without specific prior written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28  * SUCH DAMAGE.
  29  */
  30 
  31 #include <sys/types.h>
  32 
  33 #include <err.h>
  34 #include <errno.h>
  35 #include <inttypes.h>
  36 #include <limits.h>
  37 #include <stdio.h>
  38 #include <stdlib.h>
  39 #include <string.h>
  40 #include <unistd.h>
  41 #include <locale.h>
  42 #include <note.h>
  43 
  44 #define warnx1(a, b, c)         warnx(a)
  45 #define warnx2(a, b, c)         warnx(a, b)
  46 #define warnx3(a, b, c)         warnx(a, b, c)
  47 
  48 #define PTRDIFF(x, y)   ((uintptr_t)(x) - (uintptr_t)(y))
  49 
  50 #define _(x)    gettext(x)
  51 
  52 #define PF(f, func) do {                                                \
  53         char *b = NULL;                                                 \
  54         int dollar = 0;                                                 \
  55         if (*f == '$')  {                                               \
  56                 dollar++;                                               \
  57                 *f = '%';                                               \
  58         }                                                               \
  59         if (havewidth)                                                  \
  60                 if (haveprec)                                           \
  61                         (void) asprintf(&b, f, fieldwidth, precision, func); \
  62                 else                                                    \
  63                         (void) asprintf(&b, f, fieldwidth, func);   \
  64         else if (haveprec)                                              \
  65                 (void) asprintf(&b, f, precision, func);            \
  66         else                                                            \
  67                 (void) asprintf(&b, f, func);                               \
  68         if (b) {                                                        \
  69                 (void) fputs(b, stdout);                                \
  70                 free(b);                                                \
  71         }                                                               \
  72         if (dollar)                                                     \
  73                 *f = '$';                                               \
  74 _NOTE(CONSTCOND) } while (0)
  75 
  76 static int       asciicode(void);
  77 static char     *doformat(char *, int *);
  78 static int       escape(char *, int, size_t *);
  79 static int       getchr(void);
  80 static int       getfloating(long double *, int);
  81 static int       getint(int *);
  82 static int       getnum(intmax_t *, uintmax_t *, int);
  83 static const char
  84                 *getstr(void);
  85 static char     *mknum(char *, char);
  86 static void      usage(void);
  87 
  88 static int  myargc;
  89 static char **myargv;
  90 static char **gargv;
  91 
  92 int
  93 main(int argc, char *argv[])
  94 {
  95         size_t len;
  96         int chopped, end, rval;
  97         char *format, *fmt, *start;
  98 
  99         (void) setlocale(LC_ALL, "");
 100 
 101         argv++;
 102         argc--;
 103 
 104         /*
 105          * POSIX says: Standard utilities that do not accept options,
 106          * but that do accept operands, shall recognize "--" as a
 107          * first argument to be discarded.
 108          */
 109         if (argc && strcmp(argv[0], "--") == 0) {
 110                 argc--;
 111                 argv++;
 112         }
 113 
 114         if (argc < 1) {
 115                 usage();
 116                 return (1);
 117         }
 118 
 119         /*
 120          * Basic algorithm is to scan the format string for conversion
 121          * specifications -- once one is found, find out if the field
 122          * width or precision is a '*'; if it is, gather up value.  Note,
 123          * format strings are reused as necessary to use up the provided
 124          * arguments, arguments of zero/null string are provided to use
 125          * up the format string.
 126          */
 127         fmt = format = *argv;
 128         chopped = escape(fmt, 1, &len);             /* backslash interpretation */
 129         rval = end = 0;
 130         gargv = ++argv;
 131 
 132         for (;;) {
 133                 char **maxargv = gargv;
 134 
 135                 myargv = gargv;
 136                 for (myargc = 0; gargv[myargc]; myargc++)
 137                         /* nop */;
 138                 start = fmt;
 139                 while (fmt < format + len) {
 140                         if (fmt[0] == '%') {
 141                                 (void) fwrite(start, 1, PTRDIFF(fmt, start),
 142                                     stdout);
 143                                 if (fmt[1] == '%') {
 144                                         /* %% prints a % */
 145                                         (void) putchar('%');
 146                                         fmt += 2;
 147                                 } else {
 148                                         fmt = doformat(fmt, &rval);
 149                                         if (fmt == NULL)
 150                                                 return (1);
 151                                         end = 0;
 152                                 }
 153                                 start = fmt;
 154                         } else
 155                                 fmt++;
 156                         if (gargv > maxargv)
 157                                 maxargv = gargv;
 158                 }
 159                 gargv = maxargv;
 160 
 161                 if (end == 1) {
 162                         warnx1(_("missing format character"), NULL, NULL);
 163                         return (1);
 164                 }
 165                 (void) fwrite(start, 1, PTRDIFF(fmt, start), stdout);
 166                 if (chopped || !*gargv)
 167                         return (rval);
 168                 /* Restart at the beginning of the format string. */
 169                 fmt = format;
 170                 end = 1;
 171         }
 172         /* NOTREACHED */
 173 }
 174 
 175 
 176 static char *
 177 doformat(char *start, int *rval)
 178 {
 179         static const char skip1[] = "#'-+ 0";
 180         static const char skip2[] = "0123456789";
 181         char *fmt;
 182         int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
 183         char convch, nextch;
 184 
 185         fmt = start + 1;
 186 
 187         /* look for "n$" field index specifier */
 188         fmt += strspn(fmt, skip2);
 189         if ((*fmt == '$') && (fmt != (start + 1))) {
 190                 int idx = atoi(start + 1);
 191                 if (idx <= myargc) {
 192                         gargv = &myargv[idx - 1];
 193                 } else {
 194                         gargv = &myargv[myargc];
 195                 }
 196                 start = fmt;
 197                 fmt++;
 198         } else {
 199                 fmt = start + 1;
 200         }
 201 
 202         /* skip to field width */
 203         fmt += strspn(fmt, skip1);
 204         if (*fmt == '*') {
 205                 if (getint(&fieldwidth))
 206                         return (NULL);
 207                 havewidth = 1;
 208                 ++fmt;
 209         } else {
 210                 havewidth = 0;
 211 
 212                 /* skip to possible '.', get following precision */
 213                 fmt += strspn(fmt, skip2);
 214         }
 215         if (*fmt == '.') {
 216                 /* precision present? */
 217                 ++fmt;
 218                 if (*fmt == '*') {
 219                         if (getint(&precision))
 220                                 return (NULL);
 221                         haveprec = 1;
 222                         ++fmt;
 223                 } else {
 224                         haveprec = 0;
 225 
 226                         /* skip to conversion char */
 227                         fmt += strspn(fmt, skip2);
 228                 }
 229         } else
 230                 haveprec = 0;
 231         if (!*fmt) {
 232                 warnx1(_("missing format character"), NULL, NULL);
 233                 return (NULL);
 234         }
 235 
 236         /*
 237          * Look for a length modifier.  POSIX doesn't have these, so
 238          * we only support them for floating-point conversions, which
 239          * are extensions.  This is useful because the L modifier can
 240          * be used to gain extra range and precision, while omitting
 241          * it is more likely to produce consistent results on different
 242          * architectures.  This is not so important for integers
 243          * because overflow is the only bad thing that can happen to
 244          * them, but consider the command  printf %a 1.1
 245          */
 246         if (*fmt == 'L') {
 247                 mod_ldbl = 1;
 248                 fmt++;
 249                 if (!strchr("aAeEfFgG", *fmt)) {
 250                         warnx2(_("bad modifier L for %%%c"), *fmt, NULL);
 251                         return (NULL);
 252                 }
 253         } else {
 254                 mod_ldbl = 0;
 255         }
 256 
 257         convch = *fmt;
 258         nextch = *++fmt;
 259         *fmt = '\0';
 260         switch (convch) {
 261         case 'b': {
 262                 size_t len;
 263                 char *p;
 264                 int getout;
 265 
 266                 p = strdup(getstr());
 267                 if (p == NULL) {
 268                         warnx2("%s", strerror(ENOMEM), NULL);
 269                         return (NULL);
 270                 }
 271                 getout = escape(p, 0, &len);
 272                 *(fmt - 1) = 's';
 273                 PF(start, p);
 274                 *(fmt - 1) = 'b';
 275                 free(p);
 276 
 277                 if (getout)
 278                         return (fmt);
 279                 break;
 280         }
 281         case 'c': {
 282                 char p;
 283 
 284                 p = getchr();
 285                 PF(start, p);
 286                 break;
 287         }
 288         case 's': {
 289                 const char *p;
 290 
 291                 p = getstr();
 292                 PF(start, p);
 293                 break;
 294         }
 295         case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
 296                 char *f;
 297                 intmax_t val;
 298                 uintmax_t uval;
 299                 int signedconv;
 300 
 301                 signedconv = (convch == 'd' || convch == 'i');
 302                 if ((f = mknum(start, convch)) == NULL)
 303                         return (NULL);
 304                 if (getnum(&val, &uval, signedconv))
 305                         *rval = 1;
 306                 if (signedconv)
 307                         PF(f, val);
 308                 else
 309                         PF(f, uval);
 310                 break;
 311         }
 312         case 'e': case 'E':
 313         case 'f': case 'F':
 314         case 'g': case 'G':
 315         case 'a': case 'A': {
 316                 long double p;
 317 
 318                 if (getfloating(&p, mod_ldbl))
 319                         *rval = 1;
 320                 if (mod_ldbl)
 321                         PF(start, p);
 322                 else
 323                         PF(start, (double)p);
 324                 break;
 325         }
 326         default:
 327                 warnx2(_("illegal format character %c"), convch, NULL);
 328                 return (NULL);
 329         }
 330         *fmt = nextch;
 331         return (fmt);
 332 }
 333 
 334 static char *
 335 mknum(char *str, char ch)
 336 {
 337         static char *copy;
 338         static size_t copy_size;
 339         char *newcopy;
 340         size_t len, newlen;
 341 
 342         len = strlen(str) + 2;
 343         if (len > copy_size) {
 344                 newlen = ((len + 1023) >> 10) << 10;
 345                 if ((newcopy = realloc(copy, newlen)) == NULL) {
 346                         warnx2("%s", strerror(ENOMEM), NULL);
 347                         return (NULL);
 348                 }
 349                 copy = newcopy;
 350                 copy_size = newlen;
 351         }
 352 
 353         (void) memmove(copy, str, len - 3);
 354         copy[len - 3] = 'j';
 355         copy[len - 2] = ch;
 356         copy[len - 1] = '\0';
 357         return (copy);
 358 }
 359 
 360 static int
 361 escape(char *fmt, int percent, size_t *len)
 362 {
 363         char *save, *store, c;
 364         int value;
 365 
 366         for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
 367                 if (c != '\\') {
 368                         *store = c;
 369                         continue;
 370                 }
 371                 switch (*++fmt) {
 372                 case '\0':              /* EOS, user error */
 373                         *store = '\\';
 374                         *++store = '\0';
 375                         *len = PTRDIFF(store, save);
 376                         return (0);
 377                 case '\\':              /* backslash */
 378                 case '\'':              /* single quote */
 379                         *store = *fmt;
 380                         break;
 381                 case 'a':               /* bell/alert */
 382                         *store = '\a';
 383                         break;
 384                 case 'b':               /* backspace */
 385                         *store = '\b';
 386                         break;
 387                 case 'c':
 388                         *store = '\0';
 389                         *len = PTRDIFF(store, save);
 390                         return (1);
 391                 case 'f':               /* form-feed */
 392                         *store = '\f';
 393                         break;
 394                 case 'n':               /* newline */
 395                         *store = '\n';
 396                         break;
 397                 case 'r':               /* carriage-return */
 398                         *store = '\r';
 399                         break;
 400                 case 't':               /* horizontal tab */
 401                         *store = '\t';
 402                         break;
 403                 case 'v':               /* vertical tab */
 404                         *store = '\v';
 405                         break;
 406                                         /* octal constant */
 407                 case '0': case '1': case '2': case '3':
 408                 case '4': case '5': case '6': case '7':
 409                         c = (!percent && *fmt == '0') ? 4 : 3;
 410                         for (value = 0;
 411                             c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
 412                                 value <<= 3;
 413                                 value += *fmt - '0';
 414                         }
 415                         --fmt;
 416                         if (percent && value == '%') {
 417                                 *store++ = '%';
 418                                 *store = '%';
 419                         } else
 420                                 *store = (char)value;
 421                         break;
 422                 default:
 423                         *store = *fmt;
 424                         break;
 425                 }
 426         }
 427         *store = '\0';
 428         *len = PTRDIFF(store, save);
 429         return (0);
 430 }
 431 
 432 static int
 433 getchr(void)
 434 {
 435         if (!*gargv)
 436                 return ('\0');
 437         return ((int)**gargv++);
 438 }
 439 
 440 static const char *
 441 getstr(void)
 442 {
 443         if (!*gargv)
 444                 return ("");
 445         return (*gargv++);
 446 }
 447 
 448 static int
 449 getint(int *ip)
 450 {
 451         intmax_t val;
 452         uintmax_t uval;
 453         int rval;
 454 
 455         if (getnum(&val, &uval, 1))
 456                 return (1);
 457         rval = 0;
 458         if (val < INT_MIN || val > INT_MAX) {
 459                 warnx3("%s: %s", *gargv, strerror(ERANGE));
 460                 rval = 1;
 461         }
 462         *ip = (int)val;
 463         return (rval);
 464 }
 465 
 466 static int
 467 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
 468 {
 469         char *ep;
 470         int rval;
 471 
 472         if (!*gargv) {
 473                 *ip = 0;
 474                 return (0);
 475         }
 476         if (**gargv == '"' || **gargv == '\'') {
 477                 if (signedconv)
 478                         *ip = asciicode();
 479                 else
 480                         *uip = asciicode();
 481                 return (0);
 482         }
 483         rval = 0;
 484         errno = 0;
 485         if (signedconv)
 486                 *ip = strtoimax(*gargv, &ep, 0);
 487         else
 488                 *uip = strtoumax(*gargv, &ep, 0);
 489         if (ep == *gargv) {
 490                 warnx2(_("%s: expected numeric value"), *gargv, NULL);
 491                 rval = 1;
 492         } else if (*ep != '\0') {
 493                 warnx2(_("%s: not completely converted"), *gargv, NULL);
 494                 rval = 1;
 495         }
 496         if (errno == ERANGE) {
 497                 warnx3("%s: %s", *gargv, strerror(ERANGE));
 498                 rval = 1;
 499         }
 500         ++gargv;
 501         return (rval);
 502 }
 503 
 504 static int
 505 getfloating(long double *dp, int mod_ldbl)
 506 {
 507         char *ep;
 508         int rval;
 509 
 510         if (!*gargv) {
 511                 *dp = 0.0;
 512                 return (0);
 513         }
 514         if (**gargv == '"' || **gargv == '\'') {
 515                 *dp = asciicode();
 516                 return (0);
 517         }
 518         rval = 0;
 519         errno = 0;
 520         if (mod_ldbl)
 521                 *dp = strtold(*gargv, &ep);
 522         else
 523                 *dp = strtod(*gargv, &ep);
 524         if (ep == *gargv) {
 525                 warnx2(_("%s: expected numeric value"), *gargv, NULL);
 526                 rval = 1;
 527         } else if (*ep != '\0') {
 528                 warnx2(_("%s: not completely converted"), *gargv, NULL);
 529                 rval = 1;
 530         }
 531         if (errno == ERANGE) {
 532                 warnx3("%s: %s", *gargv, strerror(ERANGE));
 533                 rval = 1;
 534         }
 535         ++gargv;
 536         return (rval);
 537 }
 538 
 539 static int
 540 asciicode(void)
 541 {
 542         int ch;
 543 
 544         ch = **gargv;
 545         if (ch == '\'' || ch == '"')
 546                 ch = (*gargv)[1];
 547         ++gargv;
 548         return (ch);
 549 }
 550 
 551 static void
 552 usage(void)
 553 {
 554         (void) fprintf(stderr, _("usage: printf format [arguments ...]\n"));
 555 }