1 /*
2 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
3 * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/types.h>
33
34 #include <err.h>
35 #include <errno.h>
36 #include <inttypes.h>
37 #include <limits.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 #include <alloca.h>
43 #include <ctype.h>
44 #include <locale.h>
45 #include <note.h>
46
47 #define warnx1(a, b, c) warnx(a)
48 #define warnx2(a, b, c) warnx(a, b)
49 #define warnx3(a, b, c) warnx(a, b, c)
50
51 #define PTRDIFF(x, y) ((uintptr_t)(x) - (uintptr_t)(y))
52
53 #define _(x) gettext(x)
54
55 #define PF(f, func) do { \
56 char *b = NULL; \
57 if (havewidth) \
58 if (haveprec) \
59 (void) asprintf(&b, f, fieldwidth, precision, func); \
60 else \
61 (void) asprintf(&b, f, fieldwidth, func); \
62 else if (haveprec) \
63 (void) asprintf(&b, f, precision, func); \
64 else \
65 (void) asprintf(&b, f, func); \
66 if (b) { \
67 (void) fputs(b, stdout); \
68 free(b); \
69 } \
70 _NOTE(CONSTCOND) } while (0)
71
72 static int asciicode(void);
73 static char *doformat(char *, int *);
74 static int escape(char *, int, size_t *);
75 static int getchr(void);
76 static int getfloating(long double *, int);
77 static int getint(int *);
78 static int getnum(intmax_t *, uintmax_t *, int);
79 static const char
80 *getstr(void);
81 static char *mknum(char *, char);
82 static void usage(void);
83
84 static const char digits[] = "0123456789";
85
86 static int myargc;
87 static char **myargv;
88 static char **gargv;
89 static char **maxargv;
90
91 int
92 main(int argc, char *argv[])
93 {
94 size_t len;
95 int chopped, end, rval;
96 char *format, *fmt, *start;
97
98 (void) setlocale(LC_ALL, "");
99
100 argv++;
101 argc--;
102
103 /*
104 * POSIX says: Standard utilities that do not accept options,
105 * but that do accept operands, shall recognize "--" as a
106 * first argument to be discarded.
107 */
108 if (argc && strcmp(argv[0], "--") == 0) {
109 argc--;
110 argv++;
111 }
112
113 if (argc < 1) {
114 usage();
115 return (1);
116 }
117
118 /*
119 * Basic algorithm is to scan the format string for conversion
120 * specifications -- once one is found, find out if the field
121 * width or precision is a '*'; if it is, gather up value. Note,
122 * format strings are reused as necessary to use up the provided
123 * arguments, arguments of zero/null string are provided to use
124 * up the format string.
125 */
126 fmt = format = *argv;
127 chopped = escape(fmt, 1, &len); /* backslash interpretation */
128 rval = end = 0;
129 gargv = ++argv;
130
131 for (;;) {
132 maxargv = gargv;
133
134 myargv = gargv;
135 for (myargc = 0; gargv[myargc]; myargc++)
136 /* nop */;
137 start = fmt;
138 while (fmt < format + len) {
139 if (fmt[0] == '%') {
140 (void) fwrite(start, 1, PTRDIFF(fmt, start),
141 stdout);
142 if (fmt[1] == '%') {
143 /* %% prints a % */
144 (void) putchar('%');
145 fmt += 2;
146 } else {
147 fmt = doformat(fmt, &rval);
148 if (fmt == NULL)
149 return (1);
150 end = 0;
151 }
152 start = fmt;
153 } else
154 fmt++;
155 if (gargv > maxargv)
156 maxargv = gargv;
157 }
158 gargv = maxargv;
159
160 if (end == 1) {
161 warnx1(_("missing format character"), NULL, NULL);
162 return (1);
163 }
164 (void) fwrite(start, 1, PTRDIFF(fmt, start), stdout);
165 if (chopped || !*gargv)
166 return (rval);
167 /* Restart at the beginning of the format string. */
168 fmt = format;
169 end = 1;
170 }
171 /* NOTREACHED */
172 }
173
174
175 static char *
176 doformat(char *fmt, int *rval)
177 {
178 static const char skip1[] = "#'-+ 0";
179 int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
180 char convch, nextch;
181 char *start;
182 char **fargv;
183 char *dptr;
184 int l;
185
186 start = alloca(strlen(fmt) + 1);
187
188 dptr = start;
189 *dptr++ = '%';
190 *dptr = 0;
191
192 fmt++;
193
194 /* look for "n$" field index specifier */
195 l = strspn(fmt, digits);
196 if ((l > 0) && (fmt[l] == '$')) {
197 int idx = atoi(fmt);
198 if (idx <= myargc) {
199 gargv = &myargv[idx - 1];
200 } else {
201 gargv = &myargv[myargc];
202 }
203 if (gargv > maxargv) {
204 maxargv = gargv;
205 }
206 fmt += l + 1;
207
208 /* save format argument */
209 fargv = gargv;
210 } else {
211 fargv = NULL;
212 }
213
214 /* skip to field width */
215 while (strchr(skip1, *fmt) != NULL) {
216 *dptr++ = *fmt++;
217 *dptr = 0;
218 }
219
220
221 if (*fmt == '*') {
222
223 fmt++;
224 l = strspn(fmt, digits);
225 if ((l > 0) && (fmt[l] == '$')) {
226 int idx = atoi(fmt);
227 if (idx <= myargc) {
228 gargv = &myargv[idx - 1];
229 } else {
230 gargv = &myargv[myargc];
231 }
232 fmt += l + 1;
233 }
234
235 if (getint(&fieldwidth))
236 return (NULL);
237 if (gargv > maxargv) {
238 maxargv = gargv;
239 }
240 havewidth = 1;
241
242 *dptr++ = '*';
243 *dptr = 0;
244 } else {
245 havewidth = 0;
246
247 /* skip to possible '.', get following precision */
248 while (isdigit(*fmt)) {
249 *dptr++ = *fmt++;
250 *dptr = 0;
251 }
252 }
253
254 if (*fmt == '.') {
255 /* precision present? */
256 fmt++;
257 *dptr++ = '.';
258
259 if (*fmt == '*') {
260
261 fmt++;
262 l = strspn(fmt, digits);
263 if ((l > 0) && (fmt[l] == '$')) {
264 int idx = atoi(fmt);
265 if (idx <= myargc) {
266 gargv = &myargv[idx - 1];
267 } else {
268 gargv = &myargv[myargc];
269 }
270 fmt += l + 1;
271 }
272
273 if (getint(&precision))
274 return (NULL);
275 if (gargv > maxargv) {
276 maxargv = gargv;
277 }
278 haveprec = 1;
279 *dptr++ = '*';
280 *dptr = 0;
281 } else {
282 haveprec = 0;
283
284 /* skip to conversion char */
285 while (isdigit(*fmt)) {
286 *dptr++ = *fmt++;
287 *dptr = 0;
288 }
289 }
290 } else
291 haveprec = 0;
292 if (!*fmt) {
293 warnx1(_("missing format character"), NULL, NULL);
294 return (NULL);
295 }
296 *dptr++ = *fmt;
297 *dptr = 0;
298
299 /*
300 * Look for a length modifier. POSIX doesn't have these, so
301 * we only support them for floating-point conversions, which
302 * are extensions. This is useful because the L modifier can
303 * be used to gain extra range and precision, while omitting
304 * it is more likely to produce consistent results on different
305 * architectures. This is not so important for integers
306 * because overflow is the only bad thing that can happen to
307 * them, but consider the command printf %a 1.1
308 */
309 if (*fmt == 'L') {
310 mod_ldbl = 1;
311 fmt++;
312 if (!strchr("aAeEfFgG", *fmt)) {
313 warnx2(_("bad modifier L for %%%c"), *fmt, NULL);
314 return (NULL);
315 }
316 } else {
317 mod_ldbl = 0;
318 }
319
320 /* save the current arg offset, and set to the format arg */
321 if (fargv != NULL) {
322 gargv = fargv;
323 }
324
325 convch = *fmt;
326 nextch = *++fmt;
327
328 *fmt = '\0';
329 switch (convch) {
330 case 'b': {
331 size_t len;
332 char *p;
333 int getout;
334
335 p = strdup(getstr());
336 if (p == NULL) {
337 warnx2("%s", strerror(ENOMEM), NULL);
338 return (NULL);
339 }
340 getout = escape(p, 0, &len);
341 *(fmt - 1) = 's';
342 PF(start, p);
343 *(fmt - 1) = 'b';
344 free(p);
345
346 if (getout)
347 return (fmt);
348 break;
349 }
350 case 'c': {
351 char p;
352
353 p = getchr();
354 PF(start, p);
355 break;
356 }
357 case 's': {
358 const char *p;
359
360 p = getstr();
361 PF(start, p);
362 break;
363 }
364 case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': {
365 char *f;
366 intmax_t val;
367 uintmax_t uval;
368 int signedconv;
369
370 signedconv = (convch == 'd' || convch == 'i');
371 if ((f = mknum(start, convch)) == NULL)
372 return (NULL);
373 if (getnum(&val, &uval, signedconv))
374 *rval = 1;
375 if (signedconv)
376 PF(f, val);
377 else
378 PF(f, uval);
379 break;
380 }
381 case 'e': case 'E':
382 case 'f': case 'F':
383 case 'g': case 'G':
384 case 'a': case 'A': {
385 long double p;
386
387 if (getfloating(&p, mod_ldbl))
388 *rval = 1;
389 if (mod_ldbl)
390 PF(start, p);
391 else
392 PF(start, (double)p);
393 break;
394 }
395 default:
396 warnx2(_("illegal format character %c"), convch, NULL);
397 return (NULL);
398 }
399 *fmt = nextch;
400
401 /* return the gargv to the next element */
402 return (fmt);
403 }
404
405 static char *
406 mknum(char *str, char ch)
407 {
408 static char *copy;
409 static size_t copy_size;
410 char *newcopy;
411 size_t len, newlen;
412
413 len = strlen(str) + 2;
414 if (len > copy_size) {
415 newlen = ((len + 1023) >> 10) << 10;
416 if ((newcopy = realloc(copy, newlen)) == NULL) {
417 warnx2("%s", strerror(ENOMEM), NULL);
418 return (NULL);
419 }
420 copy = newcopy;
421 copy_size = newlen;
422 }
423
424 (void) memmove(copy, str, len - 3);
425 copy[len - 3] = 'j';
426 copy[len - 2] = ch;
427 copy[len - 1] = '\0';
428 return (copy);
429 }
430
431 static int
432 escape(char *fmt, int percent, size_t *len)
433 {
434 char *save, *store, c;
435 int value;
436
437 for (save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) {
438 if (c != '\\') {
439 *store = c;
440 continue;
441 }
442 switch (*++fmt) {
443 case '\0': /* EOS, user error */
444 *store = '\\';
445 *++store = '\0';
446 *len = PTRDIFF(store, save);
447 return (0);
448 case '\\': /* backslash */
449 case '\'': /* single quote */
450 *store = *fmt;
451 break;
452 case 'a': /* bell/alert */
453 *store = '\a';
454 break;
455 case 'b': /* backspace */
456 *store = '\b';
457 break;
458 case 'c':
459 *store = '\0';
460 *len = PTRDIFF(store, save);
461 return (1);
462 case 'f': /* form-feed */
463 *store = '\f';
464 break;
465 case 'n': /* newline */
466 *store = '\n';
467 break;
468 case 'r': /* carriage-return */
469 *store = '\r';
470 break;
471 case 't': /* horizontal tab */
472 *store = '\t';
473 break;
474 case 'v': /* vertical tab */
475 *store = '\v';
476 break;
477 /* octal constant */
478 case '0': case '1': case '2': case '3':
479 case '4': case '5': case '6': case '7':
480 c = (!percent && *fmt == '0') ? 4 : 3;
481 for (value = 0;
482 c-- && *fmt >= '0' && *fmt <= '7'; ++fmt) {
483 value <<= 3;
484 value += *fmt - '0';
485 }
486 --fmt;
487 if (percent && value == '%') {
488 *store++ = '%';
489 *store = '%';
490 } else
491 *store = (char)value;
492 break;
493 default:
494 *store = *fmt;
495 break;
496 }
497 }
498 *store = '\0';
499 *len = PTRDIFF(store, save);
500 return (0);
501 }
502
503 static int
504 getchr(void)
505 {
506 if (!*gargv)
507 return ('\0');
508 return ((int)**gargv++);
509 }
510
511 static const char *
512 getstr(void)
513 {
514 if (!*gargv)
515 return ("");
516 return (*gargv++);
517 }
518
519 static int
520 getint(int *ip)
521 {
522 intmax_t val;
523 uintmax_t uval;
524 int rval;
525
526 if (getnum(&val, &uval, 1))
527 return (1);
528 rval = 0;
529 if (val < INT_MIN || val > INT_MAX) {
530 warnx3("%s: %s", *gargv, strerror(ERANGE));
531 rval = 1;
532 }
533 *ip = (int)val;
534 return (rval);
535 }
536
537 static int
538 getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
539 {
540 char *ep;
541 int rval;
542
543 if (!*gargv) {
544 *ip = 0;
545 return (0);
546 }
547 if (**gargv == '"' || **gargv == '\'') {
548 if (signedconv)
549 *ip = asciicode();
550 else
551 *uip = asciicode();
552 return (0);
553 }
554 rval = 0;
555 errno = 0;
556 if (signedconv)
557 *ip = strtoimax(*gargv, &ep, 0);
558 else
559 *uip = strtoumax(*gargv, &ep, 0);
560 if (ep == *gargv) {
561 warnx2(_("%s: expected numeric value"), *gargv, NULL);
562 rval = 1;
563 } else if (*ep != '\0') {
564 warnx2(_("%s: not completely converted"), *gargv, NULL);
565 rval = 1;
566 }
567 if (errno == ERANGE) {
568 warnx3("%s: %s", *gargv, strerror(ERANGE));
569 rval = 1;
570 }
571 ++gargv;
572 return (rval);
573 }
574
575 static int
576 getfloating(long double *dp, int mod_ldbl)
577 {
578 char *ep;
579 int rval;
580
581 if (!*gargv) {
582 *dp = 0.0;
583 return (0);
584 }
585 if (**gargv == '"' || **gargv == '\'') {
586 *dp = asciicode();
587 return (0);
588 }
589 rval = 0;
590 errno = 0;
591 if (mod_ldbl)
592 *dp = strtold(*gargv, &ep);
593 else
594 *dp = strtod(*gargv, &ep);
595 if (ep == *gargv) {
596 warnx2(_("%s: expected numeric value"), *gargv, NULL);
597 rval = 1;
598 } else if (*ep != '\0') {
599 warnx2(_("%s: not completely converted"), *gargv, NULL);
600 rval = 1;
601 }
602 if (errno == ERANGE) {
603 warnx3("%s: %s", *gargv, strerror(ERANGE));
604 rval = 1;
605 }
606 ++gargv;
607 return (rval);
608 }
609
610 static int
611 asciicode(void)
612 {
613 int ch;
614
615 ch = **gargv;
616 if (ch == '\'' || ch == '"')
617 ch = (*gargv)[1];
618 ++gargv;
619 return (ch);
620 }
621
622 static void
623 usage(void)
624 {
625 (void) fprintf(stderr, _("usage: printf format [arguments ...]\n"));
626 }