Print this page
9718 update mandoc to 1.14.4
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/mandoc/mandoc.c
+++ new/usr/src/cmd/mandoc/mandoc.c
1 -/* $Id: mandoc.c,v 1.103 2017/07/03 13:40:19 schwarze Exp $ */
1 +/* $Id: mandoc.c,v 1.104 2018/07/28 18:34:15 schwarze Exp $ */
2 2 /*
3 3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 - * Copyright (c) 2011-2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
4 + * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5 5 *
6 6 * Permission to use, copy, modify, and distribute this software for any
7 7 * purpose with or without fee is hereby granted, provided that the above
8 8 * copyright notice and this permission notice appear in all copies.
9 9 *
10 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 17 */
18 18 #include "config.h"
19 19
20 20 #include <sys/types.h>
21 21
22 22 #include <assert.h>
23 23 #include <ctype.h>
24 24 #include <errno.h>
25 25 #include <limits.h>
26 26 #include <stdlib.h>
27 27 #include <stdio.h>
28 28 #include <string.h>
29 29 #include <time.h>
30 30
31 31 #include "mandoc_aux.h"
32 32 #include "mandoc.h"
33 33 #include "roff.h"
34 34 #include "libmandoc.h"
35 35
36 36 static int a2time(time_t *, const char *, const char *);
37 37 static char *time2a(time_t);
38 38
39 39
40 40 enum mandoc_esc
41 41 mandoc_escape(const char **end, const char **start, int *sz)
42 42 {
43 43 const char *local_start;
44 44 int local_sz;
45 45 char term;
46 46 enum mandoc_esc gly;
47 47
48 48 /*
49 49 * When the caller doesn't provide return storage,
50 50 * use local storage.
51 51 */
52 52
53 53 if (NULL == start)
54 54 start = &local_start;
55 55 if (NULL == sz)
56 56 sz = &local_sz;
57 57
58 58 /*
59 59 * Beyond the backslash, at least one input character
60 60 * is part of the escape sequence. With one exception
61 61 * (see below), that character won't be returned.
62 62 */
63 63
64 64 gly = ESCAPE_ERROR;
65 65 *start = ++*end;
66 66 *sz = 0;
67 67 term = '\0';
68 68
69 69 switch ((*start)[-1]) {
70 70 /*
71 71 * First the glyphs. There are several different forms of
72 72 * these, but each eventually returns a substring of the glyph
73 73 * name.
74 74 */
75 75 case '(':
76 76 gly = ESCAPE_SPECIAL;
77 77 *sz = 2;
78 78 break;
79 79 case '[':
80 80 gly = ESCAPE_SPECIAL;
81 81 term = ']';
82 82 break;
83 83 case 'C':
84 84 if ('\'' != **start)
85 85 return ESCAPE_ERROR;
86 86 *start = ++*end;
87 87 gly = ESCAPE_SPECIAL;
88 88 term = '\'';
89 89 break;
90 90
91 91 /*
92 92 * Escapes taking no arguments at all.
93 93 */
94 94 case 'd':
95 95 case 'u':
96 96 case ',':
97 97 case '/':
98 98 return ESCAPE_IGNORE;
99 99 case 'p':
100 100 return ESCAPE_BREAK;
101 101
102 102 /*
103 103 * The \z escape is supposed to output the following
104 104 * character without advancing the cursor position.
105 105 * Since we are mostly dealing with terminal mode,
106 106 * let us just skip the next character.
107 107 */
108 108 case 'z':
109 109 return ESCAPE_SKIPCHAR;
110 110
111 111 /*
112 112 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
113 113 * 'X' is the trigger. These have opaque sub-strings.
114 114 */
115 115 case 'F':
116 116 case 'g':
117 117 case 'k':
118 118 case 'M':
119 119 case 'm':
120 120 case 'n':
121 121 case 'V':
122 122 case 'Y':
123 123 gly = ESCAPE_IGNORE;
124 124 /* FALLTHROUGH */
125 125 case 'f':
126 126 if (ESCAPE_ERROR == gly)
127 127 gly = ESCAPE_FONT;
128 128 switch (**start) {
129 129 case '(':
130 130 *start = ++*end;
131 131 *sz = 2;
132 132 break;
133 133 case '[':
134 134 *start = ++*end;
135 135 term = ']';
136 136 break;
137 137 default:
138 138 *sz = 1;
139 139 break;
140 140 }
141 141 break;
142 142
143 143 /*
144 144 * These escapes are of the form \X'Y', where 'X' is the trigger
145 145 * and 'Y' is any string. These have opaque sub-strings.
146 146 * The \B and \w escapes are handled in roff.c, roff_res().
147 147 */
148 148 case 'A':
149 149 case 'b':
150 150 case 'D':
151 151 case 'R':
152 152 case 'X':
153 153 case 'Z':
154 154 gly = ESCAPE_IGNORE;
155 155 /* FALLTHROUGH */
156 156 case 'o':
157 157 if (**start == '\0')
158 158 return ESCAPE_ERROR;
159 159 if (gly == ESCAPE_ERROR)
160 160 gly = ESCAPE_OVERSTRIKE;
161 161 term = **start;
162 162 *start = ++*end;
163 163 break;
164 164
165 165 /*
166 166 * These escapes are of the form \X'N', where 'X' is the trigger
167 167 * and 'N' resolves to a numerical expression.
168 168 */
169 169 case 'h':
170 170 case 'H':
171 171 case 'L':
172 172 case 'l':
173 173 case 'S':
174 174 case 'v':
175 175 case 'x':
176 176 if (strchr(" %&()*+-./0123456789:<=>", **start)) {
177 177 if ('\0' != **start)
178 178 ++*end;
179 179 return ESCAPE_ERROR;
180 180 }
181 181 switch ((*start)[-1]) {
182 182 case 'h':
183 183 gly = ESCAPE_HORIZ;
184 184 break;
185 185 case 'l':
186 186 gly = ESCAPE_HLINE;
187 187 break;
188 188 default:
189 189 gly = ESCAPE_IGNORE;
190 190 break;
191 191 }
192 192 term = **start;
193 193 *start = ++*end;
194 194 break;
195 195
196 196 /*
197 197 * Special handling for the numbered character escape.
198 198 * XXX Do any other escapes need similar handling?
199 199 */
200 200 case 'N':
201 201 if ('\0' == **start)
202 202 return ESCAPE_ERROR;
203 203 (*end)++;
204 204 if (isdigit((unsigned char)**start)) {
205 205 *sz = 1;
206 206 return ESCAPE_IGNORE;
207 207 }
208 208 (*start)++;
209 209 while (isdigit((unsigned char)**end))
210 210 (*end)++;
211 211 *sz = *end - *start;
212 212 if ('\0' != **end)
213 213 (*end)++;
214 214 return ESCAPE_NUMBERED;
215 215
216 216 /*
217 217 * Sizes get a special category of their own.
218 218 */
219 219 case 's':
220 220 gly = ESCAPE_IGNORE;
221 221
222 222 /* See +/- counts as a sign. */
223 223 if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
224 224 *start = ++*end;
225 225
226 226 switch (**end) {
227 227 case '(':
228 228 *start = ++*end;
229 229 *sz = 2;
230 230 break;
231 231 case '[':
232 232 *start = ++*end;
233 233 term = ']';
234 234 break;
235 235 case '\'':
236 236 *start = ++*end;
237 237 term = '\'';
238 238 break;
239 239 case '3':
240 240 case '2':
241 241 case '1':
242 242 *sz = (*end)[-1] == 's' &&
243 243 isdigit((unsigned char)(*end)[1]) ? 2 : 1;
244 244 break;
245 245 default:
246 246 *sz = 1;
247 247 break;
248 248 }
249 249
250 250 break;
251 251
252 252 /*
253 253 * Anything else is assumed to be a glyph.
254 254 * In this case, pass back the character after the backslash.
255 255 */
256 256 default:
257 257 gly = ESCAPE_SPECIAL;
258 258 *start = --*end;
259 259 *sz = 1;
260 260 break;
261 261 }
262 262
263 263 assert(ESCAPE_ERROR != gly);
264 264
265 265 /*
266 266 * Read up to the terminating character,
267 267 * paying attention to nested escapes.
268 268 */
269 269
270 270 if ('\0' != term) {
271 271 while (**end != term) {
272 272 switch (**end) {
273 273 case '\0':
274 274 return ESCAPE_ERROR;
275 275 case '\\':
276 276 (*end)++;
277 277 if (ESCAPE_ERROR ==
278 278 mandoc_escape(end, NULL, NULL))
279 279 return ESCAPE_ERROR;
280 280 break;
281 281 default:
282 282 (*end)++;
283 283 break;
284 284 }
285 285 }
286 286 *sz = (*end)++ - *start;
287 287 } else {
288 288 assert(*sz > 0);
289 289 if ((size_t)*sz > strlen(*start))
290 290 return ESCAPE_ERROR;
291 291 *end += *sz;
292 292 }
293 293
294 294 /* Run post-processors. */
295 295
296 296 switch (gly) {
297 297 case ESCAPE_FONT:
298 298 if (2 == *sz) {
299 299 if ('C' == **start) {
300 300 /*
301 301 * Treat constant-width font modes
302 302 * just like regular font modes.
303 303 */
304 304 (*start)++;
305 305 (*sz)--;
306 306 } else {
307 307 if ('B' == (*start)[0] && 'I' == (*start)[1])
308 308 gly = ESCAPE_FONTBI;
309 309 break;
310 310 }
311 311 } else if (1 != *sz)
312 312 break;
313 313
314 314 switch (**start) {
315 315 case '3':
316 316 case 'B':
317 317 gly = ESCAPE_FONTBOLD;
318 318 break;
319 319 case '2':
320 320 case 'I':
321 321 gly = ESCAPE_FONTITALIC;
322 322 break;
323 323 case 'P':
324 324 gly = ESCAPE_FONTPREV;
325 325 break;
326 326 case '1':
327 327 case 'R':
328 328 gly = ESCAPE_FONTROMAN;
329 329 break;
330 330 }
331 331 break;
332 332 case ESCAPE_SPECIAL:
333 333 if (1 == *sz && 'c' == **start)
334 334 gly = ESCAPE_NOSPACE;
335 335 /*
336 336 * Unicode escapes are defined in groff as \[u0000]
337 337 * to \[u10FFFF], where the contained value must be
338 338 * a valid Unicode codepoint. Here, however, only
339 339 * check the length and range.
340 340 */
341 341 if (**start != 'u' || *sz < 5 || *sz > 7)
342 342 break;
343 343 if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
344 344 break;
345 345 if (*sz == 6 && (*start)[1] == '0')
346 346 break;
347 347 if (*sz == 5 && (*start)[1] == 'D' &&
348 348 strchr("89ABCDEF", (*start)[2]) != NULL)
349 349 break;
350 350 if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
351 351 + 1 == *sz)
352 352 gly = ESCAPE_UNICODE;
353 353 break;
354 354 default:
355 355 break;
356 356 }
357 357
358 358 return gly;
359 359 }
360 360
361 361 /*
362 362 * Parse a quoted or unquoted roff-style request or macro argument.
363 363 * Return a pointer to the parsed argument, which is either the original
364 364 * pointer or advanced by one byte in case the argument is quoted.
365 365 * NUL-terminate the argument in place.
366 366 * Collapse pairs of quotes inside quoted arguments.
367 367 * Advance the argument pointer to the next argument,
368 368 * or to the NUL byte terminating the argument line.
369 369 */
370 370 char *
371 371 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
372 372 {
373 373 char *start, *cp;
374 374 int quoted, pairs, white;
375 375
376 376 /* Quoting can only start with a new word. */
377 377 start = *cpp;
378 378 quoted = 0;
379 379 if ('"' == *start) {
380 380 quoted = 1;
381 381 start++;
382 382 }
383 383
384 384 pairs = 0;
385 385 white = 0;
386 386 for (cp = start; '\0' != *cp; cp++) {
387 387
388 388 /*
389 389 * Move the following text left
390 390 * after quoted quotes and after "\\" and "\t".
391 391 */
392 392 if (pairs)
393 393 cp[-pairs] = cp[0];
394 394
395 395 if ('\\' == cp[0]) {
396 396 /*
397 397 * In copy mode, translate double to single
398 398 * backslashes and backslash-t to literal tabs.
399 399 */
400 400 switch (cp[1]) {
401 401 case 't':
402 402 cp[0] = '\t';
403 403 /* FALLTHROUGH */
404 404 case '\\':
405 405 pairs++;
406 406 cp++;
407 407 break;
408 408 case ' ':
409 409 /* Skip escaped blanks. */
410 410 if (0 == quoted)
411 411 cp++;
412 412 break;
413 413 default:
414 414 break;
415 415 }
416 416 } else if (0 == quoted) {
417 417 if (' ' == cp[0]) {
418 418 /* Unescaped blanks end unquoted args. */
419 419 white = 1;
420 420 break;
421 421 }
422 422 } else if ('"' == cp[0]) {
423 423 if ('"' == cp[1]) {
424 424 /* Quoted quotes collapse. */
425 425 pairs++;
426 426 cp++;
427 427 } else {
428 428 /* Unquoted quotes end quoted args. */
429 429 quoted = 2;
430 430 break;
431 431 }
432 432 }
433 433 }
434 434
435 435 /* Quoted argument without a closing quote. */
436 436 if (1 == quoted)
437 437 mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL);
438 438
439 439 /* NUL-terminate this argument and move to the next one. */
440 440 if (pairs)
441 441 cp[-pairs] = '\0';
442 442 if ('\0' != *cp) {
443 443 *cp++ = '\0';
444 444 while (' ' == *cp)
445 445 cp++;
446 446 }
447 447 *pos += (int)(cp - start) + (quoted ? 1 : 0);
448 448 *cpp = cp;
449 449
450 450 if ('\0' == *cp && (white || ' ' == cp[-1]))
451 451 mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL);
452 452
453 453 return start;
454 454 }
455 455
456 456 static int
457 457 a2time(time_t *t, const char *fmt, const char *p)
458 458 {
459 459 struct tm tm;
460 460 char *pp;
461 461
462 462 memset(&tm, 0, sizeof(struct tm));
463 463
464 464 pp = NULL;
465 465 #if HAVE_STRPTIME
466 466 pp = strptime(p, fmt, &tm);
467 467 #endif
468 468 if (NULL != pp && '\0' == *pp) {
469 469 *t = mktime(&tm);
470 470 return 1;
471 471 }
472 472
473 473 return 0;
474 474 }
475 475
476 476 static char *
477 477 time2a(time_t t)
478 478 {
479 479 struct tm *tm;
480 480 char *buf, *p;
481 481 size_t ssz;
482 482 int isz;
483 483
484 484 tm = localtime(&t);
485 485 if (tm == NULL)
486 486 return NULL;
487 487
488 488 /*
489 489 * Reserve space:
490 490 * up to 9 characters for the month (September) + blank
491 491 * up to 2 characters for the day + comma + blank
492 492 * 4 characters for the year and a terminating '\0'
493 493 */
494 494
495 495 p = buf = mandoc_malloc(10 + 4 + 4 + 1);
496 496
497 497 if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
498 498 goto fail;
499 499 p += (int)ssz;
500 500
501 501 /*
502 502 * The output format is just "%d" here, not "%2d" or "%02d".
503 503 * That's also the reason why we can't just format the
504 504 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
505 505 * Besides, the present approach is less prone to buffer
506 506 * overflows, in case anybody should ever introduce the bug
507 507 * of looking at LC_TIME.
508 508 */
509 509
510 510 if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1)
511 511 goto fail;
512 512 p += isz;
513 513
514 514 if (strftime(p, 4 + 1, "%Y", tm) == 0)
515 515 goto fail;
516 516 return buf;
517 517
518 518 fail:
519 519 free(buf);
520 520 return NULL;
521 521 }
522 522
523 523 char *
524 524 mandoc_normdate(struct roff_man *man, char *in, int ln, int pos)
525 525 {
526 526 char *cp;
527 527 time_t t;
528 528
529 529 /* No date specified: use today's date. */
530 530
531 531 if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) {
532 532 mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, ln, pos, NULL);
533 533 return time2a(time(NULL));
↓ open down ↓ |
519 lines elided |
↑ open up ↑ |
534 534 }
535 535
536 536 /* Valid mdoc(7) date format. */
537 537
538 538 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) ||
539 539 a2time(&t, "%b %d, %Y", in)) {
540 540 cp = time2a(t);
541 541 if (t > time(NULL) + 86400)
542 542 mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse,
543 543 ln, pos, cp);
544 + else if (*in != '$' && strcmp(in, cp) != 0)
545 + mandoc_msg(MANDOCERR_DATE_NORM, man->parse,
546 + ln, pos, cp);
544 547 return cp;
545 548 }
546 549
547 550 /* In man(7), do not warn about the legacy format. */
548 551
549 552 if (a2time(&t, "%Y-%m-%d", in) == 0)
550 553 mandoc_msg(MANDOCERR_DATE_BAD, man->parse, ln, pos, in);
551 554 else if (t > time(NULL) + 86400)
552 555 mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, ln, pos, in);
553 556 else if (man->macroset == MACROSET_MDOC)
554 557 mandoc_vmsg(MANDOCERR_DATE_LEGACY, man->parse,
555 558 ln, pos, "Dd %s", in);
556 559
557 560 /* Use any non-mdoc(7) date verbatim. */
558 561
559 562 return mandoc_strdup(in);
560 563 }
561 564
562 565 int
563 566 mandoc_eos(const char *p, size_t sz)
564 567 {
565 568 const char *q;
566 569 int enclosed, found;
567 570
568 571 if (0 == sz)
569 572 return 0;
570 573
571 574 /*
572 575 * End-of-sentence recognition must include situations where
573 576 * some symbols, such as `)', allow prior EOS punctuation to
574 577 * propagate outward.
575 578 */
576 579
577 580 enclosed = found = 0;
578 581 for (q = p + (int)sz - 1; q >= p; q--) {
579 582 switch (*q) {
580 583 case '\"':
581 584 case '\'':
582 585 case ']':
583 586 case ')':
584 587 if (0 == found)
585 588 enclosed = 1;
586 589 break;
587 590 case '.':
588 591 case '!':
589 592 case '?':
590 593 found = 1;
591 594 break;
592 595 default:
593 596 return found &&
594 597 (!enclosed || isalnum((unsigned char)*q));
595 598 }
596 599 }
597 600
598 601 return found && !enclosed;
599 602 }
600 603
601 604 /*
602 605 * Convert a string to a long that may not be <0.
603 606 * If the string is invalid, or is less than 0, return -1.
604 607 */
605 608 int
606 609 mandoc_strntoi(const char *p, size_t sz, int base)
607 610 {
608 611 char buf[32];
609 612 char *ep;
610 613 long v;
611 614
612 615 if (sz > 31)
613 616 return -1;
614 617
615 618 memcpy(buf, p, sz);
616 619 buf[(int)sz] = '\0';
617 620
618 621 errno = 0;
619 622 v = strtol(buf, &ep, base);
620 623
621 624 if (buf[0] == '\0' || *ep != '\0')
622 625 return -1;
623 626
624 627 if (v > INT_MAX)
625 628 v = INT_MAX;
626 629 if (v < INT_MIN)
627 630 v = INT_MIN;
628 631
629 632 return (int)v;
630 633 }
↓ open down ↓ |
77 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX