Print this page
5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/mandoc/read.c
+++ new/usr/src/cmd/mandoc/read.c
1 -/* $Id: read.c,v 1.28 2012/02/16 20:51:31 joerg Exp $ */
1 +/* $Id: read.c,v 1.39 2013/09/16 00:25:07 schwarze Exp $ */
2 2 /*
3 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 - * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
4 + * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5 5 *
6 6 * Permission to use, copy, modify, and distribute this software for any
7 7 * purpose with or without fee is hereby granted, provided that the above
8 8 * copyright notice and this permission notice appear in all copies.
9 9 *
10 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 17 */
18 18 #ifdef HAVE_CONFIG_H
19 19 #include "config.h"
20 20 #endif
21 21
22 22 #ifdef HAVE_MMAP
23 23 # include <sys/stat.h>
24 24 # include <sys/mman.h>
25 25 #endif
26 26
27 27 #include <assert.h>
28 28 #include <ctype.h>
29 29 #include <fcntl.h>
30 30 #include <stdarg.h>
31 31 #include <stdint.h>
32 32 #include <stdio.h>
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
33 33 #include <stdlib.h>
34 34 #include <string.h>
35 35 #include <unistd.h>
36 36
37 37 #include "mandoc.h"
38 38 #include "libmandoc.h"
39 39 #include "mdoc.h"
40 40 #include "man.h"
41 41 #include "main.h"
42 42
43 -#ifndef MAP_FILE
44 -#define MAP_FILE 0
45 -#endif
46 -
47 43 #define REPARSE_LIMIT 1000
48 44
49 45 struct buf {
50 46 char *buf; /* binary input buffer */
51 47 size_t sz; /* size of binary buffer */
52 48 };
53 49
54 50 struct mparse {
55 51 enum mandoclevel file_status; /* status of current parse */
56 52 enum mandoclevel wlevel; /* ignore messages below this */
57 53 int line; /* line number in the file */
58 54 enum mparset inttype; /* which parser to use */
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
59 55 struct man *pman; /* persistent man parser */
60 56 struct mdoc *pmdoc; /* persistent mdoc parser */
61 57 struct man *man; /* man parser */
62 58 struct mdoc *mdoc; /* mdoc parser */
63 59 struct roff *roff; /* roff parser (!NULL) */
64 60 int reparse_count; /* finite interp. stack */
65 61 mandocmsg mmsg; /* warning/error message handler */
66 62 void *arg; /* argument to mmsg */
67 63 const char *file;
68 64 struct buf *secondary;
65 + char *defos; /* default operating system */
69 66 };
70 67
71 68 static void resize_buf(struct buf *, size_t);
72 69 static void mparse_buf_r(struct mparse *, struct buf, int);
73 -static void mparse_readfd_r(struct mparse *, int, const char *, int);
74 70 static void pset(const char *, int, struct mparse *);
75 71 static int read_whole_file(const char *, int, struct buf *, int *);
76 72 static void mparse_end(struct mparse *);
73 +static void mparse_parse_buffer(struct mparse *, struct buf,
74 + const char *);
77 75
78 76 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
79 77 MANDOCERR_OK,
80 78 MANDOCERR_WARNING,
81 79 MANDOCERR_WARNING,
82 80 MANDOCERR_ERROR,
83 81 MANDOCERR_FATAL,
84 82 MANDOCERR_MAX,
85 83 MANDOCERR_MAX
86 84 };
87 85
88 86 static const char * const mandocerrs[MANDOCERR_MAX] = {
89 87 "ok",
90 88
91 89 "generic warning",
92 90
93 91 /* related to the prologue */
94 92 "no title in document",
95 93 "document title should be all caps",
96 94 "unknown manual section",
95 + "unknown manual volume or arch",
97 96 "date missing, using today's date",
98 97 "cannot parse date, using it verbatim",
99 98 "prologue macros out of order",
100 99 "duplicate prologue macro",
101 100 "macro not allowed in prologue",
102 101 "macro not allowed in body",
103 102
104 103 /* related to document structure */
105 104 ".so is fragile, better use ln(1)",
106 105 "NAME section must come first",
107 106 "bad NAME section contents",
108 - "manual name not yet set",
109 107 "sections out of conventional order",
110 108 "duplicate section name",
111 - "section not in conventional manual section",
109 + "section header suited to sections 2, 3, and 9 only",
112 110
113 111 /* related to macros and nesting */
114 112 "skipping obsolete macro",
115 113 "skipping paragraph macro",
114 + "moving paragraph macro out of list",
116 115 "skipping no-space macro",
117 116 "blocks badly nested",
118 117 "child violates parent syntax",
119 118 "nested displays are not portable",
120 119 "already in literal mode",
121 120 "line scope broken",
122 121
123 122 /* related to missing macro arguments */
124 123 "skipping empty macro",
125 124 "argument count wrong",
126 125 "missing display type",
127 126 "list type must come first",
128 127 "tag lists require a width argument",
129 128 "missing font type",
130 129 "skipping end of block that is not open",
131 130
132 131 /* related to bad macro arguments */
133 132 "skipping argument",
134 133 "duplicate argument",
135 134 "duplicate display type",
136 135 "duplicate list type",
137 136 "unknown AT&T UNIX version",
138 137 "bad Boolean value",
139 138 "unknown font",
140 139 "unknown standard specifier",
141 140 "bad width argument",
142 141
143 142 /* related to plain text */
144 143 "blank line in non-literal context",
145 144 "tab in non-literal context",
146 145 "end of line whitespace",
147 146 "bad comment style",
148 147 "bad escape sequence",
149 148 "unterminated quoted string",
150 149
151 150 /* related to equations */
152 151 "unexpected literal in equation",
153 152
154 153 "generic error",
155 154
156 155 /* related to equations */
157 156 "unexpected equation scope closure",
158 157 "equation scope open on exit",
159 158 "overlapping equation scopes",
160 159 "unexpected end of equation",
161 160 "equation syntax error",
162 161
163 162 /* related to tables */
164 163 "bad table syntax",
165 164 "bad table option",
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
166 165 "bad table layout",
167 166 "no table layout cells specified",
168 167 "no table data cells specified",
169 168 "ignore data in cell",
170 169 "data block still open",
171 170 "ignoring extra data cells",
172 171
173 172 "input stack limit exceeded, infinite loop?",
174 173 "skipping bad character",
175 174 "escaped character not allowed in a name",
175 + "manual name not yet set",
176 176 "skipping text before the first section header",
177 177 "skipping unknown macro",
178 178 "NOT IMPLEMENTED, please use groff: skipping request",
179 179 "argument count wrong",
180 + "skipping column outside column list",
180 181 "skipping end of block that is not open",
181 182 "missing end of block",
182 183 "scope open on exit",
183 184 "uname(3) system call failed",
184 185 "macro requires line argument(s)",
185 186 "macro requires body argument(s)",
186 187 "macro requires argument(s)",
188 + "request requires a numeric argument",
187 189 "missing list type",
188 190 "line argument(s) will be lost",
189 191 "body argument(s) will be lost",
190 192
191 193 "generic fatal error",
192 194
193 195 "not a manual",
194 196 "column syntax is inconsistent",
195 197 "NOT IMPLEMENTED: .Bd -file",
196 198 "argument count wrong, violates syntax",
197 199 "child violates parent syntax",
198 200 "argument count wrong, violates syntax",
199 201 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
200 202 "no document body",
201 203 "no document prologue",
202 204 "static buffer exhausted",
203 205 };
204 206
205 207 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
206 208 "SUCCESS",
207 209 "RESERVED",
208 210 "WARNING",
209 211 "ERROR",
210 212 "FATAL",
211 213 "BADARG",
212 214 "SYSERR"
213 215 };
214 216
215 217 static void
216 218 resize_buf(struct buf *buf, size_t initial)
217 219 {
218 220
219 221 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
220 222 buf->buf = mandoc_realloc(buf->buf, buf->sz);
221 223 }
222 224
223 225 static void
224 226 pset(const char *buf, int pos, struct mparse *curp)
225 227 {
226 228 int i;
227 229
228 230 /*
229 231 * Try to intuit which kind of manual parser should be used. If
230 232 * passed in by command-line (-man, -mdoc), then use that
231 233 * explicitly. If passed as -mandoc, then try to guess from the
232 234 * line: either skip dot-lines, use -mdoc when finding `.Dt', or
233 235 * default to -man, which is more lenient.
234 236 *
235 237 * Separate out pmdoc/pman from mdoc/man: the first persists
236 238 * through all parsers, while the latter is used per-parse.
237 239 */
238 240
239 241 if ('.' == buf[0] || '\'' == buf[0]) {
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
240 242 for (i = 1; buf[i]; i++)
241 243 if (' ' != buf[i] && '\t' != buf[i])
242 244 break;
243 245 if ('\0' == buf[i])
244 246 return;
245 247 }
246 248
247 249 switch (curp->inttype) {
248 250 case (MPARSE_MDOC):
249 251 if (NULL == curp->pmdoc)
250 - curp->pmdoc = mdoc_alloc(curp->roff, curp);
252 + curp->pmdoc = mdoc_alloc(curp->roff, curp,
253 + curp->defos);
251 254 assert(curp->pmdoc);
252 255 curp->mdoc = curp->pmdoc;
253 256 return;
254 257 case (MPARSE_MAN):
255 258 if (NULL == curp->pman)
256 259 curp->pman = man_alloc(curp->roff, curp);
257 260 assert(curp->pman);
258 261 curp->man = curp->pman;
259 262 return;
260 263 default:
261 264 break;
262 265 }
263 266
264 267 if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) {
265 268 if (NULL == curp->pmdoc)
266 - curp->pmdoc = mdoc_alloc(curp->roff, curp);
269 + curp->pmdoc = mdoc_alloc(curp->roff, curp,
270 + curp->defos);
267 271 assert(curp->pmdoc);
268 272 curp->mdoc = curp->pmdoc;
269 273 return;
270 274 }
271 275
272 276 if (NULL == curp->pman)
273 277 curp->pman = man_alloc(curp->roff, curp);
274 278 assert(curp->pman);
275 279 curp->man = curp->pman;
276 280 }
277 281
278 282 /*
279 283 * Main parse routine for an opened file. This is called for each
280 284 * opened file and simply loops around the full input file, possibly
281 285 * nesting (i.e., with `so').
282 286 */
283 287 static void
284 288 mparse_buf_r(struct mparse *curp, struct buf blk, int start)
285 289 {
286 290 const struct tbl_span *span;
287 291 struct buf ln;
288 292 enum rofferr rr;
289 293 int i, of, rc;
290 294 int pos; /* byte number in the ln buffer */
291 295 int lnn; /* line number in the real file */
292 296 unsigned char c;
293 297
294 298 memset(&ln, 0, sizeof(struct buf));
295 299
296 300 lnn = curp->line;
297 301 pos = 0;
298 302
299 303 for (i = 0; i < (int)blk.sz; ) {
300 304 if (0 == pos && '\0' == blk.buf[i])
301 305 break;
302 306
303 307 if (start) {
304 308 curp->line = lnn;
305 309 curp->reparse_count = 0;
306 310 }
307 311
308 312 while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) {
309 313
310 314 /*
311 315 * When finding an unescaped newline character,
312 316 * leave the character loop to process the line.
313 317 * Skip a preceding carriage return, if any.
314 318 */
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
315 319
316 320 if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz &&
317 321 '\n' == blk.buf[i + 1])
318 322 ++i;
319 323 if ('\n' == blk.buf[i]) {
320 324 ++i;
321 325 ++lnn;
322 326 break;
323 327 }
324 328
329 + /*
330 + * Make sure we have space for at least
331 + * one backslash and one other character
332 + * and the trailing NUL byte.
333 + */
334 +
335 + if (pos + 2 >= (int)ln.sz)
336 + resize_buf(&ln, 256);
337 +
325 338 /*
326 339 * Warn about bogus characters. If you're using
327 340 * non-ASCII encoding, you're screwing your
328 341 * readers. Since I'd rather this not happen,
329 342 * I'll be helpful and replace these characters
330 343 * with "?", so we don't display gibberish.
331 344 * Note to manual writers: use special characters.
332 345 */
333 346
334 347 c = (unsigned char) blk.buf[i];
335 348
336 349 if ( ! (isascii(c) &&
337 350 (isgraph(c) || isblank(c)))) {
338 351 mandoc_msg(MANDOCERR_BADCHAR, curp,
339 352 curp->line, pos, NULL);
340 353 i++;
341 - if (pos >= (int)ln.sz)
342 - resize_buf(&ln, 256);
343 354 ln.buf[pos++] = '?';
344 355 continue;
345 356 }
346 357
347 358 /* Trailing backslash = a plain char. */
348 359
349 360 if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) {
350 - if (pos >= (int)ln.sz)
351 - resize_buf(&ln, 256);
352 361 ln.buf[pos++] = blk.buf[i++];
353 362 continue;
354 363 }
355 364
356 365 /*
357 366 * Found escape and at least one other character.
358 367 * When it's a newline character, skip it.
359 368 * When there is a carriage return in between,
360 369 * skip that one as well.
361 370 */
362 371
363 372 if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz &&
364 373 '\n' == blk.buf[i + 2])
365 374 ++i;
366 375 if ('\n' == blk.buf[i + 1]) {
367 376 i += 2;
368 377 ++lnn;
369 378 continue;
370 379 }
371 380
372 381 if ('"' == blk.buf[i + 1] || '#' == blk.buf[i + 1]) {
373 382 i += 2;
374 383 /* Comment, skip to end of line */
375 384 for (; i < (int)blk.sz; ++i) {
376 385 if ('\n' == blk.buf[i]) {
377 386 ++i;
378 387 ++lnn;
379 388 break;
380 389 }
381 390 }
382 391
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
383 392 /* Backout trailing whitespaces */
384 393 for (; pos > 0; --pos) {
385 394 if (ln.buf[pos - 1] != ' ')
386 395 break;
387 396 if (pos > 2 && ln.buf[pos - 2] == '\\')
388 397 break;
389 398 }
390 399 break;
391 400 }
392 401
393 - /* Some other escape sequence, copy & cont. */
402 + /* Catch escaped bogus characters. */
394 403
395 - if (pos + 1 >= (int)ln.sz)
396 - resize_buf(&ln, 256);
404 + c = (unsigned char) blk.buf[i+1];
397 405
406 + if ( ! (isascii(c) &&
407 + (isgraph(c) || isblank(c)))) {
408 + mandoc_msg(MANDOCERR_BADCHAR, curp,
409 + curp->line, pos, NULL);
410 + i += 2;
411 + ln.buf[pos++] = '?';
412 + continue;
413 + }
414 +
415 + /* Some other escape sequence, copy & cont. */
416 +
398 417 ln.buf[pos++] = blk.buf[i++];
399 418 ln.buf[pos++] = blk.buf[i++];
400 419 }
401 420
402 421 if (pos >= (int)ln.sz)
403 422 resize_buf(&ln, 256);
404 423
405 424 ln.buf[pos] = '\0';
406 425
407 426 /*
408 427 * A significant amount of complexity is contained by
409 428 * the roff preprocessor. It's line-oriented but can be
410 429 * expressed on one line, so we need at times to
411 430 * readjust our starting point and re-run it. The roff
412 431 * preprocessor can also readjust the buffers with new
413 432 * data, so we pass them in wholesale.
414 433 */
415 434
416 435 of = 0;
417 436
418 437 /*
419 438 * Maintain a lookaside buffer of all parsed lines. We
420 439 * only do this if mparse_keep() has been invoked (the
421 440 * buffer may be accessed with mparse_getkeep()).
422 441 */
423 442
424 443 if (curp->secondary) {
425 444 curp->secondary->buf =
426 445 mandoc_realloc
427 446 (curp->secondary->buf,
428 447 curp->secondary->sz + pos + 2);
429 448 memcpy(curp->secondary->buf +
430 449 curp->secondary->sz,
431 450 ln.buf, pos);
432 451 curp->secondary->sz += pos;
433 452 curp->secondary->buf
434 453 [curp->secondary->sz] = '\n';
435 454 curp->secondary->sz++;
436 455 curp->secondary->buf
437 456 [curp->secondary->sz] = '\0';
438 457 }
439 458 rerun:
440 459 rr = roff_parseln
441 460 (curp->roff, curp->line,
442 461 &ln.buf, &ln.sz, of, &of);
443 462
444 463 switch (rr) {
445 464 case (ROFF_REPARSE):
446 465 if (REPARSE_LIMIT >= ++curp->reparse_count)
447 466 mparse_buf_r(curp, ln, 0);
448 467 else
449 468 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
450 469 curp->line, pos, NULL);
451 470 pos = 0;
452 471 continue;
453 472 case (ROFF_APPEND):
454 473 pos = (int)strlen(ln.buf);
455 474 continue;
456 475 case (ROFF_RERUN):
457 476 goto rerun;
458 477 case (ROFF_IGN):
459 478 pos = 0;
460 479 continue;
461 480 case (ROFF_ERR):
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
462 481 assert(MANDOCLEVEL_FATAL <= curp->file_status);
463 482 break;
464 483 case (ROFF_SO):
465 484 /*
466 485 * We remove `so' clauses from our lookaside
467 486 * buffer because we're going to descend into
468 487 * the file recursively.
469 488 */
470 489 if (curp->secondary)
471 490 curp->secondary->sz -= pos + 1;
472 - mparse_readfd_r(curp, -1, ln.buf + of, 1);
491 + mparse_readfd(curp, -1, ln.buf + of);
473 492 if (MANDOCLEVEL_FATAL <= curp->file_status)
474 493 break;
475 494 pos = 0;
476 495 continue;
477 496 default:
478 497 break;
479 498 }
480 499
481 500 /*
482 501 * If we encounter errors in the recursive parse, make
483 502 * sure we don't continue parsing.
484 503 */
485 504
486 505 if (MANDOCLEVEL_FATAL <= curp->file_status)
487 506 break;
488 507
489 508 /*
490 509 * If input parsers have not been allocated, do so now.
491 510 * We keep these instanced between parsers, but set them
492 511 * locally per parse routine since we can use different
493 512 * parsers with each one.
494 513 */
495 514
496 515 if ( ! (curp->man || curp->mdoc))
497 516 pset(ln.buf + of, pos - of, curp);
498 517
499 518 /*
500 519 * Lastly, push down into the parsers themselves. One
501 520 * of these will have already been set in the pset()
502 521 * routine.
503 522 * If libroff returns ROFF_TBL, then add it to the
504 523 * currently open parse. Since we only get here if
505 524 * there does exist data (see tbl_data.c), we're
506 525 * guaranteed that something's been allocated.
507 526 * Do the same for ROFF_EQN.
508 527 */
509 528
510 529 rc = -1;
511 530
512 531 if (ROFF_TBL == rr)
513 532 while (NULL != (span = roff_span(curp->roff))) {
514 533 rc = curp->man ?
515 534 man_addspan(curp->man, span) :
516 535 mdoc_addspan(curp->mdoc, span);
517 536 if (0 == rc)
518 537 break;
519 538 }
520 539 else if (ROFF_EQN == rr)
521 540 rc = curp->mdoc ?
522 541 mdoc_addeqn(curp->mdoc,
523 542 roff_eqn(curp->roff)) :
524 543 man_addeqn(curp->man,
525 544 roff_eqn(curp->roff));
526 545 else if (curp->man || curp->mdoc)
527 546 rc = curp->man ?
528 547 man_parseln(curp->man,
529 548 curp->line, ln.buf, of) :
530 549 mdoc_parseln(curp->mdoc,
531 550 curp->line, ln.buf, of);
532 551
533 552 if (0 == rc) {
534 553 assert(MANDOCLEVEL_FATAL <= curp->file_status);
535 554 break;
536 555 }
537 556
538 557 /* Temporary buffers typically are not full. */
539 558
540 559 if (0 == start && '\0' == blk.buf[i])
541 560 break;
542 561
543 562 /* Start the next input line. */
544 563
545 564 pos = 0;
546 565 }
547 566
548 567 free(ln.buf);
549 568 }
550 569
551 570 static int
552 571 read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap)
553 572 {
554 573 size_t off;
555 574 ssize_t ssz;
556 575
557 576 #ifdef HAVE_MMAP
558 577 struct stat st;
559 578 if (-1 == fstat(fd, &st)) {
560 579 perror(file);
561 580 return(0);
562 581 }
563 582
564 583 /*
565 584 * If we're a regular file, try just reading in the whole entry
566 585 * via mmap(). This is faster than reading it into blocks, and
567 586 * since each file is only a few bytes to begin with, I'm not
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
568 587 * concerned that this is going to tank any machines.
569 588 */
570 589
571 590 if (S_ISREG(st.st_mode)) {
572 591 if (st.st_size >= (1U << 31)) {
573 592 fprintf(stderr, "%s: input too large\n", file);
574 593 return(0);
575 594 }
576 595 *with_mmap = 1;
577 596 fb->sz = (size_t)st.st_size;
578 - fb->buf = mmap(NULL, fb->sz, PROT_READ,
579 - MAP_FILE|MAP_SHARED, fd, 0);
597 + fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
580 598 if (fb->buf != MAP_FAILED)
581 599 return(1);
582 600 }
583 601 #endif
584 602
585 603 /*
586 604 * If this isn't a regular file (like, say, stdin), then we must
587 605 * go the old way and just read things in bit by bit.
588 606 */
589 607
590 608 *with_mmap = 0;
591 609 off = 0;
592 610 fb->sz = 0;
593 611 fb->buf = NULL;
594 612 for (;;) {
595 613 if (off == fb->sz) {
596 614 if (fb->sz == (1U << 31)) {
597 615 fprintf(stderr, "%s: input too large\n", file);
598 616 break;
599 617 }
600 618 resize_buf(fb, 65536);
601 619 }
602 620 ssz = read(fd, fb->buf + (int)off, fb->sz - off);
603 621 if (ssz == 0) {
604 622 fb->sz = off;
605 623 return(1);
606 624 }
607 625 if (ssz == -1) {
608 626 perror(file);
609 627 break;
610 628 }
611 629 off += (size_t)ssz;
612 630 }
613 631
614 632 free(fb->buf);
615 633 fb->buf = NULL;
616 634 return(0);
617 635 }
618 636
619 637 static void
620 638 mparse_end(struct mparse *curp)
621 639 {
622 640
623 641 if (MANDOCLEVEL_FATAL <= curp->file_status)
624 642 return;
625 643
626 644 if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) {
627 645 assert(MANDOCLEVEL_FATAL <= curp->file_status);
628 646 return;
629 647 }
630 648
631 649 if (curp->man && ! man_endparse(curp->man)) {
632 650 assert(MANDOCLEVEL_FATAL <= curp->file_status);
633 651 return;
634 652 }
635 653
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
636 654 if ( ! (curp->man || curp->mdoc)) {
637 655 mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL);
638 656 curp->file_status = MANDOCLEVEL_FATAL;
639 657 return;
640 658 }
641 659
642 660 roff_endparse(curp->roff);
643 661 }
644 662
645 663 static void
646 -mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file,
647 - int re)
664 +mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
648 665 {
649 666 const char *svfile;
667 + static int recursion_depth;
650 668
669 + if (64 < recursion_depth) {
670 + mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
671 + return;
672 + }
673 +
651 674 /* Line number is per-file. */
652 675 svfile = curp->file;
653 676 curp->file = file;
654 677 curp->line = 1;
678 + recursion_depth++;
655 679
656 680 mparse_buf_r(curp, blk, 1);
657 681
658 - if (0 == re && MANDOCLEVEL_FATAL > curp->file_status)
682 + if (0 == --recursion_depth && MANDOCLEVEL_FATAL > curp->file_status)
659 683 mparse_end(curp);
660 684
661 685 curp->file = svfile;
662 686 }
663 687
664 688 enum mandoclevel
665 689 mparse_readmem(struct mparse *curp, const void *buf, size_t len,
666 690 const char *file)
667 691 {
668 692 struct buf blk;
669 693
670 694 blk.buf = UNCONST(buf);
671 695 blk.sz = len;
672 696
673 - mparse_parse_buffer(curp, blk, file, 0);
697 + mparse_parse_buffer(curp, blk, file);
674 698 return(curp->file_status);
675 699 }
676 700
677 -static void
678 -mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re)
701 +enum mandoclevel
702 +mparse_readfd(struct mparse *curp, int fd, const char *file)
679 703 {
680 704 struct buf blk;
681 705 int with_mmap;
682 706
683 707 if (-1 == fd)
684 708 if (-1 == (fd = open(file, O_RDONLY, 0))) {
685 709 perror(file);
686 710 curp->file_status = MANDOCLEVEL_SYSERR;
687 - return;
711 + goto out;
688 712 }
689 713 /*
690 714 * Run for each opened file; may be called more than once for
691 715 * each full parse sequence if the opened file is nested (i.e.,
692 716 * from `so'). Simply sucks in the whole file and moves into
693 717 * the parse phase for the file.
694 718 */
695 719
696 720 if ( ! read_whole_file(file, fd, &blk, &with_mmap)) {
697 721 curp->file_status = MANDOCLEVEL_SYSERR;
698 - return;
722 + goto out;
699 723 }
700 724
701 - mparse_parse_buffer(curp, blk, file, re);
725 + mparse_parse_buffer(curp, blk, file);
702 726
703 727 #ifdef HAVE_MMAP
704 728 if (with_mmap)
705 729 munmap(blk.buf, blk.sz);
706 730 else
707 731 #endif
708 732 free(blk.buf);
709 733
710 734 if (STDIN_FILENO != fd && -1 == close(fd))
711 735 perror(file);
712 -}
713 -
714 -enum mandoclevel
715 -mparse_readfd(struct mparse *curp, int fd, const char *file)
716 -{
717 -
718 - mparse_readfd_r(curp, fd, file, 0);
736 +out:
719 737 return(curp->file_status);
720 738 }
721 739
722 740 struct mparse *
723 -mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg)
741 +mparse_alloc(enum mparset inttype, enum mandoclevel wlevel,
742 + mandocmsg mmsg, void *arg, char *defos)
724 743 {
725 744 struct mparse *curp;
726 745
727 746 assert(wlevel <= MANDOCLEVEL_FATAL);
728 747
729 748 curp = mandoc_calloc(1, sizeof(struct mparse));
730 749
731 750 curp->wlevel = wlevel;
732 751 curp->mmsg = mmsg;
733 752 curp->arg = arg;
734 753 curp->inttype = inttype;
754 + curp->defos = defos;
735 755
736 - curp->roff = roff_alloc(curp);
756 + curp->roff = roff_alloc(inttype, curp);
737 757 return(curp);
738 758 }
739 759
740 760 void
741 761 mparse_reset(struct mparse *curp)
742 762 {
743 763
744 764 roff_reset(curp->roff);
745 765
746 766 if (curp->mdoc)
747 767 mdoc_reset(curp->mdoc);
748 768 if (curp->man)
749 769 man_reset(curp->man);
750 770 if (curp->secondary)
751 771 curp->secondary->sz = 0;
752 772
753 773 curp->file_status = MANDOCLEVEL_OK;
754 774 curp->mdoc = NULL;
755 775 curp->man = NULL;
756 776 }
757 777
758 778 void
759 779 mparse_free(struct mparse *curp)
760 780 {
761 781
762 782 if (curp->pmdoc)
763 783 mdoc_free(curp->pmdoc);
764 784 if (curp->pman)
765 785 man_free(curp->pman);
766 786 if (curp->roff)
767 787 roff_free(curp->roff);
768 788 if (curp->secondary)
769 789 free(curp->secondary->buf);
770 790
771 791 free(curp->secondary);
772 792 free(curp);
773 793 }
774 794
775 795 void
776 796 mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man)
777 797 {
778 798
779 799 if (mdoc)
780 800 *mdoc = curp->mdoc;
781 801 if (man)
782 802 *man = curp->man;
783 803 }
784 804
785 805 void
786 806 mandoc_vmsg(enum mandocerr t, struct mparse *m,
787 807 int ln, int pos, const char *fmt, ...)
788 808 {
789 809 char buf[256];
790 810 va_list ap;
791 811
792 812 va_start(ap, fmt);
793 813 vsnprintf(buf, sizeof(buf) - 1, fmt, ap);
794 814 va_end(ap);
795 815
796 816 mandoc_msg(t, m, ln, pos, buf);
797 817 }
798 818
799 819 void
800 820 mandoc_msg(enum mandocerr er, struct mparse *m,
801 821 int ln, int col, const char *msg)
802 822 {
803 823 enum mandoclevel level;
804 824
805 825 level = MANDOCLEVEL_FATAL;
806 826 while (er < mandoclimits[level])
807 827 level--;
808 828
809 829 if (level < m->wlevel)
810 830 return;
811 831
812 832 if (m->mmsg)
813 833 (*m->mmsg)(er, level, m->file, ln, col, msg);
814 834
815 835 if (m->file_status < level)
816 836 m->file_status = level;
817 837 }
818 838
819 839 const char *
820 840 mparse_strerror(enum mandocerr er)
821 841 {
822 842
823 843 return(mandocerrs[er]);
824 844 }
825 845
826 846 const char *
827 847 mparse_strlevel(enum mandoclevel lvl)
828 848 {
829 849 return(mandoclevels[lvl]);
830 850 }
831 851
832 852 void
833 853 mparse_keep(struct mparse *p)
834 854 {
835 855
836 856 assert(NULL == p->secondary);
837 857 p->secondary = mandoc_calloc(1, sizeof(struct buf));
838 858 }
839 859
840 860 const char *
841 861 mparse_getkeep(const struct mparse *p)
842 862 {
843 863
844 864 assert(p->secondary);
845 865 return(p->secondary->sz ? p->secondary->buf : NULL);
846 866 }
↓ open down ↓ |
100 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX