Print this page
9718 update mandoc to 1.14.4
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/mandoc/read.c
+++ new/usr/src/cmd/mandoc/read.c
1 -/* $Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */
1 +/* $Id: read.c,v 1.196 2018/07/28 18:34:15 schwarze Exp $ */
2 2 /*
3 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 - * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
4 + * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5 5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 6 *
7 7 * Permission to use, copy, modify, and distribute this software for any
8 8 * purpose with or without fee is hereby granted, provided that the above
9 9 * copyright notice and this permission notice appear in all copies.
10 10 *
11 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 18 */
19 19 #include "config.h"
20 20
21 21 #include <sys/types.h>
22 22 #include <sys/mman.h>
23 23 #include <sys/stat.h>
24 24
25 25 #include <assert.h>
26 26 #include <ctype.h>
27 27 #include <errno.h>
28 28 #include <fcntl.h>
29 29 #include <stdarg.h>
30 30 #include <stdio.h>
31 31 #include <stdlib.h>
32 32 #include <string.h>
33 33 #include <unistd.h>
34 34 #include <zlib.h>
35 35
36 36 #include "mandoc_aux.h"
37 37 #include "mandoc.h"
38 38 #include "roff.h"
39 39 #include "mdoc.h"
40 40 #include "man.h"
41 41 #include "libmandoc.h"
42 42
43 43 #define REPARSE_LIMIT 1000
44 44
45 45 struct mparse {
46 46 struct roff *roff; /* roff parser (!NULL) */
47 47 struct roff_man *man; /* man parser */
48 48 char *sodest; /* filename pointed to by .so */
49 49 const char *file; /* filename of current input file */
50 50 struct buf *primary; /* buffer currently being parsed */
51 51 struct buf *secondary; /* preprocessed copy of input */
52 52 const char *os_s; /* default operating system */
53 53 mandocmsg mmsg; /* warning/error message handler */
54 54 enum mandoclevel file_status; /* status of current parse */
55 55 enum mandocerr mmin; /* ignore messages below this */
56 56 int options; /* parser options */
57 57 int gzip; /* current input file is gzipped */
58 58 int filenc; /* encoding of the current file */
59 59 int reparse_count; /* finite interp. stack */
60 60 int line; /* line number in the file */
61 61 };
62 62
63 63 static void choose_parser(struct mparse *);
64 64 static void resize_buf(struct buf *, size_t);
65 65 static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
66 66 static int read_whole_file(struct mparse *, const char *, int,
67 67 struct buf *, int *);
68 68 static void mparse_end(struct mparse *);
69 69 static void mparse_parse_buffer(struct mparse *, struct buf,
70 70 const char *);
71 71
72 72 static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = {
73 73 MANDOCERR_OK,
74 74 MANDOCERR_OK,
75 75 MANDOCERR_WARNING,
76 76 MANDOCERR_ERROR,
77 77 MANDOCERR_UNSUPP,
78 78 MANDOCERR_MAX,
79 79 MANDOCERR_MAX
80 80 };
81 81
82 82 static const char * const mandocerrs[MANDOCERR_MAX] = {
83 83 "ok",
84 84
85 85 "base system convention",
86 86
↓ open down ↓ |
72 lines elided |
↑ open up ↑ |
87 87 "Mdocdate found",
88 88 "Mdocdate missing",
89 89 "unknown architecture",
90 90 "operating system explicitly specified",
91 91 "RCS id missing",
92 92 "referenced manual not found",
93 93
94 94 "generic style suggestion",
95 95
96 96 "legacy man(7) date format",
97 + "normalizing date format to",
97 98 "lower case character in document title",
98 99 "duplicate RCS id",
99 - "typo in section name",
100 + "possible typo in section name",
100 101 "unterminated quoted argument",
101 102 "useless macro",
102 103 "consider using OS macro",
103 104 "errnos out of order",
104 105 "duplicate errno",
105 106 "trailing delimiter",
106 107 "no blank before trailing delimiter",
107 108 "fill mode already enabled, skipping",
108 109 "fill mode already disabled, skipping",
110 + "verbatim \"--\", maybe consider using \\(em",
109 111 "function name without markup",
110 112 "whitespace at end of input line",
111 113 "bad comment style",
112 114
113 115 "generic warning",
114 116
115 117 /* related to the prologue */
116 118 "missing manual title, using UNTITLED",
117 119 "missing manual title, using \"\"",
118 120 "missing manual section, using \"\"",
119 121 "unknown manual section",
120 122 "missing date, using today's date",
121 123 "cannot parse date, using it verbatim",
122 124 "date in the future, using it anyway",
123 125 "missing Os macro, using \"\"",
124 126 "late prologue macro",
125 127 "prologue macros out of order",
126 128
127 129 /* related to document structure */
128 130 ".so is fragile, better use ln(1)",
129 131 "no document body",
130 132 "content before first section header",
131 133 "first section is not \"NAME\"",
132 134 "NAME section without Nm before Nd",
133 135 "NAME section without description",
134 136 "description not at the end of NAME",
135 137 "bad NAME section content",
136 138 "missing comma before name",
137 139 "missing description line, using \"\"",
138 140 "description line outside NAME section",
139 141 "sections out of conventional order",
140 142 "duplicate section title",
141 143 "unexpected section",
142 144 "cross reference to self",
143 145 "unusual Xr order",
144 146 "unusual Xr punctuation",
145 147 "AUTHORS section without An macro",
146 148
147 149 /* related to macros and nesting */
148 150 "obsolete macro",
149 151 "macro neither callable nor escaped",
150 152 "skipping paragraph macro",
151 153 "moving paragraph macro out of list",
152 154 "skipping no-space macro",
153 155 "blocks badly nested",
154 156 "nested displays are not portable",
155 157 "moving content out of list",
156 158 "first macro on line",
157 159 "line scope broken",
158 160 "skipping blank line in line scope",
159 161
160 162 /* related to missing macro arguments */
161 163 "skipping empty request",
162 164 "conditional request controls empty scope",
163 165 "skipping empty macro",
164 166 "empty block",
165 167 "empty argument, using 0n",
166 168 "missing display type, using -ragged",
167 169 "list type is not the first argument",
168 170 "missing -width in -tag list, using 6n",
169 171 "missing utility name, using \"\"",
170 172 "missing function name, using \"\"",
171 173 "empty head in list item",
172 174 "empty list item",
173 175 "missing argument, using next line",
174 176 "missing font type, using \\fR",
175 177 "unknown font type, using \\fR",
176 178 "nothing follows prefix",
177 179 "empty reference block",
178 180 "missing section argument",
179 181 "missing -std argument, adding it",
180 182 "missing option string, using \"\"",
181 183 "missing resource identifier, using \"\"",
182 184 "missing eqn box, using \"\"",
183 185
184 186 /* related to bad macro arguments */
185 187 "duplicate argument",
186 188 "skipping duplicate argument",
187 189 "skipping duplicate display type",
188 190 "skipping duplicate list type",
189 191 "skipping -width argument",
190 192 "wrong number of cells",
191 193 "unknown AT&T UNIX version",
192 194 "comma in function argument",
193 195 "parenthesis in function name",
194 196 "unknown library name",
195 197 "invalid content in Rs block",
196 198 "invalid Boolean argument",
197 199 "unknown font, skipping request",
198 200 "odd number of characters in request",
199 201
200 202 /* related to plain text */
201 203 "blank line in fill mode, using .sp",
202 204 "tab in filled text",
203 205 "new sentence, new line",
204 206 "invalid escape sequence",
205 207 "undefined string, using \"\"",
206 208
207 209 /* related to tables */
208 210 "tbl line starts with span",
209 211 "tbl column starts with span",
210 212 "skipping vertical bar in tbl layout",
211 213
212 214 "generic error",
213 215
214 216 /* related to tables */
215 217 "non-alphabetic character in tbl options",
216 218 "skipping unknown tbl option",
217 219 "missing tbl option argument",
218 220 "wrong tbl option argument size",
219 221 "empty tbl layout",
220 222 "invalid character in tbl layout",
221 223 "unmatched parenthesis in tbl layout",
222 224 "tbl without any data cells",
223 225 "ignoring data in spanned tbl cell",
224 226 "ignoring extra tbl data cells",
225 227 "data block open at end of tbl",
226 228
227 229 /* related to document structure and macros */
228 230 NULL,
229 231 "duplicate prologue macro",
230 232 "skipping late title macro",
231 233 "input stack limit exceeded, infinite loop?",
232 234 "skipping bad character",
233 235 "skipping unknown macro",
234 236 "skipping insecure request",
235 237 "skipping item outside list",
236 238 "skipping column outside column list",
237 239 "skipping end of block that is not open",
238 240 "fewer RS blocks open, skipping",
239 241 "inserting missing end of block",
240 242 "appending missing end of block",
241 243
242 244 /* related to request and macro arguments */
243 245 "escaped character not allowed in a name",
244 246 "NOT IMPLEMENTED: Bd -file",
245 247 "skipping display without arguments",
246 248 "missing list type, using -item",
247 249 "argument is not numeric, using 1",
248 250 "missing manual name, using \"\"",
249 251 "uname(3) system call failed, using UNKNOWN",
250 252 "unknown standard specifier",
251 253 "skipping request without numeric argument",
252 254 "NOT IMPLEMENTED: .so with absolute path or \"..\"",
253 255 ".so request failed",
254 256 "skipping all arguments",
255 257 "skipping excess arguments",
256 258 "divide by zero",
257 259
258 260 "unsupported feature",
259 261 "input too large",
260 262 "unsupported control character",
261 263 "unsupported roff request",
262 264 "eqn delim option in tbl",
263 265 "unsupported tbl layout modifier",
264 266 "ignoring macro in table",
265 267 };
266 268
267 269 static const char * const mandoclevels[MANDOCLEVEL_MAX] = {
268 270 "SUCCESS",
269 271 "STYLE",
270 272 "WARNING",
271 273 "ERROR",
272 274 "UNSUPP",
273 275 "BADARG",
274 276 "SYSERR"
275 277 };
276 278
277 279
278 280 static void
279 281 resize_buf(struct buf *buf, size_t initial)
280 282 {
281 283
282 284 buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
283 285 buf->buf = mandoc_realloc(buf->buf, buf->sz);
284 286 }
285 287
286 288 static void
287 289 choose_parser(struct mparse *curp)
288 290 {
289 291 char *cp, *ep;
290 292 int format;
291 293
292 294 /*
293 295 * If neither command line arguments -mdoc or -man select
294 296 * a parser nor the roff parser found a .Dd or .TH macro
295 297 * yet, look ahead in the main input buffer.
296 298 */
297 299
298 300 if ((format = roff_getformat(curp->roff)) == 0) {
299 301 cp = curp->primary->buf;
300 302 ep = cp + curp->primary->sz;
301 303 while (cp < ep) {
302 304 if (*cp == '.' || *cp == '\'') {
303 305 cp++;
304 306 if (cp[0] == 'D' && cp[1] == 'd') {
305 307 format = MPARSE_MDOC;
306 308 break;
307 309 }
308 310 if (cp[0] == 'T' && cp[1] == 'H') {
309 311 format = MPARSE_MAN;
310 312 break;
311 313 }
312 314 }
313 315 cp = memchr(cp, '\n', ep - cp);
314 316 if (cp == NULL)
315 317 break;
316 318 cp++;
317 319 }
318 320 }
319 321
320 322 if (format == MPARSE_MDOC) {
321 323 curp->man->macroset = MACROSET_MDOC;
322 324 if (curp->man->mdocmac == NULL)
323 325 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
324 326 } else {
325 327 curp->man->macroset = MACROSET_MAN;
326 328 if (curp->man->manmac == NULL)
327 329 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
328 330 }
329 331 curp->man->first->tok = TOKEN_NONE;
330 332 }
331 333
332 334 /*
333 335 * Main parse routine for a buffer.
334 336 * It assumes encoding and line numbering are already set up.
335 337 * It can recurse directly (for invocations of user-defined
336 338 * macros, inline equations, and input line traps)
337 339 * and indirectly (for .so file inclusion).
338 340 */
339 341 static int
340 342 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
341 343 {
342 344 struct buf ln;
343 345 const char *save_file;
344 346 char *cp;
345 347 size_t pos; /* byte number in the ln buffer */
346 348 enum rofferr rr;
347 349 int of;
348 350 int lnn; /* line number in the real file */
349 351 int fd;
350 352 unsigned char c;
351 353
352 354 memset(&ln, 0, sizeof(ln));
353 355
354 356 lnn = curp->line;
355 357 pos = 0;
356 358
357 359 while (i < blk.sz) {
358 360 if (0 == pos && '\0' == blk.buf[i])
359 361 break;
360 362
361 363 if (start) {
362 364 curp->line = lnn;
363 365 curp->reparse_count = 0;
364 366
365 367 if (lnn < 3 &&
366 368 curp->filenc & MPARSE_UTF8 &&
367 369 curp->filenc & MPARSE_LATIN1)
368 370 curp->filenc = preconv_cue(&blk, i);
369 371 }
370 372
371 373 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
372 374
373 375 /*
374 376 * When finding an unescaped newline character,
375 377 * leave the character loop to process the line.
376 378 * Skip a preceding carriage return, if any.
377 379 */
378 380
379 381 if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
380 382 '\n' == blk.buf[i + 1])
381 383 ++i;
382 384 if ('\n' == blk.buf[i]) {
383 385 ++i;
384 386 ++lnn;
385 387 break;
386 388 }
387 389
388 390 /*
389 391 * Make sure we have space for the worst
390 392 * case of 11 bytes: "\\[u10ffff]\0"
391 393 */
392 394
393 395 if (pos + 11 > ln.sz)
394 396 resize_buf(&ln, 256);
395 397
396 398 /*
397 399 * Encode 8-bit input.
398 400 */
399 401
400 402 c = blk.buf[i];
401 403 if (c & 0x80) {
402 404 if ( ! (curp->filenc && preconv_encode(
403 405 &blk, &i, &ln, &pos, &curp->filenc))) {
404 406 mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
405 407 curp->line, pos, "0x%x", c);
406 408 ln.buf[pos++] = '?';
407 409 i++;
408 410 }
409 411 continue;
410 412 }
411 413
412 414 /*
413 415 * Exclude control characters.
414 416 */
415 417
416 418 if (c == 0x7f || (c < 0x20 && c != 0x09)) {
417 419 mandoc_vmsg(c == 0x00 || c == 0x04 ||
418 420 c > 0x0a ? MANDOCERR_CHAR_BAD :
419 421 MANDOCERR_CHAR_UNSUPP,
420 422 curp, curp->line, pos, "0x%x", c);
421 423 i++;
422 424 if (c != '\r')
423 425 ln.buf[pos++] = '?';
424 426 continue;
425 427 }
426 428
427 429 ln.buf[pos++] = blk.buf[i++];
428 430 }
429 431
430 432 if (pos + 1 >= ln.sz)
431 433 resize_buf(&ln, 256);
432 434
433 435 if (i == blk.sz || blk.buf[i] == '\0')
434 436 ln.buf[pos++] = '\n';
435 437 ln.buf[pos] = '\0';
436 438
437 439 /*
438 440 * A significant amount of complexity is contained by
439 441 * the roff preprocessor. It's line-oriented but can be
440 442 * expressed on one line, so we need at times to
441 443 * readjust our starting point and re-run it. The roff
442 444 * preprocessor can also readjust the buffers with new
443 445 * data, so we pass them in wholesale.
444 446 */
445 447
446 448 of = 0;
447 449
448 450 /*
449 451 * Maintain a lookaside buffer of all parsed lines. We
450 452 * only do this if mparse_keep() has been invoked (the
451 453 * buffer may be accessed with mparse_getkeep()).
452 454 */
453 455
454 456 if (curp->secondary) {
455 457 curp->secondary->buf = mandoc_realloc(
456 458 curp->secondary->buf,
457 459 curp->secondary->sz + pos + 2);
458 460 memcpy(curp->secondary->buf +
459 461 curp->secondary->sz,
460 462 ln.buf, pos);
461 463 curp->secondary->sz += pos;
462 464 curp->secondary->buf
463 465 [curp->secondary->sz] = '\n';
464 466 curp->secondary->sz++;
465 467 curp->secondary->buf
466 468 [curp->secondary->sz] = '\0';
467 469 }
468 470 rerun:
469 471 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
470 472
471 473 switch (rr) {
472 474 case ROFF_REPARSE:
473 475 if (++curp->reparse_count > REPARSE_LIMIT)
474 476 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
475 477 curp->line, pos, NULL);
476 478 else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
477 479 start == 1) {
478 480 pos = 0;
479 481 continue;
480 482 }
481 483 free(ln.buf);
482 484 return 0;
483 485 case ROFF_APPEND:
484 486 pos = strlen(ln.buf);
485 487 continue;
486 488 case ROFF_RERUN:
487 489 goto rerun;
488 490 case ROFF_IGN:
489 491 pos = 0;
490 492 continue;
491 493 case ROFF_SO:
492 494 if ( ! (curp->options & MPARSE_SO) &&
493 495 (i >= blk.sz || blk.buf[i] == '\0')) {
494 496 curp->sodest = mandoc_strdup(ln.buf + of);
495 497 free(ln.buf);
496 498 return 1;
497 499 }
498 500 /*
499 501 * We remove `so' clauses from our lookaside
500 502 * buffer because we're going to descend into
501 503 * the file recursively.
502 504 */
503 505 if (curp->secondary)
504 506 curp->secondary->sz -= pos + 1;
505 507 save_file = curp->file;
506 508 if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
507 509 mparse_readfd(curp, fd, ln.buf + of);
508 510 close(fd);
509 511 curp->file = save_file;
510 512 } else {
511 513 curp->file = save_file;
512 514 mandoc_vmsg(MANDOCERR_SO_FAIL,
513 515 curp, curp->line, pos,
514 516 ".so %s", ln.buf + of);
515 517 ln.sz = mandoc_asprintf(&cp,
516 518 ".sp\nSee the file %s.\n.sp",
517 519 ln.buf + of);
518 520 free(ln.buf);
519 521 ln.buf = cp;
520 522 of = 0;
521 523 mparse_buf_r(curp, ln, of, 0);
522 524 }
523 525 pos = 0;
524 526 continue;
525 527 default:
526 528 break;
527 529 }
528 530
529 531 if (curp->man->macroset == MACROSET_NONE)
530 532 choose_parser(curp);
531 533
532 534 if ((curp->man->macroset == MACROSET_MDOC ?
533 535 mdoc_parseln(curp->man, curp->line, ln.buf, of) :
534 536 man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
535 537 break;
536 538
537 539 /* Temporary buffers typically are not full. */
538 540
539 541 if (0 == start && '\0' == blk.buf[i])
540 542 break;
541 543
542 544 /* Start the next input line. */
543 545
544 546 pos = 0;
545 547 }
546 548
547 549 free(ln.buf);
548 550 return 1;
↓ open down ↓ |
430 lines elided |
↑ open up ↑ |
549 551 }
550 552
551 553 static int
552 554 read_whole_file(struct mparse *curp, const char *file, int fd,
553 555 struct buf *fb, int *with_mmap)
554 556 {
555 557 struct stat st;
556 558 gzFile gz;
557 559 size_t off;
558 560 ssize_t ssz;
561 + int gzerrnum, retval;
559 562
560 563 if (fstat(fd, &st) == -1) {
561 564 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
562 565 "fstat: %s", strerror(errno));
563 566 return 0;
564 567 }
565 568
566 569 /*
567 570 * If we're a regular file, try just reading in the whole entry
568 571 * via mmap(). This is faster than reading it into blocks, and
569 572 * since each file is only a few bytes to begin with, I'm not
570 573 * concerned that this is going to tank any machines.
571 574 */
572 575
573 576 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
574 577 if (st.st_size > 0x7fffffff) {
575 578 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
576 579 return 0;
577 580 }
578 581 *with_mmap = 1;
579 582 fb->sz = (size_t)st.st_size;
580 583 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
581 584 if (fb->buf != MAP_FAILED)
582 585 return 1;
583 586 }
584 587
585 588 if (curp->gzip) {
589 + /*
590 + * Duplicating the file descriptor is required
591 + * because we will have to call gzclose(3)
592 + * to free memory used internally by zlib,
593 + * but that will also close the file descriptor,
594 + * which this function must not do.
595 + */
596 + if ((fd = dup(fd)) == -1) {
597 + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
598 + "dup: %s", strerror(errno));
599 + return 0;
600 + }
586 601 if ((gz = gzdopen(fd, "rb")) == NULL) {
587 602 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
588 603 "gzdopen: %s", strerror(errno));
604 + close(fd);
589 605 return 0;
590 606 }
591 607 } else
592 608 gz = NULL;
593 609
594 610 /*
595 611 * If this isn't a regular file (like, say, stdin), then we must
596 612 * go the old way and just read things in bit by bit.
597 613 */
598 614
599 615 *with_mmap = 0;
600 616 off = 0;
617 + retval = 0;
601 618 fb->sz = 0;
602 619 fb->buf = NULL;
603 620 for (;;) {
604 621 if (off == fb->sz) {
605 622 if (fb->sz == (1U << 31)) {
606 623 mandoc_msg(MANDOCERR_TOOLARGE, curp,
607 624 0, 0, NULL);
608 625 break;
609 626 }
610 627 resize_buf(fb, 65536);
611 628 }
612 629 ssz = curp->gzip ?
613 630 gzread(gz, fb->buf + (int)off, fb->sz - off) :
614 631 read(fd, fb->buf + (int)off, fb->sz - off);
615 632 if (ssz == 0) {
616 633 fb->sz = off;
617 - return 1;
634 + retval = 1;
635 + break;
618 636 }
619 637 if (ssz == -1) {
620 - mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
621 - "read: %s", strerror(errno));
638 + if (curp->gzip)
639 + (void)gzerror(gz, &gzerrnum);
640 + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
641 + curp->gzip && gzerrnum != Z_ERRNO ?
642 + zError(gzerrnum) : strerror(errno));
622 643 break;
623 644 }
624 645 off += (size_t)ssz;
625 646 }
626 647
627 - free(fb->buf);
628 - fb->buf = NULL;
629 - return 0;
648 + if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
649 + mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
650 + gzerrnum == Z_ERRNO ? strerror(errno) :
651 + zError(gzerrnum));
652 + if (retval == 0) {
653 + free(fb->buf);
654 + fb->buf = NULL;
655 + }
656 + return retval;
630 657 }
631 658
632 659 static void
633 660 mparse_end(struct mparse *curp)
634 661 {
635 662 if (curp->man->macroset == MACROSET_NONE)
636 663 curp->man->macroset = MACROSET_MAN;
637 664 if (curp->man->macroset == MACROSET_MDOC)
638 665 mdoc_endparse(curp->man);
639 666 else
640 667 man_endparse(curp->man);
641 668 roff_endparse(curp->roff);
642 669 }
643 670
644 671 static void
645 672 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
646 673 {
647 674 struct buf *svprimary;
648 675 const char *svfile;
649 676 size_t offset;
650 677 static int recursion_depth;
651 678
652 679 if (64 < recursion_depth) {
653 680 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
654 681 return;
655 682 }
656 683
657 684 /* Line number is per-file. */
658 685 svfile = curp->file;
659 686 curp->file = file;
660 687 svprimary = curp->primary;
661 688 curp->primary = &blk;
662 689 curp->line = 1;
663 690 recursion_depth++;
664 691
665 692 /* Skip an UTF-8 byte order mark. */
666 693 if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
667 694 (unsigned char)blk.buf[0] == 0xef &&
668 695 (unsigned char)blk.buf[1] == 0xbb &&
669 696 (unsigned char)blk.buf[2] == 0xbf) {
670 697 offset = 3;
671 698 curp->filenc &= ~MPARSE_LATIN1;
672 699 } else
673 700 offset = 0;
674 701
675 702 mparse_buf_r(curp, blk, offset, 1);
676 703
677 704 if (--recursion_depth == 0)
678 705 mparse_end(curp);
679 706
680 707 curp->primary = svprimary;
681 708 curp->file = svfile;
682 709 }
683 710
684 711 enum mandoclevel
685 712 mparse_readmem(struct mparse *curp, void *buf, size_t len,
686 713 const char *file)
687 714 {
688 715 struct buf blk;
689 716
690 717 blk.buf = buf;
691 718 blk.sz = len;
692 719
693 720 mparse_parse_buffer(curp, blk, file);
694 721 return curp->file_status;
695 722 }
696 723
697 724 /*
698 725 * Read the whole file into memory and call the parsers.
699 726 * Called recursively when an .so request is encountered.
700 727 */
701 728 enum mandoclevel
702 729 mparse_readfd(struct mparse *curp, int fd, const char *file)
703 730 {
704 731 struct buf blk;
705 732 int with_mmap;
706 733 int save_filenc;
707 734
708 735 if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
709 736 save_filenc = curp->filenc;
710 737 curp->filenc = curp->options &
711 738 (MPARSE_UTF8 | MPARSE_LATIN1);
712 739 mparse_parse_buffer(curp, blk, file);
713 740 curp->filenc = save_filenc;
714 741 if (with_mmap)
715 742 munmap(blk.buf, blk.sz);
716 743 else
717 744 free(blk.buf);
718 745 }
719 746 return curp->file_status;
720 747 }
721 748
722 749 int
723 750 mparse_open(struct mparse *curp, const char *file)
724 751 {
725 752 char *cp;
726 753 int fd;
727 754
728 755 curp->file = file;
729 756 cp = strrchr(file, '.');
730 757 curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
731 758
732 759 /* First try to use the filename as it is. */
733 760
734 761 if ((fd = open(file, O_RDONLY)) != -1)
735 762 return fd;
736 763
737 764 /*
738 765 * If that doesn't work and the filename doesn't
739 766 * already end in .gz, try appending .gz.
740 767 */
741 768
742 769 if ( ! curp->gzip) {
743 770 mandoc_asprintf(&cp, "%s.gz", file);
744 771 fd = open(cp, O_RDONLY);
745 772 free(cp);
746 773 if (fd != -1) {
747 774 curp->gzip = 1;
748 775 return fd;
749 776 }
750 777 }
751 778
752 779 /* Neither worked, give up. */
753 780
754 781 mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
755 782 return -1;
756 783 }
757 784
758 785 struct mparse *
759 786 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
760 787 enum mandoc_os os_e, const char *os_s)
761 788 {
762 789 struct mparse *curp;
763 790
764 791 curp = mandoc_calloc(1, sizeof(struct mparse));
765 792
766 793 curp->options = options;
767 794 curp->mmin = mmin;
768 795 curp->mmsg = mmsg;
769 796 curp->os_s = os_s;
770 797
771 798 curp->roff = roff_alloc(curp, options);
772 799 curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
773 800 curp->options & MPARSE_QUICK ? 1 : 0);
774 801 if (curp->options & MPARSE_MDOC) {
775 802 curp->man->macroset = MACROSET_MDOC;
776 803 if (curp->man->mdocmac == NULL)
777 804 curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
778 805 } else if (curp->options & MPARSE_MAN) {
779 806 curp->man->macroset = MACROSET_MAN;
780 807 if (curp->man->manmac == NULL)
781 808 curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
782 809 }
783 810 curp->man->first->tok = TOKEN_NONE;
784 811 curp->man->meta.os_e = os_e;
785 812 return curp;
786 813 }
787 814
788 815 void
789 816 mparse_reset(struct mparse *curp)
790 817 {
791 818 roff_reset(curp->roff);
792 819 roff_man_reset(curp->man);
793 820
794 821 free(curp->sodest);
795 822 curp->sodest = NULL;
796 823
797 824 if (curp->secondary)
798 825 curp->secondary->sz = 0;
799 826
800 827 curp->file_status = MANDOCLEVEL_OK;
801 828 curp->gzip = 0;
802 829 }
803 830
804 831 void
805 832 mparse_free(struct mparse *curp)
806 833 {
807 834
808 835 roffhash_free(curp->man->mdocmac);
809 836 roffhash_free(curp->man->manmac);
810 837 roff_man_free(curp->man);
811 838 roff_free(curp->roff);
812 839 if (curp->secondary)
813 840 free(curp->secondary->buf);
814 841
815 842 free(curp->secondary);
816 843 free(curp->sodest);
817 844 free(curp);
818 845 }
819 846
820 847 void
821 848 mparse_result(struct mparse *curp, struct roff_man **man,
822 849 char **sodest)
823 850 {
824 851
825 852 if (sodest && NULL != (*sodest = curp->sodest)) {
826 853 *man = NULL;
827 854 return;
828 855 }
829 856 if (man)
830 857 *man = curp->man;
831 858 }
832 859
833 860 void
834 861 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
835 862 {
836 863 if (curp->file_status > *rc)
837 864 *rc = curp->file_status;
838 865 }
839 866
840 867 void
841 868 mandoc_vmsg(enum mandocerr t, struct mparse *m,
842 869 int ln, int pos, const char *fmt, ...)
843 870 {
844 871 char buf[256];
845 872 va_list ap;
846 873
847 874 va_start(ap, fmt);
848 875 (void)vsnprintf(buf, sizeof(buf), fmt, ap);
849 876 va_end(ap);
850 877
851 878 mandoc_msg(t, m, ln, pos, buf);
852 879 }
853 880
854 881 void
855 882 mandoc_msg(enum mandocerr er, struct mparse *m,
856 883 int ln, int col, const char *msg)
857 884 {
858 885 enum mandoclevel level;
859 886
860 887 if (er < m->mmin && er != MANDOCERR_FILE)
861 888 return;
862 889
863 890 level = MANDOCLEVEL_UNSUPP;
864 891 while (er < mandoclimits[level])
865 892 level--;
866 893
867 894 if (m->mmsg)
868 895 (*m->mmsg)(er, level, m->file, ln, col, msg);
869 896
870 897 if (m->file_status < level)
871 898 m->file_status = level;
872 899 }
873 900
874 901 const char *
875 902 mparse_strerror(enum mandocerr er)
876 903 {
877 904
878 905 return mandocerrs[er];
879 906 }
880 907
881 908 const char *
882 909 mparse_strlevel(enum mandoclevel lvl)
883 910 {
884 911 return mandoclevels[lvl];
885 912 }
886 913
887 914 void
888 915 mparse_keep(struct mparse *p)
889 916 {
890 917
891 918 assert(NULL == p->secondary);
892 919 p->secondary = mandoc_calloc(1, sizeof(struct buf));
893 920 }
894 921
895 922 const char *
896 923 mparse_getkeep(const struct mparse *p)
897 924 {
898 925
899 926 assert(p->secondary);
900 927 return p->secondary->sz ? p->secondary->buf : NULL;
901 928 }
↓ open down ↓ |
262 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX