1 /*      $Id: read.c,v 1.196 2018/07/28 18:34:15 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
   5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
   6  *
   7  * Permission to use, copy, modify, and distribute this software for any
   8  * purpose with or without fee is hereby granted, provided that the above
   9  * copyright notice and this permission notice appear in all copies.
  10  *
  11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18  */
  19 #include "config.h"
  20 
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #include <sys/stat.h>
  24 
  25 #include <assert.h>
  26 #include <ctype.h>
  27 #include <errno.h>
  28 #include <fcntl.h>
  29 #include <stdarg.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <unistd.h>
  34 #include <zlib.h>
  35 
  36 #include "mandoc_aux.h"
  37 #include "mandoc.h"
  38 #include "roff.h"
  39 #include "mdoc.h"
  40 #include "man.h"
  41 #include "libmandoc.h"
  42 
  43 #define REPARSE_LIMIT   1000
  44 
  45 struct  mparse {
  46         struct roff      *roff; /* roff parser (!NULL) */
  47         struct roff_man  *man; /* man parser */
  48         char             *sodest; /* filename pointed to by .so */
  49         const char       *file; /* filename of current input file */
  50         struct buf       *primary; /* buffer currently being parsed */
  51         struct buf       *secondary; /* preprocessed copy of input */
  52         const char       *os_s; /* default operating system */
  53         mandocmsg         mmsg; /* warning/error message handler */
  54         enum mandoclevel  file_status; /* status of current parse */
  55         enum mandocerr    mmin; /* ignore messages below this */
  56         int               options; /* parser options */
  57         int               gzip; /* current input file is gzipped */
  58         int               filenc; /* encoding of the current file */
  59         int               reparse_count; /* finite interp. stack */
  60         int               line; /* line number in the file */
  61 };
  62 
  63 static  void      choose_parser(struct mparse *);
  64 static  void      resize_buf(struct buf *, size_t);
  65 static  int       mparse_buf_r(struct mparse *, struct buf, size_t, int);
  66 static  int       read_whole_file(struct mparse *, const char *, int,
  67                                 struct buf *, int *);
  68 static  void      mparse_end(struct mparse *);
  69 static  void      mparse_parse_buffer(struct mparse *, struct buf,
  70                         const char *);
  71 
  72 static  const enum mandocerr    mandoclimits[MANDOCLEVEL_MAX] = {
  73         MANDOCERR_OK,
  74         MANDOCERR_OK,
  75         MANDOCERR_WARNING,
  76         MANDOCERR_ERROR,
  77         MANDOCERR_UNSUPP,
  78         MANDOCERR_MAX,
  79         MANDOCERR_MAX
  80 };
  81 
  82 static  const char * const      mandocerrs[MANDOCERR_MAX] = {
  83         "ok",
  84 
  85         "base system convention",
  86 
  87         "Mdocdate found",
  88         "Mdocdate missing",
  89         "unknown architecture",
  90         "operating system explicitly specified",
  91         "RCS id missing",
  92         "referenced manual not found",
  93 
  94         "generic style suggestion",
  95 
  96         "legacy man(7) date format",
  97         "normalizing date format to",
  98         "lower case character in document title",
  99         "duplicate RCS id",
 100         "possible typo in section name",
 101         "unterminated quoted argument",
 102         "useless macro",
 103         "consider using OS macro",
 104         "errnos out of order",
 105         "duplicate errno",
 106         "trailing delimiter",
 107         "no blank before trailing delimiter",
 108         "fill mode already enabled, skipping",
 109         "fill mode already disabled, skipping",
 110         "verbatim \"--\", maybe consider using \\(em",
 111         "function name without markup",
 112         "whitespace at end of input line",
 113         "bad comment style",
 114 
 115         "generic warning",
 116 
 117         /* related to the prologue */
 118         "missing manual title, using UNTITLED",
 119         "missing manual title, using \"\"",
 120         "missing manual section, using \"\"",
 121         "unknown manual section",
 122         "missing date, using today's date",
 123         "cannot parse date, using it verbatim",
 124         "date in the future, using it anyway",
 125         "missing Os macro, using \"\"",
 126         "late prologue macro",
 127         "prologue macros out of order",
 128 
 129         /* related to document structure */
 130         ".so is fragile, better use ln(1)",
 131         "no document body",
 132         "content before first section header",
 133         "first section is not \"NAME\"",
 134         "NAME section without Nm before Nd",
 135         "NAME section without description",
 136         "description not at the end of NAME",
 137         "bad NAME section content",
 138         "missing comma before name",
 139         "missing description line, using \"\"",
 140         "description line outside NAME section",
 141         "sections out of conventional order",
 142         "duplicate section title",
 143         "unexpected section",
 144         "cross reference to self",
 145         "unusual Xr order",
 146         "unusual Xr punctuation",
 147         "AUTHORS section without An macro",
 148 
 149         /* related to macros and nesting */
 150         "obsolete macro",
 151         "macro neither callable nor escaped",
 152         "skipping paragraph macro",
 153         "moving paragraph macro out of list",
 154         "skipping no-space macro",
 155         "blocks badly nested",
 156         "nested displays are not portable",
 157         "moving content out of list",
 158         "first macro on line",
 159         "line scope broken",
 160         "skipping blank line in line scope",
 161 
 162         /* related to missing macro arguments */
 163         "skipping empty request",
 164         "conditional request controls empty scope",
 165         "skipping empty macro",
 166         "empty block",
 167         "empty argument, using 0n",
 168         "missing display type, using -ragged",
 169         "list type is not the first argument",
 170         "missing -width in -tag list, using 6n",
 171         "missing utility name, using \"\"",
 172         "missing function name, using \"\"",
 173         "empty head in list item",
 174         "empty list item",
 175         "missing argument, using next line",
 176         "missing font type, using \\fR",
 177         "unknown font type, using \\fR",
 178         "nothing follows prefix",
 179         "empty reference block",
 180         "missing section argument",
 181         "missing -std argument, adding it",
 182         "missing option string, using \"\"",
 183         "missing resource identifier, using \"\"",
 184         "missing eqn box, using \"\"",
 185 
 186         /* related to bad macro arguments */
 187         "duplicate argument",
 188         "skipping duplicate argument",
 189         "skipping duplicate display type",
 190         "skipping duplicate list type",
 191         "skipping -width argument",
 192         "wrong number of cells",
 193         "unknown AT&T UNIX version",
 194         "comma in function argument",
 195         "parenthesis in function name",
 196         "unknown library name",
 197         "invalid content in Rs block",
 198         "invalid Boolean argument",
 199         "unknown font, skipping request",
 200         "odd number of characters in request",
 201 
 202         /* related to plain text */
 203         "blank line in fill mode, using .sp",
 204         "tab in filled text",
 205         "new sentence, new line",
 206         "invalid escape sequence",
 207         "undefined string, using \"\"",
 208 
 209         /* related to tables */
 210         "tbl line starts with span",
 211         "tbl column starts with span",
 212         "skipping vertical bar in tbl layout",
 213 
 214         "generic error",
 215 
 216         /* related to tables */
 217         "non-alphabetic character in tbl options",
 218         "skipping unknown tbl option",
 219         "missing tbl option argument",
 220         "wrong tbl option argument size",
 221         "empty tbl layout",
 222         "invalid character in tbl layout",
 223         "unmatched parenthesis in tbl layout",
 224         "tbl without any data cells",
 225         "ignoring data in spanned tbl cell",
 226         "ignoring extra tbl data cells",
 227         "data block open at end of tbl",
 228 
 229         /* related to document structure and macros */
 230         NULL,
 231         "duplicate prologue macro",
 232         "skipping late title macro",
 233         "input stack limit exceeded, infinite loop?",
 234         "skipping bad character",
 235         "skipping unknown macro",
 236         "skipping insecure request",
 237         "skipping item outside list",
 238         "skipping column outside column list",
 239         "skipping end of block that is not open",
 240         "fewer RS blocks open, skipping",
 241         "inserting missing end of block",
 242         "appending missing end of block",
 243 
 244         /* related to request and macro arguments */
 245         "escaped character not allowed in a name",
 246         "NOT IMPLEMENTED: Bd -file",
 247         "skipping display without arguments",
 248         "missing list type, using -item",
 249         "argument is not numeric, using 1",
 250         "missing manual name, using \"\"",
 251         "uname(3) system call failed, using UNKNOWN",
 252         "unknown standard specifier",
 253         "skipping request without numeric argument",
 254         "NOT IMPLEMENTED: .so with absolute path or \"..\"",
 255         ".so request failed",
 256         "skipping all arguments",
 257         "skipping excess arguments",
 258         "divide by zero",
 259 
 260         "unsupported feature",
 261         "input too large",
 262         "unsupported control character",
 263         "unsupported roff request",
 264         "eqn delim option in tbl",
 265         "unsupported tbl layout modifier",
 266         "ignoring macro in table",
 267 };
 268 
 269 static  const char * const      mandoclevels[MANDOCLEVEL_MAX] = {
 270         "SUCCESS",
 271         "STYLE",
 272         "WARNING",
 273         "ERROR",
 274         "UNSUPP",
 275         "BADARG",
 276         "SYSERR"
 277 };
 278 
 279 
 280 static void
 281 resize_buf(struct buf *buf, size_t initial)
 282 {
 283 
 284         buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
 285         buf->buf = mandoc_realloc(buf->buf, buf->sz);
 286 }
 287 
 288 static void
 289 choose_parser(struct mparse *curp)
 290 {
 291         char            *cp, *ep;
 292         int              format;
 293 
 294         /*
 295          * If neither command line arguments -mdoc or -man select
 296          * a parser nor the roff parser found a .Dd or .TH macro
 297          * yet, look ahead in the main input buffer.
 298          */
 299 
 300         if ((format = roff_getformat(curp->roff)) == 0) {
 301                 cp = curp->primary->buf;
 302                 ep = cp + curp->primary->sz;
 303                 while (cp < ep) {
 304                         if (*cp == '.' || *cp == '\'') {
 305                                 cp++;
 306                                 if (cp[0] == 'D' && cp[1] == 'd') {
 307                                         format = MPARSE_MDOC;
 308                                         break;
 309                                 }
 310                                 if (cp[0] == 'T' && cp[1] == 'H') {
 311                                         format = MPARSE_MAN;
 312                                         break;
 313                                 }
 314                         }
 315                         cp = memchr(cp, '\n', ep - cp);
 316                         if (cp == NULL)
 317                                 break;
 318                         cp++;
 319                 }
 320         }
 321 
 322         if (format == MPARSE_MDOC) {
 323                 curp->man->macroset = MACROSET_MDOC;
 324                 if (curp->man->mdocmac == NULL)
 325                         curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
 326         } else {
 327                 curp->man->macroset = MACROSET_MAN;
 328                 if (curp->man->manmac == NULL)
 329                         curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
 330         }
 331         curp->man->first->tok = TOKEN_NONE;
 332 }
 333 
 334 /*
 335  * Main parse routine for a buffer.
 336  * It assumes encoding and line numbering are already set up.
 337  * It can recurse directly (for invocations of user-defined
 338  * macros, inline equations, and input line traps)
 339  * and indirectly (for .so file inclusion).
 340  */
 341 static int
 342 mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
 343 {
 344         struct buf       ln;
 345         const char      *save_file;
 346         char            *cp;
 347         size_t           pos; /* byte number in the ln buffer */
 348         enum rofferr     rr;
 349         int              of;
 350         int              lnn; /* line number in the real file */
 351         int              fd;
 352         unsigned char    c;
 353 
 354         memset(&ln, 0, sizeof(ln));
 355 
 356         lnn = curp->line;
 357         pos = 0;
 358 
 359         while (i < blk.sz) {
 360                 if (0 == pos && '\0' == blk.buf[i])
 361                         break;
 362 
 363                 if (start) {
 364                         curp->line = lnn;
 365                         curp->reparse_count = 0;
 366 
 367                         if (lnn < 3 &&
 368                             curp->filenc & MPARSE_UTF8 &&
 369                             curp->filenc & MPARSE_LATIN1)
 370                                 curp->filenc = preconv_cue(&blk, i);
 371                 }
 372 
 373                 while (i < blk.sz && (start || blk.buf[i] != '\0')) {
 374 
 375                         /*
 376                          * When finding an unescaped newline character,
 377                          * leave the character loop to process the line.
 378                          * Skip a preceding carriage return, if any.
 379                          */
 380 
 381                         if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
 382                             '\n' == blk.buf[i + 1])
 383                                 ++i;
 384                         if ('\n' == blk.buf[i]) {
 385                                 ++i;
 386                                 ++lnn;
 387                                 break;
 388                         }
 389 
 390                         /*
 391                          * Make sure we have space for the worst
 392                          * case of 11 bytes: "\\[u10ffff]\0"
 393                          */
 394 
 395                         if (pos + 11 > ln.sz)
 396                                 resize_buf(&ln, 256);
 397 
 398                         /*
 399                          * Encode 8-bit input.
 400                          */
 401 
 402                         c = blk.buf[i];
 403                         if (c & 0x80) {
 404                                 if ( ! (curp->filenc && preconv_encode(
 405                                     &blk, &i, &ln, &pos, &curp->filenc))) {
 406                                         mandoc_vmsg(MANDOCERR_CHAR_BAD, curp,
 407                                             curp->line, pos, "0x%x", c);
 408                                         ln.buf[pos++] = '?';
 409                                         i++;
 410                                 }
 411                                 continue;
 412                         }
 413 
 414                         /*
 415                          * Exclude control characters.
 416                          */
 417 
 418                         if (c == 0x7f || (c < 0x20 && c != 0x09)) {
 419                                 mandoc_vmsg(c == 0x00 || c == 0x04 ||
 420                                     c > 0x0a ? MANDOCERR_CHAR_BAD :
 421                                     MANDOCERR_CHAR_UNSUPP,
 422                                     curp, curp->line, pos, "0x%x", c);
 423                                 i++;
 424                                 if (c != '\r')
 425                                         ln.buf[pos++] = '?';
 426                                 continue;
 427                         }
 428 
 429                         ln.buf[pos++] = blk.buf[i++];
 430                 }
 431 
 432                 if (pos + 1 >= ln.sz)
 433                         resize_buf(&ln, 256);
 434 
 435                 if (i == blk.sz || blk.buf[i] == '\0')
 436                         ln.buf[pos++] = '\n';
 437                 ln.buf[pos] = '\0';
 438 
 439                 /*
 440                  * A significant amount of complexity is contained by
 441                  * the roff preprocessor.  It's line-oriented but can be
 442                  * expressed on one line, so we need at times to
 443                  * readjust our starting point and re-run it.  The roff
 444                  * preprocessor can also readjust the buffers with new
 445                  * data, so we pass them in wholesale.
 446                  */
 447 
 448                 of = 0;
 449 
 450                 /*
 451                  * Maintain a lookaside buffer of all parsed lines.  We
 452                  * only do this if mparse_keep() has been invoked (the
 453                  * buffer may be accessed with mparse_getkeep()).
 454                  */
 455 
 456                 if (curp->secondary) {
 457                         curp->secondary->buf = mandoc_realloc(
 458                             curp->secondary->buf,
 459                             curp->secondary->sz + pos + 2);
 460                         memcpy(curp->secondary->buf +
 461                             curp->secondary->sz,
 462                             ln.buf, pos);
 463                         curp->secondary->sz += pos;
 464                         curp->secondary->buf
 465                                 [curp->secondary->sz] = '\n';
 466                         curp->secondary->sz++;
 467                         curp->secondary->buf
 468                                 [curp->secondary->sz] = '\0';
 469                 }
 470 rerun:
 471                 rr = roff_parseln(curp->roff, curp->line, &ln, &of);
 472 
 473                 switch (rr) {
 474                 case ROFF_REPARSE:
 475                         if (++curp->reparse_count > REPARSE_LIMIT)
 476                                 mandoc_msg(MANDOCERR_ROFFLOOP, curp,
 477                                     curp->line, pos, NULL);
 478                         else if (mparse_buf_r(curp, ln, of, 0) == 1 ||
 479                             start == 1) {
 480                                 pos = 0;
 481                                 continue;
 482                         }
 483                         free(ln.buf);
 484                         return 0;
 485                 case ROFF_APPEND:
 486                         pos = strlen(ln.buf);
 487                         continue;
 488                 case ROFF_RERUN:
 489                         goto rerun;
 490                 case ROFF_IGN:
 491                         pos = 0;
 492                         continue;
 493                 case ROFF_SO:
 494                         if ( ! (curp->options & MPARSE_SO) &&
 495                             (i >= blk.sz || blk.buf[i] == '\0')) {
 496                                 curp->sodest = mandoc_strdup(ln.buf + of);
 497                                 free(ln.buf);
 498                                 return 1;
 499                         }
 500                         /*
 501                          * We remove `so' clauses from our lookaside
 502                          * buffer because we're going to descend into
 503                          * the file recursively.
 504                          */
 505                         if (curp->secondary)
 506                                 curp->secondary->sz -= pos + 1;
 507                         save_file = curp->file;
 508                         if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
 509                                 mparse_readfd(curp, fd, ln.buf + of);
 510                                 close(fd);
 511                                 curp->file = save_file;
 512                         } else {
 513                                 curp->file = save_file;
 514                                 mandoc_vmsg(MANDOCERR_SO_FAIL,
 515                                     curp, curp->line, pos,
 516                                     ".so %s", ln.buf + of);
 517                                 ln.sz = mandoc_asprintf(&cp,
 518                                     ".sp\nSee the file %s.\n.sp",
 519                                     ln.buf + of);
 520                                 free(ln.buf);
 521                                 ln.buf = cp;
 522                                 of = 0;
 523                                 mparse_buf_r(curp, ln, of, 0);
 524                         }
 525                         pos = 0;
 526                         continue;
 527                 default:
 528                         break;
 529                 }
 530 
 531                 if (curp->man->macroset == MACROSET_NONE)
 532                         choose_parser(curp);
 533 
 534                 if ((curp->man->macroset == MACROSET_MDOC ?
 535                     mdoc_parseln(curp->man, curp->line, ln.buf, of) :
 536                     man_parseln(curp->man, curp->line, ln.buf, of)) == 2)
 537                                 break;
 538 
 539                 /* Temporary buffers typically are not full. */
 540 
 541                 if (0 == start && '\0' == blk.buf[i])
 542                         break;
 543 
 544                 /* Start the next input line. */
 545 
 546                 pos = 0;
 547         }
 548 
 549         free(ln.buf);
 550         return 1;
 551 }
 552 
 553 static int
 554 read_whole_file(struct mparse *curp, const char *file, int fd,
 555                 struct buf *fb, int *with_mmap)
 556 {
 557         struct stat      st;
 558         gzFile           gz;
 559         size_t           off;
 560         ssize_t          ssz;
 561         int              gzerrnum, retval;
 562 
 563         if (fstat(fd, &st) == -1) {
 564                 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 565                     "fstat: %s", strerror(errno));
 566                 return 0;
 567         }
 568 
 569         /*
 570          * If we're a regular file, try just reading in the whole entry
 571          * via mmap().  This is faster than reading it into blocks, and
 572          * since each file is only a few bytes to begin with, I'm not
 573          * concerned that this is going to tank any machines.
 574          */
 575 
 576         if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
 577                 if (st.st_size > 0x7fffffff) {
 578                         mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
 579                         return 0;
 580                 }
 581                 *with_mmap = 1;
 582                 fb->sz = (size_t)st.st_size;
 583                 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
 584                 if (fb->buf != MAP_FAILED)
 585                         return 1;
 586         }
 587 
 588         if (curp->gzip) {
 589                 /*
 590                  * Duplicating the file descriptor is required
 591                  * because we will have to call gzclose(3)
 592                  * to free memory used internally by zlib,
 593                  * but that will also close the file descriptor,
 594                  * which this function must not do.
 595                  */
 596                 if ((fd = dup(fd)) == -1) {
 597                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 598                             "dup: %s", strerror(errno));
 599                         return 0;
 600                 }
 601                 if ((gz = gzdopen(fd, "rb")) == NULL) {
 602                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 603                             "gzdopen: %s", strerror(errno));
 604                         close(fd);
 605                         return 0;
 606                 }
 607         } else
 608                 gz = NULL;
 609 
 610         /*
 611          * If this isn't a regular file (like, say, stdin), then we must
 612          * go the old way and just read things in bit by bit.
 613          */
 614 
 615         *with_mmap = 0;
 616         off = 0;
 617         retval = 0;
 618         fb->sz = 0;
 619         fb->buf = NULL;
 620         for (;;) {
 621                 if (off == fb->sz) {
 622                         if (fb->sz == (1U << 31)) {
 623                                 mandoc_msg(MANDOCERR_TOOLARGE, curp,
 624                                     0, 0, NULL);
 625                                 break;
 626                         }
 627                         resize_buf(fb, 65536);
 628                 }
 629                 ssz = curp->gzip ?
 630                     gzread(gz, fb->buf + (int)off, fb->sz - off) :
 631                     read(fd, fb->buf + (int)off, fb->sz - off);
 632                 if (ssz == 0) {
 633                         fb->sz = off;
 634                         retval = 1;
 635                         break;
 636                 }
 637                 if (ssz == -1) {
 638                         if (curp->gzip)
 639                                 (void)gzerror(gz, &gzerrnum);
 640                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
 641                             curp->gzip && gzerrnum != Z_ERRNO ?
 642                             zError(gzerrnum) : strerror(errno));
 643                         break;
 644                 }
 645                 off += (size_t)ssz;
 646         }
 647 
 648         if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
 649                 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
 650                     gzerrnum == Z_ERRNO ? strerror(errno) :
 651                     zError(gzerrnum));
 652         if (retval == 0) {
 653                 free(fb->buf);
 654                 fb->buf = NULL;
 655         }
 656         return retval;
 657 }
 658 
 659 static void
 660 mparse_end(struct mparse *curp)
 661 {
 662         if (curp->man->macroset == MACROSET_NONE)
 663                 curp->man->macroset = MACROSET_MAN;
 664         if (curp->man->macroset == MACROSET_MDOC)
 665                 mdoc_endparse(curp->man);
 666         else
 667                 man_endparse(curp->man);
 668         roff_endparse(curp->roff);
 669 }
 670 
 671 static void
 672 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
 673 {
 674         struct buf      *svprimary;
 675         const char      *svfile;
 676         size_t           offset;
 677         static int       recursion_depth;
 678 
 679         if (64 < recursion_depth) {
 680                 mandoc_msg(MANDOCERR_ROFFLOOP, curp, curp->line, 0, NULL);
 681                 return;
 682         }
 683 
 684         /* Line number is per-file. */
 685         svfile = curp->file;
 686         curp->file = file;
 687         svprimary = curp->primary;
 688         curp->primary = &blk;
 689         curp->line = 1;
 690         recursion_depth++;
 691 
 692         /* Skip an UTF-8 byte order mark. */
 693         if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
 694             (unsigned char)blk.buf[0] == 0xef &&
 695             (unsigned char)blk.buf[1] == 0xbb &&
 696             (unsigned char)blk.buf[2] == 0xbf) {
 697                 offset = 3;
 698                 curp->filenc &= ~MPARSE_LATIN1;
 699         } else
 700                 offset = 0;
 701 
 702         mparse_buf_r(curp, blk, offset, 1);
 703 
 704         if (--recursion_depth == 0)
 705                 mparse_end(curp);
 706 
 707         curp->primary = svprimary;
 708         curp->file = svfile;
 709 }
 710 
 711 enum mandoclevel
 712 mparse_readmem(struct mparse *curp, void *buf, size_t len,
 713                 const char *file)
 714 {
 715         struct buf blk;
 716 
 717         blk.buf = buf;
 718         blk.sz = len;
 719 
 720         mparse_parse_buffer(curp, blk, file);
 721         return curp->file_status;
 722 }
 723 
 724 /*
 725  * Read the whole file into memory and call the parsers.
 726  * Called recursively when an .so request is encountered.
 727  */
 728 enum mandoclevel
 729 mparse_readfd(struct mparse *curp, int fd, const char *file)
 730 {
 731         struct buf       blk;
 732         int              with_mmap;
 733         int              save_filenc;
 734 
 735         if (read_whole_file(curp, file, fd, &blk, &with_mmap)) {
 736                 save_filenc = curp->filenc;
 737                 curp->filenc = curp->options &
 738                     (MPARSE_UTF8 | MPARSE_LATIN1);
 739                 mparse_parse_buffer(curp, blk, file);
 740                 curp->filenc = save_filenc;
 741                 if (with_mmap)
 742                         munmap(blk.buf, blk.sz);
 743                 else
 744                         free(blk.buf);
 745         }
 746         return curp->file_status;
 747 }
 748 
 749 int
 750 mparse_open(struct mparse *curp, const char *file)
 751 {
 752         char             *cp;
 753         int               fd;
 754 
 755         curp->file = file;
 756         cp = strrchr(file, '.');
 757         curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
 758 
 759         /* First try to use the filename as it is. */
 760 
 761         if ((fd = open(file, O_RDONLY)) != -1)
 762                 return fd;
 763 
 764         /*
 765          * If that doesn't work and the filename doesn't
 766          * already  end in .gz, try appending .gz.
 767          */
 768 
 769         if ( ! curp->gzip) {
 770                 mandoc_asprintf(&cp, "%s.gz", file);
 771                 fd = open(cp, O_RDONLY);
 772                 free(cp);
 773                 if (fd != -1) {
 774                         curp->gzip = 1;
 775                         return fd;
 776                 }
 777         }
 778 
 779         /* Neither worked, give up. */
 780 
 781         mandoc_msg(MANDOCERR_FILE, curp, 0, 0, strerror(errno));
 782         return -1;
 783 }
 784 
 785 struct mparse *
 786 mparse_alloc(int options, enum mandocerr mmin, mandocmsg mmsg,
 787     enum mandoc_os os_e, const char *os_s)
 788 {
 789         struct mparse   *curp;
 790 
 791         curp = mandoc_calloc(1, sizeof(struct mparse));
 792 
 793         curp->options = options;
 794         curp->mmin = mmin;
 795         curp->mmsg = mmsg;
 796         curp->os_s = os_s;
 797 
 798         curp->roff = roff_alloc(curp, options);
 799         curp->man = roff_man_alloc(curp->roff, curp, curp->os_s,
 800                 curp->options & MPARSE_QUICK ? 1 : 0);
 801         if (curp->options & MPARSE_MDOC) {
 802                 curp->man->macroset = MACROSET_MDOC;
 803                 if (curp->man->mdocmac == NULL)
 804                         curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
 805         } else if (curp->options & MPARSE_MAN) {
 806                 curp->man->macroset = MACROSET_MAN;
 807                 if (curp->man->manmac == NULL)
 808                         curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
 809         }
 810         curp->man->first->tok = TOKEN_NONE;
 811         curp->man->meta.os_e = os_e;
 812         return curp;
 813 }
 814 
 815 void
 816 mparse_reset(struct mparse *curp)
 817 {
 818         roff_reset(curp->roff);
 819         roff_man_reset(curp->man);
 820 
 821         free(curp->sodest);
 822         curp->sodest = NULL;
 823 
 824         if (curp->secondary)
 825                 curp->secondary->sz = 0;
 826 
 827         curp->file_status = MANDOCLEVEL_OK;
 828         curp->gzip = 0;
 829 }
 830 
 831 void
 832 mparse_free(struct mparse *curp)
 833 {
 834 
 835         roffhash_free(curp->man->mdocmac);
 836         roffhash_free(curp->man->manmac);
 837         roff_man_free(curp->man);
 838         roff_free(curp->roff);
 839         if (curp->secondary)
 840                 free(curp->secondary->buf);
 841 
 842         free(curp->secondary);
 843         free(curp->sodest);
 844         free(curp);
 845 }
 846 
 847 void
 848 mparse_result(struct mparse *curp, struct roff_man **man,
 849         char **sodest)
 850 {
 851 
 852         if (sodest && NULL != (*sodest = curp->sodest)) {
 853                 *man = NULL;
 854                 return;
 855         }
 856         if (man)
 857                 *man = curp->man;
 858 }
 859 
 860 void
 861 mparse_updaterc(struct mparse *curp, enum mandoclevel *rc)
 862 {
 863         if (curp->file_status > *rc)
 864                 *rc = curp->file_status;
 865 }
 866 
 867 void
 868 mandoc_vmsg(enum mandocerr t, struct mparse *m,
 869                 int ln, int pos, const char *fmt, ...)
 870 {
 871         char             buf[256];
 872         va_list          ap;
 873 
 874         va_start(ap, fmt);
 875         (void)vsnprintf(buf, sizeof(buf), fmt, ap);
 876         va_end(ap);
 877 
 878         mandoc_msg(t, m, ln, pos, buf);
 879 }
 880 
 881 void
 882 mandoc_msg(enum mandocerr er, struct mparse *m,
 883                 int ln, int col, const char *msg)
 884 {
 885         enum mandoclevel level;
 886 
 887         if (er < m->mmin && er != MANDOCERR_FILE)
 888                 return;
 889 
 890         level = MANDOCLEVEL_UNSUPP;
 891         while (er < mandoclimits[level])
 892                 level--;
 893 
 894         if (m->mmsg)
 895                 (*m->mmsg)(er, level, m->file, ln, col, msg);
 896 
 897         if (m->file_status < level)
 898                 m->file_status = level;
 899 }
 900 
 901 const char *
 902 mparse_strerror(enum mandocerr er)
 903 {
 904 
 905         return mandocerrs[er];
 906 }
 907 
 908 const char *
 909 mparse_strlevel(enum mandoclevel lvl)
 910 {
 911         return mandoclevels[lvl];
 912 }
 913 
 914 void
 915 mparse_keep(struct mparse *p)
 916 {
 917 
 918         assert(NULL == p->secondary);
 919         p->secondary = mandoc_calloc(1, sizeof(struct buf));
 920 }
 921 
 922 const char *
 923 mparse_getkeep(const struct mparse *p)
 924 {
 925 
 926         assert(p->secondary);
 927         return p->secondary->sz ? p->secondary->buf : NULL;
 928 }