Print this page
9718 update mandoc to 1.14.4
   1 /*      $Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
   5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
   6  *
   7  * Permission to use, copy, modify, and distribute this software for any
   8  * purpose with or without fee is hereby granted, provided that the above
   9  * copyright notice and this permission notice appear in all copies.
  10  *
  11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18  */
  19 #include "config.h"
  20 
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #include <sys/stat.h>
  24 


  77         MANDOCERR_UNSUPP,
  78         MANDOCERR_MAX,
  79         MANDOCERR_MAX
  80 };
  81 
  82 static  const char * const      mandocerrs[MANDOCERR_MAX] = {
  83         "ok",
  84 
  85         "base system convention",
  86 
  87         "Mdocdate found",
  88         "Mdocdate missing",
  89         "unknown architecture",
  90         "operating system explicitly specified",
  91         "RCS id missing",
  92         "referenced manual not found",
  93 
  94         "generic style suggestion",
  95 
  96         "legacy man(7) date format",

  97         "lower case character in document title",
  98         "duplicate RCS id",
  99         "typo in section name",
 100         "unterminated quoted argument",
 101         "useless macro",
 102         "consider using OS macro",
 103         "errnos out of order",
 104         "duplicate errno",
 105         "trailing delimiter",
 106         "no blank before trailing delimiter",
 107         "fill mode already enabled, skipping",
 108         "fill mode already disabled, skipping",

 109         "function name without markup",
 110         "whitespace at end of input line",
 111         "bad comment style",
 112 
 113         "generic warning",
 114 
 115         /* related to the prologue */
 116         "missing manual title, using UNTITLED",
 117         "missing manual title, using \"\"",
 118         "missing manual section, using \"\"",
 119         "unknown manual section",
 120         "missing date, using today's date",
 121         "cannot parse date, using it verbatim",
 122         "date in the future, using it anyway",
 123         "missing Os macro, using \"\"",
 124         "late prologue macro",
 125         "prologue macros out of order",
 126 
 127         /* related to document structure */
 128         ".so is fragile, better use ln(1)",


 539                 if (0 == start && '\0' == blk.buf[i])
 540                         break;
 541 
 542                 /* Start the next input line. */
 543 
 544                 pos = 0;
 545         }
 546 
 547         free(ln.buf);
 548         return 1;
 549 }
 550 
 551 static int
 552 read_whole_file(struct mparse *curp, const char *file, int fd,
 553                 struct buf *fb, int *with_mmap)
 554 {
 555         struct stat      st;
 556         gzFile           gz;
 557         size_t           off;
 558         ssize_t          ssz;

 559 
 560         if (fstat(fd, &st) == -1) {
 561                 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 562                     "fstat: %s", strerror(errno));
 563                 return 0;
 564         }
 565 
 566         /*
 567          * If we're a regular file, try just reading in the whole entry
 568          * via mmap().  This is faster than reading it into blocks, and
 569          * since each file is only a few bytes to begin with, I'm not
 570          * concerned that this is going to tank any machines.
 571          */
 572 
 573         if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
 574                 if (st.st_size > 0x7fffffff) {
 575                         mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
 576                         return 0;
 577                 }
 578                 *with_mmap = 1;
 579                 fb->sz = (size_t)st.st_size;
 580                 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
 581                 if (fb->buf != MAP_FAILED)
 582                         return 1;
 583         }
 584 
 585         if (curp->gzip) {












 586                 if ((gz = gzdopen(fd, "rb")) == NULL) {
 587                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 588                             "gzdopen: %s", strerror(errno));

 589                         return 0;
 590                 }
 591         } else
 592                 gz = NULL;
 593 
 594         /*
 595          * If this isn't a regular file (like, say, stdin), then we must
 596          * go the old way and just read things in bit by bit.
 597          */
 598 
 599         *with_mmap = 0;
 600         off = 0;

 601         fb->sz = 0;
 602         fb->buf = NULL;
 603         for (;;) {
 604                 if (off == fb->sz) {
 605                         if (fb->sz == (1U << 31)) {
 606                                 mandoc_msg(MANDOCERR_TOOLARGE, curp,
 607                                     0, 0, NULL);
 608                                 break;
 609                         }
 610                         resize_buf(fb, 65536);
 611                 }
 612                 ssz = curp->gzip ?
 613                     gzread(gz, fb->buf + (int)off, fb->sz - off) :
 614                     read(fd, fb->buf + (int)off, fb->sz - off);
 615                 if (ssz == 0) {
 616                         fb->sz = off;
 617                         return 1;

 618                 }
 619                 if (ssz == -1) {
 620                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 621                             "read: %s", strerror(errno));



 622                         break;
 623                 }
 624                 off += (size_t)ssz;
 625         }
 626 





 627         free(fb->buf);
 628         fb->buf = NULL;
 629         return 0;

 630 }
 631 
 632 static void
 633 mparse_end(struct mparse *curp)
 634 {
 635         if (curp->man->macroset == MACROSET_NONE)
 636                 curp->man->macroset = MACROSET_MAN;
 637         if (curp->man->macroset == MACROSET_MDOC)
 638                 mdoc_endparse(curp->man);
 639         else
 640                 man_endparse(curp->man);
 641         roff_endparse(curp->roff);
 642 }
 643 
 644 static void
 645 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
 646 {
 647         struct buf      *svprimary;
 648         const char      *svfile;
 649         size_t           offset;


   1 /*      $Id: read.c,v 1.196 2018/07/28 18:34:15 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
   5  * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
   6  *
   7  * Permission to use, copy, modify, and distribute this software for any
   8  * purpose with or without fee is hereby granted, provided that the above
   9  * copyright notice and this permission notice appear in all copies.
  10  *
  11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  18  */
  19 #include "config.h"
  20 
  21 #include <sys/types.h>
  22 #include <sys/mman.h>
  23 #include <sys/stat.h>
  24 


  77         MANDOCERR_UNSUPP,
  78         MANDOCERR_MAX,
  79         MANDOCERR_MAX
  80 };
  81 
  82 static  const char * const      mandocerrs[MANDOCERR_MAX] = {
  83         "ok",
  84 
  85         "base system convention",
  86 
  87         "Mdocdate found",
  88         "Mdocdate missing",
  89         "unknown architecture",
  90         "operating system explicitly specified",
  91         "RCS id missing",
  92         "referenced manual not found",
  93 
  94         "generic style suggestion",
  95 
  96         "legacy man(7) date format",
  97         "normalizing date format to",
  98         "lower case character in document title",
  99         "duplicate RCS id",
 100         "possible typo in section name",
 101         "unterminated quoted argument",
 102         "useless macro",
 103         "consider using OS macro",
 104         "errnos out of order",
 105         "duplicate errno",
 106         "trailing delimiter",
 107         "no blank before trailing delimiter",
 108         "fill mode already enabled, skipping",
 109         "fill mode already disabled, skipping",
 110         "verbatim \"--\", maybe consider using \\(em",
 111         "function name without markup",
 112         "whitespace at end of input line",
 113         "bad comment style",
 114 
 115         "generic warning",
 116 
 117         /* related to the prologue */
 118         "missing manual title, using UNTITLED",
 119         "missing manual title, using \"\"",
 120         "missing manual section, using \"\"",
 121         "unknown manual section",
 122         "missing date, using today's date",
 123         "cannot parse date, using it verbatim",
 124         "date in the future, using it anyway",
 125         "missing Os macro, using \"\"",
 126         "late prologue macro",
 127         "prologue macros out of order",
 128 
 129         /* related to document structure */
 130         ".so is fragile, better use ln(1)",


 541                 if (0 == start && '\0' == blk.buf[i])
 542                         break;
 543 
 544                 /* Start the next input line. */
 545 
 546                 pos = 0;
 547         }
 548 
 549         free(ln.buf);
 550         return 1;
 551 }
 552 
 553 static int
 554 read_whole_file(struct mparse *curp, const char *file, int fd,
 555                 struct buf *fb, int *with_mmap)
 556 {
 557         struct stat      st;
 558         gzFile           gz;
 559         size_t           off;
 560         ssize_t          ssz;
 561         int              gzerrnum, retval;
 562 
 563         if (fstat(fd, &st) == -1) {
 564                 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 565                     "fstat: %s", strerror(errno));
 566                 return 0;
 567         }
 568 
 569         /*
 570          * If we're a regular file, try just reading in the whole entry
 571          * via mmap().  This is faster than reading it into blocks, and
 572          * since each file is only a few bytes to begin with, I'm not
 573          * concerned that this is going to tank any machines.
 574          */
 575 
 576         if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
 577                 if (st.st_size > 0x7fffffff) {
 578                         mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
 579                         return 0;
 580                 }
 581                 *with_mmap = 1;
 582                 fb->sz = (size_t)st.st_size;
 583                 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
 584                 if (fb->buf != MAP_FAILED)
 585                         return 1;
 586         }
 587 
 588         if (curp->gzip) {
 589                 /*
 590                  * Duplicating the file descriptor is required
 591                  * because we will have to call gzclose(3)
 592                  * to free memory used internally by zlib,
 593                  * but that will also close the file descriptor,
 594                  * which this function must not do.
 595                  */
 596                 if ((fd = dup(fd)) == -1) {
 597                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 598                             "dup: %s", strerror(errno));
 599                         return 0;
 600                 }
 601                 if ((gz = gzdopen(fd, "rb")) == NULL) {
 602                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
 603                             "gzdopen: %s", strerror(errno));
 604                         close(fd);
 605                         return 0;
 606                 }
 607         } else
 608                 gz = NULL;
 609 
 610         /*
 611          * If this isn't a regular file (like, say, stdin), then we must
 612          * go the old way and just read things in bit by bit.
 613          */
 614 
 615         *with_mmap = 0;
 616         off = 0;
 617         retval = 0;
 618         fb->sz = 0;
 619         fb->buf = NULL;
 620         for (;;) {
 621                 if (off == fb->sz) {
 622                         if (fb->sz == (1U << 31)) {
 623                                 mandoc_msg(MANDOCERR_TOOLARGE, curp,
 624                                     0, 0, NULL);
 625                                 break;
 626                         }
 627                         resize_buf(fb, 65536);
 628                 }
 629                 ssz = curp->gzip ?
 630                     gzread(gz, fb->buf + (int)off, fb->sz - off) :
 631                     read(fd, fb->buf + (int)off, fb->sz - off);
 632                 if (ssz == 0) {
 633                         fb->sz = off;
 634                         retval = 1;
 635                         break;
 636                 }
 637                 if (ssz == -1) {
 638                         if (curp->gzip)
 639                                 (void)gzerror(gz, &gzerrnum);
 640                         mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
 641                             curp->gzip && gzerrnum != Z_ERRNO ?
 642                             zError(gzerrnum) : strerror(errno));
 643                         break;
 644                 }
 645                 off += (size_t)ssz;
 646         }
 647 
 648         if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
 649                 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
 650                     gzerrnum == Z_ERRNO ? strerror(errno) :
 651                     zError(gzerrnum));
 652         if (retval == 0) {
 653                 free(fb->buf);
 654                 fb->buf = NULL;
 655         }
 656         return retval;
 657 }
 658 
 659 static void
 660 mparse_end(struct mparse *curp)
 661 {
 662         if (curp->man->macroset == MACROSET_NONE)
 663                 curp->man->macroset = MACROSET_MAN;
 664         if (curp->man->macroset == MACROSET_MDOC)
 665                 mdoc_endparse(curp->man);
 666         else
 667                 man_endparse(curp->man);
 668         roff_endparse(curp->roff);
 669 }
 670 
 671 static void
 672 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
 673 {
 674         struct buf      *svprimary;
 675         const char      *svfile;
 676         size_t           offset;