1 /* $Id: read.c,v 1.192 2017/07/20 14:36:36 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
77 MANDOCERR_UNSUPP,
78 MANDOCERR_MAX,
79 MANDOCERR_MAX
80 };
81
82 static const char * const mandocerrs[MANDOCERR_MAX] = {
83 "ok",
84
85 "base system convention",
86
87 "Mdocdate found",
88 "Mdocdate missing",
89 "unknown architecture",
90 "operating system explicitly specified",
91 "RCS id missing",
92 "referenced manual not found",
93
94 "generic style suggestion",
95
96 "legacy man(7) date format",
97 "lower case character in document title",
98 "duplicate RCS id",
99 "typo in section name",
100 "unterminated quoted argument",
101 "useless macro",
102 "consider using OS macro",
103 "errnos out of order",
104 "duplicate errno",
105 "trailing delimiter",
106 "no blank before trailing delimiter",
107 "fill mode already enabled, skipping",
108 "fill mode already disabled, skipping",
109 "function name without markup",
110 "whitespace at end of input line",
111 "bad comment style",
112
113 "generic warning",
114
115 /* related to the prologue */
116 "missing manual title, using UNTITLED",
117 "missing manual title, using \"\"",
118 "missing manual section, using \"\"",
119 "unknown manual section",
120 "missing date, using today's date",
121 "cannot parse date, using it verbatim",
122 "date in the future, using it anyway",
123 "missing Os macro, using \"\"",
124 "late prologue macro",
125 "prologue macros out of order",
126
127 /* related to document structure */
128 ".so is fragile, better use ln(1)",
539 if (0 == start && '\0' == blk.buf[i])
540 break;
541
542 /* Start the next input line. */
543
544 pos = 0;
545 }
546
547 free(ln.buf);
548 return 1;
549 }
550
551 static int
552 read_whole_file(struct mparse *curp, const char *file, int fd,
553 struct buf *fb, int *with_mmap)
554 {
555 struct stat st;
556 gzFile gz;
557 size_t off;
558 ssize_t ssz;
559
560 if (fstat(fd, &st) == -1) {
561 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
562 "fstat: %s", strerror(errno));
563 return 0;
564 }
565
566 /*
567 * If we're a regular file, try just reading in the whole entry
568 * via mmap(). This is faster than reading it into blocks, and
569 * since each file is only a few bytes to begin with, I'm not
570 * concerned that this is going to tank any machines.
571 */
572
573 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
574 if (st.st_size > 0x7fffffff) {
575 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
576 return 0;
577 }
578 *with_mmap = 1;
579 fb->sz = (size_t)st.st_size;
580 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
581 if (fb->buf != MAP_FAILED)
582 return 1;
583 }
584
585 if (curp->gzip) {
586 if ((gz = gzdopen(fd, "rb")) == NULL) {
587 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
588 "gzdopen: %s", strerror(errno));
589 return 0;
590 }
591 } else
592 gz = NULL;
593
594 /*
595 * If this isn't a regular file (like, say, stdin), then we must
596 * go the old way and just read things in bit by bit.
597 */
598
599 *with_mmap = 0;
600 off = 0;
601 fb->sz = 0;
602 fb->buf = NULL;
603 for (;;) {
604 if (off == fb->sz) {
605 if (fb->sz == (1U << 31)) {
606 mandoc_msg(MANDOCERR_TOOLARGE, curp,
607 0, 0, NULL);
608 break;
609 }
610 resize_buf(fb, 65536);
611 }
612 ssz = curp->gzip ?
613 gzread(gz, fb->buf + (int)off, fb->sz - off) :
614 read(fd, fb->buf + (int)off, fb->sz - off);
615 if (ssz == 0) {
616 fb->sz = off;
617 return 1;
618 }
619 if (ssz == -1) {
620 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
621 "read: %s", strerror(errno));
622 break;
623 }
624 off += (size_t)ssz;
625 }
626
627 free(fb->buf);
628 fb->buf = NULL;
629 return 0;
630 }
631
632 static void
633 mparse_end(struct mparse *curp)
634 {
635 if (curp->man->macroset == MACROSET_NONE)
636 curp->man->macroset = MACROSET_MAN;
637 if (curp->man->macroset == MACROSET_MDOC)
638 mdoc_endparse(curp->man);
639 else
640 man_endparse(curp->man);
641 roff_endparse(curp->roff);
642 }
643
644 static void
645 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
646 {
647 struct buf *svprimary;
648 const char *svfile;
649 size_t offset;
|
1 /* $Id: read.c,v 1.196 2018/07/28 18:34:15 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19 #include "config.h"
20
21 #include <sys/types.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24
77 MANDOCERR_UNSUPP,
78 MANDOCERR_MAX,
79 MANDOCERR_MAX
80 };
81
82 static const char * const mandocerrs[MANDOCERR_MAX] = {
83 "ok",
84
85 "base system convention",
86
87 "Mdocdate found",
88 "Mdocdate missing",
89 "unknown architecture",
90 "operating system explicitly specified",
91 "RCS id missing",
92 "referenced manual not found",
93
94 "generic style suggestion",
95
96 "legacy man(7) date format",
97 "normalizing date format to",
98 "lower case character in document title",
99 "duplicate RCS id",
100 "possible typo in section name",
101 "unterminated quoted argument",
102 "useless macro",
103 "consider using OS macro",
104 "errnos out of order",
105 "duplicate errno",
106 "trailing delimiter",
107 "no blank before trailing delimiter",
108 "fill mode already enabled, skipping",
109 "fill mode already disabled, skipping",
110 "verbatim \"--\", maybe consider using \\(em",
111 "function name without markup",
112 "whitespace at end of input line",
113 "bad comment style",
114
115 "generic warning",
116
117 /* related to the prologue */
118 "missing manual title, using UNTITLED",
119 "missing manual title, using \"\"",
120 "missing manual section, using \"\"",
121 "unknown manual section",
122 "missing date, using today's date",
123 "cannot parse date, using it verbatim",
124 "date in the future, using it anyway",
125 "missing Os macro, using \"\"",
126 "late prologue macro",
127 "prologue macros out of order",
128
129 /* related to document structure */
130 ".so is fragile, better use ln(1)",
541 if (0 == start && '\0' == blk.buf[i])
542 break;
543
544 /* Start the next input line. */
545
546 pos = 0;
547 }
548
549 free(ln.buf);
550 return 1;
551 }
552
553 static int
554 read_whole_file(struct mparse *curp, const char *file, int fd,
555 struct buf *fb, int *with_mmap)
556 {
557 struct stat st;
558 gzFile gz;
559 size_t off;
560 ssize_t ssz;
561 int gzerrnum, retval;
562
563 if (fstat(fd, &st) == -1) {
564 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
565 "fstat: %s", strerror(errno));
566 return 0;
567 }
568
569 /*
570 * If we're a regular file, try just reading in the whole entry
571 * via mmap(). This is faster than reading it into blocks, and
572 * since each file is only a few bytes to begin with, I'm not
573 * concerned that this is going to tank any machines.
574 */
575
576 if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
577 if (st.st_size > 0x7fffffff) {
578 mandoc_msg(MANDOCERR_TOOLARGE, curp, 0, 0, NULL);
579 return 0;
580 }
581 *with_mmap = 1;
582 fb->sz = (size_t)st.st_size;
583 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
584 if (fb->buf != MAP_FAILED)
585 return 1;
586 }
587
588 if (curp->gzip) {
589 /*
590 * Duplicating the file descriptor is required
591 * because we will have to call gzclose(3)
592 * to free memory used internally by zlib,
593 * but that will also close the file descriptor,
594 * which this function must not do.
595 */
596 if ((fd = dup(fd)) == -1) {
597 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
598 "dup: %s", strerror(errno));
599 return 0;
600 }
601 if ((gz = gzdopen(fd, "rb")) == NULL) {
602 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0,
603 "gzdopen: %s", strerror(errno));
604 close(fd);
605 return 0;
606 }
607 } else
608 gz = NULL;
609
610 /*
611 * If this isn't a regular file (like, say, stdin), then we must
612 * go the old way and just read things in bit by bit.
613 */
614
615 *with_mmap = 0;
616 off = 0;
617 retval = 0;
618 fb->sz = 0;
619 fb->buf = NULL;
620 for (;;) {
621 if (off == fb->sz) {
622 if (fb->sz == (1U << 31)) {
623 mandoc_msg(MANDOCERR_TOOLARGE, curp,
624 0, 0, NULL);
625 break;
626 }
627 resize_buf(fb, 65536);
628 }
629 ssz = curp->gzip ?
630 gzread(gz, fb->buf + (int)off, fb->sz - off) :
631 read(fd, fb->buf + (int)off, fb->sz - off);
632 if (ssz == 0) {
633 fb->sz = off;
634 retval = 1;
635 break;
636 }
637 if (ssz == -1) {
638 if (curp->gzip)
639 (void)gzerror(gz, &gzerrnum);
640 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "read: %s",
641 curp->gzip && gzerrnum != Z_ERRNO ?
642 zError(gzerrnum) : strerror(errno));
643 break;
644 }
645 off += (size_t)ssz;
646 }
647
648 if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
649 mandoc_vmsg(MANDOCERR_FILE, curp, 0, 0, "gzclose: %s",
650 gzerrnum == Z_ERRNO ? strerror(errno) :
651 zError(gzerrnum));
652 if (retval == 0) {
653 free(fb->buf);
654 fb->buf = NULL;
655 }
656 return retval;
657 }
658
659 static void
660 mparse_end(struct mparse *curp)
661 {
662 if (curp->man->macroset == MACROSET_NONE)
663 curp->man->macroset = MACROSET_MAN;
664 if (curp->man->macroset == MACROSET_MDOC)
665 mdoc_endparse(curp->man);
666 else
667 man_endparse(curp->man);
668 roff_endparse(curp->roff);
669 }
670
671 static void
672 mparse_parse_buffer(struct mparse *curp, struct buf blk, const char *file)
673 {
674 struct buf *svprimary;
675 const char *svfile;
676 size_t offset;
|