1 /*
2 * Copyright (c) 2002 John Rochester
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
31 */
32
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 #include <sys/param.h>
36
37 #include <ctype.h>
38 #include <dirent.h>
39 #include <err.h>
40 #include <signal.h>
41 #include <stddef.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46
47 #include "man.h"
48 #include "stringlist.h"
49
50
51 /* Information collected about each man page in a section */
52 struct page_info {
53 char *filename;
54 char *name;
55 char *suffix;
56 ino_t inode;
57 };
58
59 /* An expanding string */
60 struct sbuf {
61 char *content; /* the start of the buffer */
62 char *end; /* just past the end of the content */
63 char *last; /* the last allocated character */
64 };
65
66 /* Remove the last amount characters from the sbuf */
67 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount))
68 /* Return the length of the sbuf content */
69 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content)
70
71 typedef char *edited_copy(char *from, char *to, int length);
72
73 /*
74 * While the whatis line is being formed, it is stored in whatis_proto.
75 * When finished, it is reformatted into whatis_final and then appended
76 * to whatis_lines.
77 */
78 static struct sbuf *whatis_proto;
79 static struct sbuf *whatis_final;
80 static stringlist *whatis_lines; /* collected output lines */
81
82 static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */
83
84 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa"
85
86
87 /* Free a struct page_info and its content */
88 static void
89 free_page_info(struct page_info *info)
90 {
91
92 free(info->filename);
93 free(info->name);
94 free(info->suffix);
95 free(info);
96 }
97
98 /*
99 * Allocate and fill in a new struct page_info given the
100 * name of the man section directory and the dirent of the file.
101 * If the file is not a man page, return NULL.
102 */
103 static struct page_info *
104 new_page_info(char *dir, struct dirent *dirent)
105 {
106 struct page_info *info;
107 int basename_length;
108 char *suffix;
109 struct stat st;
110
111 if ((info = malloc(sizeof (struct page_info))) == NULL)
112 err(1, "malloc");
113 basename_length = strlen(dirent->d_name);
114 suffix = &dirent->d_name[basename_length];
115 if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1)
116 err(1, "asprintf");
117 for (;;) {
118 if (--suffix == dirent->d_name || !isalnum(*suffix)) {
119 if (*suffix == '.')
120 break;
121 free(info->filename);
122 free(info);
123 return (NULL);
124 }
125 }
126 *suffix++ = '\0';
127 info->name = strdup(dirent->d_name);
128 info->suffix = strdup(suffix);
129 if (stat(info->filename, &st) < 0) {
130 warn("%s", info->filename);
131 free_page_info(info);
132 return (NULL);
133 }
134 if (!S_ISREG(st.st_mode)) {
135 free_page_info(info);
136 return (NULL);
137 }
138 info->inode = st.st_ino;
139 return (info);
140 }
141
142 /*
143 * Reset sbuf length to 0.
144 */
145 static void
146 sbuf_clear(struct sbuf *sbuf)
147 {
148
149 sbuf->end = sbuf->content;
150 }
151
152 /*
153 * Allocate a new sbuf.
154 */
155 static struct sbuf *
156 new_sbuf(void)
157 {
158 struct sbuf *sbuf;
159
160 if ((sbuf = malloc(sizeof (struct sbuf))) == NULL)
161 err(1, "malloc");
162 if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL)
163 err(1, "malloc");
164 sbuf->last = sbuf->content + LINE_ALLOC - 1;
165 sbuf_clear(sbuf);
166
167 return (sbuf);
168 }
169
170 /*
171 * Ensure that there is enough room in the sbuf
172 * for nchars more characters.
173 */
174 static void
175 sbuf_need(struct sbuf *sbuf, int nchars)
176 {
177 char *new_content;
178 size_t size, cntsize;
179
180 /* Double the size of the allocation until the buffer is big enough */
181 while (sbuf->end + nchars > sbuf->last) {
182 size = sbuf->last + 1 - sbuf->content;
183 size *= 2;
184 cntsize = sbuf->end - sbuf->content;
185
186 new_content = (char *)malloc(size);
187 (void) memcpy(new_content, sbuf->content, cntsize);
188 free(sbuf->content);
189 sbuf->content = new_content;
190 sbuf->end = new_content + cntsize;
191 sbuf->last = new_content + size - 1;
192 }
193 }
194
195 /*
196 * Append a string of a given length to the sbuf.
197 */
198 static void
199 sbuf_append(struct sbuf *sbuf, const char *text, int length)
200 {
201 if (length > 0) {
202 sbuf_need(sbuf, length);
203 (void) memcpy(sbuf->end, text, length);
204 sbuf->end += length;
205 }
206 }
207
208 /*
209 * Append a null-terminated string to the sbuf.
210 */
211 static void
212 sbuf_append_str(struct sbuf *sbuf, char *text)
213 {
214
215 sbuf_append(sbuf, text, strlen(text));
216 }
217
218 /*
219 * Append an edited null-terminated string to the sbuf.
220 */
221 static void
222 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy)
223 {
224 int length;
225
226 if ((length = strlen(text)) > 0) {
227 sbuf_need(sbuf, length);
228 sbuf->end = copy(text, sbuf->end, length);
229 }
230 }
231
232 /*
233 * Strip any of a set of chars from the end of the sbuf.
234 */
235 static void
236 sbuf_strip(struct sbuf *sbuf, const char *set)
237 {
238
239 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL)
240 sbuf->end--;
241 }
242
243 /*
244 * Return the null-terminated string built by the sbuf.
245 */
246 static char *
247 sbuf_content(struct sbuf *sbuf)
248 {
249
250 *sbuf->end = '\0';
251 return (sbuf->content);
252 }
253
254 /*
255 * Return true if no man page exists in the directory with
256 * any of the names in the stringlist.
257 */
258 static int
259 no_page_exists(char *dir, stringlist *names, char *suffix)
260 {
261 char path[MAXPATHLEN];
262 size_t i;
263
264 for (i = 0; i < names->sl_cur; i++) {
265 (void) snprintf(path, MAXPATHLEN, "%s/%s.%s.gz",
266 dir, names->sl_str[i], suffix);
267 if (access(path, F_OK) < 0) {
268 path[strlen(path) - 3] = '\0';
269 if (access(path, F_OK) < 0)
270 continue;
271 }
272 return (0);
273 }
274 return (1);
275 }
276
277 /* ARGSUSED sig */
278 static void
279 trap_signal(int sig)
280 {
281
282 if (tempfile[0] != '\0')
283 (void) unlink(tempfile);
284
285 exit(1);
286 }
287
288 /*
289 * Attempt to open an output file.
290 * Return NULL if unsuccessful.
291 */
292 static FILE *
293 open_output(char *name)
294 {
295 FILE *output;
296
297 whatis_lines = sl_init();
298 (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name);
299 name = tempfile;
300 if ((output = fopen(name, "w")) == NULL) {
301 warn("%s", name);
302 return (NULL);
303 }
304 return (output);
305 }
306
307 static int
308 linesort(const void *a, const void *b)
309 {
310
311 return (strcmp((*(const char * const *)a), (*(const char * const *)b)));
312 }
313
314 /*
315 * Write the unique sorted lines to the output file.
316 */
317 static void
318 finish_output(FILE *output, char *name)
319 {
320 size_t i;
321 char *prev = NULL;
322
323 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *),
324 linesort);
325 for (i = 0; i < whatis_lines->sl_cur; i++) {
326 char *line = whatis_lines->sl_str[i];
327 if (i > 0 && strcmp(line, prev) == 0)
328 continue;
329 prev = line;
330 (void) fputs(line, output);
331 (void) putc('\n', output);
332 }
333 (void) fclose(output);
334 sl_free(whatis_lines, 1);
335 (void) rename(tempfile, name);
336 (void) unlink(tempfile);
337 }
338
339 static FILE *
340 open_whatis(char *mandir)
341 {
342 char filename[MAXPATHLEN];
343
344 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
345 return (open_output(filename));
346 }
347
348 static void
349 finish_whatis(FILE *output, char *mandir)
350 {
351 char filename[MAXPATHLEN];
352
353 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS);
354 finish_output(output, filename);
355 }
356
357 /*
358 * Remove trailing spaces from a string, returning a pointer to just
359 * beyond the new last character.
360 */
361 static char *
362 trim_rhs(char *str)
363 {
364 char *rhs;
365
366 rhs = &str[strlen(str)];
367 while (--rhs > str && isspace(*rhs))
368 ;
369 *++rhs = '\0';
370 return (rhs);
371 }
372
373 /*
374 * Return a pointer to the next non-space character in the string.
375 */
376 static char *
377 skip_spaces(char *s)
378 {
379
380 while (*s != '\0' && isspace(*s))
381 s++;
382
383 return (s);
384 }
385
386 /*
387 * Return whether the line is of one of the forms:
388 * .Sh NAME
389 * .Sh "NAME"
390 * etc.
391 * assuming that section_start is ".Sh".
392 */
393 static int
394 name_section_line(char *line, const char *section_start)
395 {
396 char *rhs;
397
398 if (strncmp(line, section_start, 3) != 0)
399 return (0);
400 line = skip_spaces(line + 3);
401 rhs = trim_rhs(line);
402 if (*line == '"') {
403 line++;
404 if (*--rhs == '"')
405 *rhs = '\0';
406 }
407 if (strcmp(line, "NAME") == 0)
408 return (1);
409
410 return (0);
411 }
412
413 /*
414 * Copy characters while removing the most common nroff/troff markup:
415 * \(em, \(mi, \s[+-N], \&
416 * \fF, \f(fo, \f[font]
417 * \*s, \*(st, \*[stringvar]
418 */
419 static char *
420 de_nroff_copy(char *from, char *to, int fromlen)
421 {
422 char *from_end = &from[fromlen];
423
424 while (from < from_end) {
425 switch (*from) {
426 case '\\':
427 switch (*++from) {
428 case '(':
429 if (strncmp(&from[1], "em", 2) == 0 ||
430 strncmp(&from[1], "mi", 2) == 0) {
431 from += 3;
432 continue;
433 }
434 break;
435 case 's':
436 if (*++from == '-')
437 from++;
438 while (isdigit(*from))
439 from++;
440 continue;
441 case 'f':
442 case '*':
443 if (*++from == '(') {
444 from += 3;
445 } else if (*from == '[') {
446 while (*++from != ']' &&
447 from < from_end)
448 ;
449 from++;
450 } else {
451 from++;
452 }
453 continue;
454 case '&':
455 from++;
456 continue;
457 }
458 break;
459 }
460 *to++ = *from++;
461 }
462 return (to);
463 }
464
465 /*
466 * Append a string with the nroff formatting removed.
467 */
468 static void
469 add_nroff(char *text)
470 {
471
472 sbuf_append_edited(whatis_proto, text, de_nroff_copy);
473 }
474
475 /*
476 * Appends "name(suffix), " to whatis_final
477 */
478 static void
479 add_whatis_name(char *name, char *suffix)
480 {
481
482 if (*name != '\0') {
483 sbuf_append_str(whatis_final, name);
484 sbuf_append(whatis_final, "(", 1);
485 sbuf_append_str(whatis_final, suffix);
486 sbuf_append(whatis_final, "), ", 3);
487 }
488 }
489
490 /*
491 * Processes an old-style man(7) line. This ignores commands with only
492 * a single number argument.
493 */
494 static void
495 process_man_line(char *line)
496 {
497 char *p;
498
499 if (*line == '.') {
500 while (isalpha(*++line))
501 ;
502 p = line = skip_spaces(line);
503 while (*p != '\0') {
504 if (!isdigit(*p))
505 break;
506 p++;
507 }
508 if (*p == '\0')
509 return;
510 } else
511 line = skip_spaces(line);
512 if (*line != '\0') {
513 add_nroff(line);
514 sbuf_append(whatis_proto, " ", 1);
515 }
516 }
517
518 /*
519 * Processes a new-style mdoc(7) line.
520 */
521 static void
522 process_mdoc_line(char *line)
523 {
524 int xref;
525 int arg = 0;
526 char *line_end = &line[strlen(line)];
527 int orig_length = sbuf_length(whatis_proto);
528 char *next;
529
530 if (*line == '\0')
531 return;
532 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) {
533 add_nroff(skip_spaces(line));
534 sbuf_append(whatis_proto, " ", 1);
535 return;
536 }
537 xref = strncmp(line, ".Xr", 3) == 0;
538 line += 3;
539 while ((line = skip_spaces(line)) < line_end) {
540 if (*line == '"') {
541 next = ++line;
542 for (;;) {
543 next = strchr(next, '"');
544 if (next == NULL)
545 break;
546 (void) memmove(next, next + 1, strlen(next));
547 line_end--;
548 if (*next != '"')
549 break;
550 next++;
551 }
552 } else {
553 next = strpbrk(line, " \t");
554 }
555 if (next != NULL)
556 *next++ = '\0';
557 else
558 next = line_end;
559 if (isupper(*line) && islower(line[1]) && line[2] == '\0') {
560 if (strcmp(line, "Ns") == 0) {
561 arg = 0;
562 line = next;
563 continue;
564 }
565 if (strstr(line, MDOC_COMMANDS) != NULL) {
566 line = next;
567 continue;
568 }
569 }
570 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) {
571 if (xref) {
572 sbuf_append(whatis_proto, "(", 1);
573 add_nroff(line);
574 sbuf_append(whatis_proto, ")", 1);
575 xref = 0;
576 } else {
577 sbuf_append(whatis_proto, " ", 1);
578 }
579 }
580 add_nroff(line);
581 arg++;
582 line = next;
583 }
584 if (sbuf_length(whatis_proto) > orig_length)
585 sbuf_append(whatis_proto, " ", 1);
586 }
587
588 /*
589 * Collect a list of comma-separated names from the text.
590 */
591 static void
592 collect_names(stringlist *names, char *text)
593 {
594 char *arg;
595
596 for (;;) {
597 arg = text;
598 text = strchr(text, ',');
599 if (text != NULL)
600 *text++ = '\0';
601 (void) sl_add(names, arg);
602 if (text == NULL)
603 return;
604 if (*text == ' ')
605 text++;
606 }
607 }
608
609 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC };
610
611 /*
612 * Process a man page source into a single whatis line and add it
613 * to whatis_lines.
614 */
615 static void
616 process_page(struct page_info *page, char *section_dir)
617 {
618 FILE *fp;
619 stringlist *names;
620 char *descr;
621 int state = STATE_UNKNOWN;
622 size_t i;
623 char *line = NULL;
624 size_t linecap = 0;
625
626 sbuf_clear(whatis_proto);
627 if ((fp = fopen(page->filename, "r")) == NULL) {
628 warn("%s", page->filename);
629 return;
630 }
631 while (getline(&line, &linecap, fp) > 0) {
632 /* Skip comments */
633 if (strncmp(line, ".\\\"", 3) == 0)
634 continue;
635 switch (state) {
636 /* Haven't reached the NAME section yet */
637 case STATE_UNKNOWN:
638 if (name_section_line(line, ".SH"))
639 state = STATE_MANSTYLE;
640 else if (name_section_line(line, ".Sh"))
641 state = STATE_MDOCNAME;
642 continue;
643 /* Inside an old-style .SH NAME section */
644 case STATE_MANSTYLE:
645 if (strncmp(line, ".SH", 3) == 0 ||
646 strncmp(line, ".SS", 3) == 0)
647 break;
648 (void) trim_rhs(line);
649 if (strcmp(line, ".") == 0)
650 continue;
651 if (strncmp(line, ".IX", 3) == 0) {
652 line += 3;
653 line = skip_spaces(line);
654 }
655 process_man_line(line);
656 continue;
657 /* Inside a new-style .Sh NAME section (the .Nm part) */
658 case STATE_MDOCNAME:
659 (void) trim_rhs(line);
660 if (strncmp(line, ".Nm", 3) == 0) {
661 process_mdoc_line(line);
662 continue;
663 } else {
664 if (strcmp(line, ".") == 0)
665 continue;
666 sbuf_append(whatis_proto, "- ", 2);
667 state = STATE_MDOCDESC;
668 }
669 /* FALLTHROUGH */
670 /* Inside a new-style .Sh NAME section (after the .Nm-s) */
671 case STATE_MDOCDESC:
672 if (strncmp(line, ".Sh", 3) == 0)
673 break;
674 (void) trim_rhs(line);
675 if (strcmp(line, ".") == 0)
676 continue;
677 process_mdoc_line(line);
678 continue;
679 }
680 break;
681 }
682 (void) fclose(fp);
683 sbuf_strip(whatis_proto, " \t.-");
684 line = sbuf_content(whatis_proto);
685 /*
686 * Line now contains the appropriate data, but without the
687 * proper indentation or the section appended to each name.
688 */
689 descr = strstr(line, " - ");
690 if (descr == NULL) {
691 descr = strchr(line, ' ');
692 if (descr == NULL)
693 return;
694 *descr++ = '\0';
695 } else {
696 *descr = '\0';
697 descr += 3;
698 }
699 names = sl_init();
700 collect_names(names, line);
701 sbuf_clear(whatis_final);
702 if (!sl_find(names, page->name) &&
703 no_page_exists(section_dir, names, page->suffix)) {
704 /*
705 * Add the page name since that's the only
706 * thing that man(1) will find.
707 */
708 add_whatis_name(page->name, page->suffix);
709 }
710 for (i = 0; i < names->sl_cur; i++)
711 add_whatis_name(names->sl_str[i], page->suffix);
712 sl_free(names, 0);
713 /* Remove last ", " */
714 sbuf_retract(whatis_final, 2);
715 while (sbuf_length(whatis_final) < INDENT)
716 sbuf_append(whatis_final, " ", 1);
717 sbuf_append(whatis_final, " - ", 3);
718 sbuf_append_str(whatis_final, skip_spaces(descr));
719 (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final)));
720 }
721
722 /*
723 * Sort pages first by inode number, then by name.
724 */
725 static int
726 pagesort(const void *a, const void *b)
727 {
728 const struct page_info *p1 = *(struct page_info * const *) a;
729 const struct page_info *p2 = *(struct page_info * const *) b;
730
731 if (p1->inode == p2->inode)
732 return (strcmp(p1->name, p2->name));
733
734 return (p1->inode - p2->inode);
735 }
736
737 /*
738 * Process a single man section.
739 */
740 static void
741 process_section(char *section_dir)
742 {
743 struct dirent **entries;
744 int nentries;
745 struct page_info **pages;
746 int npages = 0;
747 int i;
748 ino_t prev_inode = 0;
749
750 /* Scan the man section directory for pages */
751 nentries = scandir(section_dir, &entries, NULL, alphasort);
752
753 /* Collect information about man pages */
754 pages = (struct page_info **)calloc(nentries,
755 sizeof (struct page_info *));
756 for (i = 0; i < nentries; i++) {
757 struct page_info *info = new_page_info(section_dir, entries[i]);
758 if (info != NULL)
759 pages[npages++] = info;
760 free(entries[i]);
761 }
762 free(entries);
763 qsort(pages, npages, sizeof (struct page_info *), pagesort);
764
765 /* Process each unique page */
766 for (i = 0; i < npages; i++) {
767 struct page_info *page = pages[i];
768 if (page->inode != prev_inode) {
769 prev_inode = page->inode;
770 process_page(page, section_dir);
771 }
772 free_page_info(page);
773 }
774 free(pages);
775 }
776
777 /*
778 * Return whether the directory entry is a man page section.
779 */
780 static int
781 select_sections(const struct dirent *entry)
782 {
783 const char *p = &entry->d_name[3];
784
785 if (strncmp(entry->d_name, "man", 3) != 0)
786 return (0);
787 while (*p != '\0') {
788 if (!isalnum(*p++))
789 return (0);
790 }
791 return (1);
792 }
793
794 /*
795 * Process a single top-level man directory by finding all the
796 * sub-directories named man* and processing each one in turn.
797 */
798 void
799 mwpath(char *path)
800 {
801 FILE *fp = NULL;
802 struct dirent **entries;
803 int nsections;
804 int i;
805
806 (void) signal(SIGINT, trap_signal);
807 (void) signal(SIGHUP, trap_signal);
808 (void) signal(SIGQUIT, trap_signal);
809 (void) signal(SIGTERM, trap_signal);
810
811 whatis_proto = new_sbuf();
812 whatis_final = new_sbuf();
813
814 nsections = scandir(path, &entries, select_sections, alphasort);
815 if ((fp = open_whatis(path)) == NULL)
816 return;
817 for (i = 0; i < nsections; i++) {
818 char section_dir[MAXPATHLEN];
819
820 (void) snprintf(section_dir, MAXPATHLEN, "%s/%s",
821 path, entries[i]->d_name);
822 process_section(section_dir);
823 free(entries[i]);
824 }
825 free(entries);
826 finish_whatis(fp, path);
827 }