1 /* 2 * Copyright (c) 2002 John Rochester 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 31 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 32 */ 33 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <sys/param.h> 37 38 #include <ctype.h> 39 #include <dirent.h> 40 #include <err.h> 41 #include <signal.h> 42 #include <stddef.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 48 #include "man.h" 49 #include "stringlist.h" 50 51 52 /* Information collected about each man page in a section */ 53 struct page_info { 54 char *filename; 55 char *name; 56 char *suffix; 57 ino_t inode; 58 }; 59 60 /* An expanding string */ 61 struct sbuf { 62 char *content; /* the start of the buffer */ 63 char *end; /* just past the end of the content */ 64 char *last; /* the last allocated character */ 65 }; 66 67 /* Remove the last amount characters from the sbuf */ 68 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) 69 /* Return the length of the sbuf content */ 70 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) 71 72 typedef char *edited_copy(char *from, char *to, int length); 73 74 /* 75 * While the whatis line is being formed, it is stored in whatis_proto. 76 * When finished, it is reformatted into whatis_final and then appended 77 * to whatis_lines. 78 */ 79 static struct sbuf *whatis_proto; 80 static struct sbuf *whatis_final; 81 static stringlist *whatis_lines; /* collected output lines */ 82 83 static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */ 84 85 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa" 86 87 88 /* Free a struct page_info and its content */ 89 static void 90 free_page_info(struct page_info *info) 91 { 92 93 free(info->filename); 94 free(info->name); 95 free(info->suffix); 96 free(info); 97 } 98 99 /* 100 * Allocate and fill in a new struct page_info given the 101 * name of the man section directory and the dirent of the file. 102 * If the file is not a man page, return NULL. 103 */ 104 static struct page_info * 105 new_page_info(char *dir, struct dirent *dirent) 106 { 107 struct page_info *info; 108 int basename_length; 109 char *suffix; 110 struct stat st; 111 112 if ((info = malloc(sizeof (struct page_info))) == NULL) 113 err(1, "malloc"); 114 basename_length = strlen(dirent->d_name); 115 suffix = &dirent->d_name[basename_length]; 116 if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) 117 err(1, "asprintf"); 118 for (;;) { 119 if (--suffix == dirent->d_name || !isalnum(*suffix)) { 120 if (*suffix == '.') 121 break; 122 free(info->filename); 123 free(info); 124 return (NULL); 125 } 126 } 127 *suffix++ = '\0'; 128 info->name = strdup(dirent->d_name); 129 info->suffix = strdup(suffix); 130 if (stat(info->filename, &st) < 0) { 131 warn("%s", info->filename); 132 free_page_info(info); 133 return (NULL); 134 } 135 if (!S_ISREG(st.st_mode)) { 136 free_page_info(info); 137 return (NULL); 138 } 139 info->inode = st.st_ino; 140 return (info); 141 } 142 143 /* 144 * Reset sbuf length to 0. 145 */ 146 static void 147 sbuf_clear(struct sbuf *sbuf) 148 { 149 150 sbuf->end = sbuf->content; 151 } 152 153 /* 154 * Allocate a new sbuf. 155 */ 156 static struct sbuf * 157 new_sbuf(void) 158 { 159 struct sbuf *sbuf; 160 161 if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) 162 err(1, "malloc"); 163 if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) 164 err(1, "malloc"); 165 sbuf->last = sbuf->content + LINE_ALLOC - 1; 166 sbuf_clear(sbuf); 167 168 return (sbuf); 169 } 170 171 /* 172 * Ensure that there is enough room in the sbuf 173 * for nchars more characters. 174 */ 175 static void 176 sbuf_need(struct sbuf *sbuf, int nchars) 177 { 178 char *new_content; 179 size_t size, cntsize; 180 size_t grow = 128; 181 182 while (grow < nchars) { 183 grow += 128; /* we grow in chunks of 128 bytes */ 184 } 185 186 /* Grow if the buffer isn't big enough */ 187 if (sbuf->end + nchars > sbuf->last) { 188 size = sbuf->last + 1 - sbuf->content; 189 size += grow; 190 cntsize = sbuf->end - sbuf->content; 191 192 if ((new_content = realloc(sbuf->content, size)) == NULL) { 193 perror("realloc"); 194 if (tempfile[0] != '\0') 195 (void) unlink(tempfile); 196 exit(1); 197 } 198 sbuf->content = new_content; 199 sbuf->end = new_content + cntsize; 200 sbuf->last = new_content + size - 1; 201 } 202 } 203 204 /* 205 * Append a string of a given length to the sbuf. 206 */ 207 static void 208 sbuf_append(struct sbuf *sbuf, const char *text, int length) 209 { 210 if (length > 0) { 211 sbuf_need(sbuf, length); 212 (void) memcpy(sbuf->end, text, length); 213 sbuf->end += length; 214 } 215 } 216 217 /* 218 * Append a null-terminated string to the sbuf. 219 */ 220 static void 221 sbuf_append_str(struct sbuf *sbuf, char *text) 222 { 223 224 sbuf_append(sbuf, text, strlen(text)); 225 } 226 227 /* 228 * Append an edited null-terminated string to the sbuf. 229 */ 230 static void 231 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) 232 { 233 int length; 234 235 if ((length = strlen(text)) > 0) { 236 sbuf_need(sbuf, length); 237 sbuf->end = copy(text, sbuf->end, length); 238 } 239 } 240 241 /* 242 * Strip any of a set of chars from the end of the sbuf. 243 */ 244 static void 245 sbuf_strip(struct sbuf *sbuf, const char *set) 246 { 247 248 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) 249 sbuf->end--; 250 } 251 252 /* 253 * Return the null-terminated string built by the sbuf. 254 */ 255 static char * 256 sbuf_content(struct sbuf *sbuf) 257 { 258 259 *sbuf->end = '\0'; 260 return (sbuf->content); 261 } 262 263 /* 264 * Return true if no man page exists in the directory with 265 * any of the names in the stringlist. 266 */ 267 static int 268 no_page_exists(char *dir, stringlist *names, char *suffix) 269 { 270 char path[MAXPATHLEN]; 271 char *suffixes[] = { "", ".gz", ".bz2", NULL }; 272 size_t i; 273 int j; 274 275 for (i = 0; i < names->sl_cur; i++) { 276 for (j = 0; suffixes[j] != NULL; j++) { 277 (void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s", 278 dir, names->sl_str[i], suffix, suffixes[j]); 279 if (access(path, F_OK) == 0) { 280 return (0); 281 } 282 } 283 } 284 return (1); 285 } 286 287 /* ARGSUSED sig */ 288 static void 289 trap_signal(int sig) 290 { 291 292 if (tempfile[0] != '\0') 293 (void) unlink(tempfile); 294 295 exit(1); 296 } 297 298 /* 299 * Attempt to open an output file. 300 * Return NULL if unsuccessful. 301 */ 302 static FILE * 303 open_output(char *name) 304 { 305 FILE *output; 306 307 whatis_lines = sl_init(); 308 (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); 309 name = tempfile; 310 if ((output = fopen(name, "w")) == NULL) { 311 warn("%s", name); 312 return (NULL); 313 } 314 return (output); 315 } 316 317 static int 318 linesort(const void *a, const void *b) 319 { 320 321 return (strcmp((*(const char * const *)a), (*(const char * const *)b))); 322 } 323 324 /* 325 * Write the unique sorted lines to the output file. 326 */ 327 static void 328 finish_output(FILE *output, char *name) 329 { 330 size_t i; 331 char *prev = NULL; 332 333 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), 334 linesort); 335 for (i = 0; i < whatis_lines->sl_cur; i++) { 336 char *line = whatis_lines->sl_str[i]; 337 if (i > 0 && strcmp(line, prev) == 0) 338 continue; 339 prev = line; 340 (void) fputs(line, output); 341 (void) putc('\n', output); 342 } 343 (void) fclose(output); 344 sl_free(whatis_lines, 1); 345 (void) rename(tempfile, name); 346 (void) unlink(tempfile); 347 } 348 349 static FILE * 350 open_whatis(char *mandir) 351 { 352 char filename[MAXPATHLEN]; 353 354 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 355 return (open_output(filename)); 356 } 357 358 static void 359 finish_whatis(FILE *output, char *mandir) 360 { 361 char filename[MAXPATHLEN]; 362 363 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 364 finish_output(output, filename); 365 } 366 367 /* 368 * Remove trailing spaces from a string, returning a pointer to just 369 * beyond the new last character. 370 */ 371 static char * 372 trim_rhs(char *str) 373 { 374 char *rhs; 375 376 rhs = &str[strlen(str)]; 377 while (--rhs > str && isspace(*rhs)) 378 ; 379 *++rhs = '\0'; 380 return (rhs); 381 } 382 383 /* 384 * Return a pointer to the next non-space character in the string. 385 */ 386 static char * 387 skip_spaces(char *s) 388 { 389 390 while (*s != '\0' && isspace(*s)) 391 s++; 392 393 return (s); 394 } 395 396 /* 397 * Return whether the line is of one of the forms: 398 * .Sh NAME 399 * .Sh "NAME" 400 * etc. 401 * assuming that section_start is ".Sh". 402 */ 403 static int 404 name_section_line(char *line, const char *section_start) 405 { 406 char *rhs; 407 408 if (strncmp(line, section_start, 3) != 0) 409 return (0); 410 line = skip_spaces(line + 3); 411 rhs = trim_rhs(line); 412 if (*line == '"') { 413 line++; 414 if (*--rhs == '"') 415 *rhs = '\0'; 416 } 417 if (strcmp(line, "NAME") == 0) 418 return (1); 419 420 return (0); 421 } 422 423 /* 424 * Copy characters while removing the most common nroff/troff markup: 425 * \(em, \(mi, \s[+-N], \& 426 * \fF, \f(fo, \f[font] 427 * \*s, \*(st, \*[stringvar] 428 */ 429 static char * 430 de_nroff_copy(char *from, char *to, int fromlen) 431 { 432 char *from_end = &from[fromlen]; 433 434 while (from < from_end) { 435 switch (*from) { 436 case '\\': 437 switch (*++from) { 438 case '(': 439 if (strncmp(&from[1], "em", 2) == 0 || 440 strncmp(&from[1], "mi", 2) == 0) { 441 from += 3; 442 continue; 443 } 444 break; 445 case 's': 446 if (*++from == '-') 447 from++; 448 while (isdigit(*from)) 449 from++; 450 continue; 451 case 'f': 452 case '*': 453 if (*++from == '(') { 454 from += 3; 455 } else if (*from == '[') { 456 while (*++from != ']' && 457 from < from_end) 458 ; 459 from++; 460 } else { 461 from++; 462 } 463 continue; 464 case '&': 465 from++; 466 continue; 467 } 468 break; 469 } 470 *to++ = *from++; 471 } 472 return (to); 473 } 474 475 /* 476 * Append a string with the nroff formatting removed. 477 */ 478 static void 479 add_nroff(char *text) 480 { 481 482 sbuf_append_edited(whatis_proto, text, de_nroff_copy); 483 } 484 485 /* 486 * Appends "name(suffix), " to whatis_final 487 */ 488 static void 489 add_whatis_name(char *name, char *suffix) 490 { 491 492 if (*name != '\0') { 493 sbuf_append_str(whatis_final, name); 494 sbuf_append(whatis_final, "(", 1); 495 sbuf_append_str(whatis_final, suffix); 496 sbuf_append(whatis_final, "), ", 3); 497 } 498 } 499 500 /* 501 * Processes an old-style man(7) line. This ignores commands with only 502 * a single number argument. 503 */ 504 static void 505 process_man_line(char *line) 506 { 507 char *p; 508 509 if (*line == '.') { 510 while (isalpha(*++line)) 511 ; 512 p = line = skip_spaces(line); 513 while (*p != '\0') { 514 if (!isdigit(*p)) 515 break; 516 p++; 517 } 518 if (*p == '\0') 519 return; 520 } else 521 line = skip_spaces(line); 522 if (*line != '\0') { 523 add_nroff(line); 524 sbuf_append(whatis_proto, " ", 1); 525 } 526 } 527 528 /* 529 * Processes a new-style mdoc(7) line. 530 */ 531 static void 532 process_mdoc_line(char *line) 533 { 534 int xref; 535 int arg = 0; 536 char *line_end = &line[strlen(line)]; 537 int orig_length = sbuf_length(whatis_proto); 538 char *next; 539 540 if (*line == '\0') 541 return; 542 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { 543 add_nroff(skip_spaces(line)); 544 sbuf_append(whatis_proto, " ", 1); 545 return; 546 } 547 xref = strncmp(line, ".Xr", 3) == 0; 548 line += 3; 549 while ((line = skip_spaces(line)) < line_end) { 550 if (*line == '"') { 551 next = ++line; 552 for (;;) { 553 next = strchr(next, '"'); 554 if (next == NULL) 555 break; 556 (void) memmove(next, next + 1, strlen(next)); 557 line_end--; 558 if (*next != '"') 559 break; 560 next++; 561 } 562 } else { 563 next = strpbrk(line, " \t"); 564 } 565 if (next != NULL) 566 *next++ = '\0'; 567 else 568 next = line_end; 569 if (isupper(*line) && islower(line[1]) && line[2] == '\0') { 570 if (strcmp(line, "Ns") == 0) { 571 arg = 0; 572 line = next; 573 continue; 574 } 575 if (strstr(line, MDOC_COMMANDS) != NULL) { 576 line = next; 577 continue; 578 } 579 } 580 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { 581 if (xref) { 582 sbuf_append(whatis_proto, "(", 1); 583 add_nroff(line); 584 sbuf_append(whatis_proto, ")", 1); 585 xref = 0; 586 } else { 587 sbuf_append(whatis_proto, " ", 1); 588 } 589 } 590 add_nroff(line); 591 arg++; 592 line = next; 593 } 594 if (sbuf_length(whatis_proto) > orig_length) 595 sbuf_append(whatis_proto, " ", 1); 596 } 597 598 /* 599 * Collect a list of comma-separated names from the text. 600 */ 601 static void 602 collect_names(stringlist *names, char *text) 603 { 604 char *arg; 605 606 for (;;) { 607 arg = text; 608 text = strchr(text, ','); 609 if (text != NULL) 610 *text++ = '\0'; 611 (void) sl_add(names, arg); 612 if (text == NULL) 613 return; 614 if (*text == ' ') 615 text++; 616 } 617 } 618 619 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; 620 621 /* 622 * Process a man page source into a single whatis line and add it 623 * to whatis_lines. 624 */ 625 static void 626 process_page(struct page_info *page, char *section_dir) 627 { 628 FILE *fp; 629 stringlist *names; 630 char *descr; 631 int state = STATE_UNKNOWN; 632 size_t i; 633 char *line = NULL; 634 size_t linecap = 0; 635 636 sbuf_clear(whatis_proto); 637 if ((fp = fopen(page->filename, "r")) == NULL) { 638 warn("%s", page->filename); 639 return; 640 } 641 while (getline(&line, &linecap, fp) > 0) { 642 /* Skip comments */ 643 if (strncmp(line, ".\\\"", 3) == 0) 644 continue; 645 switch (state) { 646 /* Haven't reached the NAME section yet */ 647 case STATE_UNKNOWN: 648 if (name_section_line(line, ".SH")) 649 state = STATE_MANSTYLE; 650 else if (name_section_line(line, ".Sh")) 651 state = STATE_MDOCNAME; 652 continue; 653 /* Inside an old-style .SH NAME section */ 654 case STATE_MANSTYLE: 655 if (strncmp(line, ".SH", 3) == 0 || 656 strncmp(line, ".SS", 3) == 0) 657 break; 658 (void) trim_rhs(line); 659 if (strcmp(line, ".") == 0) 660 continue; 661 if (strncmp(line, ".IX", 3) == 0) { 662 line += 3; 663 line = skip_spaces(line); 664 } 665 process_man_line(line); 666 continue; 667 /* Inside a new-style .Sh NAME section (the .Nm part) */ 668 case STATE_MDOCNAME: 669 (void) trim_rhs(line); 670 if (strncmp(line, ".Nm", 3) == 0) { 671 process_mdoc_line(line); 672 continue; 673 } else { 674 if (strcmp(line, ".") == 0) 675 continue; 676 sbuf_append(whatis_proto, "- ", 2); 677 state = STATE_MDOCDESC; 678 } 679 /* FALLTHROUGH */ 680 /* Inside a new-style .Sh NAME section (after the .Nm-s) */ 681 case STATE_MDOCDESC: 682 if (strncmp(line, ".Sh", 3) == 0) 683 break; 684 (void) trim_rhs(line); 685 if (strcmp(line, ".") == 0) 686 continue; 687 process_mdoc_line(line); 688 continue; 689 } 690 break; 691 } 692 (void) fclose(fp); 693 sbuf_strip(whatis_proto, " \t.-"); 694 line = sbuf_content(whatis_proto); 695 /* 696 * Line now contains the appropriate data, but without the 697 * proper indentation or the section appended to each name. 698 */ 699 descr = strstr(line, " - "); 700 if (descr == NULL) { 701 descr = strchr(line, ' '); 702 if (descr == NULL) 703 return; 704 *descr++ = '\0'; 705 } else { 706 *descr = '\0'; 707 descr += 3; 708 } 709 names = sl_init(); 710 collect_names(names, line); 711 sbuf_clear(whatis_final); 712 if (!sl_find(names, page->name) && 713 no_page_exists(section_dir, names, page->suffix)) { 714 /* 715 * Add the page name since that's the only 716 * thing that man(1) will find. 717 */ 718 add_whatis_name(page->name, page->suffix); 719 } 720 for (i = 0; i < names->sl_cur; i++) 721 add_whatis_name(names->sl_str[i], page->suffix); 722 sl_free(names, 0); 723 /* Remove last ", " */ 724 sbuf_retract(whatis_final, 2); 725 while (sbuf_length(whatis_final) < INDENT) 726 sbuf_append(whatis_final, " ", 1); 727 sbuf_append(whatis_final, " - ", 3); 728 sbuf_append_str(whatis_final, skip_spaces(descr)); 729 (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); 730 } 731 732 /* 733 * Sort pages first by inode number, then by name. 734 */ 735 static int 736 pagesort(const void *a, const void *b) 737 { 738 const struct page_info *p1 = *(struct page_info * const *) a; 739 const struct page_info *p2 = *(struct page_info * const *) b; 740 741 if (p1->inode == p2->inode) 742 return (strcmp(p1->name, p2->name)); 743 744 return (p1->inode - p2->inode); 745 } 746 747 /* 748 * Process a single man section. 749 */ 750 static void 751 process_section(char *section_dir) 752 { 753 struct dirent **entries; 754 int nentries; 755 struct page_info **pages; 756 int npages = 0; 757 int i; 758 ino_t prev_inode = 0; 759 760 /* Scan the man section directory for pages */ 761 nentries = scandir(section_dir, &entries, NULL, alphasort); 762 763 /* Collect information about man pages */ 764 pages = (struct page_info **)calloc(nentries, 765 sizeof (struct page_info *)); 766 for (i = 0; i < nentries; i++) { 767 struct page_info *info = new_page_info(section_dir, entries[i]); 768 if (info != NULL) 769 pages[npages++] = info; 770 free(entries[i]); 771 } 772 free(entries); 773 qsort(pages, npages, sizeof (struct page_info *), pagesort); 774 775 /* Process each unique page */ 776 for (i = 0; i < npages; i++) { 777 struct page_info *page = pages[i]; 778 if (page->inode != prev_inode) { 779 prev_inode = page->inode; 780 process_page(page, section_dir); 781 } 782 free_page_info(page); 783 } 784 free(pages); 785 } 786 787 /* 788 * Return whether the directory entry is a man page section. 789 */ 790 static int 791 select_sections(const struct dirent *entry) 792 { 793 const char *p = &entry->d_name[3]; 794 795 if (strncmp(entry->d_name, "man", 3) != 0) 796 return (0); 797 while (*p != '\0') { 798 if (!isalnum(*p++)) 799 return (0); 800 } 801 return (1); 802 } 803 804 /* 805 * Process a single top-level man directory by finding all the 806 * sub-directories named man* and processing each one in turn. 807 */ 808 void 809 mwpath(char *path) 810 { 811 FILE *fp = NULL; 812 struct dirent **entries; 813 int nsections; 814 int i; 815 816 (void) signal(SIGINT, trap_signal); 817 (void) signal(SIGHUP, trap_signal); 818 (void) signal(SIGQUIT, trap_signal); 819 (void) signal(SIGTERM, trap_signal); 820 821 whatis_proto = new_sbuf(); 822 whatis_final = new_sbuf(); 823 824 nsections = scandir(path, &entries, select_sections, alphasort); 825 if ((fp = open_whatis(path)) == NULL) 826 return; 827 for (i = 0; i < nsections; i++) { 828 char section_dir[MAXPATHLEN]; 829 830 (void) snprintf(section_dir, MAXPATHLEN, "%s/%s", 831 path, entries[i]->d_name); 832 process_section(section_dir); 833 free(entries[i]); 834 } 835 free(entries); 836 finish_whatis(fp, path); 837 }