1 /* 2 * Copyright (c) 2002 John Rochester 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/stat.h> 35 #include <sys/param.h> 36 37 #include <ctype.h> 38 #include <dirent.h> 39 #include <err.h> 40 #include <signal.h> 41 #include <stddef.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 #include "man.h" 48 #include "stringlist.h" 49 50 51 /* Information collected about each man page in a section */ 52 struct page_info { 53 char *filename; 54 char *name; 55 char *suffix; 56 ino_t inode; 57 }; 58 59 /* An expanding string */ 60 struct sbuf { 61 char *content; /* the start of the buffer */ 62 char *end; /* just past the end of the content */ 63 char *last; /* the last allocated character */ 64 }; 65 66 /* Remove the last amount characters from the sbuf */ 67 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) 68 /* Return the length of the sbuf content */ 69 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) 70 71 typedef char *edited_copy(char *from, char *to, int length); 72 73 /* 74 * While the whatis line is being formed, it is stored in whatis_proto. 75 * When finished, it is reformatted into whatis_final and then appended 76 * to whatis_lines. 77 */ 78 static struct sbuf *whatis_proto; 79 static struct sbuf *whatis_final; 80 static stringlist *whatis_lines; /* collected output lines */ 81 82 static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */ 83 84 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa" 85 86 87 /* Free a struct page_info and its content */ 88 static void 89 free_page_info(struct page_info *info) 90 { 91 92 free(info->filename); 93 free(info->name); 94 free(info->suffix); 95 free(info); 96 } 97 98 /* 99 * Allocate and fill in a new struct page_info given the 100 * name of the man section directory and the dirent of the file. 101 * If the file is not a man page, return NULL. 102 */ 103 static struct page_info * 104 new_page_info(char *dir, struct dirent *dirent) 105 { 106 struct page_info *info; 107 int basename_length; 108 char *suffix; 109 struct stat st; 110 111 if ((info = malloc(sizeof (struct page_info))) == NULL) 112 err(1, "malloc"); 113 basename_length = strlen(dirent->d_name); 114 suffix = &dirent->d_name[basename_length]; 115 if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) 116 err(1, "asprintf"); 117 for (;;) { 118 if (--suffix == dirent->d_name || !isalnum(*suffix)) { 119 if (*suffix == '.') 120 break; 121 free(info->filename); 122 free(info); 123 return (NULL); 124 } 125 } 126 *suffix++ = '\0'; 127 info->name = strdup(dirent->d_name); 128 info->suffix = strdup(suffix); 129 if (stat(info->filename, &st) < 0) { 130 warn("%s", info->filename); 131 free_page_info(info); 132 return (NULL); 133 } 134 if (!S_ISREG(st.st_mode)) { 135 free_page_info(info); 136 return (NULL); 137 } 138 info->inode = st.st_ino; 139 return (info); 140 } 141 142 /* 143 * Reset sbuf length to 0. 144 */ 145 static void 146 sbuf_clear(struct sbuf *sbuf) 147 { 148 149 sbuf->end = sbuf->content; 150 } 151 152 /* 153 * Allocate a new sbuf. 154 */ 155 static struct sbuf * 156 new_sbuf(void) 157 { 158 struct sbuf *sbuf; 159 160 if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) 161 err(1, "malloc"); 162 if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) 163 err(1, "malloc"); 164 sbuf->last = sbuf->content + LINE_ALLOC - 1; 165 sbuf_clear(sbuf); 166 167 return (sbuf); 168 } 169 170 /* 171 * Ensure that there is enough room in the sbuf 172 * for nchars more characters. 173 */ 174 static void 175 sbuf_need(struct sbuf *sbuf, int nchars) 176 { 177 char *new_content; 178 size_t size, cntsize; 179 180 /* Double the size of the allocation until the buffer is big enough */ 181 while (sbuf->end + nchars > sbuf->last) { 182 size = sbuf->last + 1 - sbuf->content; 183 size *= 2; 184 cntsize = sbuf->end - sbuf->content; 185 186 new_content = (char *)malloc(size); 187 (void) memcpy(new_content, sbuf->content, cntsize); 188 free(sbuf->content); 189 sbuf->content = new_content; 190 sbuf->end = new_content + cntsize; 191 sbuf->last = new_content + size - 1; 192 } 193 } 194 195 /* 196 * Append a string of a given length to the sbuf. 197 */ 198 static void 199 sbuf_append(struct sbuf *sbuf, const char *text, int length) 200 { 201 if (length > 0) { 202 sbuf_need(sbuf, length); 203 (void) memcpy(sbuf->end, text, length); 204 sbuf->end += length; 205 } 206 } 207 208 /* 209 * Append a null-terminated string to the sbuf. 210 */ 211 static void 212 sbuf_append_str(struct sbuf *sbuf, char *text) 213 { 214 215 sbuf_append(sbuf, text, strlen(text)); 216 } 217 218 /* 219 * Append an edited null-terminated string to the sbuf. 220 */ 221 static void 222 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) 223 { 224 int length; 225 226 if ((length = strlen(text)) > 0) { 227 sbuf_need(sbuf, length); 228 sbuf->end = copy(text, sbuf->end, length); 229 } 230 } 231 232 /* 233 * Strip any of a set of chars from the end of the sbuf. 234 */ 235 static void 236 sbuf_strip(struct sbuf *sbuf, const char *set) 237 { 238 239 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) 240 sbuf->end--; 241 } 242 243 /* 244 * Return the null-terminated string built by the sbuf. 245 */ 246 static char * 247 sbuf_content(struct sbuf *sbuf) 248 { 249 250 *sbuf->end = '\0'; 251 return (sbuf->content); 252 } 253 254 /* 255 * Return true if no man page exists in the directory with 256 * any of the names in the stringlist. 257 */ 258 static int 259 no_page_exists(char *dir, stringlist *names, char *suffix) 260 { 261 char path[MAXPATHLEN]; 262 size_t i; 263 264 for (i = 0; i < names->sl_cur; i++) { 265 (void) snprintf(path, MAXPATHLEN, "%s/%s.%s.gz", 266 dir, names->sl_str[i], suffix); 267 if (access(path, F_OK) < 0) { 268 path[strlen(path) - 3] = '\0'; 269 if (access(path, F_OK) < 0) 270 continue; 271 } 272 return (0); 273 } 274 return (1); 275 } 276 277 /* ARGSUSED sig */ 278 static void 279 trap_signal(int sig) 280 { 281 282 if (tempfile[0] != '\0') 283 (void) unlink(tempfile); 284 285 exit(1); 286 } 287 288 /* 289 * Attempt to open an output file. 290 * Return NULL if unsuccessful. 291 */ 292 static FILE * 293 open_output(char *name) 294 { 295 FILE *output; 296 297 whatis_lines = sl_init(); 298 (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); 299 name = tempfile; 300 if ((output = fopen(name, "w")) == NULL) { 301 warn("%s", name); 302 return (NULL); 303 } 304 return (output); 305 } 306 307 static int 308 linesort(const void *a, const void *b) 309 { 310 311 return (strcmp((*(const char * const *)a), (*(const char * const *)b))); 312 } 313 314 /* 315 * Write the unique sorted lines to the output file. 316 */ 317 static void 318 finish_output(FILE *output, char *name) 319 { 320 size_t i; 321 char *prev = NULL; 322 323 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), 324 linesort); 325 for (i = 0; i < whatis_lines->sl_cur; i++) { 326 char *line = whatis_lines->sl_str[i]; 327 if (i > 0 && strcmp(line, prev) == 0) 328 continue; 329 prev = line; 330 (void) fputs(line, output); 331 (void) putc('\n', output); 332 } 333 (void) fclose(output); 334 sl_free(whatis_lines, 1); 335 (void) rename(tempfile, name); 336 (void) unlink(tempfile); 337 } 338 339 static FILE * 340 open_whatis(char *mandir) 341 { 342 char filename[MAXPATHLEN]; 343 344 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 345 return (open_output(filename)); 346 } 347 348 static void 349 finish_whatis(FILE *output, char *mandir) 350 { 351 char filename[MAXPATHLEN]; 352 353 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 354 finish_output(output, filename); 355 } 356 357 /* 358 * Remove trailing spaces from a string, returning a pointer to just 359 * beyond the new last character. 360 */ 361 static char * 362 trim_rhs(char *str) 363 { 364 char *rhs; 365 366 rhs = &str[strlen(str)]; 367 while (--rhs > str && isspace(*rhs)) 368 ; 369 *++rhs = '\0'; 370 return (rhs); 371 } 372 373 /* 374 * Return a pointer to the next non-space character in the string. 375 */ 376 static char * 377 skip_spaces(char *s) 378 { 379 380 while (*s != '\0' && isspace(*s)) 381 s++; 382 383 return (s); 384 } 385 386 /* 387 * Return whether the line is of one of the forms: 388 * .Sh NAME 389 * .Sh "NAME" 390 * etc. 391 * assuming that section_start is ".Sh". 392 */ 393 static int 394 name_section_line(char *line, const char *section_start) 395 { 396 char *rhs; 397 398 if (strncmp(line, section_start, 3) != 0) 399 return (0); 400 line = skip_spaces(line + 3); 401 rhs = trim_rhs(line); 402 if (*line == '"') { 403 line++; 404 if (*--rhs == '"') 405 *rhs = '\0'; 406 } 407 if (strcmp(line, "NAME") == 0) 408 return (1); 409 410 return (0); 411 } 412 413 /* 414 * Copy characters while removing the most common nroff/troff markup: 415 * \(em, \(mi, \s[+-N], \& 416 * \fF, \f(fo, \f[font] 417 * \*s, \*(st, \*[stringvar] 418 */ 419 static char * 420 de_nroff_copy(char *from, char *to, int fromlen) 421 { 422 char *from_end = &from[fromlen]; 423 424 while (from < from_end) { 425 switch (*from) { 426 case '\\': 427 switch (*++from) { 428 case '(': 429 if (strncmp(&from[1], "em", 2) == 0 || 430 strncmp(&from[1], "mi", 2) == 0) { 431 from += 3; 432 continue; 433 } 434 break; 435 case 's': 436 if (*++from == '-') 437 from++; 438 while (isdigit(*from)) 439 from++; 440 continue; 441 case 'f': 442 case '*': 443 if (*++from == '(') { 444 from += 3; 445 } else if (*from == '[') { 446 while (*++from != ']' && 447 from < from_end) 448 ; 449 from++; 450 } else { 451 from++; 452 } 453 continue; 454 case '&': 455 from++; 456 continue; 457 } 458 break; 459 } 460 *to++ = *from++; 461 } 462 return (to); 463 } 464 465 /* 466 * Append a string with the nroff formatting removed. 467 */ 468 static void 469 add_nroff(char *text) 470 { 471 472 sbuf_append_edited(whatis_proto, text, de_nroff_copy); 473 } 474 475 /* 476 * Appends "name(suffix), " to whatis_final 477 */ 478 static void 479 add_whatis_name(char *name, char *suffix) 480 { 481 482 if (*name != '\0') { 483 sbuf_append_str(whatis_final, name); 484 sbuf_append(whatis_final, "(", 1); 485 sbuf_append_str(whatis_final, suffix); 486 sbuf_append(whatis_final, "), ", 3); 487 } 488 } 489 490 /* 491 * Processes an old-style man(7) line. This ignores commands with only 492 * a single number argument. 493 */ 494 static void 495 process_man_line(char *line) 496 { 497 char *p; 498 499 if (*line == '.') { 500 while (isalpha(*++line)) 501 ; 502 p = line = skip_spaces(line); 503 while (*p != '\0') { 504 if (!isdigit(*p)) 505 break; 506 p++; 507 } 508 if (*p == '\0') 509 return; 510 } else 511 line = skip_spaces(line); 512 if (*line != '\0') { 513 add_nroff(line); 514 sbuf_append(whatis_proto, " ", 1); 515 } 516 } 517 518 /* 519 * Processes a new-style mdoc(7) line. 520 */ 521 static void 522 process_mdoc_line(char *line) 523 { 524 int xref; 525 int arg = 0; 526 char *line_end = &line[strlen(line)]; 527 int orig_length = sbuf_length(whatis_proto); 528 char *next; 529 530 if (*line == '\0') 531 return; 532 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { 533 add_nroff(skip_spaces(line)); 534 sbuf_append(whatis_proto, " ", 1); 535 return; 536 } 537 xref = strncmp(line, ".Xr", 3) == 0; 538 line += 3; 539 while ((line = skip_spaces(line)) < line_end) { 540 if (*line == '"') { 541 next = ++line; 542 for (;;) { 543 next = strchr(next, '"'); 544 if (next == NULL) 545 break; 546 (void) memmove(next, next + 1, strlen(next)); 547 line_end--; 548 if (*next != '"') 549 break; 550 next++; 551 } 552 } else { 553 next = strpbrk(line, " \t"); 554 } 555 if (next != NULL) 556 *next++ = '\0'; 557 else 558 next = line_end; 559 if (isupper(*line) && islower(line[1]) && line[2] == '\0') { 560 if (strcmp(line, "Ns") == 0) { 561 arg = 0; 562 line = next; 563 continue; 564 } 565 if (strstr(line, MDOC_COMMANDS) != NULL) { 566 line = next; 567 continue; 568 } 569 } 570 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { 571 if (xref) { 572 sbuf_append(whatis_proto, "(", 1); 573 add_nroff(line); 574 sbuf_append(whatis_proto, ")", 1); 575 xref = 0; 576 } else { 577 sbuf_append(whatis_proto, " ", 1); 578 } 579 } 580 add_nroff(line); 581 arg++; 582 line = next; 583 } 584 if (sbuf_length(whatis_proto) > orig_length) 585 sbuf_append(whatis_proto, " ", 1); 586 } 587 588 /* 589 * Collect a list of comma-separated names from the text. 590 */ 591 static void 592 collect_names(stringlist *names, char *text) 593 { 594 char *arg; 595 596 for (;;) { 597 arg = text; 598 text = strchr(text, ','); 599 if (text != NULL) 600 *text++ = '\0'; 601 (void) sl_add(names, arg); 602 if (text == NULL) 603 return; 604 if (*text == ' ') 605 text++; 606 } 607 } 608 609 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; 610 611 /* 612 * Process a man page source into a single whatis line and add it 613 * to whatis_lines. 614 */ 615 static void 616 process_page(struct page_info *page, char *section_dir) 617 { 618 FILE *fp; 619 stringlist *names; 620 char *descr; 621 int state = STATE_UNKNOWN; 622 size_t i; 623 char *line = NULL; 624 size_t linecap = 0; 625 626 sbuf_clear(whatis_proto); 627 if ((fp = fopen(page->filename, "r")) == NULL) { 628 warn("%s", page->filename); 629 return; 630 } 631 while (getline(&line, &linecap, fp) > 0) { 632 /* Skip comments */ 633 if (strncmp(line, ".\\\"", 3) == 0) 634 continue; 635 switch (state) { 636 /* Haven't reached the NAME section yet */ 637 case STATE_UNKNOWN: 638 if (name_section_line(line, ".SH")) 639 state = STATE_MANSTYLE; 640 else if (name_section_line(line, ".Sh")) 641 state = STATE_MDOCNAME; 642 continue; 643 /* Inside an old-style .SH NAME section */ 644 case STATE_MANSTYLE: 645 if (strncmp(line, ".SH", 3) == 0 || 646 strncmp(line, ".SS", 3) == 0) 647 break; 648 (void) trim_rhs(line); 649 if (strcmp(line, ".") == 0) 650 continue; 651 if (strncmp(line, ".IX", 3) == 0) { 652 line += 3; 653 line = skip_spaces(line); 654 } 655 process_man_line(line); 656 continue; 657 /* Inside a new-style .Sh NAME section (the .Nm part) */ 658 case STATE_MDOCNAME: 659 (void) trim_rhs(line); 660 if (strncmp(line, ".Nm", 3) == 0) { 661 process_mdoc_line(line); 662 continue; 663 } else { 664 if (strcmp(line, ".") == 0) 665 continue; 666 sbuf_append(whatis_proto, "- ", 2); 667 state = STATE_MDOCDESC; 668 } 669 /* FALLTHROUGH */ 670 /* Inside a new-style .Sh NAME section (after the .Nm-s) */ 671 case STATE_MDOCDESC: 672 if (strncmp(line, ".Sh", 3) == 0) 673 break; 674 (void) trim_rhs(line); 675 if (strcmp(line, ".") == 0) 676 continue; 677 process_mdoc_line(line); 678 continue; 679 } 680 break; 681 } 682 (void) fclose(fp); 683 sbuf_strip(whatis_proto, " \t.-"); 684 line = sbuf_content(whatis_proto); 685 /* 686 * Line now contains the appropriate data, but without the 687 * proper indentation or the section appended to each name. 688 */ 689 descr = strstr(line, " - "); 690 if (descr == NULL) { 691 descr = strchr(line, ' '); 692 if (descr == NULL) 693 return; 694 *descr++ = '\0'; 695 } else { 696 *descr = '\0'; 697 descr += 3; 698 } 699 names = sl_init(); 700 collect_names(names, line); 701 sbuf_clear(whatis_final); 702 if (!sl_find(names, page->name) && 703 no_page_exists(section_dir, names, page->suffix)) { 704 /* 705 * Add the page name since that's the only 706 * thing that man(1) will find. 707 */ 708 add_whatis_name(page->name, page->suffix); 709 } 710 for (i = 0; i < names->sl_cur; i++) 711 add_whatis_name(names->sl_str[i], page->suffix); 712 sl_free(names, 0); 713 /* Remove last ", " */ 714 sbuf_retract(whatis_final, 2); 715 while (sbuf_length(whatis_final) < INDENT) 716 sbuf_append(whatis_final, " ", 1); 717 sbuf_append(whatis_final, " - ", 3); 718 sbuf_append_str(whatis_final, skip_spaces(descr)); 719 (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); 720 } 721 722 /* 723 * Sort pages first by inode number, then by name. 724 */ 725 static int 726 pagesort(const void *a, const void *b) 727 { 728 const struct page_info *p1 = *(struct page_info * const *) a; 729 const struct page_info *p2 = *(struct page_info * const *) b; 730 731 if (p1->inode == p2->inode) 732 return (strcmp(p1->name, p2->name)); 733 734 return (p1->inode - p2->inode); 735 } 736 737 /* 738 * Process a single man section. 739 */ 740 static void 741 process_section(char *section_dir) 742 { 743 struct dirent **entries; 744 int nentries; 745 struct page_info **pages; 746 int npages = 0; 747 int i; 748 ino_t prev_inode = 0; 749 750 /* Scan the man section directory for pages */ 751 nentries = scandir(section_dir, &entries, NULL, alphasort); 752 753 /* Collect information about man pages */ 754 pages = (struct page_info **)calloc(nentries, 755 sizeof (struct page_info *)); 756 for (i = 0; i < nentries; i++) { 757 struct page_info *info = new_page_info(section_dir, entries[i]); 758 if (info != NULL) 759 pages[npages++] = info; 760 free(entries[i]); 761 } 762 free(entries); 763 qsort(pages, npages, sizeof (struct page_info *), pagesort); 764 765 /* Process each unique page */ 766 for (i = 0; i < npages; i++) { 767 struct page_info *page = pages[i]; 768 if (page->inode != prev_inode) { 769 prev_inode = page->inode; 770 process_page(page, section_dir); 771 } 772 free_page_info(page); 773 } 774 free(pages); 775 } 776 777 /* 778 * Return whether the directory entry is a man page section. 779 */ 780 static int 781 select_sections(const struct dirent *entry) 782 { 783 const char *p = &entry->d_name[3]; 784 785 if (strncmp(entry->d_name, "man", 3) != 0) 786 return (0); 787 while (*p != '\0') { 788 if (!isalnum(*p++)) 789 return (0); 790 } 791 return (1); 792 } 793 794 /* 795 * Process a single top-level man directory by finding all the 796 * sub-directories named man* and processing each one in turn. 797 */ 798 void 799 mwpath(char *path) 800 { 801 FILE *fp = NULL; 802 struct dirent **entries; 803 int nsections; 804 int i; 805 806 (void) signal(SIGINT, trap_signal); 807 (void) signal(SIGHUP, trap_signal); 808 (void) signal(SIGQUIT, trap_signal); 809 (void) signal(SIGTERM, trap_signal); 810 811 whatis_proto = new_sbuf(); 812 whatis_final = new_sbuf(); 813 814 nsections = scandir(path, &entries, select_sections, alphasort); 815 if ((fp = open_whatis(path)) == NULL) 816 return; 817 for (i = 0; i < nsections; i++) { 818 char section_dir[MAXPATHLEN]; 819 820 (void) snprintf(section_dir, MAXPATHLEN, "%s/%s", 821 path, entries[i]->d_name); 822 process_section(section_dir); 823 free(entries[i]); 824 } 825 free(entries); 826 finish_whatis(fp, path); 827 }