1 /* $Id: term.c,v 1.201 2011/09/21 09:57:13 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc.h" 32 #include "out.h" 33 #include "term.h" 34 #include "main.h" 35 36 static void adjbuf(struct termp *p, int); 37 static void bufferc(struct termp *, char); 38 static void encode(struct termp *, const char *, size_t); 39 static void encode1(struct termp *, int); 40 41 void 42 term_free(struct termp *p) 43 { 44 45 if (p->buf) 46 free(p->buf); 47 if (p->symtab) 48 mchars_free(p->symtab); 49 50 free(p); 51 } 52 53 54 void 55 term_begin(struct termp *p, term_margin head, 56 term_margin foot, const void *arg) 57 { 58 59 p->headf = head; 60 p->footf = foot; 61 p->argf = arg; 62 (*p->begin)(p); 63 } 64 65 66 void 67 term_end(struct termp *p) 68 { 69 70 (*p->end)(p); 71 } 72 73 /* 74 * Flush a line of text. A "line" is loosely defined as being something 75 * that should be followed by a newline, regardless of whether it's 76 * broken apart by newlines getting there. A line can also be a 77 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does 78 * not have a trailing newline. 79 * 80 * The following flags may be specified: 81 * 82 * - TERMP_NOBREAK: this is the most important and is used when making 83 * columns. In short: don't print a newline and instead expect the 84 * next call to do the padding up to the start of the next column. 85 * 86 * - TERMP_TWOSPACE: make sure there is room for at least two space 87 * characters of padding. Otherwise, rather break the line. 88 * 89 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and 90 * the line is overrun, and don't pad-right if it's underrun. 91 * 92 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when 93 * overrunning, instead save the position and continue at that point 94 * when the next invocation. 95 * 96 * In-line line breaking: 97 * 98 * If TERMP_NOBREAK is specified and the line overruns the right 99 * margin, it will break and pad-right to the right margin after 100 * writing. If maxrmargin is violated, it will break and continue 101 * writing from the right-margin, which will lead to the above scenario 102 * upon exit. Otherwise, the line will break at the right margin. 103 */ 104 void 105 term_flushln(struct termp *p) 106 { 107 int i; /* current input position in p->buf */ 108 size_t vis; /* current visual position on output */ 109 size_t vbl; /* number of blanks to prepend to output */ 110 size_t vend; /* end of word visual position on output */ 111 size_t bp; /* visual right border position */ 112 size_t dv; /* temporary for visual pos calculations */ 113 int j; /* temporary loop index for p->buf */ 114 int jhy; /* last hyph before overflow w/r/t j */ 115 size_t maxvis; /* output position of visible boundary */ 116 size_t mmax; /* used in calculating bp */ 117 118 /* 119 * First, establish the maximum columns of "visible" content. 120 * This is usually the difference between the right-margin and 121 * an indentation, but can be, for tagged lists or columns, a 122 * small set of values. 123 */ 124 assert (p->rmargin >= p->offset); 125 dv = p->rmargin - p->offset; 126 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 127 dv = p->maxrmargin - p->offset; 128 mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 129 130 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; 131 132 /* 133 * Calculate the required amount of padding. 134 */ 135 vbl = p->offset + p->overstep > p->viscol ? 136 p->offset + p->overstep - p->viscol : 0; 137 138 vis = vend = 0; 139 i = 0; 140 141 while (i < p->col) { 142 /* 143 * Handle literal tab characters: collapse all 144 * subsequent tabs into a single huge set of spaces. 145 */ 146 while (i < p->col && '\t' == p->buf[i]) { 147 vend = (vis / p->tabwidth + 1) * p->tabwidth; 148 vbl += vend - vis; 149 vis = vend; 150 i++; 151 } 152 153 /* 154 * Count up visible word characters. Control sequences 155 * (starting with the CSI) aren't counted. A space 156 * generates a non-printing word, which is valid (the 157 * space is printed according to regular spacing rules). 158 */ 159 160 for (j = i, jhy = 0; j < p->col; j++) { 161 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) 162 break; 163 164 /* Back over the the last printed character. */ 165 if (8 == p->buf[j]) { 166 assert(j); 167 vend -= (*p->width)(p, p->buf[j - 1]); 168 continue; 169 } 170 171 /* Regular word. */ 172 /* Break at the hyphen point if we overrun. */ 173 if (vend > vis && vend < bp && 174 ASCII_HYPH == p->buf[j]) 175 jhy = j; 176 177 vend += (*p->width)(p, p->buf[j]); 178 } 179 180 /* 181 * Find out whether we would exceed the right margin. 182 * If so, break to the next line. 183 */ 184 if (vend > bp && 0 == jhy && vis > 0) { 185 vend -= vis; 186 (*p->endline)(p); 187 p->viscol = 0; 188 if (TERMP_NOBREAK & p->flags) { 189 vbl = p->rmargin; 190 vend += p->rmargin - p->offset; 191 } else 192 vbl = p->offset; 193 194 /* Remove the p->overstep width. */ 195 196 bp += (size_t)p->overstep; 197 p->overstep = 0; 198 } 199 200 /* Write out the [remaining] word. */ 201 for ( ; i < p->col; i++) { 202 if (vend > bp && jhy > 0 && i > jhy) 203 break; 204 if ('\t' == p->buf[i]) 205 break; 206 if (' ' == p->buf[i]) { 207 j = i; 208 while (' ' == p->buf[i]) 209 i++; 210 dv = (size_t)(i - j) * (*p->width)(p, ' '); 211 vbl += dv; 212 vend += dv; 213 break; 214 } 215 if (ASCII_NBRSP == p->buf[i]) { 216 vbl += (*p->width)(p, ' '); 217 continue; 218 } 219 220 /* 221 * Now we definitely know there will be 222 * printable characters to output, 223 * so write preceding white space now. 224 */ 225 if (vbl) { 226 (*p->advance)(p, vbl); 227 p->viscol += vbl; 228 vbl = 0; 229 } 230 231 if (ASCII_HYPH == p->buf[i]) { 232 (*p->letter)(p, '-'); 233 p->viscol += (*p->width)(p, '-'); 234 continue; 235 } 236 237 (*p->letter)(p, p->buf[i]); 238 if (8 == p->buf[i]) 239 p->viscol -= (*p->width)(p, p->buf[i-1]); 240 else 241 p->viscol += (*p->width)(p, p->buf[i]); 242 } 243 vis = vend; 244 } 245 246 /* 247 * If there was trailing white space, it was not printed; 248 * so reset the cursor position accordingly. 249 */ 250 if (vis) 251 vis -= vbl; 252 253 p->col = 0; 254 p->overstep = 0; 255 256 if ( ! (TERMP_NOBREAK & p->flags)) { 257 p->viscol = 0; 258 (*p->endline)(p); 259 return; 260 } 261 262 if (TERMP_HANG & p->flags) { 263 /* We need one blank after the tag. */ 264 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' ')); 265 266 /* 267 * Behave exactly the same way as groff: 268 * If we have overstepped the margin, temporarily move 269 * it to the right and flag the rest of the line to be 270 * shorter. 271 * If we landed right at the margin, be happy. 272 * If we are one step before the margin, temporarily 273 * move it one step LEFT and flag the rest of the line 274 * to be longer. 275 */ 276 if (p->overstep < -1) 277 p->overstep = 0; 278 return; 279 280 } else if (TERMP_DANGLE & p->flags) 281 return; 282 283 /* If the column was overrun, break the line. */ 284 if (maxvis <= vis + 285 ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) { 286 (*p->endline)(p); 287 p->viscol = 0; 288 } 289 } 290 291 292 /* 293 * A newline only breaks an existing line; it won't assert vertical 294 * space. All data in the output buffer is flushed prior to the newline 295 * assertion. 296 */ 297 void 298 term_newln(struct termp *p) 299 { 300 301 p->flags |= TERMP_NOSPACE; 302 if (p->col || p->viscol) 303 term_flushln(p); 304 } 305 306 307 /* 308 * Asserts a vertical space (a full, empty line-break between lines). 309 * Note that if used twice, this will cause two blank spaces and so on. 310 * All data in the output buffer is flushed prior to the newline 311 * assertion. 312 */ 313 void 314 term_vspace(struct termp *p) 315 { 316 317 term_newln(p); 318 p->viscol = 0; 319 (*p->endline)(p); 320 } 321 322 void 323 term_fontlast(struct termp *p) 324 { 325 enum termfont f; 326 327 f = p->fontl; 328 p->fontl = p->fontq[p->fonti]; 329 p->fontq[p->fonti] = f; 330 } 331 332 333 void 334 term_fontrepl(struct termp *p, enum termfont f) 335 { 336 337 p->fontl = p->fontq[p->fonti]; 338 p->fontq[p->fonti] = f; 339 } 340 341 342 void 343 term_fontpush(struct termp *p, enum termfont f) 344 { 345 346 assert(p->fonti + 1 < 10); 347 p->fontl = p->fontq[p->fonti]; 348 p->fontq[++p->fonti] = f; 349 } 350 351 352 const void * 353 term_fontq(struct termp *p) 354 { 355 356 return(&p->fontq[p->fonti]); 357 } 358 359 360 enum termfont 361 term_fonttop(struct termp *p) 362 { 363 364 return(p->fontq[p->fonti]); 365 } 366 367 368 void 369 term_fontpopq(struct termp *p, const void *key) 370 { 371 372 while (p->fonti >= 0 && key != &p->fontq[p->fonti]) 373 p->fonti--; 374 assert(p->fonti >= 0); 375 } 376 377 378 void 379 term_fontpop(struct termp *p) 380 { 381 382 assert(p->fonti); 383 p->fonti--; 384 } 385 386 /* 387 * Handle pwords, partial words, which may be either a single word or a 388 * phrase that cannot be broken down (such as a literal string). This 389 * handles word styling. 390 */ 391 void 392 term_word(struct termp *p, const char *word) 393 { 394 const char *seq, *cp; 395 char c; 396 int sz, uc; 397 size_t ssz; 398 enum mandoc_esc esc; 399 400 if ( ! (TERMP_NOSPACE & p->flags)) { 401 if ( ! (TERMP_KEEP & p->flags)) { 402 if (TERMP_PREKEEP & p->flags) 403 p->flags |= TERMP_KEEP; 404 bufferc(p, ' '); 405 if (TERMP_SENTENCE & p->flags) 406 bufferc(p, ' '); 407 } else 408 bufferc(p, ASCII_NBRSP); 409 } 410 411 if ( ! (p->flags & TERMP_NONOSPACE)) 412 p->flags &= ~TERMP_NOSPACE; 413 else 414 p->flags |= TERMP_NOSPACE; 415 416 p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); 417 418 while ('\0' != *word) { 419 if ((ssz = strcspn(word, "\\")) > 0) 420 encode(p, word, ssz); 421 422 word += (int)ssz; 423 if ('\\' != *word) 424 continue; 425 426 word++; 427 esc = mandoc_escape(&word, &seq, &sz); 428 if (ESCAPE_ERROR == esc) 429 break; 430 431 if (TERMENC_ASCII != p->enc) 432 switch (esc) { 433 case (ESCAPE_UNICODE): 434 uc = mchars_num2uc(seq + 1, sz - 1); 435 if ('\0' == uc) 436 break; 437 encode1(p, uc); 438 continue; 439 case (ESCAPE_SPECIAL): 440 uc = mchars_spec2cp(p->symtab, seq, sz); 441 if (uc <= 0) 442 break; 443 encode1(p, uc); 444 continue; 445 default: 446 break; 447 } 448 449 switch (esc) { 450 case (ESCAPE_UNICODE): 451 encode1(p, '?'); 452 break; 453 case (ESCAPE_NUMBERED): 454 c = mchars_num2char(seq, sz); 455 if ('\0' != c) 456 encode(p, &c, 1); 457 break; 458 case (ESCAPE_SPECIAL): 459 cp = mchars_spec2str(p->symtab, seq, sz, &ssz); 460 if (NULL != cp) 461 encode(p, cp, ssz); 462 else if (1 == ssz) 463 encode(p, seq, sz); 464 break; 465 case (ESCAPE_FONTBOLD): 466 term_fontrepl(p, TERMFONT_BOLD); 467 break; 468 case (ESCAPE_FONTITALIC): 469 term_fontrepl(p, TERMFONT_UNDER); 470 break; 471 case (ESCAPE_FONT): 472 /* FALLTHROUGH */ 473 case (ESCAPE_FONTROMAN): 474 term_fontrepl(p, TERMFONT_NONE); 475 break; 476 case (ESCAPE_FONTPREV): 477 term_fontlast(p); 478 break; 479 case (ESCAPE_NOSPACE): 480 if ('\0' == *word) 481 p->flags |= TERMP_NOSPACE; 482 break; 483 default: 484 break; 485 } 486 } 487 } 488 489 static void 490 adjbuf(struct termp *p, int sz) 491 { 492 493 if (0 == p->maxcols) 494 p->maxcols = 1024; 495 while (sz >= p->maxcols) 496 p->maxcols <<= 2; 497 498 p->buf = mandoc_realloc 499 (p->buf, sizeof(int) * (size_t)p->maxcols); 500 } 501 502 static void 503 bufferc(struct termp *p, char c) 504 { 505 506 if (p->col + 1 >= p->maxcols) 507 adjbuf(p, p->col + 1); 508 509 p->buf[p->col++] = c; 510 } 511 512 /* 513 * See encode(). 514 * Do this for a single (probably unicode) value. 515 * Does not check for non-decorated glyphs. 516 */ 517 static void 518 encode1(struct termp *p, int c) 519 { 520 enum termfont f; 521 522 if (p->col + 4 >= p->maxcols) 523 adjbuf(p, p->col + 4); 524 525 f = term_fonttop(p); 526 527 if (TERMFONT_NONE == f) { 528 p->buf[p->col++] = c; 529 return; 530 } else if (TERMFONT_UNDER == f) { 531 p->buf[p->col++] = '_'; 532 } else 533 p->buf[p->col++] = c; 534 535 p->buf[p->col++] = 8; 536 p->buf[p->col++] = c; 537 } 538 539 static void 540 encode(struct termp *p, const char *word, size_t sz) 541 { 542 enum termfont f; 543 int i, len; 544 545 /* LINTED */ 546 len = sz; 547 548 /* 549 * Encode and buffer a string of characters. If the current 550 * font mode is unset, buffer directly, else encode then buffer 551 * character by character. 552 */ 553 554 if (TERMFONT_NONE == (f = term_fonttop(p))) { 555 if (p->col + len >= p->maxcols) 556 adjbuf(p, p->col + len); 557 for (i = 0; i < len; i++) 558 p->buf[p->col++] = word[i]; 559 return; 560 } 561 562 /* Pre-buffer, assuming worst-case. */ 563 564 if (p->col + 1 + (len * 3) >= p->maxcols) 565 adjbuf(p, p->col + 1 + (len * 3)); 566 567 for (i = 0; i < len; i++) { 568 if (ASCII_HYPH != word[i] && 569 ! isgraph((unsigned char)word[i])) { 570 p->buf[p->col++] = word[i]; 571 continue; 572 } 573 574 if (TERMFONT_UNDER == f) 575 p->buf[p->col++] = '_'; 576 else if (ASCII_HYPH == word[i]) 577 p->buf[p->col++] = '-'; 578 else 579 p->buf[p->col++] = word[i]; 580 581 p->buf[p->col++] = 8; 582 p->buf[p->col++] = word[i]; 583 } 584 } 585 586 size_t 587 term_len(const struct termp *p, size_t sz) 588 { 589 590 return((*p->width)(p, ' ') * sz); 591 } 592 593 594 size_t 595 term_strlen(const struct termp *p, const char *cp) 596 { 597 size_t sz, rsz, i; 598 int ssz, c; 599 const char *seq, *rhs; 600 enum mandoc_esc esc; 601 static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; 602 603 /* 604 * Account for escaped sequences within string length 605 * calculations. This follows the logic in term_word() as we 606 * must calculate the width of produced strings. 607 */ 608 609 sz = 0; 610 while ('\0' != *cp) { 611 rsz = strcspn(cp, rej); 612 for (i = 0; i < rsz; i++) 613 sz += (*p->width)(p, *cp++); 614 615 c = 0; 616 switch (*cp) { 617 case ('\\'): 618 cp++; 619 esc = mandoc_escape(&cp, &seq, &ssz); 620 if (ESCAPE_ERROR == esc) 621 return(sz); 622 623 if (TERMENC_ASCII != p->enc) 624 switch (esc) { 625 case (ESCAPE_UNICODE): 626 c = mchars_num2uc 627 (seq + 1, ssz - 1); 628 if ('\0' == c) 629 break; 630 sz += (*p->width)(p, c); 631 continue; 632 case (ESCAPE_SPECIAL): 633 c = mchars_spec2cp 634 (p->symtab, seq, ssz); 635 if (c <= 0) 636 break; 637 sz += (*p->width)(p, c); 638 continue; 639 default: 640 break; 641 } 642 643 rhs = NULL; 644 645 switch (esc) { 646 case (ESCAPE_UNICODE): 647 sz += (*p->width)(p, '?'); 648 break; 649 case (ESCAPE_NUMBERED): 650 c = mchars_num2char(seq, ssz); 651 if ('\0' != c) 652 sz += (*p->width)(p, c); 653 break; 654 case (ESCAPE_SPECIAL): 655 rhs = mchars_spec2str 656 (p->symtab, seq, ssz, &rsz); 657 658 if (ssz != 1 || rhs) 659 break; 660 661 rhs = seq; 662 rsz = ssz; 663 break; 664 default: 665 break; 666 } 667 668 if (NULL == rhs) 669 break; 670 671 for (i = 0; i < rsz; i++) 672 sz += (*p->width)(p, *rhs++); 673 break; 674 case (ASCII_NBRSP): 675 sz += (*p->width)(p, ' '); 676 cp++; 677 break; 678 case (ASCII_HYPH): 679 sz += (*p->width)(p, '-'); 680 cp++; 681 break; 682 default: 683 break; 684 } 685 } 686 687 return(sz); 688 } 689 690 /* ARGSUSED */ 691 size_t 692 term_vspan(const struct termp *p, const struct roffsu *su) 693 { 694 double r; 695 696 switch (su->unit) { 697 case (SCALE_CM): 698 r = su->scale * 2; 699 break; 700 case (SCALE_IN): 701 r = su->scale * 6; 702 break; 703 case (SCALE_PC): 704 r = su->scale; 705 break; 706 case (SCALE_PT): 707 r = su->scale / 8; 708 break; 709 case (SCALE_MM): 710 r = su->scale / 1000; 711 break; 712 case (SCALE_VS): 713 r = su->scale; 714 break; 715 default: 716 r = su->scale - 1; 717 break; 718 } 719 720 if (r < 0.0) 721 r = 0.0; 722 return(/* LINTED */(size_t) 723 r); 724 } 725 726 size_t 727 term_hspan(const struct termp *p, const struct roffsu *su) 728 { 729 double v; 730 731 v = ((*p->hspan)(p, su)); 732 if (v < 0.0) 733 v = 0.0; 734 return((size_t) /* LINTED */ 735 v); 736 }