illumos-gate Udiff usr/src/cmd/mandoc/term.c

Print this page

5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD

@@ -1,9 +1,9 @@
-/*      $Id: term.c,v 1.201 2011/09/21 09:57:13 schwarze Exp $ */
+/*      $Id: term.c,v 1.214 2013/12/25 00:39:31 schwarze Exp $ */
 /*
  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
  * copyright notice and this permission notice appear in all copies.
  *

@@ -31,11 +31,12 @@
 #include "mandoc.h"
 #include "out.h"
 #include "term.h"
 #include "main.h"
 
-static  void             adjbuf(struct termp *p, int);
+static  size_t           cond_width(const struct termp *, int, int *);
+static  void             adjbuf(struct termp *p, size_t);
 static  void             bufferc(struct termp *, char);
 static  void             encode(struct termp *, const char *, size_t);
 static  void             encode1(struct termp *, int);
 
 void

@@ -80,14 +81,13 @@
  * The following flags may be specified:
  *
  *  - TERMP_NOBREAK: this is the most important and is used when making
  *    columns.  In short: don't print a newline and instead expect the
  *    next call to do the padding up to the start of the next column.
+ *    p->trailspace may be set to 0, 1, or 2, depending on how many
+ *    space characters are required at the end of the column.
  *
- *  - TERMP_TWOSPACE: make sure there is room for at least two space
- *    characters of padding.  Otherwise, rather break the line.
- *
  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
  *    the line is overrun, and don't pad-right if it's underrun.
  *
  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
  *    overrunning, instead save the position and continue at that point

@@ -102,26 +102,32 @@
  *  upon exit.  Otherwise, the line will break at the right margin.
  */
 void
 term_flushln(struct termp *p)
 {
-        int              i;     /* current input position in p->buf */
+        size_t           i;     /* current input position in p->buf */
+        int              ntab;  /* number of tabs to prepend */
         size_t           vis;   /* current visual position on output */
         size_t           vbl;   /* number of blanks to prepend to output */
         size_t           vend;  /* end of word visual position on output */
         size_t           bp;    /* visual right border position */
         size_t           dv;    /* temporary for visual pos calculations */
-        int              j;     /* temporary loop index for p->buf */
-        int              jhy;   /* last hyph before overflow w/r/t j */
+        size_t           j;     /* temporary loop index for p->buf */
+        size_t           jhy;   /* last hyph before overflow w/r/t j */
         size_t           maxvis; /* output position of visible boundary */
         size_t           mmax; /* used in calculating bp */
 
         /*
          * First, establish the maximum columns of "visible" content.
          * This is usually the difference between the right-margin and
          * an indentation, but can be, for tagged lists or columns, a
          * small set of values. 
+         *
+         * The following unsigned-signed subtractions look strange,
+         * but they are actually correct.  If the int p->overstep
+         * is negative, it gets sign extended.  Subtracting that
+         * very large size_t effectively adds a small number to dv.
          */
         assert  (p->rmargin >= p->offset);
         dv     = p->rmargin - p->offset;
         maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
         dv     = p->maxrmargin - p->offset;

@@ -141,14 +147,16 @@
         while (i < p->col) {
                 /*
                  * Handle literal tab characters: collapse all
                  * subsequent tabs into a single huge set of spaces.
                  */
+                ntab = 0;
                 while (i < p->col && '\t' == p->buf[i]) {
                         vend = (vis / p->tabwidth + 1) * p->tabwidth;
                         vbl += vend - vis;
                         vis = vend;
+                        ntab++;
                         i++;
                 }
 
                 /*
                  * Count up visible word characters.  Control sequences

@@ -156,11 +164,11 @@
                  * generates a non-printing word, which is valid (the
                  * space is printed according to regular spacing rules).
                  */
 
                 for (j = i, jhy = 0; j < p->col; j++) {
-                        if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
+                        if (' ' == p->buf[j] || '\t' == p->buf[j])
                                 break;
 
                         /* Back over the the last printed character. */
                         if (8 == p->buf[j]) {
                                 assert(j);

@@ -189,12 +197,21 @@
                                 vbl = p->rmargin;
                                 vend += p->rmargin - p->offset;
                         } else
                                 vbl = p->offset;
 
-                        /* Remove the p->overstep width. */
+                        /* use pending tabs on the new line */
 
+                        if (0 < ntab)
+                                vbl += ntab * p->tabwidth;
+
+                        /*
+                         * Remove the p->overstep width.
+                         * Again, if p->overstep is negative,
+                         * sign extension does the right thing.
+                         */
+
                         bp += (size_t)p->overstep;
                         p->overstep = 0;
                 }
 
                 /* Write out the [remaining] word. */

@@ -205,11 +222,11 @@
                                 break;
                         if (' ' == p->buf[i]) {
                                 j = i;
                                 while (' ' == p->buf[i])
                                         i++;
-                                dv = (size_t)(i - j) * (*p->width)(p, ' ');
+                                dv = (i - j) * (*p->width)(p, ' ');
                                 vbl += dv;
                                 vend += dv;
                                 break;
                         }
                         if (ASCII_NBRSP == p->buf[i]) {

@@ -258,33 +275,29 @@
                 (*p->endline)(p);
                 return;
         }
 
         if (TERMP_HANG & p->flags) {
-                /* We need one blank after the tag. */
-                p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
+                p->overstep = (int)(vis - maxvis +
+                                p->trailspace * (*p->width)(p, ' '));
 
                 /*
-                 * Behave exactly the same way as groff:
                  * If we have overstepped the margin, temporarily move
                  * it to the right and flag the rest of the line to be
                  * shorter.
-                 * If we landed right at the margin, be happy.
-                 * If we are one step before the margin, temporarily
-                 * move it one step LEFT and flag the rest of the line
-                 * to be longer.
+                 * If there is a request to keep the columns together,
+                 * allow negative overstep when the column is not full.
                  */
-                if (p->overstep < -1)
+                if (p->trailspace && p->overstep < 0)
                         p->overstep = 0;
                 return;
 
         } else if (TERMP_DANGLE & p->flags)
                 return;
 
         /* If the column was overrun, break the line. */
-        if (maxvis <= vis +
-            ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
+        if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) {
                 (*p->endline)(p);
                 p->viscol = 0;
         }
 }

@@ -314,10 +327,13 @@
 term_vspace(struct termp *p)
 {
 
         term_newln(p);
         p->viscol = 0;
+        if (0 < p->skipvsp)
+                p->skipvsp--;
+        else
         (*p->endline)(p);
 }
 
 void
 term_fontlast(struct termp *p)

@@ -367,11 +383,11 @@
 
 void
 term_fontpopq(struct termp *p, const void *key)
 {
 
-        while (p->fonti >= 0 && key != &p->fontq[p->fonti])
+        while (p->fonti >= 0 && key < (void *)(p->fontq + p->fonti))
                 p->fonti--;
         assert(p->fonti >= 0);
 }

@@ -389,41 +405,55 @@
  * handles word styling.
  */
 void
 term_word(struct termp *p, const char *word)
 {
+        const char       nbrsp[2] = { ASCII_NBRSP, 0 };
         const char      *seq, *cp;
         char             c;
         int              sz, uc;
         size_t           ssz;
         enum mandoc_esc  esc;
 
         if ( ! (TERMP_NOSPACE & p->flags)) {
                 if ( ! (TERMP_KEEP & p->flags)) {
-                        if (TERMP_PREKEEP & p->flags)
-                                p->flags |= TERMP_KEEP;
                         bufferc(p, ' ');
                         if (TERMP_SENTENCE & p->flags)
                                 bufferc(p, ' ');
                 } else
                         bufferc(p, ASCII_NBRSP);
         }
+        if (TERMP_PREKEEP & p->flags)
+                p->flags |= TERMP_KEEP;
 
         if ( ! (p->flags & TERMP_NONOSPACE))
                 p->flags &= ~TERMP_NOSPACE;
         else
                 p->flags |= TERMP_NOSPACE;
 
-        p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
+        p->flags &= ~TERMP_SENTENCE;
 
         while ('\0' != *word) {
-                if ((ssz = strcspn(word, "\\")) > 0)
+                if ('\\' != *word) {
+                        if (TERMP_SKIPCHAR & p->flags) {
+                                p->flags &= ~TERMP_SKIPCHAR;
+                                word++;
+                                continue;
+                        }
+                        if (TERMP_NBRWORD & p->flags) {
+                                if (' ' == *word) {
+                                        encode(p, nbrsp, 1);
+                                        word++;
+                                        continue;
+                                }
+                                ssz = strcspn(word, "\\ ");
+                        } else
+                                ssz = strcspn(word, "\\");
                         encode(p, word, ssz);
-
                 word += (int)ssz;
-                if ('\\' != *word)
                         continue;
+                }
 
                 word++;
                 esc = mandoc_escape(&word, &seq, &sz);
                 if (ESCAPE_ERROR == esc)
                         break;

@@ -466,39 +496,47 @@
                         term_fontrepl(p, TERMFONT_BOLD);
                         break;
                 case (ESCAPE_FONTITALIC):
                         term_fontrepl(p, TERMFONT_UNDER);
                         break;
+                case (ESCAPE_FONTBI):
+                        term_fontrepl(p, TERMFONT_BI);
+                        break;
                 case (ESCAPE_FONT):
                         /* FALLTHROUGH */
                 case (ESCAPE_FONTROMAN):
                         term_fontrepl(p, TERMFONT_NONE);
                         break;
                 case (ESCAPE_FONTPREV):
                         term_fontlast(p);
                         break;
                 case (ESCAPE_NOSPACE):
-                        if ('\0' == *word)
+                        if (TERMP_SKIPCHAR & p->flags)
+                                p->flags &= ~TERMP_SKIPCHAR;
+                        else if ('\0' == *word)
                                 p->flags |= TERMP_NOSPACE;
                         break;
+                case (ESCAPE_SKIPCHAR):
+                        p->flags |= TERMP_SKIPCHAR;
+                        break;
                 default:
                         break;
                 }
         }
+        p->flags &= ~TERMP_NBRWORD;
 }
 
 static void
-adjbuf(struct termp *p, int sz)
+adjbuf(struct termp *p, size_t sz)
 {
 
         if (0 == p->maxcols)
                 p->maxcols = 1024;
         while (sz >= p->maxcols)
                 p->maxcols <<= 2;
 
-        p->buf = mandoc_realloc
-                (p->buf, sizeof(int) * (size_t)p->maxcols);
+        p->buf = mandoc_realloc(p->buf, sizeof(int) * p->maxcols);
 }
 
 static void
 bufferc(struct termp *p, char c)
 {

@@ -517,71 +555,69 @@
 static void
 encode1(struct termp *p, int c)
 {
         enum termfont     f;
 
-        if (p->col + 4 >= p->maxcols)
-                adjbuf(p, p->col + 4);
+        if (TERMP_SKIPCHAR & p->flags) {
+                p->flags &= ~TERMP_SKIPCHAR;
+                return;
+        }
 
+        if (p->col + 6 >= p->maxcols)
+                adjbuf(p, p->col + 6);
+
         f = term_fonttop(p);
 
-        if (TERMFONT_NONE == f) {
-                p->buf[p->col++] = c;
-                return;
-        } else if (TERMFONT_UNDER == f) {
+        if (TERMFONT_UNDER == f || TERMFONT_BI == f) {
                 p->buf[p->col++] = '_';
-        } else
+                p->buf[p->col++] = 8;
+        }
+        if (TERMFONT_BOLD == f || TERMFONT_BI == f) {
+                if (ASCII_HYPH == c)
+                        p->buf[p->col++] = '-';
+                else
                 p->buf[p->col++] = c;
-
         p->buf[p->col++] = 8;
+        }
         p->buf[p->col++] = c;
 }
 
 static void
 encode(struct termp *p, const char *word, size_t sz)
 {
-        enum termfont     f;
-        int               i, len;
+        size_t            i;
 
-        /* LINTED */
-        len = sz;
+        if (TERMP_SKIPCHAR & p->flags) {
+                p->flags &= ~TERMP_SKIPCHAR;
+                return;
+        }
 
         /*
          * Encode and buffer a string of characters.  If the current
          * font mode is unset, buffer directly, else encode then buffer
          * character by character.
          */
 
-        if (TERMFONT_NONE == (f = term_fonttop(p))) {
-                if (p->col + len >= p->maxcols) 
-                        adjbuf(p, p->col + len);
-                for (i = 0; i < len; i++)
+        if (TERMFONT_NONE == term_fonttop(p)) {
+                if (p->col + sz >= p->maxcols) 
+                        adjbuf(p, p->col + sz);
+                for (i = 0; i < sz; i++)
                         p->buf[p->col++] = word[i];
                 return;
         }
 
         /* Pre-buffer, assuming worst-case. */
 
-        if (p->col + 1 + (len * 3) >= p->maxcols)
-                adjbuf(p, p->col + 1 + (len * 3));
+        if (p->col + 1 + (sz * 5) >= p->maxcols)
+                adjbuf(p, p->col + 1 + (sz * 5));
 
-        for (i = 0; i < len; i++) {
-                if (ASCII_HYPH != word[i] &&
-                    ! isgraph((unsigned char)word[i])) {
-                        p->buf[p->col++] = word[i];
-                        continue;
-                }
-
-                if (TERMFONT_UNDER == f)
-                        p->buf[p->col++] = '_';
-                else if (ASCII_HYPH == word[i])
-                        p->buf[p->col++] = '-';
+        for (i = 0; i < sz; i++) {
+                if (ASCII_HYPH == word[i] ||
+                    isgraph((unsigned char)word[i]))
+                        encode1(p, word[i]);
                 else
                         p->buf[p->col++] = word[i];
-
-                p->buf[p->col++] = 8;
-                p->buf[p->col++] = word[i];
         }
 }
 
 size_t
 term_len(const struct termp *p, size_t sz)

@@ -588,16 +624,26 @@
 {
 
         return((*p->width)(p, ' ') * sz);
 }
 
+static size_t
+cond_width(const struct termp *p, int c, int *skip)
+{
 
+        if (*skip) {
+                (*skip) = 0;
+                return(0);
+        } else
+                return((*p->width)(p, c));
+}
+
 size_t
 term_strlen(const struct termp *p, const char *cp)
 {
         size_t           sz, rsz, i;
-        int              ssz, c;
+        int              ssz, skip, c;
         const char      *seq, *rhs;
         enum mandoc_esc  esc;
         static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
 
         /*

@@ -605,14 +651,15 @@
          * calculations.  This follows the logic in term_word() as we
          * must calculate the width of produced strings.
          */
 
         sz = 0;
+        skip = 0;
         while ('\0' != *cp) {
                 rsz = strcspn(cp, rej);
                 for (i = 0; i < rsz; i++)
-                        sz += (*p->width)(p, *cp++);
+                        sz += cond_width(p, *cp++, &skip);
 
                 c = 0;
                 switch (*cp) {
                 case ('\\'):
                         cp++;

@@ -625,33 +672,33 @@
                                 case (ESCAPE_UNICODE):
                                         c = mchars_num2uc
                                                 (seq + 1, ssz - 1);
                                         if ('\0' == c)
                                                 break;
-                                        sz += (*p->width)(p, c);
+                                        sz += cond_width(p, c, &skip);
                                         continue;
                                 case (ESCAPE_SPECIAL):
                                         c = mchars_spec2cp
                                                 (p->symtab, seq, ssz);
                                         if (c <= 0)
                                                 break;
-                                        sz += (*p->width)(p, c);
+                                        sz += cond_width(p, c, &skip);
                                         continue;
                                 default:
                                         break;
                                 }
 
                         rhs = NULL;
 
                         switch (esc) {
                         case (ESCAPE_UNICODE):
-                                sz += (*p->width)(p, '?');
+                                sz += cond_width(p, '?', &skip);
                                 break;
                         case (ESCAPE_NUMBERED):
                                 c = mchars_num2char(seq, ssz);
                                 if ('\0' != c)
-                                        sz += (*p->width)(p, c);
+                                        sz += cond_width(p, c, &skip);
                                 break;
                         case (ESCAPE_SPECIAL):
                                 rhs = mchars_spec2str
                                         (p->symtab, seq, ssz, &rsz);

@@ -659,26 +706,34 @@
                                         break;
 
                                 rhs = seq;
                                 rsz = ssz;
                                 break;
+                        case (ESCAPE_SKIPCHAR):
+                                skip = 1;
+                                break;
                         default:
                                 break;
                         }
 
                         if (NULL == rhs)
                                 break;
 
+                        if (skip) {
+                                skip = 0;
+                                break;
+                        }
+
                         for (i = 0; i < rsz; i++)
                                 sz += (*p->width)(p, *rhs++);
                         break;
                 case (ASCII_NBRSP):
-                        sz += (*p->width)(p, ' ');
+                        sz += cond_width(p, ' ', &skip);
                         cp++;
                         break;
                 case (ASCII_HYPH):
-                        sz += (*p->width)(p, '-');
+                        sz += cond_width(p, '-', &skip);
                         cp++;
                         break;
                 default:
                         break;
                 }