Print this page
5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD
   1 /*      $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21 
  22 #include <sys/types.h>
  23 
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <errno.h>
  27 #include <limits.h>
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <time.h>
  32 
  33 #include "mandoc.h"
  34 #include "libmandoc.h"
  35 
  36 #define DATESIZE 32
  37 
  38 static  int      a2time(time_t *, const char *, const char *);
  39 static  char    *time2a(time_t);
  40 static  int      numescape(const char *);
  41 
  42 /*
  43  * Pass over recursive numerical expressions.  This context of this
  44  * function is important: it's only called within character-terminating
  45  * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
  46  * recursion: we don't care about what's in these blocks. 
  47  * This returns the number of characters skipped or -1 if an error
  48  * occurs (the caller should bail).
  49  */
  50 static int
  51 numescape(const char *start)
  52 {
  53         int              i;
  54         size_t           sz;
  55         const char      *cp;

  56 
  57         i = 0;
  58 
  59         /* The expression consists of a subexpression. */
  60 
  61         if ('\\' == start[i]) {
  62                 cp = &start[++i];
  63                 /*
  64                  * Read past the end of the subexpression.
  65                  * Bail immediately on errors.
  66                  */
  67                 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
  68                         return(-1);
  69                 return(i + cp - &start[i]);
  70         } 
  71 
  72         if ('(' != start[i++])
  73                 return(0);


  74 
  75         /*
  76          * A parenthesised subexpression.  Read until the closing
  77          * parenthesis, making sure to handle any nested subexpressions
  78          * that might ruin our parse.
  79          */
  80 
  81         while (')' != start[i]) {
  82                 sz = strcspn(&start[i], ")\\");
  83                 i += (int)sz;
  84 
  85                 if ('\0' == start[i])
  86                         return(-1);
  87                 else if ('\\' != start[i])
  88                         continue;
  89 
  90                 cp = &start[++i];
  91                 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
  92                         return(-1);
  93                 i += cp - &start[i];
  94         }
  95 
  96         /* Read past the terminating ')'. */
  97         return(++i);
  98 }
  99 
 100 enum mandoc_esc
 101 mandoc_escape(const char **end, const char **start, int *sz)
 102 {
 103         char             c, term, numeric;
 104         int              i, lim, ssz, rlim;
 105         const char      *cp, *rstart;
 106         enum mandoc_esc  gly; 
 107 
 108         cp = *end;
 109         rstart = cp;
 110         if (start)
 111                 *start = rstart;
 112         i = lim = 0;
 113         gly = ESCAPE_ERROR;
 114         term = numeric = '\0';


 115 
 116         switch ((c = cp[i++])) {
 117         /*
 118          * First the glyphs.  There are several different forms of
 119          * these, but each eventually returns a substring of the glyph
 120          * name.
 121          */
 122         case ('('):
 123                 gly = ESCAPE_SPECIAL;
 124                 lim = 2;
 125                 break;
 126         case ('['):
 127                 gly = ESCAPE_SPECIAL;
 128                 /*
 129                  * Unicode escapes are defined in groff as \[uXXXX] to
 130                  * \[u10FFFF], where the contained value must be a valid
 131                  * Unicode codepoint.  Here, however, only check whether
 132                  * it's not a zero-width escape.
 133                  */
 134                 if ('u' == cp[i] && ']' != cp[i + 1])
 135                         gly = ESCAPE_UNICODE;
 136                 term = ']';
 137                 break;
 138         case ('C'):
 139                 if ('\'' != cp[i])
 140                         return(ESCAPE_ERROR);




 141                 gly = ESCAPE_SPECIAL;
 142                 term = '\'';
 143                 break;
 144 
 145         /*

















 146          * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
 147          * 'X' is the trigger.  These have opaque sub-strings.
 148          */
 149         case ('F'):
 150                 /* FALLTHROUGH */
 151         case ('g'):
 152                 /* FALLTHROUGH */
 153         case ('k'):
 154                 /* FALLTHROUGH */
 155         case ('M'):
 156                 /* FALLTHROUGH */
 157         case ('m'):
 158                 /* FALLTHROUGH */
 159         case ('n'):
 160                 /* FALLTHROUGH */
 161         case ('V'):
 162                 /* FALLTHROUGH */
 163         case ('Y'):
 164                 gly = ESCAPE_IGNORE;
 165                 /* FALLTHROUGH */
 166         case ('f'):
 167                 if (ESCAPE_ERROR == gly)
 168                         gly = ESCAPE_FONT;
 169 
 170                 rstart= &cp[i];
 171                 if (start) 
 172                         *start = rstart;
 173 
 174                 switch (cp[i++]) {
 175                 case ('('):
 176                         lim = 2;

 177                         break;
 178                 case ('['):

 179                         term = ']';
 180                         break;
 181                 default:
 182                         lim = 1;
 183                         i--;
 184                         break;
 185                 }
 186                 break;
 187 
 188         /*
 189          * These escapes are of the form \X'Y', where 'X' is the trigger
 190          * and 'Y' is any string.  These have opaque sub-strings.
 191          */
 192         case ('A'):
 193                 /* FALLTHROUGH */
 194         case ('b'):
 195                 /* FALLTHROUGH */


 196         case ('D'):
 197                 /* FALLTHROUGH */
 198         case ('o'):
 199                 /* FALLTHROUGH */
 200         case ('R'):
 201                 /* FALLTHROUGH */


 202         case ('X'):
 203                 /* FALLTHROUGH */
 204         case ('Z'):
 205                 if ('\'' != cp[i++])
 206                         return(ESCAPE_ERROR);
 207                 gly = ESCAPE_IGNORE;

 208                 term = '\'';
 209                 break;
 210 
 211         /*
 212          * These escapes are of the form \X'N', where 'X' is the trigger
 213          * and 'N' resolves to a numerical expression.
 214          */
 215         case ('B'):
 216                 /* FALLTHROUGH */
 217         case ('h'):
 218                 /* FALLTHROUGH */
 219         case ('H'):
 220                 /* FALLTHROUGH */
 221         case ('L'):
 222                 /* FALLTHROUGH */
 223         case ('l'):
 224                 gly = ESCAPE_NUMBERED;
 225                 /* FALLTHROUGH */
 226         case ('S'):
 227                 /* FALLTHROUGH */
 228         case ('v'):
 229                 /* FALLTHROUGH */
 230         case ('w'):
 231                 /* FALLTHROUGH */
 232         case ('x'):
 233                 if (ESCAPE_ERROR == gly)
 234                         gly = ESCAPE_IGNORE;
 235                 if ('\'' != cp[i++])
 236                         return(ESCAPE_ERROR);
 237                 term = numeric = '\'';


 238                 break;
 239 
 240         /*
 241          * Special handling for the numbered character escape.
 242          * XXX Do any other escapes need similar handling?
 243          */
 244         case ('N'):
 245                 if ('\0' == cp[i])
 246                         return(ESCAPE_ERROR);
 247                 *end = &cp[++i];
 248                 if (isdigit((unsigned char)cp[i-1]))

 249                         return(ESCAPE_IGNORE);


 250                 while (isdigit((unsigned char)**end))
 251                         (*end)++;
 252                 if (start)
 253                         *start = &cp[i];
 254                 if (sz)
 255                         *sz = *end - &cp[i];
 256                 if ('\0' != **end)
 257                         (*end)++;
 258                 return(ESCAPE_NUMBERED);
 259 
 260         /* 
 261          * Sizes get a special category of their own.
 262          */
 263         case ('s'):
 264                 gly = ESCAPE_IGNORE;
 265 
 266                 rstart = &cp[i];
 267                 if (start) 
 268                         *start = rstart;
 269 
 270                 /* See +/- counts as a sign. */
 271                 c = cp[i];
 272                 if ('+' == c || '-' == c || ASCII_HYPH == c)
 273                         ++i;
 274 
 275                 switch (cp[i++]) {
 276                 case ('('):
 277                         lim = 2;

 278                         break;
 279                 case ('['):
 280                         term = numeric = ']';

 281                         break;
 282                 case ('\''):
 283                         term = numeric = '\'';

 284                         break;
 285                 default:
 286                         lim = 1;
 287                         i--;
 288                         break;
 289                 }
 290 
 291                 /* See +/- counts as a sign. */
 292                 c = cp[i];
 293                 if ('+' == c || '-' == c || ASCII_HYPH == c)
 294                         ++i;
 295 
 296                 break;
 297 
 298         /*
 299          * Anything else is assumed to be a glyph.

 300          */
 301         default:
 302                 gly = ESCAPE_SPECIAL;
 303                 lim = 1;
 304                 i--;
 305                 break;
 306         }
 307 
 308         assert(ESCAPE_ERROR != gly);
 309 
 310         rstart = &cp[i];
 311         if (start)
 312                 *start = rstart;
 313 
 314         /*
 315          * If a terminating block has been specified, we need to
 316          * handle the case of recursion, which could have their
 317          * own terminating blocks that mess up our parse.  This, by the
 318          * way, means that the "start" and "size" values will be
 319          * effectively meaningless.
 320          */
 321 
 322         ssz = 0;
 323         if (numeric && -1 == (ssz = numescape(&cp[i])))
 324                 return(ESCAPE_ERROR);
 325 
 326         i += ssz;
 327         rlim = -1;
 328 
 329         /*
 330          * We have a character terminator.  Try to read up to that
 331          * character.  If we can't (i.e., we hit the nil), then return
 332          * an error; if we can, calculate our length, read past the
 333          * terminating character, and exit.
 334          */
 335 
 336         if ('\0' != term) {
 337                 *end = strchr(&cp[i], term);
 338                 if ('\0' == *end)

 339                         return(ESCAPE_ERROR);
 340 
 341                 rlim = *end - &cp[i];
 342                 if (sz)
 343                         *sz = rlim;
 344                 (*end)++;
 345                 goto out;






 346         }
 347 
 348         assert(lim > 0);
 349 
 350         /*
 351          * We have a numeric limit.  If the string is shorter than that,
 352          * stop and return an error.  Else adjust our endpoint, length,
 353          * and return the current glyph.
 354          */
 355 
 356         if ((size_t)lim > strlen(&cp[i]))
 357                 return(ESCAPE_ERROR);


 358 
 359         rlim = lim;
 360         if (sz)
 361                 *sz = rlim;
 362 
 363         *end = &cp[i] + lim;
 364 
 365 out:
 366         assert(rlim >= 0 && rstart);
 367 
 368         /* Run post-processors. */
 369 
 370         switch (gly) {
 371         case (ESCAPE_FONT):


 372                 /*
 373                  * Pretend that the constant-width font modes are the
 374                  * same as the regular font modes.
 375                  */
 376                 if (2 == rlim && 'C' == *rstart)
 377                         rstart++;
 378                 else if (1 != rlim)


 379                         break;



 380 
 381                 switch (*rstart) {
 382                 case ('3'):
 383                         /* FALLTHROUGH */
 384                 case ('B'):
 385                         gly = ESCAPE_FONTBOLD;
 386                         break;
 387                 case ('2'):
 388                         /* FALLTHROUGH */
 389                 case ('I'):
 390                         gly = ESCAPE_FONTITALIC;
 391                         break;
 392                 case ('P'):
 393                         gly = ESCAPE_FONTPREV;
 394                         break;
 395                 case ('1'):
 396                         /* FALLTHROUGH */
 397                 case ('R'):
 398                         gly = ESCAPE_FONTROMAN;
 399                         break;
 400                 }
 401                 break;
 402         case (ESCAPE_SPECIAL):
 403                 if (1 != rlim)
 404                         break;
 405                 if ('c' == *rstart)
 406                         gly = ESCAPE_NOSPACE;
 407                 break;
 408         default:
 409                 break;
 410         }
 411 
 412         return(gly);
 413 }
 414 
 415 void *
 416 mandoc_calloc(size_t num, size_t size)
 417 {
 418         void            *ptr;
 419 
 420         ptr = calloc(num, size);
 421         if (NULL == ptr) {
 422                 perror(NULL);
 423                 exit((int)MANDOCLEVEL_SYSERR);
 424         }
 425 


 467 }
 468 
 469 char *
 470 mandoc_strdup(const char *ptr)
 471 {
 472         char            *p;
 473 
 474         p = strdup(ptr);
 475         if (NULL == p) {
 476                 perror(NULL);
 477                 exit((int)MANDOCLEVEL_SYSERR);
 478         }
 479 
 480         return(p);
 481 }
 482 
 483 /*
 484  * Parse a quoted or unquoted roff-style request or macro argument.
 485  * Return a pointer to the parsed argument, which is either the original
 486  * pointer or advanced by one byte in case the argument is quoted.
 487  * Null-terminate the argument in place.
 488  * Collapse pairs of quotes inside quoted arguments.
 489  * Advance the argument pointer to the next argument,
 490  * or to the null byte terminating the argument line.
 491  */
 492 char *
 493 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
 494 {
 495         char     *start, *cp;
 496         int       quoted, pairs, white;
 497 
 498         /* Quoting can only start with a new word. */
 499         start = *cpp;
 500         quoted = 0;
 501         if ('"' == *start) {
 502                 quoted = 1;
 503                 start++;
 504         } 
 505 
 506         pairs = 0;
 507         white = 0;
 508         for (cp = start; '\0' != *cp; cp++) {
 509                 /* Move left after quoted quotes and escaped backslashes. */




 510                 if (pairs)
 511                         cp[-pairs] = cp[0];

 512                 if ('\\' == cp[0]) {
 513                         if ('\\' == cp[1]) {
 514                                 /* Poor man's copy mode. */







 515                                 pairs++;
 516                                 cp++;
 517                         } else if (0 == quoted && ' ' == cp[1])

 518                                 /* Skip escaped blanks. */

 519                                 cp++;




 520                 } else if (0 == quoted) {
 521                         if (' ' == cp[0]) {
 522                                 /* Unescaped blanks end unquoted args. */
 523                                 white = 1;
 524                                 break;
 525                         }
 526                 } else if ('"' == cp[0]) {
 527                         if ('"' == cp[1]) {
 528                                 /* Quoted quotes collapse. */
 529                                 pairs++;
 530                                 cp++;
 531                         } else {
 532                                 /* Unquoted quotes end quoted args. */
 533                                 quoted = 2;
 534                                 break;
 535                         }
 536                 }
 537         }
 538 
 539         /* Quoted argument without a closing quote. */
 540         if (1 == quoted)
 541                 mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
 542 
 543         /* Null-terminate this argument and move to the next one. */
 544         if (pairs)
 545                 cp[-pairs] = '\0';
 546         if ('\0' != *cp) {
 547                 *cp++ = '\0';
 548                 while (' ' == *cp)
 549                         cp++;
 550         }
 551         *pos += (int)(cp - start) + (quoted ? 1 : 0);
 552         *cpp = cp;
 553 
 554         if ('\0' == *cp && (white || ' ' == cp[-1]))
 555                 mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
 556 
 557         return(start);
 558 }
 559 
 560 static int
 561 a2time(time_t *t, const char *fmt, const char *p)
 562 {
 563         struct tm        tm;


 660                         /* FALLTHROUGH */
 661                 case (')'):
 662                         if (0 == found)
 663                                 enclosed = 1;
 664                         break;
 665                 case ('.'):
 666                         /* FALLTHROUGH */
 667                 case ('!'):
 668                         /* FALLTHROUGH */
 669                 case ('?'):
 670                         found = 1;
 671                         break;
 672                 default:
 673                         return(found && (!enclosed || isalnum((unsigned char)*q)));
 674                 }
 675         }
 676 
 677         return(found && !enclosed);
 678 }
 679 
 680 /*
 681  * Find out whether a line is a macro line or not.  If it is, adjust the
 682  * current position and return one; if it isn't, return zero and don't
 683  * change the current position.
 684  */
 685 int
 686 mandoc_getcontrol(const char *cp, int *ppos)
 687 {
 688         int             pos;
 689 
 690         pos = *ppos;
 691 
 692         if ('\\' == cp[pos] && '.' == cp[pos + 1])
 693                 pos += 2;
 694         else if ('.' == cp[pos] || '\'' == cp[pos])
 695                 pos++;
 696         else
 697                 return(0);
 698 
 699         while (' ' == cp[pos] || '\t' == cp[pos])
 700                 pos++;
 701 
 702         *ppos = pos;
 703         return(1);
 704 }
 705 
 706 /*
 707  * Convert a string to a long that may not be <0.
 708  * If the string is invalid, or is less than 0, return -1.
 709  */
 710 int
 711 mandoc_strntoi(const char *p, size_t sz, int base)
 712 {
 713         char             buf[32];
 714         char            *ep;
 715         long             v;
 716 
 717         if (sz > 31)
 718                 return(-1);
 719 
 720         memcpy(buf, p, sz);
 721         buf[(int)sz] = '\0';
 722 
 723         errno = 0;
 724         v = strtol(buf, &ep, base);
 725 
   1 /*      $Id: mandoc.c,v 1.74 2013/12/30 18:30:32 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 #ifdef HAVE_CONFIG_H
  19 #include "config.h"
  20 #endif
  21 
  22 #include <sys/types.h>
  23 
  24 #include <assert.h>
  25 #include <ctype.h>
  26 #include <errno.h>
  27 #include <limits.h>
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <time.h>
  32 
  33 #include "mandoc.h"
  34 #include "libmandoc.h"
  35 
  36 #define DATESIZE 32
  37 
  38 static  int      a2time(time_t *, const char *, const char *);
  39 static  char    *time2a(time_t);

  40 
  41 
  42 enum mandoc_esc
  43 mandoc_escape(const char **end, const char **start, int *sz)







  44 {
  45         const char      *local_start;
  46         int              local_sz;
  47         char             term;
  48         enum mandoc_esc  gly; 
  49 






  50         /*
  51          * When the caller doesn't provide return storage,
  52          * use local storage.
  53          */




  54 
  55         if (NULL == start)
  56                 start = &local_start;
  57         if (NULL == sz)
  58                 sz = &local_sz;
  59 
  60         /*
  61          * Beyond the backslash, at least one input character
  62          * is part of the escape sequence.  With one exception
  63          * (see below), that character won't be returned.
  64          */
  65 
































  66         gly = ESCAPE_ERROR;
  67         *start = ++*end;
  68         *sz = 0;
  69         term = '\0';
  70 
  71         switch ((*start)[-1]) {
  72         /*
  73          * First the glyphs.  There are several different forms of
  74          * these, but each eventually returns a substring of the glyph
  75          * name.
  76          */
  77         case ('('):
  78                 gly = ESCAPE_SPECIAL;
  79                 *sz = 2;
  80                 break;
  81         case ('['):
  82                 gly = ESCAPE_SPECIAL;
  83                 /*
  84                  * Unicode escapes are defined in groff as \[uXXXX] to
  85                  * \[u10FFFF], where the contained value must be a valid
  86                  * Unicode codepoint.  Here, however, only check whether
  87                  * it's not a zero-width escape.
  88                  */
  89                 if ('u' == (*start)[0] && ']' != (*start)[1])
  90                         gly = ESCAPE_UNICODE;
  91                 term = ']';
  92                 break;
  93         case ('C'):
  94                 if ('\'' != **start)
  95                         return(ESCAPE_ERROR);
  96                 *start = ++*end;
  97                 if ('u' == (*start)[0] && '\'' != (*start)[1])
  98                         gly = ESCAPE_UNICODE;
  99                 else
 100                         gly = ESCAPE_SPECIAL;
 101                 term = '\'';
 102                 break;
 103 
 104         /*
 105          * Escapes taking no arguments at all.
 106          */
 107         case ('d'):
 108                 /* FALLTHROUGH */
 109         case ('u'):
 110                 return(ESCAPE_IGNORE);
 111 
 112         /*
 113          * The \z escape is supposed to output the following
 114          * character without advancing the cursor position.  
 115          * Since we are mostly dealing with terminal mode,
 116          * let us just skip the next character.
 117          */
 118         case ('z'):
 119                 return(ESCAPE_SKIPCHAR);
 120 
 121         /*
 122          * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
 123          * 'X' is the trigger.  These have opaque sub-strings.
 124          */
 125         case ('F'):
 126                 /* FALLTHROUGH */
 127         case ('g'):
 128                 /* FALLTHROUGH */
 129         case ('k'):
 130                 /* FALLTHROUGH */
 131         case ('M'):
 132                 /* FALLTHROUGH */
 133         case ('m'):
 134                 /* FALLTHROUGH */
 135         case ('n'):
 136                 /* FALLTHROUGH */
 137         case ('V'):
 138                 /* FALLTHROUGH */
 139         case ('Y'):
 140                 gly = ESCAPE_IGNORE;
 141                 /* FALLTHROUGH */
 142         case ('f'):
 143                 if (ESCAPE_ERROR == gly)
 144                         gly = ESCAPE_FONT;
 145                 switch (**start) {





 146                 case ('('):
 147                         *start = ++*end;
 148                         *sz = 2;
 149                         break;
 150                 case ('['):
 151                         *start = ++*end;
 152                         term = ']';
 153                         break;
 154                 default:
 155                         *sz = 1;

 156                         break;
 157                 }
 158                 break;
 159 
 160         /*
 161          * These escapes are of the form \X'Y', where 'X' is the trigger
 162          * and 'Y' is any string.  These have opaque sub-strings.
 163          */
 164         case ('A'):
 165                 /* FALLTHROUGH */
 166         case ('b'):
 167                 /* FALLTHROUGH */
 168         case ('B'):
 169                 /* FALLTHROUGH */
 170         case ('D'):
 171                 /* FALLTHROUGH */
 172         case ('o'):
 173                 /* FALLTHROUGH */
 174         case ('R'):
 175                 /* FALLTHROUGH */
 176         case ('w'):
 177                 /* FALLTHROUGH */
 178         case ('X'):
 179                 /* FALLTHROUGH */
 180         case ('Z'):
 181                 if ('\'' != **start)
 182                         return(ESCAPE_ERROR);
 183                 gly = ESCAPE_IGNORE;
 184                 *start = ++*end;
 185                 term = '\'';
 186                 break;
 187 
 188         /*
 189          * These escapes are of the form \X'N', where 'X' is the trigger
 190          * and 'N' resolves to a numerical expression.
 191          */


 192         case ('h'):
 193                 /* FALLTHROUGH */
 194         case ('H'):
 195                 /* FALLTHROUGH */
 196         case ('L'):
 197                 /* FALLTHROUGH */
 198         case ('l'):

 199                 /* FALLTHROUGH */
 200         case ('S'):
 201                 /* FALLTHROUGH */
 202         case ('v'):
 203                 /* FALLTHROUGH */


 204         case ('x'):
 205                 if ('\'' != **start)


 206                         return(ESCAPE_ERROR);
 207                 gly = ESCAPE_IGNORE;
 208                 *start = ++*end;
 209                 term = '\'';
 210                 break;
 211 
 212         /*
 213          * Special handling for the numbered character escape.
 214          * XXX Do any other escapes need similar handling?
 215          */
 216         case ('N'):
 217                 if ('\0' == **start)
 218                         return(ESCAPE_ERROR);
 219                 (*end)++;
 220                 if (isdigit((unsigned char)**start)) {
 221                         *sz = 1;
 222                         return(ESCAPE_IGNORE);
 223                 }
 224                 (*start)++;
 225                 while (isdigit((unsigned char)**end))
 226                         (*end)++;
 227                 *sz = *end - *start;



 228                 if ('\0' != **end)
 229                         (*end)++;
 230                 return(ESCAPE_NUMBERED);
 231 
 232         /* 
 233          * Sizes get a special category of their own.
 234          */
 235         case ('s'):
 236                 gly = ESCAPE_IGNORE;
 237 




 238                 /* See +/- counts as a sign. */
 239                 if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
 240                         (*end)++;

 241 
 242                 switch (**end) {
 243                 case ('('):
 244                         *start = ++*end;
 245                         *sz = 2;
 246                         break;
 247                 case ('['):
 248                         *start = ++*end;
 249                         term = ']';
 250                         break;
 251                 case ('\''):
 252                         *start = ++*end;
 253                         term = '\'';
 254                         break;
 255                 default:
 256                         *sz = 1;

 257                         break;
 258                 }
 259 





 260                 break;
 261 
 262         /*
 263          * Anything else is assumed to be a glyph.
 264          * In this case, pass back the character after the backslash.
 265          */
 266         default:
 267                 gly = ESCAPE_SPECIAL;
 268                 *start = --*end;
 269                 *sz = 1;
 270                 break;
 271         }
 272 
 273         assert(ESCAPE_ERROR != gly);
 274 




 275         /*
 276          * Read up to the terminating character,
 277          * paying attention to nested escapes.



 278          */
 279 














 280         if ('\0' != term) {
 281                 while (**end != term) {
 282                         switch (**end) {
 283                         case ('\0'):
 284                                 return(ESCAPE_ERROR);
 285                         case ('\\'):



 286                                 (*end)++;
 287                                 if (ESCAPE_ERROR ==
 288                                     mandoc_escape(end, NULL, NULL))
 289                                         return(ESCAPE_ERROR);
 290                                 break;
 291                         default:
 292                                 (*end)++;
 293                                 break;
 294                         }
 295                 }
 296                 *sz = (*end)++ - *start;
 297         } else {
 298                 assert(*sz > 0);
 299                 if ((size_t)*sz > strlen(*start))





 300                         return(ESCAPE_ERROR);
 301                 *end += *sz;
 302         }
 303 









 304         /* Run post-processors. */
 305 
 306         switch (gly) {
 307         case (ESCAPE_FONT):
 308                 if (2 == *sz) {
 309                         if ('C' == **start) {
 310                                 /*
 311                                  * Treat constant-width font modes
 312                                  * just like regular font modes.
 313                                  */
 314                                 (*start)++;
 315                                 (*sz)--;
 316                         } else {
 317                                 if ('B' == (*start)[0] && 'I' == (*start)[1])
 318                                         gly = ESCAPE_FONTBI;
 319                                 break;
 320                         }
 321                 } else if (1 != *sz)
 322                         break;
 323 
 324                 switch (**start) {
 325                 case ('3'):
 326                         /* FALLTHROUGH */
 327                 case ('B'):
 328                         gly = ESCAPE_FONTBOLD;
 329                         break;
 330                 case ('2'):
 331                         /* FALLTHROUGH */
 332                 case ('I'):
 333                         gly = ESCAPE_FONTITALIC;
 334                         break;
 335                 case ('P'):
 336                         gly = ESCAPE_FONTPREV;
 337                         break;
 338                 case ('1'):
 339                         /* FALLTHROUGH */
 340                 case ('R'):
 341                         gly = ESCAPE_FONTROMAN;
 342                         break;
 343                 }
 344                 break;
 345         case (ESCAPE_SPECIAL):
 346                 if (1 == *sz && 'c' == **start)


 347                         gly = ESCAPE_NOSPACE;
 348                 break;
 349         default:
 350                 break;
 351         }
 352 
 353         return(gly);
 354 }
 355 
 356 void *
 357 mandoc_calloc(size_t num, size_t size)
 358 {
 359         void            *ptr;
 360 
 361         ptr = calloc(num, size);
 362         if (NULL == ptr) {
 363                 perror(NULL);
 364                 exit((int)MANDOCLEVEL_SYSERR);
 365         }
 366 


 408 }
 409 
 410 char *
 411 mandoc_strdup(const char *ptr)
 412 {
 413         char            *p;
 414 
 415         p = strdup(ptr);
 416         if (NULL == p) {
 417                 perror(NULL);
 418                 exit((int)MANDOCLEVEL_SYSERR);
 419         }
 420 
 421         return(p);
 422 }
 423 
 424 /*
 425  * Parse a quoted or unquoted roff-style request or macro argument.
 426  * Return a pointer to the parsed argument, which is either the original
 427  * pointer or advanced by one byte in case the argument is quoted.
 428  * NUL-terminate the argument in place.
 429  * Collapse pairs of quotes inside quoted arguments.
 430  * Advance the argument pointer to the next argument,
 431  * or to the NUL byte terminating the argument line.
 432  */
 433 char *
 434 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
 435 {
 436         char     *start, *cp;
 437         int       quoted, pairs, white;
 438 
 439         /* Quoting can only start with a new word. */
 440         start = *cpp;
 441         quoted = 0;
 442         if ('"' == *start) {
 443                 quoted = 1;
 444                 start++;
 445         } 
 446 
 447         pairs = 0;
 448         white = 0;
 449         for (cp = start; '\0' != *cp; cp++) {
 450 
 451                 /*
 452                  * Move the following text left
 453                  * after quoted quotes and after "\\" and "\t".
 454                  */
 455                 if (pairs)
 456                         cp[-pairs] = cp[0];
 457 
 458                 if ('\\' == cp[0]) {
 459                         /*
 460                          * In copy mode, translate double to single
 461                          * backslashes and backslash-t to literal tabs.
 462                          */
 463                         switch (cp[1]) {
 464                         case ('t'):
 465                                 cp[0] = '\t';
 466                                 /* FALLTHROUGH */
 467                         case ('\\'):
 468                                 pairs++;
 469                                 cp++;
 470                                 break;
 471                         case (' '):
 472                                 /* Skip escaped blanks. */
 473                                 if (0 == quoted)
 474                                         cp++;
 475                                 break;
 476                         default:
 477                                 break;
 478                         }
 479                 } else if (0 == quoted) {
 480                         if (' ' == cp[0]) {
 481                                 /* Unescaped blanks end unquoted args. */
 482                                 white = 1;
 483                                 break;
 484                         }
 485                 } else if ('"' == cp[0]) {
 486                         if ('"' == cp[1]) {
 487                                 /* Quoted quotes collapse. */
 488                                 pairs++;
 489                                 cp++;
 490                         } else {
 491                                 /* Unquoted quotes end quoted args. */
 492                                 quoted = 2;
 493                                 break;
 494                         }
 495                 }
 496         }
 497 
 498         /* Quoted argument without a closing quote. */
 499         if (1 == quoted)
 500                 mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
 501 
 502         /* NUL-terminate this argument and move to the next one. */
 503         if (pairs)
 504                 cp[-pairs] = '\0';
 505         if ('\0' != *cp) {
 506                 *cp++ = '\0';
 507                 while (' ' == *cp)
 508                         cp++;
 509         }
 510         *pos += (int)(cp - start) + (quoted ? 1 : 0);
 511         *cpp = cp;
 512 
 513         if ('\0' == *cp && (white || ' ' == cp[-1]))
 514                 mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
 515 
 516         return(start);
 517 }
 518 
 519 static int
 520 a2time(time_t *t, const char *fmt, const char *p)
 521 {
 522         struct tm        tm;


 619                         /* FALLTHROUGH */
 620                 case (')'):
 621                         if (0 == found)
 622                                 enclosed = 1;
 623                         break;
 624                 case ('.'):
 625                         /* FALLTHROUGH */
 626                 case ('!'):
 627                         /* FALLTHROUGH */
 628                 case ('?'):
 629                         found = 1;
 630                         break;
 631                 default:
 632                         return(found && (!enclosed || isalnum((unsigned char)*q)));
 633                 }
 634         }
 635 
 636         return(found && !enclosed);
 637 }
 638 


























 639 /*
 640  * Convert a string to a long that may not be <0.
 641  * If the string is invalid, or is less than 0, return -1.
 642  */
 643 int
 644 mandoc_strntoi(const char *p, size_t sz, int base)
 645 {
 646         char             buf[32];
 647         char            *ep;
 648         long             v;
 649 
 650         if (sz > 31)
 651                 return(-1);
 652 
 653         memcpy(buf, p, sz);
 654         buf[(int)sz] = '\0';
 655 
 656         errno = 0;
 657         v = strtol(buf, &ep, base);
 658