1 /*      $Id: roff.h,v 1.59 2018/04/11 17:11:13 schwarze Exp $   */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 
  19 struct  ohash;
  20 struct  mdoc_arg;
  21 union   mdoc_data;
  22 
  23 enum    roff_macroset {
  24         MACROSET_NONE = 0,
  25         MACROSET_MDOC,
  26         MACROSET_MAN
  27 };
  28 
  29 enum    roff_sec {
  30         SEC_NONE = 0,
  31         SEC_NAME,
  32         SEC_LIBRARY,
  33         SEC_SYNOPSIS,
  34         SEC_DESCRIPTION,
  35         SEC_CONTEXT,
  36         SEC_IMPLEMENTATION,     /* IMPLEMENTATION NOTES */
  37         SEC_RETURN_VALUES,
  38         SEC_ENVIRONMENT,
  39         SEC_FILES,
  40         SEC_EXIT_STATUS,
  41         SEC_EXAMPLES,
  42         SEC_DIAGNOSTICS,
  43         SEC_COMPATIBILITY,
  44         SEC_ERRORS,
  45         SEC_SEE_ALSO,
  46         SEC_STANDARDS,
  47         SEC_HISTORY,
  48         SEC_AUTHORS,
  49         SEC_CAVEATS,
  50         SEC_BUGS,
  51         SEC_SECURITY,
  52         SEC_CUSTOM,
  53         SEC__MAX
  54 };
  55 
  56 enum    roff_type {
  57         ROFFT_ROOT,
  58         ROFFT_BLOCK,
  59         ROFFT_HEAD,
  60         ROFFT_BODY,
  61         ROFFT_TAIL,
  62         ROFFT_ELEM,
  63         ROFFT_TEXT,
  64         ROFFT_COMMENT,
  65         ROFFT_TBL,
  66         ROFFT_EQN
  67 };
  68 
  69 enum    roff_tok {
  70         ROFF_br = 0,
  71         ROFF_ce,
  72         ROFF_ft,
  73         ROFF_ll,
  74         ROFF_mc,
  75         ROFF_po,
  76         ROFF_rj,
  77         ROFF_sp,
  78         ROFF_ta,
  79         ROFF_ti,
  80         ROFF_MAX,
  81         ROFF_ab,
  82         ROFF_ad,
  83         ROFF_af,
  84         ROFF_aln,
  85         ROFF_als,
  86         ROFF_am,
  87         ROFF_am1,
  88         ROFF_ami,
  89         ROFF_ami1,
  90         ROFF_as,
  91         ROFF_as1,
  92         ROFF_asciify,
  93         ROFF_backtrace,
  94         ROFF_bd,
  95         ROFF_bleedat,
  96         ROFF_blm,
  97         ROFF_box,
  98         ROFF_boxa,
  99         ROFF_bp,
 100         ROFF_BP,
 101         ROFF_break,
 102         ROFF_breakchar,
 103         ROFF_brnl,
 104         ROFF_brp,
 105         ROFF_brpnl,
 106         ROFF_c2,
 107         ROFF_cc,
 108         ROFF_cf,
 109         ROFF_cflags,
 110         ROFF_ch,
 111         ROFF_char,
 112         ROFF_chop,
 113         ROFF_class,
 114         ROFF_close,
 115         ROFF_CL,
 116         ROFF_color,
 117         ROFF_composite,
 118         ROFF_continue,
 119         ROFF_cp,
 120         ROFF_cropat,
 121         ROFF_cs,
 122         ROFF_cu,
 123         ROFF_da,
 124         ROFF_dch,
 125         ROFF_Dd,
 126         ROFF_de,
 127         ROFF_de1,
 128         ROFF_defcolor,
 129         ROFF_dei,
 130         ROFF_dei1,
 131         ROFF_device,
 132         ROFF_devicem,
 133         ROFF_di,
 134         ROFF_do,
 135         ROFF_ds,
 136         ROFF_ds1,
 137         ROFF_dwh,
 138         ROFF_dt,
 139         ROFF_ec,
 140         ROFF_ecr,
 141         ROFF_ecs,
 142         ROFF_el,
 143         ROFF_em,
 144         ROFF_EN,
 145         ROFF_eo,
 146         ROFF_EP,
 147         ROFF_EQ,
 148         ROFF_errprint,
 149         ROFF_ev,
 150         ROFF_evc,
 151         ROFF_ex,
 152         ROFF_fallback,
 153         ROFF_fam,
 154         ROFF_fc,
 155         ROFF_fchar,
 156         ROFF_fcolor,
 157         ROFF_fdeferlig,
 158         ROFF_feature,
 159         /* MAN_fi; ignored in mdoc(7) */
 160         ROFF_fkern,
 161         ROFF_fl,
 162         ROFF_flig,
 163         ROFF_fp,
 164         ROFF_fps,
 165         ROFF_fschar,
 166         ROFF_fspacewidth,
 167         ROFF_fspecial,
 168         ROFF_ftr,
 169         ROFF_fzoom,
 170         ROFF_gcolor,
 171         ROFF_hc,
 172         ROFF_hcode,
 173         ROFF_hidechar,
 174         ROFF_hla,
 175         ROFF_hlm,
 176         ROFF_hpf,
 177         ROFF_hpfa,
 178         ROFF_hpfcode,
 179         ROFF_hw,
 180         ROFF_hy,
 181         ROFF_hylang,
 182         ROFF_hylen,
 183         ROFF_hym,
 184         ROFF_hypp,
 185         ROFF_hys,
 186         ROFF_ie,
 187         ROFF_if,
 188         ROFF_ig,
 189         /* MAN_in; ignored in mdoc(7) */
 190         ROFF_index,
 191         ROFF_it,
 192         ROFF_itc,
 193         ROFF_IX,
 194         ROFF_kern,
 195         ROFF_kernafter,
 196         ROFF_kernbefore,
 197         ROFF_kernpair,
 198         ROFF_lc,
 199         ROFF_lc_ctype,
 200         ROFF_lds,
 201         ROFF_length,
 202         ROFF_letadj,
 203         ROFF_lf,
 204         ROFF_lg,
 205         ROFF_lhang,
 206         ROFF_linetabs,
 207         ROFF_lnr,
 208         ROFF_lnrf,
 209         ROFF_lpfx,
 210         ROFF_ls,
 211         ROFF_lsm,
 212         ROFF_lt,
 213         ROFF_mediasize,
 214         ROFF_minss,
 215         ROFF_mk,
 216         ROFF_mso,
 217         ROFF_na,
 218         ROFF_ne,
 219         /* MAN_nf; ignored in mdoc(7) */
 220         ROFF_nh,
 221         ROFF_nhychar,
 222         ROFF_nm,
 223         ROFF_nn,
 224         ROFF_nop,
 225         ROFF_nr,
 226         ROFF_nrf,
 227         ROFF_nroff,
 228         ROFF_ns,
 229         ROFF_nx,
 230         ROFF_open,
 231         ROFF_opena,
 232         ROFF_os,
 233         ROFF_output,
 234         ROFF_padj,
 235         ROFF_papersize,
 236         ROFF_pc,
 237         ROFF_pev,
 238         ROFF_pi,
 239         ROFF_PI,
 240         ROFF_pl,
 241         ROFF_pm,
 242         ROFF_pn,
 243         ROFF_pnr,
 244         ROFF_ps,
 245         ROFF_psbb,
 246         ROFF_pshape,
 247         ROFF_pso,
 248         ROFF_ptr,
 249         ROFF_pvs,
 250         ROFF_rchar,
 251         ROFF_rd,
 252         ROFF_recursionlimit,
 253         ROFF_return,
 254         ROFF_rfschar,
 255         ROFF_rhang,
 256         ROFF_rm,
 257         ROFF_rn,
 258         ROFF_rnn,
 259         ROFF_rr,
 260         ROFF_rs,
 261         ROFF_rt,
 262         ROFF_schar,
 263         ROFF_sentchar,
 264         ROFF_shc,
 265         ROFF_shift,
 266         ROFF_sizes,
 267         ROFF_so,
 268         ROFF_spacewidth,
 269         ROFF_special,
 270         ROFF_spreadwarn,
 271         ROFF_ss,
 272         ROFF_sty,
 273         ROFF_substring,
 274         ROFF_sv,
 275         ROFF_sy,
 276         ROFF_T_,
 277         ROFF_tc,
 278         ROFF_TE,
 279         ROFF_TH,
 280         ROFF_tkf,
 281         ROFF_tl,
 282         ROFF_tm,
 283         ROFF_tm1,
 284         ROFF_tmc,
 285         ROFF_tr,
 286         ROFF_track,
 287         ROFF_transchar,
 288         ROFF_trf,
 289         ROFF_trimat,
 290         ROFF_trin,
 291         ROFF_trnt,
 292         ROFF_troff,
 293         ROFF_TS,
 294         ROFF_uf,
 295         ROFF_ul,
 296         ROFF_unformat,
 297         ROFF_unwatch,
 298         ROFF_unwatchn,
 299         ROFF_vpt,
 300         ROFF_vs,
 301         ROFF_warn,
 302         ROFF_warnscale,
 303         ROFF_watch,
 304         ROFF_watchlength,
 305         ROFF_watchn,
 306         ROFF_wh,
 307         ROFF_while,
 308         ROFF_write,
 309         ROFF_writec,
 310         ROFF_writem,
 311         ROFF_xflag,
 312         ROFF_cblock,
 313         ROFF_RENAMED,
 314         ROFF_USERDEF,
 315         TOKEN_NONE,
 316         MDOC_Dd,
 317         MDOC_Dt,
 318         MDOC_Os,
 319         MDOC_Sh,
 320         MDOC_Ss,
 321         MDOC_Pp,
 322         MDOC_D1,
 323         MDOC_Dl,
 324         MDOC_Bd,
 325         MDOC_Ed,
 326         MDOC_Bl,
 327         MDOC_El,
 328         MDOC_It,
 329         MDOC_Ad,
 330         MDOC_An,
 331         MDOC_Ap,
 332         MDOC_Ar,
 333         MDOC_Cd,
 334         MDOC_Cm,
 335         MDOC_Dv,
 336         MDOC_Er,
 337         MDOC_Ev,
 338         MDOC_Ex,
 339         MDOC_Fa,
 340         MDOC_Fd,
 341         MDOC_Fl,
 342         MDOC_Fn,
 343         MDOC_Ft,
 344         MDOC_Ic,
 345         MDOC_In,
 346         MDOC_Li,
 347         MDOC_Nd,
 348         MDOC_Nm,
 349         MDOC_Op,
 350         MDOC_Ot,
 351         MDOC_Pa,
 352         MDOC_Rv,
 353         MDOC_St,
 354         MDOC_Va,
 355         MDOC_Vt,
 356         MDOC_Xr,
 357         MDOC__A,
 358         MDOC__B,
 359         MDOC__D,
 360         MDOC__I,
 361         MDOC__J,
 362         MDOC__N,
 363         MDOC__O,
 364         MDOC__P,
 365         MDOC__R,
 366         MDOC__T,
 367         MDOC__V,
 368         MDOC_Ac,
 369         MDOC_Ao,
 370         MDOC_Aq,
 371         MDOC_At,
 372         MDOC_Bc,
 373         MDOC_Bf,
 374         MDOC_Bo,
 375         MDOC_Bq,
 376         MDOC_Bsx,
 377         MDOC_Bx,
 378         MDOC_Db,
 379         MDOC_Dc,
 380         MDOC_Do,
 381         MDOC_Dq,
 382         MDOC_Ec,
 383         MDOC_Ef,
 384         MDOC_Em,
 385         MDOC_Eo,
 386         MDOC_Fx,
 387         MDOC_Ms,
 388         MDOC_No,
 389         MDOC_Ns,
 390         MDOC_Nx,
 391         MDOC_Ox,
 392         MDOC_Pc,
 393         MDOC_Pf,
 394         MDOC_Po,
 395         MDOC_Pq,
 396         MDOC_Qc,
 397         MDOC_Ql,
 398         MDOC_Qo,
 399         MDOC_Qq,
 400         MDOC_Re,
 401         MDOC_Rs,
 402         MDOC_Sc,
 403         MDOC_So,
 404         MDOC_Sq,
 405         MDOC_Sm,
 406         MDOC_Sx,
 407         MDOC_Sy,
 408         MDOC_Tn,
 409         MDOC_Ux,
 410         MDOC_Xc,
 411         MDOC_Xo,
 412         MDOC_Fo,
 413         MDOC_Fc,
 414         MDOC_Oo,
 415         MDOC_Oc,
 416         MDOC_Bk,
 417         MDOC_Ek,
 418         MDOC_Bt,
 419         MDOC_Hf,
 420         MDOC_Fr,
 421         MDOC_Ud,
 422         MDOC_Lb,
 423         MDOC_Lp,
 424         MDOC_Lk,
 425         MDOC_Mt,
 426         MDOC_Brq,
 427         MDOC_Bro,
 428         MDOC_Brc,
 429         MDOC__C,
 430         MDOC_Es,
 431         MDOC_En,
 432         MDOC_Dx,
 433         MDOC__Q,
 434         MDOC__U,
 435         MDOC_Ta,
 436         MDOC_MAX,
 437         MAN_TH,
 438         MAN_SH,
 439         MAN_SS,
 440         MAN_TP,
 441         MAN_LP,
 442         MAN_PP,
 443         MAN_P,
 444         MAN_IP,
 445         MAN_HP,
 446         MAN_SM,
 447         MAN_SB,
 448         MAN_BI,
 449         MAN_IB,
 450         MAN_BR,
 451         MAN_RB,
 452         MAN_R,
 453         MAN_B,
 454         MAN_I,
 455         MAN_IR,
 456         MAN_RI,
 457         MAN_nf,
 458         MAN_fi,
 459         MAN_RE,
 460         MAN_RS,
 461         MAN_DT,
 462         MAN_UC,
 463         MAN_PD,
 464         MAN_AT,
 465         MAN_in,
 466         MAN_OP,
 467         MAN_EX,
 468         MAN_EE,
 469         MAN_UR,
 470         MAN_UE,
 471         MAN_MT,
 472         MAN_ME,
 473         MAN_MAX
 474 };
 475 
 476 enum    roff_next {
 477         ROFF_NEXT_SIBLING = 0,
 478         ROFF_NEXT_CHILD
 479 };
 480 
 481 /*
 482  * Indicates that a BODY's formatting has ended, but
 483  * the scope is still open.  Used for badly nested blocks.
 484  */
 485 enum    mdoc_endbody {
 486         ENDBODY_NOT = 0,
 487         ENDBODY_SPACE   /* Is broken: append a space. */
 488 };
 489 
 490 struct  roff_node {
 491         struct roff_node *parent;  /* Parent AST node. */
 492         struct roff_node *child;   /* First child AST node. */
 493         struct roff_node *last;    /* Last child AST node. */
 494         struct roff_node *next;    /* Sibling AST node. */
 495         struct roff_node *prev;    /* Prior sibling AST node. */
 496         struct roff_node *head;    /* BLOCK */
 497         struct roff_node *body;    /* BLOCK/ENDBODY */
 498         struct roff_node *tail;    /* BLOCK */
 499         struct mdoc_arg  *args;    /* BLOCK/ELEM */
 500         union mdoc_data  *norm;    /* Normalized arguments. */
 501         char             *string;  /* TEXT */
 502         const struct tbl_span *span; /* TBL */
 503         struct eqn_box   *eqn;     /* EQN */
 504         int               line;    /* Input file line number. */
 505         int               pos;     /* Input file column number. */
 506         int               flags;
 507 #define NODE_VALID       (1 << 0)  /* Has been validated. */
 508 #define NODE_ENDED       (1 << 1)  /* Gone past body end mark. */
 509 #define NODE_EOS         (1 << 2)  /* At sentence boundary. */
 510 #define NODE_LINE        (1 << 3)  /* First macro/text on line. */
 511 #define NODE_SYNPRETTY   (1 << 4)  /* SYNOPSIS-style formatting. */
 512 #define NODE_BROKEN      (1 << 5)  /* Must validate parent when ending. */
 513 #define NODE_DELIMO      (1 << 6)
 514 #define NODE_DELIMC      (1 << 7)
 515 #define NODE_NOSRC       (1 << 8)  /* Generated node, not in input file. */
 516 #define NODE_NOPRT       (1 << 9)  /* Shall not print anything. */
 517         int               prev_font; /* Before entering this node. */
 518         int               aux;     /* Decoded node data, type-dependent. */
 519         enum roff_tok     tok;     /* Request or macro ID. */
 520         enum roff_type    type;    /* AST node type. */
 521         enum roff_sec     sec;     /* Current named section. */
 522         enum mdoc_endbody end;     /* BODY */
 523 };
 524 
 525 struct  roff_meta {
 526         char             *msec;    /* Manual section, usually a digit. */
 527         char             *vol;     /* Manual volume title. */
 528         char             *os;      /* Operating system. */
 529         char             *arch;    /* Machine architecture. */
 530         char             *title;   /* Manual title, usually CAPS. */
 531         char             *name;    /* Leading manual name. */
 532         char             *date;    /* Normalized date. */
 533         int               hasbody; /* Document is not empty. */
 534         int               rcsids;  /* Bits indexed by enum mandoc_os. */
 535         enum mandoc_os    os_e;    /* Operating system. */
 536 };
 537 
 538 struct  roff_man {
 539         struct roff_meta  meta;    /* Document meta-data. */
 540         struct mparse    *parse;   /* Parse pointer. */
 541         struct roff      *roff;    /* Roff parser state data. */
 542         struct ohash     *mdocmac; /* Mdoc macro lookup table. */
 543         struct ohash     *manmac;  /* Man macro lookup table. */
 544         const char       *os_s;    /* Default operating system. */
 545         struct roff_node *first;   /* The first node parsed. */
 546         struct roff_node *last;    /* The last node parsed. */
 547         struct roff_node *last_es; /* The most recent Es node. */
 548         int               quick;   /* Abort parse early. */
 549         int               flags;   /* Parse flags. */
 550 #define MDOC_LITERAL     (1 << 1)  /* In a literal scope. */
 551 #define MDOC_PBODY       (1 << 2)  /* In the document body. */
 552 #define MDOC_NEWLINE     (1 << 3)  /* First macro/text in a line. */
 553 #define MDOC_PHRASE      (1 << 4)  /* In a Bl -column phrase. */
 554 #define MDOC_PHRASELIT   (1 << 5)  /* Literal within a phrase. */
 555 #define MDOC_FREECOL     (1 << 6)  /* `It' invocation should close. */
 556 #define MDOC_SYNOPSIS    (1 << 7)  /* SYNOPSIS-style formatting. */
 557 #define MDOC_KEEP        (1 << 8)  /* In a word keep. */
 558 #define MDOC_SMOFF       (1 << 9)  /* Spacing is off. */
 559 #define MDOC_NODELIMC    (1 << 10) /* Disable closing delimiter handling. */
 560 #define MAN_ELINE        (1 << 11) /* Next-line element scope. */
 561 #define MAN_BLINE        (1 << 12) /* Next-line block scope. */
 562 #define MDOC_PHRASEQF    (1 << 13) /* Quote first word encountered. */
 563 #define MDOC_PHRASEQL    (1 << 14) /* Quote last word of this phrase. */
 564 #define MDOC_PHRASEQN    (1 << 15) /* Quote first word of the next phrase. */
 565 #define MAN_LITERAL       MDOC_LITERAL
 566 #define MAN_NEWLINE       MDOC_NEWLINE
 567         enum roff_macroset macroset; /* Kind of high-level macros used. */
 568         enum roff_sec     lastsec; /* Last section seen. */
 569         enum roff_sec     lastnamed; /* Last standard section seen. */
 570         enum roff_next    next;    /* Where to put the next node. */
 571 };
 572 
 573 extern  const char *const *roff_name;
 574 
 575 
 576 void             deroff(char **, const struct roff_node *);
 577 struct ohash    *roffhash_alloc(enum roff_tok, enum roff_tok);
 578 enum roff_tok    roffhash_find(struct ohash *, const char *, size_t);
 579 void             roffhash_free(struct ohash *);
 580 void             roff_validate(struct roff_man *);