1 /*      $Id: roff.h,v 1.58 2017/07/08 14:51:05 schwarze Exp $   */
   2 /*
   3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
   5  *
   6  * Permission to use, copy, modify, and distribute this software for any
   7  * purpose with or without fee is hereby granted, provided that the above
   8  * copyright notice and this permission notice appear in all copies.
   9  *
  10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
  11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17  */
  18 
  19 struct  ohash;
  20 struct  mdoc_arg;
  21 union   mdoc_data;
  22 
  23 enum    roff_macroset {
  24         MACROSET_NONE = 0,
  25         MACROSET_MDOC,
  26         MACROSET_MAN
  27 };
  28 
  29 enum    roff_sec {
  30         SEC_NONE = 0,
  31         SEC_NAME,
  32         SEC_LIBRARY,
  33         SEC_SYNOPSIS,
  34         SEC_DESCRIPTION,
  35         SEC_CONTEXT,
  36         SEC_IMPLEMENTATION,     /* IMPLEMENTATION NOTES */
  37         SEC_RETURN_VALUES,
  38         SEC_ENVIRONMENT,
  39         SEC_FILES,
  40         SEC_EXIT_STATUS,
  41         SEC_EXAMPLES,
  42         SEC_DIAGNOSTICS,
  43         SEC_COMPATIBILITY,
  44         SEC_ERRORS,
  45         SEC_SEE_ALSO,
  46         SEC_STANDARDS,
  47         SEC_HISTORY,
  48         SEC_AUTHORS,
  49         SEC_CAVEATS,
  50         SEC_BUGS,
  51         SEC_SECURITY,
  52         SEC_CUSTOM,
  53         SEC__MAX
  54 };
  55 
  56 enum    roff_type {
  57         ROFFT_ROOT,
  58         ROFFT_BLOCK,
  59         ROFFT_HEAD,
  60         ROFFT_BODY,
  61         ROFFT_TAIL,
  62         ROFFT_ELEM,
  63         ROFFT_TEXT,
  64         ROFFT_TBL,
  65         ROFFT_EQN
  66 };
  67 
  68 enum    roff_tok {
  69         ROFF_br = 0,
  70         ROFF_ce,
  71         ROFF_ft,
  72         ROFF_ll,
  73         ROFF_mc,
  74         ROFF_po,
  75         ROFF_rj,
  76         ROFF_sp,
  77         ROFF_ta,
  78         ROFF_ti,
  79         ROFF_MAX,
  80         ROFF_ab,
  81         ROFF_ad,
  82         ROFF_af,
  83         ROFF_aln,
  84         ROFF_als,
  85         ROFF_am,
  86         ROFF_am1,
  87         ROFF_ami,
  88         ROFF_ami1,
  89         ROFF_as,
  90         ROFF_as1,
  91         ROFF_asciify,
  92         ROFF_backtrace,
  93         ROFF_bd,
  94         ROFF_bleedat,
  95         ROFF_blm,
  96         ROFF_box,
  97         ROFF_boxa,
  98         ROFF_bp,
  99         ROFF_BP,
 100         ROFF_break,
 101         ROFF_breakchar,
 102         ROFF_brnl,
 103         ROFF_brp,
 104         ROFF_brpnl,
 105         ROFF_c2,
 106         ROFF_cc,
 107         ROFF_cf,
 108         ROFF_cflags,
 109         ROFF_ch,
 110         ROFF_char,
 111         ROFF_chop,
 112         ROFF_class,
 113         ROFF_close,
 114         ROFF_CL,
 115         ROFF_color,
 116         ROFF_composite,
 117         ROFF_continue,
 118         ROFF_cp,
 119         ROFF_cropat,
 120         ROFF_cs,
 121         ROFF_cu,
 122         ROFF_da,
 123         ROFF_dch,
 124         ROFF_Dd,
 125         ROFF_de,
 126         ROFF_de1,
 127         ROFF_defcolor,
 128         ROFF_dei,
 129         ROFF_dei1,
 130         ROFF_device,
 131         ROFF_devicem,
 132         ROFF_di,
 133         ROFF_do,
 134         ROFF_ds,
 135         ROFF_ds1,
 136         ROFF_dwh,
 137         ROFF_dt,
 138         ROFF_ec,
 139         ROFF_ecr,
 140         ROFF_ecs,
 141         ROFF_el,
 142         ROFF_em,
 143         ROFF_EN,
 144         ROFF_eo,
 145         ROFF_EP,
 146         ROFF_EQ,
 147         ROFF_errprint,
 148         ROFF_ev,
 149         ROFF_evc,
 150         ROFF_ex,
 151         ROFF_fallback,
 152         ROFF_fam,
 153         ROFF_fc,
 154         ROFF_fchar,
 155         ROFF_fcolor,
 156         ROFF_fdeferlig,
 157         ROFF_feature,
 158         /* MAN_fi; ignored in mdoc(7) */
 159         ROFF_fkern,
 160         ROFF_fl,
 161         ROFF_flig,
 162         ROFF_fp,
 163         ROFF_fps,
 164         ROFF_fschar,
 165         ROFF_fspacewidth,
 166         ROFF_fspecial,
 167         ROFF_ftr,
 168         ROFF_fzoom,
 169         ROFF_gcolor,
 170         ROFF_hc,
 171         ROFF_hcode,
 172         ROFF_hidechar,
 173         ROFF_hla,
 174         ROFF_hlm,
 175         ROFF_hpf,
 176         ROFF_hpfa,
 177         ROFF_hpfcode,
 178         ROFF_hw,
 179         ROFF_hy,
 180         ROFF_hylang,
 181         ROFF_hylen,
 182         ROFF_hym,
 183         ROFF_hypp,
 184         ROFF_hys,
 185         ROFF_ie,
 186         ROFF_if,
 187         ROFF_ig,
 188         /* MAN_in; ignored in mdoc(7) */
 189         ROFF_index,
 190         ROFF_it,
 191         ROFF_itc,
 192         ROFF_IX,
 193         ROFF_kern,
 194         ROFF_kernafter,
 195         ROFF_kernbefore,
 196         ROFF_kernpair,
 197         ROFF_lc,
 198         ROFF_lc_ctype,
 199         ROFF_lds,
 200         ROFF_length,
 201         ROFF_letadj,
 202         ROFF_lf,
 203         ROFF_lg,
 204         ROFF_lhang,
 205         ROFF_linetabs,
 206         ROFF_lnr,
 207         ROFF_lnrf,
 208         ROFF_lpfx,
 209         ROFF_ls,
 210         ROFF_lsm,
 211         ROFF_lt,
 212         ROFF_mediasize,
 213         ROFF_minss,
 214         ROFF_mk,
 215         ROFF_mso,
 216         ROFF_na,
 217         ROFF_ne,
 218         /* MAN_nf; ignored in mdoc(7) */
 219         ROFF_nh,
 220         ROFF_nhychar,
 221         ROFF_nm,
 222         ROFF_nn,
 223         ROFF_nop,
 224         ROFF_nr,
 225         ROFF_nrf,
 226         ROFF_nroff,
 227         ROFF_ns,
 228         ROFF_nx,
 229         ROFF_open,
 230         ROFF_opena,
 231         ROFF_os,
 232         ROFF_output,
 233         ROFF_padj,
 234         ROFF_papersize,
 235         ROFF_pc,
 236         ROFF_pev,
 237         ROFF_pi,
 238         ROFF_PI,
 239         ROFF_pl,
 240         ROFF_pm,
 241         ROFF_pn,
 242         ROFF_pnr,
 243         ROFF_ps,
 244         ROFF_psbb,
 245         ROFF_pshape,
 246         ROFF_pso,
 247         ROFF_ptr,
 248         ROFF_pvs,
 249         ROFF_rchar,
 250         ROFF_rd,
 251         ROFF_recursionlimit,
 252         ROFF_return,
 253         ROFF_rfschar,
 254         ROFF_rhang,
 255         ROFF_rm,
 256         ROFF_rn,
 257         ROFF_rnn,
 258         ROFF_rr,
 259         ROFF_rs,
 260         ROFF_rt,
 261         ROFF_schar,
 262         ROFF_sentchar,
 263         ROFF_shc,
 264         ROFF_shift,
 265         ROFF_sizes,
 266         ROFF_so,
 267         ROFF_spacewidth,
 268         ROFF_special,
 269         ROFF_spreadwarn,
 270         ROFF_ss,
 271         ROFF_sty,
 272         ROFF_substring,
 273         ROFF_sv,
 274         ROFF_sy,
 275         ROFF_T_,
 276         ROFF_tc,
 277         ROFF_TE,
 278         ROFF_TH,
 279         ROFF_tkf,
 280         ROFF_tl,
 281         ROFF_tm,
 282         ROFF_tm1,
 283         ROFF_tmc,
 284         ROFF_tr,
 285         ROFF_track,
 286         ROFF_transchar,
 287         ROFF_trf,
 288         ROFF_trimat,
 289         ROFF_trin,
 290         ROFF_trnt,
 291         ROFF_troff,
 292         ROFF_TS,
 293         ROFF_uf,
 294         ROFF_ul,
 295         ROFF_unformat,
 296         ROFF_unwatch,
 297         ROFF_unwatchn,
 298         ROFF_vpt,
 299         ROFF_vs,
 300         ROFF_warn,
 301         ROFF_warnscale,
 302         ROFF_watch,
 303         ROFF_watchlength,
 304         ROFF_watchn,
 305         ROFF_wh,
 306         ROFF_while,
 307         ROFF_write,
 308         ROFF_writec,
 309         ROFF_writem,
 310         ROFF_xflag,
 311         ROFF_cblock,
 312         ROFF_RENAMED,
 313         ROFF_USERDEF,
 314         TOKEN_NONE,
 315         MDOC_Dd,
 316         MDOC_Dt,
 317         MDOC_Os,
 318         MDOC_Sh,
 319         MDOC_Ss,
 320         MDOC_Pp,
 321         MDOC_D1,
 322         MDOC_Dl,
 323         MDOC_Bd,
 324         MDOC_Ed,
 325         MDOC_Bl,
 326         MDOC_El,
 327         MDOC_It,
 328         MDOC_Ad,
 329         MDOC_An,
 330         MDOC_Ap,
 331         MDOC_Ar,
 332         MDOC_Cd,
 333         MDOC_Cm,
 334         MDOC_Dv,
 335         MDOC_Er,
 336         MDOC_Ev,
 337         MDOC_Ex,
 338         MDOC_Fa,
 339         MDOC_Fd,
 340         MDOC_Fl,
 341         MDOC_Fn,
 342         MDOC_Ft,
 343         MDOC_Ic,
 344         MDOC_In,
 345         MDOC_Li,
 346         MDOC_Nd,
 347         MDOC_Nm,
 348         MDOC_Op,
 349         MDOC_Ot,
 350         MDOC_Pa,
 351         MDOC_Rv,
 352         MDOC_St,
 353         MDOC_Va,
 354         MDOC_Vt,
 355         MDOC_Xr,
 356         MDOC__A,
 357         MDOC__B,
 358         MDOC__D,
 359         MDOC__I,
 360         MDOC__J,
 361         MDOC__N,
 362         MDOC__O,
 363         MDOC__P,
 364         MDOC__R,
 365         MDOC__T,
 366         MDOC__V,
 367         MDOC_Ac,
 368         MDOC_Ao,
 369         MDOC_Aq,
 370         MDOC_At,
 371         MDOC_Bc,
 372         MDOC_Bf,
 373         MDOC_Bo,
 374         MDOC_Bq,
 375         MDOC_Bsx,
 376         MDOC_Bx,
 377         MDOC_Db,
 378         MDOC_Dc,
 379         MDOC_Do,
 380         MDOC_Dq,
 381         MDOC_Ec,
 382         MDOC_Ef,
 383         MDOC_Em,
 384         MDOC_Eo,
 385         MDOC_Fx,
 386         MDOC_Ms,
 387         MDOC_No,
 388         MDOC_Ns,
 389         MDOC_Nx,
 390         MDOC_Ox,
 391         MDOC_Pc,
 392         MDOC_Pf,
 393         MDOC_Po,
 394         MDOC_Pq,
 395         MDOC_Qc,
 396         MDOC_Ql,
 397         MDOC_Qo,
 398         MDOC_Qq,
 399         MDOC_Re,
 400         MDOC_Rs,
 401         MDOC_Sc,
 402         MDOC_So,
 403         MDOC_Sq,
 404         MDOC_Sm,
 405         MDOC_Sx,
 406         MDOC_Sy,
 407         MDOC_Tn,
 408         MDOC_Ux,
 409         MDOC_Xc,
 410         MDOC_Xo,
 411         MDOC_Fo,
 412         MDOC_Fc,
 413         MDOC_Oo,
 414         MDOC_Oc,
 415         MDOC_Bk,
 416         MDOC_Ek,
 417         MDOC_Bt,
 418         MDOC_Hf,
 419         MDOC_Fr,
 420         MDOC_Ud,
 421         MDOC_Lb,
 422         MDOC_Lp,
 423         MDOC_Lk,
 424         MDOC_Mt,
 425         MDOC_Brq,
 426         MDOC_Bro,
 427         MDOC_Brc,
 428         MDOC__C,
 429         MDOC_Es,
 430         MDOC_En,
 431         MDOC_Dx,
 432         MDOC__Q,
 433         MDOC__U,
 434         MDOC_Ta,
 435         MDOC_MAX,
 436         MAN_TH,
 437         MAN_SH,
 438         MAN_SS,
 439         MAN_TP,
 440         MAN_LP,
 441         MAN_PP,
 442         MAN_P,
 443         MAN_IP,
 444         MAN_HP,
 445         MAN_SM,
 446         MAN_SB,
 447         MAN_BI,
 448         MAN_IB,
 449         MAN_BR,
 450         MAN_RB,
 451         MAN_R,
 452         MAN_B,
 453         MAN_I,
 454         MAN_IR,
 455         MAN_RI,
 456         MAN_nf,
 457         MAN_fi,
 458         MAN_RE,
 459         MAN_RS,
 460         MAN_DT,
 461         MAN_UC,
 462         MAN_PD,
 463         MAN_AT,
 464         MAN_in,
 465         MAN_OP,
 466         MAN_EX,
 467         MAN_EE,
 468         MAN_UR,
 469         MAN_UE,
 470         MAN_MT,
 471         MAN_ME,
 472         MAN_MAX
 473 };
 474 
 475 enum    roff_next {
 476         ROFF_NEXT_SIBLING = 0,
 477         ROFF_NEXT_CHILD
 478 };
 479 
 480 /*
 481  * Indicates that a BODY's formatting has ended, but
 482  * the scope is still open.  Used for badly nested blocks.
 483  */
 484 enum    mdoc_endbody {
 485         ENDBODY_NOT = 0,
 486         ENDBODY_SPACE   /* Is broken: append a space. */
 487 };
 488 
 489 struct  roff_node {
 490         struct roff_node *parent;  /* Parent AST node. */
 491         struct roff_node *child;   /* First child AST node. */
 492         struct roff_node *last;    /* Last child AST node. */
 493         struct roff_node *next;    /* Sibling AST node. */
 494         struct roff_node *prev;    /* Prior sibling AST node. */
 495         struct roff_node *head;    /* BLOCK */
 496         struct roff_node *body;    /* BLOCK/ENDBODY */
 497         struct roff_node *tail;    /* BLOCK */
 498         struct mdoc_arg  *args;    /* BLOCK/ELEM */
 499         union mdoc_data  *norm;    /* Normalized arguments. */
 500         char             *string;  /* TEXT */
 501         const struct tbl_span *span; /* TBL */
 502         struct eqn_box   *eqn;     /* EQN */
 503         int               line;    /* Input file line number. */
 504         int               pos;     /* Input file column number. */
 505         int               flags;
 506 #define NODE_VALID       (1 << 0)  /* Has been validated. */
 507 #define NODE_ENDED       (1 << 1)  /* Gone past body end mark. */
 508 #define NODE_EOS         (1 << 2)  /* At sentence boundary. */
 509 #define NODE_LINE        (1 << 3)  /* First macro/text on line. */
 510 #define NODE_SYNPRETTY   (1 << 4)  /* SYNOPSIS-style formatting. */
 511 #define NODE_BROKEN      (1 << 5)  /* Must validate parent when ending. */
 512 #define NODE_DELIMO      (1 << 6)
 513 #define NODE_DELIMC      (1 << 7)
 514 #define NODE_NOSRC       (1 << 8)  /* Generated node, not in input file. */
 515 #define NODE_NOPRT       (1 << 9)  /* Shall not print anything. */
 516         int               prev_font; /* Before entering this node. */
 517         int               aux;     /* Decoded node data, type-dependent. */
 518         enum roff_tok     tok;     /* Request or macro ID. */
 519         enum roff_type    type;    /* AST node type. */
 520         enum roff_sec     sec;     /* Current named section. */
 521         enum mdoc_endbody end;     /* BODY */
 522 };
 523 
 524 struct  roff_meta {
 525         char             *msec;    /* Manual section, usually a digit. */
 526         char             *vol;     /* Manual volume title. */
 527         char             *os;      /* Operating system. */
 528         char             *arch;    /* Machine architecture. */
 529         char             *title;   /* Manual title, usually CAPS. */
 530         char             *name;    /* Leading manual name. */
 531         char             *date;    /* Normalized date. */
 532         int               hasbody; /* Document is not empty. */
 533         int               rcsids;  /* Bits indexed by enum mandoc_os. */
 534         enum mandoc_os    os_e;    /* Operating system. */
 535 };
 536 
 537 struct  roff_man {
 538         struct roff_meta  meta;    /* Document meta-data. */
 539         struct mparse    *parse;   /* Parse pointer. */
 540         struct roff      *roff;    /* Roff parser state data. */
 541         struct ohash     *mdocmac; /* Mdoc macro lookup table. */
 542         struct ohash     *manmac;  /* Man macro lookup table. */
 543         const char       *os_s;    /* Default operating system. */
 544         struct roff_node *first;   /* The first node parsed. */
 545         struct roff_node *last;    /* The last node parsed. */
 546         struct roff_node *last_es; /* The most recent Es node. */
 547         int               quick;   /* Abort parse early. */
 548         int               flags;   /* Parse flags. */
 549 #define MDOC_LITERAL     (1 << 1)  /* In a literal scope. */
 550 #define MDOC_PBODY       (1 << 2)  /* In the document body. */
 551 #define MDOC_NEWLINE     (1 << 3)  /* First macro/text in a line. */
 552 #define MDOC_PHRASE      (1 << 4)  /* In a Bl -column phrase. */
 553 #define MDOC_PHRASELIT   (1 << 5)  /* Literal within a phrase. */
 554 #define MDOC_FREECOL     (1 << 6)  /* `It' invocation should close. */
 555 #define MDOC_SYNOPSIS    (1 << 7)  /* SYNOPSIS-style formatting. */
 556 #define MDOC_KEEP        (1 << 8)  /* In a word keep. */
 557 #define MDOC_SMOFF       (1 << 9)  /* Spacing is off. */
 558 #define MDOC_NODELIMC    (1 << 10) /* Disable closing delimiter handling. */
 559 #define MAN_ELINE        (1 << 11) /* Next-line element scope. */
 560 #define MAN_BLINE        (1 << 12) /* Next-line block scope. */
 561 #define MDOC_PHRASEQF    (1 << 13) /* Quote first word encountered. */
 562 #define MDOC_PHRASEQL    (1 << 14) /* Quote last word of this phrase. */
 563 #define MDOC_PHRASEQN    (1 << 15) /* Quote first word of the next phrase. */
 564 #define MAN_LITERAL       MDOC_LITERAL
 565 #define MAN_NEWLINE       MDOC_NEWLINE
 566         enum roff_macroset macroset; /* Kind of high-level macros used. */
 567         enum roff_sec     lastsec; /* Last section seen. */
 568         enum roff_sec     lastnamed; /* Last standard section seen. */
 569         enum roff_next    next;    /* Where to put the next node. */
 570 };
 571 
 572 extern  const char *const *roff_name;
 573 
 574 
 575 void             deroff(char **, const struct roff_node *);
 576 struct ohash    *roffhash_alloc(enum roff_tok, enum roff_tok);
 577 enum roff_tok    roffhash_find(struct ohash *, const char *, size_t);
 578 void             roffhash_free(struct ohash *);
 579 void             roff_validate(struct roff_man *);