1 /* $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <time.h>
32
33 #include "mandoc.h"
34 #include "libmandoc.h"
35
36 #define DATESIZE 32
37
38 static int a2time(time_t *, const char *, const char *);
39 static char *time2a(time_t);
40 static int numescape(const char *);
41
42 /*
43 * Pass over recursive numerical expressions. This context of this
44 * function is important: it's only called within character-terminating
45 * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial
46 * recursion: we don't care about what's in these blocks.
47 * This returns the number of characters skipped or -1 if an error
48 * occurs (the caller should bail).
49 */
50 static int
51 numescape(const char *start)
52 {
53 int i;
54 size_t sz;
55 const char *cp;
56
57 i = 0;
58
59 /* The expression consists of a subexpression. */
60
61 if ('\\' == start[i]) {
62 cp = &start[++i];
63 /*
64 * Read past the end of the subexpression.
65 * Bail immediately on errors.
66 */
67 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
68 return(-1);
69 return(i + cp - &start[i]);
70 }
71
72 if ('(' != start[i++])
73 return(0);
74
75 /*
76 * A parenthesised subexpression. Read until the closing
77 * parenthesis, making sure to handle any nested subexpressions
78 * that might ruin our parse.
79 */
80
81 while (')' != start[i]) {
82 sz = strcspn(&start[i], ")\\");
83 i += (int)sz;
84
85 if ('\0' == start[i])
86 return(-1);
87 else if ('\\' != start[i])
88 continue;
89
90 cp = &start[++i];
91 if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL))
92 return(-1);
93 i += cp - &start[i];
94 }
95
96 /* Read past the terminating ')'. */
97 return(++i);
98 }
99
100 enum mandoc_esc
101 mandoc_escape(const char **end, const char **start, int *sz)
102 {
103 char c, term, numeric;
104 int i, lim, ssz, rlim;
105 const char *cp, *rstart;
106 enum mandoc_esc gly;
107
108 cp = *end;
109 rstart = cp;
110 if (start)
111 *start = rstart;
112 i = lim = 0;
113 gly = ESCAPE_ERROR;
114 term = numeric = '\0';
115
116 switch ((c = cp[i++])) {
117 /*
118 * First the glyphs. There are several different forms of
119 * these, but each eventually returns a substring of the glyph
120 * name.
121 */
122 case ('('):
123 gly = ESCAPE_SPECIAL;
124 lim = 2;
125 break;
126 case ('['):
127 gly = ESCAPE_SPECIAL;
128 /*
129 * Unicode escapes are defined in groff as \[uXXXX] to
130 * \[u10FFFF], where the contained value must be a valid
131 * Unicode codepoint. Here, however, only check whether
132 * it's not a zero-width escape.
133 */
134 if ('u' == cp[i] && ']' != cp[i + 1])
135 gly = ESCAPE_UNICODE;
136 term = ']';
137 break;
138 case ('C'):
139 if ('\'' != cp[i])
140 return(ESCAPE_ERROR);
141 gly = ESCAPE_SPECIAL;
142 term = '\'';
143 break;
144
145 /*
146 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
147 * 'X' is the trigger. These have opaque sub-strings.
148 */
149 case ('F'):
150 /* FALLTHROUGH */
151 case ('g'):
152 /* FALLTHROUGH */
153 case ('k'):
154 /* FALLTHROUGH */
155 case ('M'):
156 /* FALLTHROUGH */
157 case ('m'):
158 /* FALLTHROUGH */
159 case ('n'):
160 /* FALLTHROUGH */
161 case ('V'):
162 /* FALLTHROUGH */
163 case ('Y'):
164 gly = ESCAPE_IGNORE;
165 /* FALLTHROUGH */
166 case ('f'):
167 if (ESCAPE_ERROR == gly)
168 gly = ESCAPE_FONT;
169
170 rstart= &cp[i];
171 if (start)
172 *start = rstart;
173
174 switch (cp[i++]) {
175 case ('('):
176 lim = 2;
177 break;
178 case ('['):
179 term = ']';
180 break;
181 default:
182 lim = 1;
183 i--;
184 break;
185 }
186 break;
187
188 /*
189 * These escapes are of the form \X'Y', where 'X' is the trigger
190 * and 'Y' is any string. These have opaque sub-strings.
191 */
192 case ('A'):
193 /* FALLTHROUGH */
194 case ('b'):
195 /* FALLTHROUGH */
196 case ('D'):
197 /* FALLTHROUGH */
198 case ('o'):
199 /* FALLTHROUGH */
200 case ('R'):
201 /* FALLTHROUGH */
202 case ('X'):
203 /* FALLTHROUGH */
204 case ('Z'):
205 if ('\'' != cp[i++])
206 return(ESCAPE_ERROR);
207 gly = ESCAPE_IGNORE;
208 term = '\'';
209 break;
210
211 /*
212 * These escapes are of the form \X'N', where 'X' is the trigger
213 * and 'N' resolves to a numerical expression.
214 */
215 case ('B'):
216 /* FALLTHROUGH */
217 case ('h'):
218 /* FALLTHROUGH */
219 case ('H'):
220 /* FALLTHROUGH */
221 case ('L'):
222 /* FALLTHROUGH */
223 case ('l'):
224 gly = ESCAPE_NUMBERED;
225 /* FALLTHROUGH */
226 case ('S'):
227 /* FALLTHROUGH */
228 case ('v'):
229 /* FALLTHROUGH */
230 case ('w'):
231 /* FALLTHROUGH */
232 case ('x'):
233 if (ESCAPE_ERROR == gly)
234 gly = ESCAPE_IGNORE;
235 if ('\'' != cp[i++])
236 return(ESCAPE_ERROR);
237 term = numeric = '\'';
238 break;
239
240 /*
241 * Special handling for the numbered character escape.
242 * XXX Do any other escapes need similar handling?
243 */
244 case ('N'):
245 if ('\0' == cp[i])
246 return(ESCAPE_ERROR);
247 *end = &cp[++i];
248 if (isdigit((unsigned char)cp[i-1]))
249 return(ESCAPE_IGNORE);
250 while (isdigit((unsigned char)**end))
251 (*end)++;
252 if (start)
253 *start = &cp[i];
254 if (sz)
255 *sz = *end - &cp[i];
256 if ('\0' != **end)
257 (*end)++;
258 return(ESCAPE_NUMBERED);
259
260 /*
261 * Sizes get a special category of their own.
262 */
263 case ('s'):
264 gly = ESCAPE_IGNORE;
265
266 rstart = &cp[i];
267 if (start)
268 *start = rstart;
269
270 /* See +/- counts as a sign. */
271 c = cp[i];
272 if ('+' == c || '-' == c || ASCII_HYPH == c)
273 ++i;
274
275 switch (cp[i++]) {
276 case ('('):
277 lim = 2;
278 break;
279 case ('['):
280 term = numeric = ']';
281 break;
282 case ('\''):
283 term = numeric = '\'';
284 break;
285 default:
286 lim = 1;
287 i--;
288 break;
289 }
290
291 /* See +/- counts as a sign. */
292 c = cp[i];
293 if ('+' == c || '-' == c || ASCII_HYPH == c)
294 ++i;
295
296 break;
297
298 /*
299 * Anything else is assumed to be a glyph.
300 */
301 default:
302 gly = ESCAPE_SPECIAL;
303 lim = 1;
304 i--;
305 break;
306 }
307
308 assert(ESCAPE_ERROR != gly);
309
310 rstart = &cp[i];
311 if (start)
312 *start = rstart;
313
314 /*
315 * If a terminating block has been specified, we need to
316 * handle the case of recursion, which could have their
317 * own terminating blocks that mess up our parse. This, by the
318 * way, means that the "start" and "size" values will be
319 * effectively meaningless.
320 */
321
322 ssz = 0;
323 if (numeric && -1 == (ssz = numescape(&cp[i])))
324 return(ESCAPE_ERROR);
325
326 i += ssz;
327 rlim = -1;
328
329 /*
330 * We have a character terminator. Try to read up to that
331 * character. If we can't (i.e., we hit the nil), then return
332 * an error; if we can, calculate our length, read past the
333 * terminating character, and exit.
334 */
335
336 if ('\0' != term) {
337 *end = strchr(&cp[i], term);
338 if ('\0' == *end)
339 return(ESCAPE_ERROR);
340
341 rlim = *end - &cp[i];
342 if (sz)
343 *sz = rlim;
344 (*end)++;
345 goto out;
346 }
347
348 assert(lim > 0);
349
350 /*
351 * We have a numeric limit. If the string is shorter than that,
352 * stop and return an error. Else adjust our endpoint, length,
353 * and return the current glyph.
354 */
355
356 if ((size_t)lim > strlen(&cp[i]))
357 return(ESCAPE_ERROR);
358
359 rlim = lim;
360 if (sz)
361 *sz = rlim;
362
363 *end = &cp[i] + lim;
364
365 out:
366 assert(rlim >= 0 && rstart);
367
368 /* Run post-processors. */
369
370 switch (gly) {
371 case (ESCAPE_FONT):
372 /*
373 * Pretend that the constant-width font modes are the
374 * same as the regular font modes.
375 */
376 if (2 == rlim && 'C' == *rstart)
377 rstart++;
378 else if (1 != rlim)
379 break;
380
381 switch (*rstart) {
382 case ('3'):
383 /* FALLTHROUGH */
384 case ('B'):
385 gly = ESCAPE_FONTBOLD;
386 break;
387 case ('2'):
388 /* FALLTHROUGH */
389 case ('I'):
390 gly = ESCAPE_FONTITALIC;
391 break;
392 case ('P'):
393 gly = ESCAPE_FONTPREV;
394 break;
395 case ('1'):
396 /* FALLTHROUGH */
397 case ('R'):
398 gly = ESCAPE_FONTROMAN;
399 break;
400 }
401 break;
402 case (ESCAPE_SPECIAL):
403 if (1 != rlim)
404 break;
405 if ('c' == *rstart)
406 gly = ESCAPE_NOSPACE;
407 break;
408 default:
409 break;
410 }
411
412 return(gly);
413 }
414
415 void *
416 mandoc_calloc(size_t num, size_t size)
417 {
418 void *ptr;
419
420 ptr = calloc(num, size);
421 if (NULL == ptr) {
422 perror(NULL);
423 exit((int)MANDOCLEVEL_SYSERR);
424 }
425
467 }
468
469 char *
470 mandoc_strdup(const char *ptr)
471 {
472 char *p;
473
474 p = strdup(ptr);
475 if (NULL == p) {
476 perror(NULL);
477 exit((int)MANDOCLEVEL_SYSERR);
478 }
479
480 return(p);
481 }
482
483 /*
484 * Parse a quoted or unquoted roff-style request or macro argument.
485 * Return a pointer to the parsed argument, which is either the original
486 * pointer or advanced by one byte in case the argument is quoted.
487 * Null-terminate the argument in place.
488 * Collapse pairs of quotes inside quoted arguments.
489 * Advance the argument pointer to the next argument,
490 * or to the null byte terminating the argument line.
491 */
492 char *
493 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
494 {
495 char *start, *cp;
496 int quoted, pairs, white;
497
498 /* Quoting can only start with a new word. */
499 start = *cpp;
500 quoted = 0;
501 if ('"' == *start) {
502 quoted = 1;
503 start++;
504 }
505
506 pairs = 0;
507 white = 0;
508 for (cp = start; '\0' != *cp; cp++) {
509 /* Move left after quoted quotes and escaped backslashes. */
510 if (pairs)
511 cp[-pairs] = cp[0];
512 if ('\\' == cp[0]) {
513 if ('\\' == cp[1]) {
514 /* Poor man's copy mode. */
515 pairs++;
516 cp++;
517 } else if (0 == quoted && ' ' == cp[1])
518 /* Skip escaped blanks. */
519 cp++;
520 } else if (0 == quoted) {
521 if (' ' == cp[0]) {
522 /* Unescaped blanks end unquoted args. */
523 white = 1;
524 break;
525 }
526 } else if ('"' == cp[0]) {
527 if ('"' == cp[1]) {
528 /* Quoted quotes collapse. */
529 pairs++;
530 cp++;
531 } else {
532 /* Unquoted quotes end quoted args. */
533 quoted = 2;
534 break;
535 }
536 }
537 }
538
539 /* Quoted argument without a closing quote. */
540 if (1 == quoted)
541 mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
542
543 /* Null-terminate this argument and move to the next one. */
544 if (pairs)
545 cp[-pairs] = '\0';
546 if ('\0' != *cp) {
547 *cp++ = '\0';
548 while (' ' == *cp)
549 cp++;
550 }
551 *pos += (int)(cp - start) + (quoted ? 1 : 0);
552 *cpp = cp;
553
554 if ('\0' == *cp && (white || ' ' == cp[-1]))
555 mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
556
557 return(start);
558 }
559
560 static int
561 a2time(time_t *t, const char *fmt, const char *p)
562 {
563 struct tm tm;
660 /* FALLTHROUGH */
661 case (')'):
662 if (0 == found)
663 enclosed = 1;
664 break;
665 case ('.'):
666 /* FALLTHROUGH */
667 case ('!'):
668 /* FALLTHROUGH */
669 case ('?'):
670 found = 1;
671 break;
672 default:
673 return(found && (!enclosed || isalnum((unsigned char)*q)));
674 }
675 }
676
677 return(found && !enclosed);
678 }
679
680 /*
681 * Find out whether a line is a macro line or not. If it is, adjust the
682 * current position and return one; if it isn't, return zero and don't
683 * change the current position.
684 */
685 int
686 mandoc_getcontrol(const char *cp, int *ppos)
687 {
688 int pos;
689
690 pos = *ppos;
691
692 if ('\\' == cp[pos] && '.' == cp[pos + 1])
693 pos += 2;
694 else if ('.' == cp[pos] || '\'' == cp[pos])
695 pos++;
696 else
697 return(0);
698
699 while (' ' == cp[pos] || '\t' == cp[pos])
700 pos++;
701
702 *ppos = pos;
703 return(1);
704 }
705
706 /*
707 * Convert a string to a long that may not be <0.
708 * If the string is invalid, or is less than 0, return -1.
709 */
710 int
711 mandoc_strntoi(const char *p, size_t sz, int base)
712 {
713 char buf[32];
714 char *ep;
715 long v;
716
717 if (sz > 31)
718 return(-1);
719
720 memcpy(buf, p, sz);
721 buf[(int)sz] = '\0';
722
723 errno = 0;
724 v = strtol(buf, &ep, base);
725
|
1 /* $Id: mandoc.c,v 1.74 2013/12/30 18:30:32 schwarze Exp $ */
2 /*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21
22 #include <sys/types.h>
23
24 #include <assert.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <limits.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <time.h>
32
33 #include "mandoc.h"
34 #include "libmandoc.h"
35
36 #define DATESIZE 32
37
38 static int a2time(time_t *, const char *, const char *);
39 static char *time2a(time_t);
40
41
42 enum mandoc_esc
43 mandoc_escape(const char **end, const char **start, int *sz)
44 {
45 const char *local_start;
46 int local_sz;
47 char term;
48 enum mandoc_esc gly;
49
50 /*
51 * When the caller doesn't provide return storage,
52 * use local storage.
53 */
54
55 if (NULL == start)
56 start = &local_start;
57 if (NULL == sz)
58 sz = &local_sz;
59
60 /*
61 * Beyond the backslash, at least one input character
62 * is part of the escape sequence. With one exception
63 * (see below), that character won't be returned.
64 */
65
66 gly = ESCAPE_ERROR;
67 *start = ++*end;
68 *sz = 0;
69 term = '\0';
70
71 switch ((*start)[-1]) {
72 /*
73 * First the glyphs. There are several different forms of
74 * these, but each eventually returns a substring of the glyph
75 * name.
76 */
77 case ('('):
78 gly = ESCAPE_SPECIAL;
79 *sz = 2;
80 break;
81 case ('['):
82 gly = ESCAPE_SPECIAL;
83 /*
84 * Unicode escapes are defined in groff as \[uXXXX] to
85 * \[u10FFFF], where the contained value must be a valid
86 * Unicode codepoint. Here, however, only check whether
87 * it's not a zero-width escape.
88 */
89 if ('u' == (*start)[0] && ']' != (*start)[1])
90 gly = ESCAPE_UNICODE;
91 term = ']';
92 break;
93 case ('C'):
94 if ('\'' != **start)
95 return(ESCAPE_ERROR);
96 *start = ++*end;
97 if ('u' == (*start)[0] && '\'' != (*start)[1])
98 gly = ESCAPE_UNICODE;
99 else
100 gly = ESCAPE_SPECIAL;
101 term = '\'';
102 break;
103
104 /*
105 * Escapes taking no arguments at all.
106 */
107 case ('d'):
108 /* FALLTHROUGH */
109 case ('u'):
110 return(ESCAPE_IGNORE);
111
112 /*
113 * The \z escape is supposed to output the following
114 * character without advancing the cursor position.
115 * Since we are mostly dealing with terminal mode,
116 * let us just skip the next character.
117 */
118 case ('z'):
119 return(ESCAPE_SKIPCHAR);
120
121 /*
122 * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
123 * 'X' is the trigger. These have opaque sub-strings.
124 */
125 case ('F'):
126 /* FALLTHROUGH */
127 case ('g'):
128 /* FALLTHROUGH */
129 case ('k'):
130 /* FALLTHROUGH */
131 case ('M'):
132 /* FALLTHROUGH */
133 case ('m'):
134 /* FALLTHROUGH */
135 case ('n'):
136 /* FALLTHROUGH */
137 case ('V'):
138 /* FALLTHROUGH */
139 case ('Y'):
140 gly = ESCAPE_IGNORE;
141 /* FALLTHROUGH */
142 case ('f'):
143 if (ESCAPE_ERROR == gly)
144 gly = ESCAPE_FONT;
145 switch (**start) {
146 case ('('):
147 *start = ++*end;
148 *sz = 2;
149 break;
150 case ('['):
151 *start = ++*end;
152 term = ']';
153 break;
154 default:
155 *sz = 1;
156 break;
157 }
158 break;
159
160 /*
161 * These escapes are of the form \X'Y', where 'X' is the trigger
162 * and 'Y' is any string. These have opaque sub-strings.
163 */
164 case ('A'):
165 /* FALLTHROUGH */
166 case ('b'):
167 /* FALLTHROUGH */
168 case ('B'):
169 /* FALLTHROUGH */
170 case ('D'):
171 /* FALLTHROUGH */
172 case ('o'):
173 /* FALLTHROUGH */
174 case ('R'):
175 /* FALLTHROUGH */
176 case ('w'):
177 /* FALLTHROUGH */
178 case ('X'):
179 /* FALLTHROUGH */
180 case ('Z'):
181 if ('\'' != **start)
182 return(ESCAPE_ERROR);
183 gly = ESCAPE_IGNORE;
184 *start = ++*end;
185 term = '\'';
186 break;
187
188 /*
189 * These escapes are of the form \X'N', where 'X' is the trigger
190 * and 'N' resolves to a numerical expression.
191 */
192 case ('h'):
193 /* FALLTHROUGH */
194 case ('H'):
195 /* FALLTHROUGH */
196 case ('L'):
197 /* FALLTHROUGH */
198 case ('l'):
199 /* FALLTHROUGH */
200 case ('S'):
201 /* FALLTHROUGH */
202 case ('v'):
203 /* FALLTHROUGH */
204 case ('x'):
205 if ('\'' != **start)
206 return(ESCAPE_ERROR);
207 gly = ESCAPE_IGNORE;
208 *start = ++*end;
209 term = '\'';
210 break;
211
212 /*
213 * Special handling for the numbered character escape.
214 * XXX Do any other escapes need similar handling?
215 */
216 case ('N'):
217 if ('\0' == **start)
218 return(ESCAPE_ERROR);
219 (*end)++;
220 if (isdigit((unsigned char)**start)) {
221 *sz = 1;
222 return(ESCAPE_IGNORE);
223 }
224 (*start)++;
225 while (isdigit((unsigned char)**end))
226 (*end)++;
227 *sz = *end - *start;
228 if ('\0' != **end)
229 (*end)++;
230 return(ESCAPE_NUMBERED);
231
232 /*
233 * Sizes get a special category of their own.
234 */
235 case ('s'):
236 gly = ESCAPE_IGNORE;
237
238 /* See +/- counts as a sign. */
239 if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
240 (*end)++;
241
242 switch (**end) {
243 case ('('):
244 *start = ++*end;
245 *sz = 2;
246 break;
247 case ('['):
248 *start = ++*end;
249 term = ']';
250 break;
251 case ('\''):
252 *start = ++*end;
253 term = '\'';
254 break;
255 default:
256 *sz = 1;
257 break;
258 }
259
260 break;
261
262 /*
263 * Anything else is assumed to be a glyph.
264 * In this case, pass back the character after the backslash.
265 */
266 default:
267 gly = ESCAPE_SPECIAL;
268 *start = --*end;
269 *sz = 1;
270 break;
271 }
272
273 assert(ESCAPE_ERROR != gly);
274
275 /*
276 * Read up to the terminating character,
277 * paying attention to nested escapes.
278 */
279
280 if ('\0' != term) {
281 while (**end != term) {
282 switch (**end) {
283 case ('\0'):
284 return(ESCAPE_ERROR);
285 case ('\\'):
286 (*end)++;
287 if (ESCAPE_ERROR ==
288 mandoc_escape(end, NULL, NULL))
289 return(ESCAPE_ERROR);
290 break;
291 default:
292 (*end)++;
293 break;
294 }
295 }
296 *sz = (*end)++ - *start;
297 } else {
298 assert(*sz > 0);
299 if ((size_t)*sz > strlen(*start))
300 return(ESCAPE_ERROR);
301 *end += *sz;
302 }
303
304 /* Run post-processors. */
305
306 switch (gly) {
307 case (ESCAPE_FONT):
308 if (2 == *sz) {
309 if ('C' == **start) {
310 /*
311 * Treat constant-width font modes
312 * just like regular font modes.
313 */
314 (*start)++;
315 (*sz)--;
316 } else {
317 if ('B' == (*start)[0] && 'I' == (*start)[1])
318 gly = ESCAPE_FONTBI;
319 break;
320 }
321 } else if (1 != *sz)
322 break;
323
324 switch (**start) {
325 case ('3'):
326 /* FALLTHROUGH */
327 case ('B'):
328 gly = ESCAPE_FONTBOLD;
329 break;
330 case ('2'):
331 /* FALLTHROUGH */
332 case ('I'):
333 gly = ESCAPE_FONTITALIC;
334 break;
335 case ('P'):
336 gly = ESCAPE_FONTPREV;
337 break;
338 case ('1'):
339 /* FALLTHROUGH */
340 case ('R'):
341 gly = ESCAPE_FONTROMAN;
342 break;
343 }
344 break;
345 case (ESCAPE_SPECIAL):
346 if (1 == *sz && 'c' == **start)
347 gly = ESCAPE_NOSPACE;
348 break;
349 default:
350 break;
351 }
352
353 return(gly);
354 }
355
356 void *
357 mandoc_calloc(size_t num, size_t size)
358 {
359 void *ptr;
360
361 ptr = calloc(num, size);
362 if (NULL == ptr) {
363 perror(NULL);
364 exit((int)MANDOCLEVEL_SYSERR);
365 }
366
408 }
409
410 char *
411 mandoc_strdup(const char *ptr)
412 {
413 char *p;
414
415 p = strdup(ptr);
416 if (NULL == p) {
417 perror(NULL);
418 exit((int)MANDOCLEVEL_SYSERR);
419 }
420
421 return(p);
422 }
423
424 /*
425 * Parse a quoted or unquoted roff-style request or macro argument.
426 * Return a pointer to the parsed argument, which is either the original
427 * pointer or advanced by one byte in case the argument is quoted.
428 * NUL-terminate the argument in place.
429 * Collapse pairs of quotes inside quoted arguments.
430 * Advance the argument pointer to the next argument,
431 * or to the NUL byte terminating the argument line.
432 */
433 char *
434 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
435 {
436 char *start, *cp;
437 int quoted, pairs, white;
438
439 /* Quoting can only start with a new word. */
440 start = *cpp;
441 quoted = 0;
442 if ('"' == *start) {
443 quoted = 1;
444 start++;
445 }
446
447 pairs = 0;
448 white = 0;
449 for (cp = start; '\0' != *cp; cp++) {
450
451 /*
452 * Move the following text left
453 * after quoted quotes and after "\\" and "\t".
454 */
455 if (pairs)
456 cp[-pairs] = cp[0];
457
458 if ('\\' == cp[0]) {
459 /*
460 * In copy mode, translate double to single
461 * backslashes and backslash-t to literal tabs.
462 */
463 switch (cp[1]) {
464 case ('t'):
465 cp[0] = '\t';
466 /* FALLTHROUGH */
467 case ('\\'):
468 pairs++;
469 cp++;
470 break;
471 case (' '):
472 /* Skip escaped blanks. */
473 if (0 == quoted)
474 cp++;
475 break;
476 default:
477 break;
478 }
479 } else if (0 == quoted) {
480 if (' ' == cp[0]) {
481 /* Unescaped blanks end unquoted args. */
482 white = 1;
483 break;
484 }
485 } else if ('"' == cp[0]) {
486 if ('"' == cp[1]) {
487 /* Quoted quotes collapse. */
488 pairs++;
489 cp++;
490 } else {
491 /* Unquoted quotes end quoted args. */
492 quoted = 2;
493 break;
494 }
495 }
496 }
497
498 /* Quoted argument without a closing quote. */
499 if (1 == quoted)
500 mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
501
502 /* NUL-terminate this argument and move to the next one. */
503 if (pairs)
504 cp[-pairs] = '\0';
505 if ('\0' != *cp) {
506 *cp++ = '\0';
507 while (' ' == *cp)
508 cp++;
509 }
510 *pos += (int)(cp - start) + (quoted ? 1 : 0);
511 *cpp = cp;
512
513 if ('\0' == *cp && (white || ' ' == cp[-1]))
514 mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
515
516 return(start);
517 }
518
519 static int
520 a2time(time_t *t, const char *fmt, const char *p)
521 {
522 struct tm tm;
619 /* FALLTHROUGH */
620 case (')'):
621 if (0 == found)
622 enclosed = 1;
623 break;
624 case ('.'):
625 /* FALLTHROUGH */
626 case ('!'):
627 /* FALLTHROUGH */
628 case ('?'):
629 found = 1;
630 break;
631 default:
632 return(found && (!enclosed || isalnum((unsigned char)*q)));
633 }
634 }
635
636 return(found && !enclosed);
637 }
638
639 /*
640 * Convert a string to a long that may not be <0.
641 * If the string is invalid, or is less than 0, return -1.
642 */
643 int
644 mandoc_strntoi(const char *p, size_t sz, int base)
645 {
646 char buf[32];
647 char *ep;
648 long v;
649
650 if (sz > 31)
651 return(-1);
652
653 memcpy(buf, p, sz);
654 buf[(int)sz] = '\0';
655
656 errno = 0;
657 v = strtol(buf, &ep, base);
658
|