24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
30 * using regcomp(3c), regexec(3c) interfaces. This is an XCU4
31 * porting aid. switches out to libgen compile/step if collation
32 * table not present.
33 *
34 * Goal is to work with vi and sed/ed.
35 * Returns expbuf in dhl format (encoding of first two bytes).
36 * Note also that this is profoundly single threaded. You
37 * cannot call compile twice with two separate search strings
38 * because the second call will wipe out the earlier stored string.
39 * This must be fixed, plus a general cleanup should be performed
40 * if this is to be integrated into libc.
41 *
42 */
43
44 #pragma ident "%Z%%M% %I% %E% SMI"
45
46 #include <stdio.h>
47 #include <widec.h>
48 #include <sys/types.h>
49 #include <regex.h>
50 #include <locale.h>
51 #include <stdlib.h>
52 #include <locale.h>
53 #include <string.h>
54 #include <unistd.h>
55 #include <regexpr.h>
56
57 /*
58 * psuedo compile/step/advance global variables
59 */
60 extern int nbra;
61 extern char *locs; /* for stopping execess recursion */
62 extern char *loc1; /* 1st character which matched RE */
63 extern char *loc2; /* char after lst char in matched RE */
64 extern char *braslist[]; /* start of nbra subexp */
65 extern char *braelist[]; /* end of nbra subexp */
356
357 return (rv == 0);
358 }
359
360
361 /*
362 * regerrno to compile/step error mapping:
363 * This is really a big compromise. Some errors don't map at all
364 * like regcomp error 15 is generated by both compile() error types
365 * 44 & 46. So which one should we map to?
366 * Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
367 * To do your errors right use xregerr() to get the regcomp error
368 * string and print that.
369 *
370 * | regcomp/regexec | Compile/step/advance |
371 * +---------------------------------+--------------------------------------+
372 * 0 REG_OK Pattern matched 1 - Pattern matched
373 * 1 REG_NOMATCH No match 0 - Pattern didn't match
374 * 2 REG_ECOLLATE Bad collation elmnt. 67 - Returned by compile on mbtowc err
375 * 3 REG_EESCAPE trailing \ in patrn 45 - } expected after \.
376 * 4 REG_ENEWLINE \n before end pattrn 36 - Illegal or missing delimiter.
377 * 5 REG_ENSUB Over 9 \( \) pairs 43 - Too many \(
378 * 6 REG_ESUBREG Bad number in \[0-9] 25 - ``\digit'' out of range.
379 * 7 REG_EBRACK [ ] inbalance 49 - [ ] imbalance.
380 * 8 REG_EPAREN ( ) inbalance 42 - \(~\) imbalance.
381 * 9 REG_EBRACE \{ \} inbalance 45 - } expected after \.
382 * 10 REG_ERANGE bad range endpoint 11 - Range endpoint too large.
383 * 11 REG_ESPACE no memory for pattern 50 - Regular expression overflow.
384 * 12 REG_BADRPT invalid repetition 36 - Illegal or missing delimiter.
385 * 13 REG_ECTYPE invalid char-class 67 - illegal byte sequence
386 * 14 REG_BADPAT syntax error 50 - Regular expression overflow.
387 * 15 REG_BADBR \{ \} contents bad 46 - First number exceeds 2nd in \{~\}
388 * 16 REG_EFATAL internal error 50 - Regular expression overflow.
389 * 17 REG_ECHAR bad mulitbyte char 67 - illegal byte sequence
390 * 18 REG_STACK stack overflow 50 - Regular expression overflow.
391 * 19 REG_ENOSYS function not supported 50- Regular expression overflow.
392 *
393 * For reference here's the compile/step errno's. We don't generate
394 * 41 here - it's done earlier, nor 44 since we can't tell if from 46.
395 *
396 * 11 - Range endpoint too large.
397 * 16 - Bad number.
398 * 25 - ``\digit'' out of range.
399 * 36 - Illegal or missing delimiter.
400 * 41 - No remembered search string.
401 * 42 - \(~\) imbalance.
402 * 43 - Too many \(.
403 * 44 - More than 2 numbers given in "\{~\}"
404 * 45 - } expected after \.
405 * 46 - First number exceeds 2nd in "\{~\}"
406 * 49 - [ ] imbalance.
407 * 50 - Regular expression overflow.
408 */
409
410 static int
411 map_errnos(int Errno)
412 {
413 switch (Errno) {
414 case REG_ECOLLATE:
415 regerrno = 67;
416 break;
417 case REG_EESCAPE:
418 regerrno = 45;
419 break;
420 case REG_ENEWLINE:
421 regerrno = 36;
422 break;
423 case REG_ENSUB:
424 regerrno = 43;
425 break;
426 case REG_ESUBREG:
427 regerrno = 25;
428 break;
429 case REG_EBRACK:
430 regerrno = 49;
431 break;
432 case REG_EPAREN:
433 regerrno = 42;
434 break;
435 case REG_EBRACE:
436 regerrno = 45;
437 break;
438 case REG_ERANGE:
439 regerrno = 11;
440 break;
441 case REG_ESPACE:
442 regerrno = 50;
443 break;
444 case REG_BADRPT:
445 regerrno = 36;
446 break;
447 case REG_ECTYPE:
448 regerrno = 67;
449 break;
450 case REG_BADPAT:
451 regerrno = 50;
452 break;
453 case REG_BADBR:
454 regerrno = 46;
455 break;
456 case REG_EFATAL:
457 regerrno = 50;
458 break;
459 case REG_ECHAR:
460 regerrno = 67;
461 break;
462 case REG_STACK:
463 regerrno = 50;
464 break;
465 case REG_ENOSYS:
466 regerrno = 50;
467 break;
468 default:
469 regerrno = 50;
470 break;
471 }
472 return (regerrno);
473 }
474
475 /*
476 * This is a routine to clean up the subtle substructure of the struct
477 * regex_comp type for use by clients of this module. Since the struct
478 * type is private, we use a generic interface, and trust the
479 * application to be damn sure that this operation is valid for the
480 * named memory.
481 */
482
483 void
484 regex_comp_free(void * a)
485 {
486 /*
487 * Free any data being held for previous search strings
|
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 */
27
28 /*
29 * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
30 * using regcomp(3c), regexec(3c) interfaces. This is an XCU4
31 * porting aid. switches out to libgen compile/step if collation
32 * table not present.
33 *
34 * Goal is to work with vi and sed/ed.
35 * Returns expbuf in dhl format (encoding of first two bytes).
36 * Note also that this is profoundly single threaded. You
37 * cannot call compile twice with two separate search strings
38 * because the second call will wipe out the earlier stored string.
39 * This must be fixed, plus a general cleanup should be performed
40 * if this is to be integrated into libc.
41 *
42 */
43
44 #include <stdio.h>
45 #include <widec.h>
46 #include <sys/types.h>
47 #include <regex.h>
48 #include <locale.h>
49 #include <stdlib.h>
50 #include <locale.h>
51 #include <string.h>
52 #include <unistd.h>
53 #include <regexpr.h>
54
55 /*
56 * psuedo compile/step/advance global variables
57 */
58 extern int nbra;
59 extern char *locs; /* for stopping execess recursion */
60 extern char *loc1; /* 1st character which matched RE */
61 extern char *loc2; /* char after lst char in matched RE */
62 extern char *braslist[]; /* start of nbra subexp */
63 extern char *braelist[]; /* end of nbra subexp */
354
355 return (rv == 0);
356 }
357
358
359 /*
360 * regerrno to compile/step error mapping:
361 * This is really a big compromise. Some errors don't map at all
362 * like regcomp error 15 is generated by both compile() error types
363 * 44 & 46. So which one should we map to?
364 * Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
365 * To do your errors right use xregerr() to get the regcomp error
366 * string and print that.
367 *
368 * | regcomp/regexec | Compile/step/advance |
369 * +---------------------------------+--------------------------------------+
370 * 0 REG_OK Pattern matched 1 - Pattern matched
371 * 1 REG_NOMATCH No match 0 - Pattern didn't match
372 * 2 REG_ECOLLATE Bad collation elmnt. 67 - Returned by compile on mbtowc err
373 * 3 REG_EESCAPE trailing \ in patrn 45 - } expected after \.
374 * 6 REG_ESUBREG Bad number in \[0-9] 25 - ``\digit'' out of range.
375 * 7 REG_EBRACK [ ] inbalance 49 - [ ] imbalance.
376 * 8 REG_EPAREN ( ) inbalance 42 - \(~\) imbalance.
377 * 9 REG_EBRACE \{ \} inbalance 45 - } expected after \.
378 * 10 REG_ERANGE bad range endpoint 11 - Range endpoint too large.
379 * 11 REG_ESPACE no memory for pattern 50 - Regular expression overflow.
380 * 12 REG_BADRPT invalid repetition 36 - Illegal or missing delimiter.
381 * 13 REG_ECTYPE invalid char-class 67 - illegal byte sequence
382 * 14 REG_BADPAT syntax error 50 - Regular expression overflow.
383 * 15 REG_BADBR \{ \} contents bad 46 - First number exceeds 2nd in \{~\}
384 * 17 REG_ECHAR bad mulitbyte char 67 - illegal byte sequence
385 *
386 * For reference here's the compile/step errno's. We don't generate
387 * 41 here - it's done earlier, nor 44 since we can't tell if from 46.
388 *
389 * 11 - Range endpoint too large.
390 * 16 - Bad number.
391 * 25 - ``\digit'' out of range.
392 * 36 - Illegal or missing delimiter.
393 * 41 - No remembered search string.
394 * 42 - \(~\) imbalance.
395 * 43 - Too many \(.
396 * 44 - More than 2 numbers given in "\{~\}"
397 * 45 - } expected after \.
398 * 46 - First number exceeds 2nd in "\{~\}"
399 * 49 - [ ] imbalance.
400 * 50 - Regular expression overflow.
401 */
402
403 static int
404 map_errnos(int Errno)
405 {
406 switch (Errno) {
407 case REG_ECOLLATE:
408 regerrno = 67;
409 break;
410 case REG_EESCAPE:
411 regerrno = 45;
412 break;
413 case REG_ESUBREG:
414 regerrno = 25;
415 break;
416 case REG_EBRACK:
417 regerrno = 49;
418 break;
419 case REG_EPAREN:
420 regerrno = 42;
421 break;
422 case REG_EBRACE:
423 regerrno = 45;
424 break;
425 case REG_ERANGE:
426 regerrno = 11;
427 break;
428 case REG_ESPACE:
429 regerrno = 50;
430 break;
431 case REG_BADRPT:
432 regerrno = 36;
433 break;
434 case REG_ECTYPE:
435 regerrno = 67;
436 break;
437 case REG_BADPAT:
438 regerrno = 50;
439 break;
440 case REG_BADBR:
441 regerrno = 46;
442 break;
443 case REG_ECHAR:
444 regerrno = 67;
445 break;
446 default:
447 regerrno = 50;
448 break;
449 }
450 return (regerrno);
451 }
452
453 /*
454 * This is a routine to clean up the subtle substructure of the struct
455 * regex_comp type for use by clients of this module. Since the struct
456 * type is private, we use a generic interface, and trust the
457 * application to be damn sure that this operation is valid for the
458 * named memory.
459 */
460
461 void
462 regex_comp_free(void * a)
463 {
464 /*
465 * Free any data being held for previous search strings
|