Print this page
9083 replace regex implementation with tre


  24  * Use is subject to license terms.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
  30  *      using regcomp(3c), regexec(3c) interfaces. This is an XCU4
  31  *      porting aid. switches out to libgen compile/step if collation
  32  *      table not present.
  33  *
  34  *      Goal is to work with vi and sed/ed.
  35  *      Returns expbuf in dhl format (encoding of first two bytes).
  36  *      Note also that this is profoundly single threaded.  You
  37  *      cannot call compile twice with two separate search strings
  38  *      because the second call will wipe out the earlier stored string.
  39  *      This must be fixed, plus a general cleanup should be performed
  40  *      if this is to be integrated into libc.
  41  *
  42  */
  43 
  44 #pragma ident   "%Z%%M% %I%     %E% SMI"
  45 
  46 #include <stdio.h>
  47 #include <widec.h>
  48 #include <sys/types.h>
  49 #include <regex.h>
  50 #include <locale.h>
  51 #include <stdlib.h>
  52 #include <locale.h>
  53 #include <string.h>
  54 #include <unistd.h>
  55 #include <regexpr.h>
  56 
  57 /*
  58  * psuedo compile/step/advance global variables
  59  */
  60 extern int nbra;
  61 extern char *locs;              /* for stopping execess recursion */
  62 extern char *loc1;              /* 1st character which matched RE */
  63 extern char *loc2;              /* char after lst char in matched RE */
  64 extern char *braslist[];        /* start of nbra subexp  */
  65 extern char *braelist[];        /* end of nbra subexp    */


 356 
 357         return (rv == 0);
 358 }
 359 
 360 
 361 /*
 362  *      regerrno to compile/step error mapping:
 363  *      This is really a big compromise.  Some errors don't map at all
 364  *      like regcomp error 15 is generated by both compile() error types
 365  *      44 & 46.  So which one should we map to?
 366  *      Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
 367  *      To do your errors right use xregerr() to get the regcomp error
 368  *      string and print that.
 369  *
 370  * |    regcomp/regexec              |  Compile/step/advance                |
 371  * +---------------------------------+--------------------------------------+
 372  * 0 REG_OK       Pattern matched       1  - Pattern matched
 373  * 1 REG_NOMATCH  No match              0  - Pattern didn't match
 374  * 2 REG_ECOLLATE Bad collation elmnt.  67 - Returned by compile on mbtowc err
 375  * 3 REG_EESCAPE  trailing \ in patrn   45 - } expected after \.
 376  * 4 REG_ENEWLINE \n before end pattrn  36 - Illegal or missing delimiter.
 377  * 5 REG_ENSUB    Over 9 \( \) pairs    43 - Too many \(
 378  * 6 REG_ESUBREG  Bad number in \[0-9]  25 - ``\digit'' out of range.
 379  * 7 REG_EBRACK   [ ] inbalance         49 - [ ] imbalance.
 380  * 8 REG_EPAREN   ( ) inbalance         42 - \(~\) imbalance.
 381  * 9 REG_EBRACE   \{ \} inbalance       45 - } expected after \.
 382  * 10 REG_ERANGE  bad range endpoint    11 - Range endpoint too large.
 383  * 11 REG_ESPACE  no memory for pattern 50 - Regular expression overflow.
 384  * 12 REG_BADRPT  invalid repetition    36 - Illegal or missing delimiter.
 385  * 13 REG_ECTYPE  invalid char-class    67 - illegal byte sequence
 386  * 14 REG_BADPAT  syntax error          50 - Regular expression overflow.
 387  * 15 REG_BADBR   \{ \} contents bad    46 - First number exceeds 2nd in \{~\}
 388  * 16 REG_EFATAL  internal error        50 - Regular expression overflow.
 389  * 17 REG_ECHAR   bad mulitbyte char    67 - illegal byte sequence
 390  * 18 REG_STACK   stack overflow        50 - Regular expression overflow.
 391  * 19 REG_ENOSYS  function not supported 50- Regular expression overflow.
 392  *
 393  *      For reference here's the compile/step errno's. We don't generate
 394  *      41 here - it's done earlier, nor 44 since we can't tell if from 46.
 395  *
 396  *      11 - Range endpoint too large.
 397  *      16 - Bad number.
 398  *      25 - ``\digit'' out of range.
 399  *      36 - Illegal or missing delimiter.
 400  *      41 - No remembered search string.
 401  *      42 - \(~\) imbalance.
 402  *      43 - Too many \(.
 403  *      44 - More than 2 numbers given in "\{~\}"
 404  *      45 - } expected after \.
 405  *      46 - First number exceeds 2nd in "\{~\}"
 406  *      49 - [ ] imbalance.
 407  *      50 - Regular expression overflow.
 408  */
 409 
 410 static int
 411 map_errnos(int Errno)
 412 {
 413         switch (Errno) {
 414         case REG_ECOLLATE:
 415                 regerrno = 67;
 416                 break;
 417         case REG_EESCAPE:
 418                 regerrno = 45;
 419                 break;
 420         case REG_ENEWLINE:
 421                 regerrno = 36;
 422                 break;
 423         case REG_ENSUB:
 424                 regerrno = 43;
 425                 break;
 426         case REG_ESUBREG:
 427                 regerrno = 25;
 428                 break;
 429         case REG_EBRACK:
 430                 regerrno = 49;
 431                 break;
 432         case REG_EPAREN:
 433                 regerrno = 42;
 434                 break;
 435         case REG_EBRACE:
 436                 regerrno = 45;
 437                 break;
 438         case REG_ERANGE:
 439                 regerrno = 11;
 440                 break;
 441         case REG_ESPACE:
 442                 regerrno = 50;
 443                 break;
 444         case REG_BADRPT:
 445                 regerrno = 36;
 446                 break;
 447         case REG_ECTYPE:
 448                 regerrno = 67;
 449                 break;
 450         case REG_BADPAT:
 451                 regerrno = 50;
 452                 break;
 453         case REG_BADBR:
 454                 regerrno = 46;
 455                 break;
 456         case REG_EFATAL:
 457                 regerrno = 50;
 458                 break;
 459         case REG_ECHAR:
 460                 regerrno = 67;
 461                 break;
 462         case REG_STACK:
 463                 regerrno = 50;
 464                 break;
 465         case REG_ENOSYS:
 466                 regerrno = 50;
 467                 break;
 468         default:
 469                 regerrno = 50;
 470                 break;
 471         }
 472         return (regerrno);
 473 }
 474 
 475 /*
 476  *  This is a routine to clean up the subtle substructure of the struct
 477  *  regex_comp type for use by clients of this module.  Since the struct
 478  *  type is private, we use a generic interface, and trust the
 479  *  application to be damn sure that this operation is valid for the
 480  *  named memory.
 481  */
 482 
 483 void
 484 regex_comp_free(void * a)
 485 {
 486         /*
 487          * Free any data being held for previous search strings


  24  * Use is subject to license terms.
  25  * Copyright (c) 2016 by Delphix. All rights reserved.
  26  */
  27 
  28 /*
  29  * xcompile, xstep, xadvance - simulate compile(3g), step(3g), advance(3g)
  30  *      using regcomp(3c), regexec(3c) interfaces. This is an XCU4
  31  *      porting aid. switches out to libgen compile/step if collation
  32  *      table not present.
  33  *
  34  *      Goal is to work with vi and sed/ed.
  35  *      Returns expbuf in dhl format (encoding of first two bytes).
  36  *      Note also that this is profoundly single threaded.  You
  37  *      cannot call compile twice with two separate search strings
  38  *      because the second call will wipe out the earlier stored string.
  39  *      This must be fixed, plus a general cleanup should be performed
  40  *      if this is to be integrated into libc.
  41  *
  42  */
  43 


  44 #include <stdio.h>
  45 #include <widec.h>
  46 #include <sys/types.h>
  47 #include <regex.h>
  48 #include <locale.h>
  49 #include <stdlib.h>
  50 #include <locale.h>
  51 #include <string.h>
  52 #include <unistd.h>
  53 #include <regexpr.h>
  54 
  55 /*
  56  * psuedo compile/step/advance global variables
  57  */
  58 extern int nbra;
  59 extern char *locs;              /* for stopping execess recursion */
  60 extern char *loc1;              /* 1st character which matched RE */
  61 extern char *loc2;              /* char after lst char in matched RE */
  62 extern char *braslist[];        /* start of nbra subexp  */
  63 extern char *braelist[];        /* end of nbra subexp    */


 354 
 355         return (rv == 0);
 356 }
 357 
 358 
 359 /*
 360  *      regerrno to compile/step error mapping:
 361  *      This is really a big compromise.  Some errors don't map at all
 362  *      like regcomp error 15 is generated by both compile() error types
 363  *      44 & 46.  So which one should we map to?
 364  *      Note REG_ESUB Can't happen- 9 is no longer max num of subexpressions
 365  *      To do your errors right use xregerr() to get the regcomp error
 366  *      string and print that.
 367  *
 368  * |    regcomp/regexec              |  Compile/step/advance                |
 369  * +---------------------------------+--------------------------------------+
 370  * 0 REG_OK       Pattern matched       1  - Pattern matched
 371  * 1 REG_NOMATCH  No match              0  - Pattern didn't match
 372  * 2 REG_ECOLLATE Bad collation elmnt.  67 - Returned by compile on mbtowc err
 373  * 3 REG_EESCAPE  trailing \ in patrn   45 - } expected after \.


 374  * 6 REG_ESUBREG  Bad number in \[0-9]  25 - ``\digit'' out of range.
 375  * 7 REG_EBRACK   [ ] inbalance         49 - [ ] imbalance.
 376  * 8 REG_EPAREN   ( ) inbalance         42 - \(~\) imbalance.
 377  * 9 REG_EBRACE   \{ \} inbalance       45 - } expected after \.
 378  * 10 REG_ERANGE  bad range endpoint    11 - Range endpoint too large.
 379  * 11 REG_ESPACE  no memory for pattern 50 - Regular expression overflow.
 380  * 12 REG_BADRPT  invalid repetition    36 - Illegal or missing delimiter.
 381  * 13 REG_ECTYPE  invalid char-class    67 - illegal byte sequence
 382  * 14 REG_BADPAT  syntax error          50 - Regular expression overflow.
 383  * 15 REG_BADBR   \{ \} contents bad    46 - First number exceeds 2nd in \{~\}

 384  * 17 REG_ECHAR   bad mulitbyte char    67 - illegal byte sequence


 385  *
 386  *      For reference here's the compile/step errno's. We don't generate
 387  *      41 here - it's done earlier, nor 44 since we can't tell if from 46.
 388  *
 389  *      11 - Range endpoint too large.
 390  *      16 - Bad number.
 391  *      25 - ``\digit'' out of range.
 392  *      36 - Illegal or missing delimiter.
 393  *      41 - No remembered search string.
 394  *      42 - \(~\) imbalance.
 395  *      43 - Too many \(.
 396  *      44 - More than 2 numbers given in "\{~\}"
 397  *      45 - } expected after \.
 398  *      46 - First number exceeds 2nd in "\{~\}"
 399  *      49 - [ ] imbalance.
 400  *      50 - Regular expression overflow.
 401  */
 402 
 403 static int
 404 map_errnos(int Errno)
 405 {
 406         switch (Errno) {
 407         case REG_ECOLLATE:
 408                 regerrno = 67;
 409                 break;
 410         case REG_EESCAPE:
 411                 regerrno = 45;
 412                 break;






 413         case REG_ESUBREG:
 414                 regerrno = 25;
 415                 break;
 416         case REG_EBRACK:
 417                 regerrno = 49;
 418                 break;
 419         case REG_EPAREN:
 420                 regerrno = 42;
 421                 break;
 422         case REG_EBRACE:
 423                 regerrno = 45;
 424                 break;
 425         case REG_ERANGE:
 426                 regerrno = 11;
 427                 break;
 428         case REG_ESPACE:
 429                 regerrno = 50;
 430                 break;
 431         case REG_BADRPT:
 432                 regerrno = 36;
 433                 break;
 434         case REG_ECTYPE:
 435                 regerrno = 67;
 436                 break;
 437         case REG_BADPAT:
 438                 regerrno = 50;
 439                 break;
 440         case REG_BADBR:
 441                 regerrno = 46;
 442                 break;



 443         case REG_ECHAR:
 444                 regerrno = 67;
 445                 break;






 446         default:
 447                 regerrno = 50;
 448                 break;
 449         }
 450         return (regerrno);
 451 }
 452 
 453 /*
 454  *  This is a routine to clean up the subtle substructure of the struct
 455  *  regex_comp type for use by clients of this module.  Since the struct
 456  *  type is private, we use a generic interface, and trust the
 457  *  application to be damn sure that this operation is valid for the
 458  *  named memory.
 459  */
 460 
 461 void
 462 regex_comp_free(void * a)
 463 {
 464         /*
 465          * Free any data being held for previous search strings