Print this page
9083 replace regex implementation with tre
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/man/man3c/regcomp.3c
+++ new/usr/src/man/man3c/regcomp.3c
1 1 .\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
2 2 .\" Copyright (c) 1992, 1993, 1994
3 3 .\" The Regents of the University of California. All rights reserved.
4 4 .\"
5 5 .\" This code is derived from software contributed to Berkeley by
6 6 .\" Henry Spencer.
7 7 .\"
8 8 .\" Redistribution and use in source and binary forms, with or without
9 9 .\" modification, are permitted provided that the following conditions
10 10 .\" are met:
11 11 .\" 1. Redistributions of source code must retain the above copyright
12 12 .\" notice, this list of conditions and the following disclaimer.
13 13 .\" 2. Redistributions in binary form must reproduce the above copyright
14 14 .\" notice, this list of conditions and the following disclaimer in the
15 15 .\" documentation and/or other materials provided with the distribution.
16 16 .\" 3. Neither the name of the University nor the names of its contributors
17 17 .\" may be used to endorse or promote products derived from this software
18 18 .\" without specific prior written permission.
19 19 .\"
20 20 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 21 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 22 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 23 .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 24 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 25 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 26 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 27 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 28 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 29 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 30 .\" SUCH DAMAGE.
31 31 .\"
32 32 .\"
33 33 .\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission
34 34 .\" to reproduce portions of its copyrighted documentation.
35 35 .\"
36 36 .\" Original documentation from The Open Group can be obtained online at
37 37 .\" http://www.opengroup.org/bookstore/.
38 38 .\"
39 39 .\" The Institute of Electrical and Electronics Engineers and The Open Group,
40 40 .\" have given us permission to reprint portions of their documentation. In the
41 41 .\" following statement, the phrase "this text" refers to portions of the system
42 42 .\" documentation.
43 43 .\"
44 44 .\" Portions of this text are reprinted and reproduced in electronic form in the
45 45 .\" Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for
46 46 .\" Information Technology -- Portable Operating System Interface (POSIX),
47 47 .\" The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the
48 48 .\" Institute of Electrical and Electronics Engineers, Inc and The Open Group.
49 49 .\"
50 50 .\" In the event of any discrepancy between these versions and the original
51 51 .\" IEEE and The Open Group Standard, the original IEEE and The Open Group
52 52 .\" Standard is the referee document.
53 53 .\"
54 54 .\" The original Standard can be obtained online at
55 55 .\" http://www.opengroup.org/unix/online.html.
56 56 .\"
57 57 .\" This notice shall appear on any product containing this material.
58 58 .\"
59 59 .\" The contents of this file are subject to the terms of the
60 60 .\" Common Development and Distribution License (the "License").
61 61 .\" You may not use this file except in compliance with the License.
62 62 .\"
63 63 .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
64 64 .\" or http://www.opensolaris.org/os/licensing.
65 65 .\" See the License for the specific language governing permissions
66 66 .\" and limitations under the License.
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
67 67 .\"
68 68 .\" When distributing Covered Code, include this CDDL HEADER in each
69 69 .\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
70 70 .\" If applicable, add the following below this CDDL HEADER, with the
71 71 .\" fields enclosed by brackets "[]" replaced with your own identifying
72 72 .\" information: Portions Copyright [yyyy] [name of copyright owner]
73 73 .\"
74 74 .\"
75 75 .\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved.
76 76 .\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved.
77 -.\" Copyright 2017 Nexenta Systems, Inc.
77 +.\" Copyright 2018 Nexenta Systems, Inc.
78 78 .\"
79 -.Dd June 14, 2017
79 +.Dd February 3, 2018
80 80 .Dt REGCOMP 3C
81 81 .Os
82 82 .Sh NAME
83 83 .Nm regcomp ,
84 84 .Nm regexec ,
85 85 .Nm regerror ,
86 86 .Nm regfree
87 87 .Nd regular-expression library
88 88 .Sh LIBRARY
89 89 .Lb libc
90 90 .Sh SYNOPSIS
91 91 .In regex.h
92 92 .Ft int
93 93 .Fo regcomp
94 94 .Fa "regex_t *restrict preg" "const char *restrict pattern" "int cflags"
95 95 .Fc
96 96 .Ft int
97 97 .Fo regexec
98 98 .Fa "const regex_t *restrict preg" "const char *restrict string"
99 99 .Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags"
100 100 .Fc
101 101 .Ft size_t
102 102 .Fo regerror
103 103 .Fa "int errcode" "const regex_t *restrict preg"
104 104 .Fa "char *restrict errbuf" "size_t errbuf_size"
105 105 .Fc
106 106 .Ft void
107 107 .Fn regfree "regex_t *preg"
108 108 .Sh DESCRIPTION
109 109 These routines implement
110 110 .St -p1003.2
111 111 regular expressions; see
112 112 .Xr regex 5 .
113 113 The
114 114 .Fn regcomp
115 115 function compiles an RE written as a string into an internal form,
116 116 .Fn regexec
117 117 matches that internal form against a string and reports results,
118 118 .Fn regerror
119 119 transforms error codes from either into human-readable messages,
120 120 and
121 121 .Fn regfree
122 122 frees any dynamically-allocated storage used by the internal form
123 123 of an RE.
124 124 .Pp
125 125 The header
126 126 .In regex.h
127 127 declares two structure types,
128 128 .Ft regex_t
129 129 and
130 130 .Ft regmatch_t ,
131 131 the former for compiled internal forms and the latter for match reporting.
132 132 It also declares the four functions, a type
133 133 .Ft regoff_t ,
134 134 and a number of constants with names starting with
135 135 .Qq Dv REG_ .
136 136 .Ss Fn regcomp
137 137 The
138 138 .Fn regcomp
139 139 function compiles the regular expression contained in the
140 140 .Fa pattern
141 141 string, subject to the flags in
142 142 .Fa cflags ,
143 143 and places the results in the
144 144 .Ft regex_t
145 145 structure pointed to by
146 146 .Fa preg .
147 147 The
148 148 .Fa cflags
149 149 argument is the bitwise OR of zero or more of the following flags:
150 150 .Bl -tag -width REG_EXTENDED
151 151 .It Dv REG_EXTENDED
152 152 Compile extended regular expressions
153 153 .Pq EREs ,
154 154 rather than the basic regular expressions
155 155 .Pq BREs
156 156 that are the default.
157 157 .It Dv REG_BASIC
158 158 This is a synonym for 0, provided as a counterpart to
159 159 .Dv REG_EXTENDED
160 160 to improve readability.
161 161 .It Dv REG_NOSPEC
162 162 Compile with recognition of all special characters turned off.
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
163 163 All characters are thus considered ordinary, so the RE is a literal string.
164 164 This is an extension, compatible with but not specified by
165 165 .St -p1003.2 ,
166 166 and should be used with caution in software intended to be portable to other
167 167 systems.
168 168 .Dv REG_EXTENDED
169 169 and
170 170 .Dv REG_NOSPEC
171 171 may not be used in the same call to
172 172 .Fn regcomp .
173 +.It Dv REG_LITERAL
174 +An alias of
175 +.Dv REG_NOSPEC .
173 176 .It Dv REG_ICASE
174 177 Compile for matching that ignores upper/lower case distinctions.
175 178 See
176 179 .Xr regex 5 .
177 180 .It Dv REG_NOSUB
178 181 Compile for matching that need only report success or failure,
179 182 not what was matched.
180 183 .It Dv REG_NEWLINE
181 184 Compile for newline-sensitive matching.
182 185 By default, newline is a completely ordinary character with no special
183 186 meaning in either REs or strings.
184 187 With this flag,
185 188 .Qq [^
186 189 bracket expressions and
187 190 .Qq \&.
188 191 never match newline,
189 192 a
190 193 .Qq \&^
191 194 anchor matches the null string after any newline in the string in addition to
192 195 its normal function, and the
193 196 .Qq \&$
194 197 anchor matches the null string before any newline in the string in addition to
195 198 its normal function.
196 199 .It Dv REG_PEND
197 200 The regular expression ends, not at the first NUL, but just before the character
198 201 pointed to by the
199 202 .Va re_endp
200 203 member of the structure pointed to by
201 204 .Fa preg .
202 205 The
203 206 .Va re_endp
204 207 member is of type
205 208 .Vt "const char *" .
206 209 This flag permits inclusion of NULs in the RE; they are considered ordinary
207 210 characters.
208 211 This is an extension, compatible with but not specified by
209 212 .St -p1003.2 ,
210 213 and should be used with caution in software intended to be portable to other
211 214 systems.
212 215 .El
213 216 .Pp
214 217 When successful,
215 218 .Fn regcomp
216 219 returns 0 and fills in the structure pointed to by
217 220 .Fa preg .
218 221 One member of that structure
219 222 .Po other than
220 223 .Va re_endp
221 224 .Pc
222 225 is publicized:
223 226 .Va re_nsub ,
224 227 of type
225 228 .Ft size_t ,
226 229 contains the number of parenthesized subexpressions within the RE
227 230 .Po except that the value of this member is undefined if the
228 231 .Dv REG_NOSUB
229 232 flag was used
230 233 .Pc .
231 234 .Ss Fn regexec
232 235 The
233 236 .Fn regexec
234 237 function matches the compiled RE pointed to by
235 238 .Fa preg
236 239 against the
237 240 .Fa string ,
238 241 subject to the flags in
239 242 .Fa eflags ,
240 243 and reports results using
241 244 .Fa nmatch ,
242 245 .Fa pmatch ,
243 246 and the returned value.
244 247 The RE must have been compiled by a previous invocation of
245 248 .Fn regcomp .
246 249 The compiled form is not altered during execution of
247 250 .Fn regexec ,
248 251 so a single compiled RE can be used simultaneously by multiple threads.
249 252 .Pp
250 253 By default, the NUL-terminated string pointed to by
251 254 .Fa string
252 255 is considered to be the text of an entire line, minus any terminating
253 256 newline.
254 257 The
255 258 .Fa eflags
256 259 argument is the bitwise OR of zero or more of the following flags:
257 260 .Bl -tag -width REG_STARTEND
258 261 .It Dv REG_NOTBOL
259 262 The first character of the string is treated as the continuation
260 263 of a line.
261 264 This means that the anchors
262 265 .Qq \&^ ,
263 266 .Qq [[:<:]] ,
264 267 and
265 268 .Qq \e<
266 269 do not match before it; but see
267 270 .Dv REG_STARTEND
268 271 below.
269 272 This does not affect the behavior of newlines under
270 273 .Dv REG_NEWLINE .
271 274 .It Dv REG_NOTEOL
272 275 The NUL terminating the string does not end a line, so the
273 276 .Qq \&$
274 277 anchor does not match before it.
275 278 This does not affect the behavior of newlines under
276 279 .Dv REG_NEWLINE .
277 280 .It Dv REG_STARTEND
278 281 The string is considered to start at
279 282 .Fa string No +
280 283 .Fa pmatch Ns [0]. Ns Fa rm_so
281 284 and to end before the byte located at
282 285 .Fa string No +
283 286 .Fa pmatch Ns [0]. Ns Fa rm_eo ,
284 287 regardless of the value of
285 288 .Fa nmatch .
286 289 See below for the definition of
287 290 .Fa pmatch
288 291 and
289 292 .Fa nmatch .
290 293 This is an extension, compatible with but not specified by
291 294 .St -p1003.2 ,
292 295 and should be used with caution in software intended to be portable to other
293 296 systems.
294 297 .Pp
295 298 Without
296 299 .Dv REG_NOTBOL ,
297 300 the position
298 301 .Fa rm_so
299 302 is considered the beginning of a line, such that
300 303 .Qq \&^
301 304 matches before it, and the beginning of a word if there is a word character at
302 305 this position, such that
303 306 .Qq [[:<:]]
304 307 and
305 308 .Qq \e<
306 309 match before it.
307 310 .Pp
308 311 With
309 312 .Dv REG_NOTBOL ,
310 313 the character at position
311 314 .Fa rm_so
312 315 is treated as the continuation of a line, and if
313 316 .Fa rm_so
314 317 is greater than 0, the preceding character is taken into consideration.
315 318 If the preceding character is a newline and the regular expression was compiled
316 319 with
317 320 .Dv REG_NEWLINE ,
318 321 .Qq ^
319 322 matches before the string; if the preceding character is not a word character
320 323 but the string starts with a word character,
321 324 .Qq [[:<:]]
322 325 and
323 326 .Qq \e<
324 327 match before the string.
325 328 .El
326 329 .Pp
327 330 See
328 331 .Xr regex 5
329 332 for a discussion of what is matched in situations where an RE or a portion
330 333 thereof could match any of several substrings of
331 334 .Fa string .
332 335 .Pp
333 336 If
334 337 .Dv REG_NOSUB
335 338 was specified in the compilation of the RE, or if
336 339 .Fa nmatch
337 340 is 0,
338 341 .Fn regexec
339 342 ignores the
340 343 .Fa pmatch
341 344 argument
342 345 .Po but see below for the case where
343 346 .Dv REG_STARTEND
344 347 is specified
345 348 .Pc .
346 349 Otherwise,
347 350 .Fa pmatch
348 351 points to an array of
349 352 .Fa nmatch
350 353 structures of type
351 354 .Ft regmatch_t .
352 355 Such a structure has at least the members
353 356 .Va rm_so
354 357 and
355 358 .Va rm_eo ,
356 359 both of type
357 360 .Ft regoff_t
358 361 .Po a signed arithmetic type at least as large as an
359 362 .Ft off_t
360 363 and a
361 364 .Ft ssize_t
362 365 .Pc ,
363 366 containing respectively the offset of the first character of a substring
364 367 and the offset of the first character after the end of the substring.
365 368 Offsets are measured from the beginning of the
366 369 .Fa string
367 370 argument given to
368 371 .Fn regexec .
369 372 An empty substring is denoted by equal offsets, both indicating the character
370 373 following the empty substring.
371 374 .Pp
372 375 The 0th member of the
373 376 .Fa pmatch
374 377 array is filled in to indicate what substring of
375 378 .Fa string
376 379 was matched by the entire RE.
377 380 Remaining members report what substring was matched by parenthesized
378 381 subexpressions within the RE; member
379 382 .Va i
380 383 reports subexpression
381 384 .Va i ,
382 385 with subexpressions counted
383 386 .Pq starting at 1
384 387 by the order of their opening parentheses in the RE, left to right.
385 388 Unused entries in the array
386 389 .Po corresponding either to subexpressions that did not participate in the match
387 390 at all, or to subexpressions that do not exist in the RE
388 391 .Po that is,
389 392 .Va i
390 393 >
391 394 .Fa preg Ns -> Ns Va re_nsub
392 395 .Pc
393 396 .Pc
394 397 have both
395 398 .Va rm_so
396 399 and
397 400 .Va rm_eo
398 401 set to -1.
399 402 If a subexpression participated in the match several times,
400 403 the reported substring is the last one it matched.
401 404 .Po Note, as an example in particular, that when the RE
402 405 .Qq (b*)+
403 406 matches
404 407 .Qq bbb ,
405 408 the parenthesized subexpression matches each of the three
406 409 .So Li b Sc Ns s
407 410 and then an infinite number of empty strings following the last
408 411 .Qq b ,
409 412 so the reported substring is one of the empties.
410 413 .Pc
411 414 .Pp
412 415 If
413 416 .Dv REG_STARTEND
414 417 is specified,
415 418 .Fa pmatch
416 419 must point to at least one
417 420 .Ft regmatch_t
418 421 .Po even if
419 422 .Fa nmatch
420 423 is 0 or
421 424 .Dv REG_NOSUB
422 425 was specified
423 426 .Pc ,
424 427 to hold the input offsets for
425 428 .Dv REG_STARTEND .
426 429 Use for output is still entirely controlled by
427 430 .Fa nmatch ;
428 431 if
429 432 .Fa nmatch
430 433 is 0 or
431 434 .Dv REG_NOSUB
432 435 was specified,
433 436 the value of
434 437 .Fa pmatch Ns [0]
435 438 will not be changed by a successful
436 439 .Fn regexec .
437 440 .Ss Fn regerror
438 441 The
439 442 .Fn regerror
440 443 function maps a non-zero
441 444 .Fa errcode
442 445 from either
443 446 .Fn regcomp
444 447 or
445 448 .Fn regexec
446 449 to a human-readable, printable message.
447 450 If
448 451 .Fa preg
449 452 is non-NULL, the error code should have arisen from use of the
450 453 .Ft regex_t
451 454 pointed to by
452 455 .Fa preg ,
453 456 and if the error code came from
454 457 .Fn regcomp ,
455 458 it should have been the result from the most recent
456 459 .Fn regcomp
457 460 using that
458 461 .Ft regex_t .
459 462 The
460 463 .Po
461 464 .Fn regerror
462 465 may be able to supply a more detailed message using information
463 466 from the
464 467 .Ft regex_t .
465 468 .Pc
466 469 The
467 470 .Fn regerror
468 471 function places the NUL-terminated message into the buffer pointed to by
469 472 .Fa errbuf ,
470 473 limiting the length
471 474 .Pq including the NUL
472 475 to at most
473 476 .Fa errbuf_size
474 477 bytes.
↓ open down ↓ |
292 lines elided |
↑ open up ↑ |
475 478 If the whole message will not fit, as much of it as will fit before the
476 479 terminating NUL is supplied.
477 480 In any case, the returned value is the size of buffer needed to hold the whole
478 481 message
479 482 .Pq including terminating NUL .
480 483 If
481 484 .Fa errbuf_size
482 485 is 0,
483 486 .Fa errbuf
484 487 is ignored but the return value is still correct.
485 -.Pp
486 -If the
487 -.Fa errcode
488 -given to
489 -.Fn regerror
490 -is first ORed with
491 -.Dv REG_ITOA ,
492 -the
493 -.Qq message
494 -that results is the printable name of the error code, e.g.
495 -.Qq Dv REG_NOMATCH ,
496 -rather than an explanation thereof.
497 -If
498 -.Fa errcode
499 -is
500 -.Dv REG_ATOI ,
501 -then
502 -.Fa preg
503 -shall be non-NULL and the
504 -.Va re_endp
505 -member of the structure it points to must point to the printable name of an
506 -error code; in this case, the result in
507 -.Fa errbuf
508 -is the decimal digits of the numeric value of the error code
509 -.Pq 0 if the name is not recognized .
510 -.Dv REG_ITOA
511 -and
512 -.Dv REG_ATOI
513 -are intended primarily as debugging facilities; they are extensions,
514 -compatible with but not specified by
515 -.St -p1003.2 ,
516 -and should be used with caution in software intended to be portable to other
517 -systems.
518 488 .Ss Fn regfree
519 489 The
520 490 .Fn regfree
521 491 function frees any dynamically-allocated storage associated with the compiled RE
522 492 pointed to by
523 493 .Fa preg .
524 494 The remaining
525 495 .Ft regex_t
526 496 is no longer a valid compiled RE and the effect of supplying it to
527 497 .Fn regexec
528 498 or
529 499 .Fn regerror
530 500 is undefined.
531 -.Sh IMPLEMENTATION NOTES
532 -There are a number of decisions that
533 -.St -p1003.2
534 -leaves up to the implementor,
535 -either by explicitly saying
536 -.Qq undefined
537 -or by virtue of them being forbidden by the RE grammar.
538 -This implementation treats them as follows.
539 -.Pp
540 -There is no particular limit on the length of REs, except insofar as memory is
541 -limited.
542 -Memory usage is approximately linear in RE size, and largely insensitive
543 -to RE complexity, except for bounded repetitions.
544 -.Pp
545 -A backslashed character other than one specifically given a magic meaning by
546 -.St -p1003.2
547 -.Pq such magic meanings occur only in BREs
548 -is taken as an ordinary character.
549 -.Pp
550 -Any unmatched
551 -.Qq \&[
552 -is a
553 -.Dv REG_EBRACK
554 -error.
555 -.Pp
556 -Equivalence classes cannot begin or end bracket-expression ranges.
557 -The endpoint of one range cannot begin another.
558 -.Pp
559 -.Dv RE_DUP_MAX ,
560 -the limit on repetition counts in bounded repetitions, is 255.
561 -.Pp
562 -A repetition operator
563 -.Po
564 -.Qq \&? ,
565 -.Qq \&* ,
566 -.Qq \&+ ,
567 -or bounds
568 -.Pc
569 -cannot follow another repetition operator.
570 -A repetition operator cannot begin an expression or subexpression
571 -or follow
572 -.Qq \&^
573 -or
574 -.Qq \&| .
575 -.Pp
576 -.Qq \&|
577 -cannot appear first or last in a (sub)expression or after another
578 -.Qq \&| ,
579 -i.e., an operand of
580 -.Qq \&|
581 -cannot be an empty subexpression.
582 -An empty parenthesized subexpression,
583 -.Qq () ,
584 -is legal and matches an empty (sub)string.
585 -An empty string is not a legal RE.
586 -.Pp
587 -A
588 -.Qq \&{
589 -followed by a digit is considered the beginning of bounds for a bounded
590 -repetition, which must then follow the syntax for bounds.
591 -A
592 -.Qq \&{
593 -.Em not
594 -followed by a digit is considered an ordinary character.
595 -.Pp
596 -.Qq \&^
597 -and
598 -.Qq \&$
599 -beginning and ending subexpressions in BREs are anchors, not ordinary
600 -characters.
601 501 .Sh RETURN VALUES
602 502 On successful completion, the
603 503 .Fn regcomp
604 504 function returns 0.
605 505 Otherwise, it returns an integer value indicating an error as described in
606 506 .In regex.h ,
607 507 and the content of preg is undefined.
608 508 .Pp
609 509 On successful completion, the
610 510 .Fn regexec
611 511 function returns 0.
612 512 Otherwise it returns
613 513 .Dv REG_NOMATCH
614 -to indicate no match, or
615 -.Dv REG_ENOSYS
616 -to indicate that the function is not supported.
514 +to indicate no match.
617 515 .Pp
618 516 Upon successful completion, the
619 517 .Fn regerror
620 518 function returns the number of bytes needed to hold the entire generated string.
621 -Otherwise, it returns 0 to indicate that the function is not implemented.
622 519 .Pp
623 520 The
624 521 .Fn regfree
625 522 function returns no value.
626 523 .Pp
627 524 The following constants are defined as error return values:
628 525 .Pp
629 526 .Bl -tag -width "REG_ECOLLATE" -compact
630 527 .It Dv REG_NOMATCH
631 528 The
632 529 .Fn regexec
633 530 function failed to match.
634 531 .It Dv REG_BADPAT
635 532 Invalid regular expression.
636 533 .It Dv REG_ECOLLATE
637 534 Invalid collating element referenced.
638 535 .It Dv REG_ECTYPE
639 536 Invalid character class type referenced.
640 537 .It Dv REG_EESCAPE
641 538 Trailing
642 539 .Qq \&\e
643 540 in pattern.
644 541 .It Dv REG_ESUBREG
645 542 Number in
646 543 .Qq \&\e Ns Em digit
647 544 invalid or in error.
648 545 .It Dv REG_EBRACK
649 546 .Qq []
650 547 imbalance.
651 548 .It Dv REG_ENOSYS
652 549 The function is not supported.
653 550 .It Dv REG_EPAREN
654 551 .Qq \e(\e)
655 552 or
656 553 .Qq ()
657 554 imbalance.
658 555 .It Dv REG_EBRACE
659 556 .Qq \e{\e}
660 557 imbalance.
661 558 .It Dv REG_BADBR
662 559 Content of
663 560 .Qq \e{\e}
664 561 invalid: not a number, number too large, more than two
665 562 numbers, first larger than second.
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
666 563 .It Dv REG_ERANGE
667 564 Invalid endpoint in range expression.
668 565 .It Dv REG_ESPACE
669 566 Out of memory.
670 567 .It Dv REG_BADRPT
671 568 .Qq \&? ,
672 569 .Qq *
673 570 or
674 571 .Qq +
675 572 not preceded by valid regular expression.
573 +.It Dv REG_EMPTY
574 +Empty (sub)expression.
575 +.It Dv REG_INVARG
576 +Invalid argument, e.g. negative-length string.
676 577 .El
677 578 .Sh USAGE
678 579 An application could use:
679 580 .Bd -literal -offset Ds
680 581 regerror(code, preg, (char *)NULL, (size_t)0)
681 582 .Ed
682 583 .Pp
683 584 to find out how big a buffer is needed for the generated string,
684 585 .Fn malloc
685 586 a buffer to hold the string, and then call
686 587 .Fn regerror
687 588 again to get the string
688 589 .Po see
689 590 .Xr malloc 3C
690 591 .Pc .
691 592 Alternately, it could allocate a fixed, static buffer that is big enough to hold
692 593 most strings, and then use
693 594 .Fn malloc
694 595 allocate a larger buffer if it finds that this is too small.
695 596 .Sh EXAMPLES
696 597 Matching string against the extended regular expression in pattern.
697 598 .Bd -literal -offset Ds
698 599 #include <regex.h>
699 600
700 601 /*
701 602 * Match string against the extended regular expression in
702 603 * pattern, treating errors as no match.
703 604 *
704 605 * return 1 for match, 0 for no match
705 606 */
706 607 int
707 608 match(const char *string, char *pattern)
708 609 {
709 610 int status;
710 611 regex_t re;
711 612
712 613 if (regcomp(&re, pattern, REG_EXTENDED\||\|REG_NOSUB) != 0) {
713 614 return(0); /* report error */
714 615 }
715 616 status = regexec(&re, string, (size_t) 0, NULL, 0);
716 617 regfree(&re);
717 618 if (status != 0) {
718 619 return(0); /* report error */
719 620 }
720 621 return(1);
721 622 }
722 623 .Ed
723 624 .Pp
724 625 The following demonstrates how the
725 626 .Dv REG_NOTBOL
726 627 flag could be used with
727 628 .Fn regexec
728 629 to find all substrings in a line that match a pattern supplied by a user.
729 630 .Pq For simplicity of the example, very little error checking is done.
730 631 .Bd -literal -offset Ds
731 632 (void) regcomp(&re, pattern, 0);
732 633 /* this call to regexec() finds the first match on the line */
733 634 error = regexec(&re, &buffer[0], 1, &pm, 0);
734 635 while (error == 0) { /* while matches found */
735 636 /* substring found between pm.rm_so and pm.rm_eo */
736 637 /* This call to regexec() finds the next match */
737 638 error = regexec(&re, buffer + pm.rm_eo, 1, &pm, REG_NOTBOL);
738 639 }
739 640 .Ed
740 641 .Sh ERRORS
741 642 No errors are defined.
742 643 .Sh CODE SET INDEPENDENCE
743 644 .Sy Enabled
744 645 .Sh INTERFACE STABILITY
745 646 .Sy Standard
746 647 .Sh MT-LEVEL
747 648 .Sy MT-Safe with exceptions
748 649 .Pp
749 650 The
750 651 .Fn regcomp
751 652 function can be used safely in a multithreaded application as long as
752 653 .Xr setlocale 3C
753 654 is not being called to change the locale.
754 655 .Sh SEE ALSO
755 656 .Xr attributes 5 ,
756 657 .Xr regex 5 ,
757 658 .Xr standards 5
758 659 .Pp
759 660 .St -p1003.2 ,
760 661 sections 2.8
761 662 .Pq Regular Expression Notation
762 663 and
763 664 B.5
764 665 .Pq C Binding for Regular Expression Matching .
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX