19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
30 */
31
32 /*
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 *
35 */
36
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 #include <ftw.h>
55 #include <sys/param.h>
56
57 #define BSIZE 512 /* Size of block for -b */
58 #define BUFSIZE 8192 /* Input buffer size */
59 #define MAX_DEPTH 1000 /* how deep to recurse */
60
61 #define M_CSETSIZE 256 /* singlebyte chars */
62 static int bmglen; /* length of BMG pattern */
63 static char *bmgpat; /* BMG pattern */
64 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
65
66 typedef struct _PATTERN {
67 char *pattern; /* original pattern */
68 wchar_t *wpattern; /* wide, lowercased pattern */
69 struct _PATTERN *next;
70 regex_t re; /* compiled pattern */
71 } PATTERN;
72
73 static PATTERN *patterns;
74 static char errstr[128]; /* regerror string buffer */
75 static int regflags = 0; /* regcomp options */
76 static int matched = 0; /* return of the grep() */
77 static int errors = 0; /* count of errors */
78 static uchar_t fgrep = 0; /* Invoked as fgrep */
79 static uchar_t egrep = 0; /* Invoked as egrep */
80 static uchar_t nvflag = 1; /* Print matching lines */
81 static uchar_t cflag; /* Count of matches */
82 static uchar_t iflag; /* Case insensitve matching */
83 static uchar_t hflag; /* Supress printing of filename */
84 static uchar_t lflag; /* Print file names of matches */
85 static uchar_t nflag; /* Precede lines by line number */
86 static uchar_t rflag; /* Search directories recursively */
87 static uchar_t bflag; /* Preccede matches by block number */
88 static uchar_t sflag; /* Suppress file error messages */
89 static uchar_t qflag; /* Suppress standard output */
90 static uchar_t wflag; /* Search for expression as a word */
91 static uchar_t xflag; /* Anchoring */
92 static uchar_t Eflag; /* Egrep or -E flag */
93 static uchar_t Fflag; /* Fgrep or -F flag */
94 static uchar_t Rflag; /* Like rflag, but follow symlinks */
95 static uchar_t outfn; /* Put out file name */
96 static char *cmdname;
97
98 static int use_wchar, use_bmg, mblocale;
99
100 static size_t outbuflen, prntbuflen;
101 static char *prntbuf;
102 static wchar_t *outline;
138 /*
139 * Skip leading slashes
140 */
141 cmdname = argv[0];
142 if (ap = strrchr(cmdname, '/'))
143 cmdname = ap + 1;
144
145 ap = cmdname;
146 /*
147 * Detect egrep/fgrep via command name, map to -E and -F options.
148 */
149 if (*ap == 'e' || *ap == 'E') {
150 regflags |= REG_EXTENDED;
151 egrep++;
152 } else {
153 if (*ap == 'f' || *ap == 'F') {
154 fgrep++;
155 }
156 }
157
158 while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
159 switch (c) {
160 case 'v': /* POSIX: negate matches */
161 nvflag = 0;
162 break;
163
164 case 'c': /* POSIX: write count */
165 cflag++;
166 break;
167
168 case 'i': /* POSIX: ignore case */
169 iflag++;
170 regflags |= REG_ICASE;
171 break;
172
173 case 'l': /* POSIX: Write filenames only */
174 lflag++;
175 break;
176
177 case 'n': /* POSIX: Write line numbers */
178 nflag++;
199 gettext("%s: out of memory\n"),
200 cmdname);
201 exit(2);
202 }
203 *(pattern_list + n_pattern - 1) = optarg;
204 break;
205
206 case 'f': /* POSIX: pattern file */
207 fflag = 1;
208 n_file++;
209 file_list = realloc(file_list,
210 sizeof (char *) * n_file);
211 if (file_list == NULL) {
212 (void) fprintf(stderr,
213 gettext("%s: out of memory\n"),
214 cmdname);
215 exit(2);
216 }
217 *(file_list + n_file - 1) = optarg;
218 break;
219 case 'h': /* Solaris: supress printing of file name */
220 hflag = 1;
221 break;
222
223 case 'q': /* POSIX: quiet: status only */
224 qflag++;
225 break;
226
227 case 'w': /* Solaris: treat pattern as word */
228 wflag++;
229 break;
230
231 case 'x': /* POSIX: full line matches */
232 xflag++;
233 regflags |= REG_ANCHOR;
234 break;
235
236 case 'E': /* POSIX: Extended RE's */
237 regflags |= REG_EXTENDED;
238 Eflag++;
239 break;
240
241 case 'F': /* POSIX: strings, not RE's */
277 if (egrep)
278 Eflag++;
279 else /* Else fgrep */
280 Fflag++;
281 }
282
283 if (wflag && (Eflag || Fflag)) {
284 /*
285 * -w cannot be specified with grep -F
286 */
287 usage();
288 }
289
290 /*
291 * -E and -F flags are mutually exclusive - check for this
292 */
293 if (Eflag && Fflag)
294 usage();
295
296 /*
297 * -c, -l and -q flags are mutually exclusive
298 * We have -c override -l like in Solaris.
299 * -q overrides -l & -c programmatically in grep() function.
300 */
301 if (cflag && lflag)
302 lflag = 0;
303
304 argv += optind - 1;
305 argc -= optind - 1;
306
307 /*
308 * Now handling -e and -f option
309 */
310 if (pattern_list) {
311 for (i = 0; i < n_pattern; i++) {
312 addpattern(pattern_list[i]);
313 }
314 free(pattern_list);
315 }
316 if (file_list) {
329 addpattern(argv[1]);
330 argc--;
331 argv++;
332 }
333
334 /*
335 * If -x flag is not specified or -i flag is specified
336 * with fgrep in a multibyte locale, need to use
337 * the wide character APIs. Otherwise, byte-oriented
338 * process will be done.
339 */
340 use_wchar = Fflag && mblocale && (!xflag || iflag);
341
342 /*
343 * Compile Patterns and also decide if BMG can be used
344 */
345 fixpatterns();
346
347 /* Process all files: stdin, or rest of arg list */
348 if (argc < 2) {
349 matched = grep(0, gettext("(standard input)"));
350 } else {
351 if (argc > 2 && hflag == 0)
352 outfn = 1; /* Print filename on match line */
353 for (argv++; *argv != NULL; argv++) {
354 process_path(*argv);
355 }
356 }
357 /*
358 * Return() here is used instead of exit
359 */
360
361 (void) fflush(stdout);
362
363 if (errors)
364 return (2);
365 return (matched ? 0 : 1);
366 }
367
368 static void
369 process_path(const char *path)
370 {
371 struct stat st;
1095 matches++;
1096 /*
1097 * Handle q, l, and c flags.
1098 */
1099 if (qflag) {
1100 /* no need to continue */
1101 /*
1102 * End of this line is ptrend.
1103 * We have read up to ptr + data_len.
1104 */
1105 off_t pos;
1106 pos = ptr + data_len - (ptrend + 1);
1107 (void) lseek(fd, -pos, SEEK_CUR);
1108 exit(0);
1109 }
1110 if (lflag) {
1111 (void) printf("%s\n", fn);
1112 break;
1113 }
1114 if (!cflag) {
1115 if (outfn) {
1116 (void) printf("%s:", fn);
1117 }
1118 if (bflag) {
1119 (void) printf("%lld:", (offset_t)
1120 (line_offset / BSIZE));
1121 }
1122 if (nflag) {
1123 (void) printf("%lld:", lineno);
1124 }
1125 *ptrend = '\n';
1126 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127 }
1128 if (ferror(stdout)) {
1129 return (0);
1130 }
1131 }
1132 L_skip_line:
1133 if (!newlinep)
1134 break;
1135
1136 data_len -= line_len + 1;
1137 line_offset += line_len + 1;
1138 ptr = ptrend + 1;
1139 }
1140
1141 if (cflag) {
1142 if (outfn) {
1143 (void) printf("%s:", fn);
1144 }
1145 if (!qflag) {
1146 (void) printf("%lld\n", matches);
1147 }
1148 }
1149 return (matches != 0);
1150 }
1151
1152 /*
1153 * usage message for grep
1154 */
1155 static void
1156 usage(void)
1157 {
1158 if (egrep || fgrep) {
1159 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160 (void) fprintf(stderr,
1161 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1162 "pattern_list [file ...]\n"));
1163
1164 (void) fprintf(stderr, "\t%s", cmdname);
1165 (void) fprintf(stderr,
1166 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1167 "[-e pattern_list]... "
1168 "[-f pattern_file]... [file...]\n"));
1169 } else {
1170 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171 (void) fprintf(stderr,
1172 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1173 "pattern_list [file ...]\n"));
1174
1175 (void) fprintf(stderr, "\t%s", cmdname);
1176 (void) fprintf(stderr,
1177 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1178 "[-e pattern_list]... "
1179 "[-f pattern_file]... [file...]\n"));
1180
1181 (void) fprintf(stderr, "\t%s", cmdname);
1182 (void) fprintf(stderr,
1183 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1184 "pattern_list [file ...]\n"));
1185
1186 (void) fprintf(stderr, "\t%s", cmdname);
1187 (void) fprintf(stderr,
1188 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1189 "[-e pattern_list]... "
1190 "[-f pattern_file]... [file...]\n"));
1191
1192 (void) fprintf(stderr, "\t%s", cmdname);
1193 (void) fprintf(stderr,
1194 gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1195 "pattern_list [file ...]\n"));
1196
1197 (void) fprintf(stderr, "\t%s", cmdname);
1198 (void) fprintf(stderr,
1199 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1200 "[-f pattern_file]... [file...]\n"));
1201 }
1202 exit(2);
1203 /* NOTREACHED */
1204 }
1205
1206 /*
1207 * Compile literal pattern into BMG tables
1208 */
1209 static void
1210 bmgcomp(char *pat, int len)
1211 {
1212 int i;
1213 int tlen;
1214 unsigned char *uc = (unsigned char *)pat;
1215
1216 bmglen = len;
1217 bmgpat = pat;
1218
1219 for (i = 0; i < M_CSETSIZE; i++) {
|
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * grep - pattern matching program - combined grep, egrep, and fgrep.
29 * Based on MKS grep command, with XCU & Solaris mods.
30 */
31
32 /*
33 * Copyright 1985, 1992 by Mortice Kern Systems Inc. All rights reserved.
34 *
35 */
36
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 /*
40 * Copyright 2013 Damian Bogel. All rights reserved.
41 */
42
43 #include <string.h>
44 #include <stdlib.h>
45 #include <ctype.h>
46 #include <stdarg.h>
47 #include <regex.h>
48 #include <limits.h>
49 #include <sys/types.h>
50 #include <sys/stat.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <locale.h>
54 #include <wchar.h>
55 #include <errno.h>
56 #include <unistd.h>
57 #include <wctype.h>
58 #include <ftw.h>
59 #include <sys/param.h>
60
61 #define STDIN_FILENAME gettext("(standard input)")
62
63 #define BSIZE 512 /* Size of block for -b */
64 #define BUFSIZE 8192 /* Input buffer size */
65 #define MAX_DEPTH 1000 /* how deep to recurse */
66
67 #define M_CSETSIZE 256 /* singlebyte chars */
68 static int bmglen; /* length of BMG pattern */
69 static char *bmgpat; /* BMG pattern */
70 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
71
72 typedef struct _PATTERN {
73 char *pattern; /* original pattern */
74 wchar_t *wpattern; /* wide, lowercased pattern */
75 struct _PATTERN *next;
76 regex_t re; /* compiled pattern */
77 } PATTERN;
78
79 static PATTERN *patterns;
80 static char errstr[128]; /* regerror string buffer */
81 static int regflags = 0; /* regcomp options */
82 static int matched = 0; /* return of the grep() */
83 static int errors = 0; /* count of errors */
84 static uchar_t fgrep = 0; /* Invoked as fgrep */
85 static uchar_t egrep = 0; /* Invoked as egrep */
86 static uchar_t nvflag = 1; /* Print matching lines */
87 static uchar_t cflag; /* Count of matches */
88 static uchar_t iflag; /* Case insensitve matching */
89 static uchar_t Hflag; /* Precede lines by file name */
90 static uchar_t hflag; /* Supress printing of filename */
91 static uchar_t lflag; /* Print file names of matches */
92 static uchar_t nflag; /* Precede lines by line number */
93 static uchar_t rflag; /* Search directories recursively */
94 static uchar_t bflag; /* Preccede matches by block number */
95 static uchar_t sflag; /* Suppress file error messages */
96 static uchar_t qflag; /* Suppress standard output */
97 static uchar_t wflag; /* Search for expression as a word */
98 static uchar_t xflag; /* Anchoring */
99 static uchar_t Eflag; /* Egrep or -E flag */
100 static uchar_t Fflag; /* Fgrep or -F flag */
101 static uchar_t Rflag; /* Like rflag, but follow symlinks */
102 static uchar_t outfn; /* Put out file name */
103 static char *cmdname;
104
105 static int use_wchar, use_bmg, mblocale;
106
107 static size_t outbuflen, prntbuflen;
108 static char *prntbuf;
109 static wchar_t *outline;
145 /*
146 * Skip leading slashes
147 */
148 cmdname = argv[0];
149 if (ap = strrchr(cmdname, '/'))
150 cmdname = ap + 1;
151
152 ap = cmdname;
153 /*
154 * Detect egrep/fgrep via command name, map to -E and -F options.
155 */
156 if (*ap == 'e' || *ap == 'E') {
157 regflags |= REG_EXTENDED;
158 egrep++;
159 } else {
160 if (*ap == 'f' || *ap == 'F') {
161 fgrep++;
162 }
163 }
164
165 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
166 switch (c) {
167 case 'v': /* POSIX: negate matches */
168 nvflag = 0;
169 break;
170
171 case 'c': /* POSIX: write count */
172 cflag++;
173 break;
174
175 case 'i': /* POSIX: ignore case */
176 iflag++;
177 regflags |= REG_ICASE;
178 break;
179
180 case 'l': /* POSIX: Write filenames only */
181 lflag++;
182 break;
183
184 case 'n': /* POSIX: Write line numbers */
185 nflag++;
206 gettext("%s: out of memory\n"),
207 cmdname);
208 exit(2);
209 }
210 *(pattern_list + n_pattern - 1) = optarg;
211 break;
212
213 case 'f': /* POSIX: pattern file */
214 fflag = 1;
215 n_file++;
216 file_list = realloc(file_list,
217 sizeof (char *) * n_file);
218 if (file_list == NULL) {
219 (void) fprintf(stderr,
220 gettext("%s: out of memory\n"),
221 cmdname);
222 exit(2);
223 }
224 *(file_list + n_file - 1) = optarg;
225 break;
226
227 /* based on options order h or H is set as in GNU grep */
228 case 'h': /* Solaris: supress printing of file name */
229 hflag = 1;
230 Hflag = 0;
231 break;
232 /* Solaris: precede every matching with file name */
233 case 'H':
234 Hflag = 1;
235 hflag = 0;
236 break;
237
238 case 'q': /* POSIX: quiet: status only */
239 qflag++;
240 break;
241
242 case 'w': /* Solaris: treat pattern as word */
243 wflag++;
244 break;
245
246 case 'x': /* POSIX: full line matches */
247 xflag++;
248 regflags |= REG_ANCHOR;
249 break;
250
251 case 'E': /* POSIX: Extended RE's */
252 regflags |= REG_EXTENDED;
253 Eflag++;
254 break;
255
256 case 'F': /* POSIX: strings, not RE's */
292 if (egrep)
293 Eflag++;
294 else /* Else fgrep */
295 Fflag++;
296 }
297
298 if (wflag && (Eflag || Fflag)) {
299 /*
300 * -w cannot be specified with grep -F
301 */
302 usage();
303 }
304
305 /*
306 * -E and -F flags are mutually exclusive - check for this
307 */
308 if (Eflag && Fflag)
309 usage();
310
311 /*
312 * -l overrides -H like in GNU grep
313 */
314 if (lflag)
315 Hflag = 0;
316
317 /*
318 * -c, -l and -q flags are mutually exclusive
319 * We have -c override -l like in Solaris.
320 * -q overrides -l & -c programmatically in grep() function.
321 */
322 if (cflag && lflag)
323 lflag = 0;
324
325 argv += optind - 1;
326 argc -= optind - 1;
327
328 /*
329 * Now handling -e and -f option
330 */
331 if (pattern_list) {
332 for (i = 0; i < n_pattern; i++) {
333 addpattern(pattern_list[i]);
334 }
335 free(pattern_list);
336 }
337 if (file_list) {
350 addpattern(argv[1]);
351 argc--;
352 argv++;
353 }
354
355 /*
356 * If -x flag is not specified or -i flag is specified
357 * with fgrep in a multibyte locale, need to use
358 * the wide character APIs. Otherwise, byte-oriented
359 * process will be done.
360 */
361 use_wchar = Fflag && mblocale && (!xflag || iflag);
362
363 /*
364 * Compile Patterns and also decide if BMG can be used
365 */
366 fixpatterns();
367
368 /* Process all files: stdin, or rest of arg list */
369 if (argc < 2) {
370 matched = grep(0, STDIN_FILENAME);
371 } else {
372 if (Hflag || (argc > 2 && hflag == 0))
373 outfn = 1; /* Print filename on match line */
374 for (argv++; *argv != NULL; argv++) {
375 process_path(*argv);
376 }
377 }
378 /*
379 * Return() here is used instead of exit
380 */
381
382 (void) fflush(stdout);
383
384 if (errors)
385 return (2);
386 return (matched ? 0 : 1);
387 }
388
389 static void
390 process_path(const char *path)
391 {
392 struct stat st;
1116 matches++;
1117 /*
1118 * Handle q, l, and c flags.
1119 */
1120 if (qflag) {
1121 /* no need to continue */
1122 /*
1123 * End of this line is ptrend.
1124 * We have read up to ptr + data_len.
1125 */
1126 off_t pos;
1127 pos = ptr + data_len - (ptrend + 1);
1128 (void) lseek(fd, -pos, SEEK_CUR);
1129 exit(0);
1130 }
1131 if (lflag) {
1132 (void) printf("%s\n", fn);
1133 break;
1134 }
1135 if (!cflag) {
1136 if (Hflag || outfn) {
1137 (void) printf("%s:", fn);
1138 }
1139 if (bflag) {
1140 (void) printf("%lld:", (offset_t)
1141 (line_offset / BSIZE));
1142 }
1143 if (nflag) {
1144 (void) printf("%lld:", lineno);
1145 }
1146 *ptrend = '\n';
1147 (void) fwrite(ptr, 1, line_len + 1, stdout);
1148 }
1149 if (ferror(stdout)) {
1150 return (0);
1151 }
1152 }
1153 L_skip_line:
1154 if (!newlinep)
1155 break;
1156
1157 data_len -= line_len + 1;
1158 line_offset += line_len + 1;
1159 ptr = ptrend + 1;
1160 }
1161
1162 if (cflag) {
1163 if (Hflag || outfn) {
1164 (void) printf("%s:", fn);
1165 }
1166 if (!qflag) {
1167 (void) printf("%lld\n", matches);
1168 }
1169 }
1170 return (matches != 0);
1171 }
1172
1173 /*
1174 * usage message for grep
1175 */
1176 static void
1177 usage(void)
1178 {
1179 if (egrep || fgrep) {
1180 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1181 (void) fprintf(stderr,
1182 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1183 "pattern_list [file ...]\n"));
1184
1185 (void) fprintf(stderr, "\t%s", cmdname);
1186 (void) fprintf(stderr,
1187 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1188 "[-e pattern_list]... "
1189 "[-f pattern_file]... [file...]\n"));
1190 } else {
1191 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1192 (void) fprintf(stderr,
1193 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1194 "pattern_list [file ...]\n"));
1195
1196 (void) fprintf(stderr, "\t%s", cmdname);
1197 (void) fprintf(stderr,
1198 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1199 "[-e pattern_list]... "
1200 "[-f pattern_file]... [file...]\n"));
1201
1202 (void) fprintf(stderr, "\t%s", cmdname);
1203 (void) fprintf(stderr,
1204 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1205 "pattern_list [file ...]\n"));
1206
1207 (void) fprintf(stderr, "\t%s", cmdname);
1208 (void) fprintf(stderr,
1209 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1210 "[-e pattern_list]... "
1211 "[-f pattern_file]... [file...]\n"));
1212
1213 (void) fprintf(stderr, "\t%s", cmdname);
1214 (void) fprintf(stderr,
1215 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1216 "pattern_list [file ...]\n"));
1217
1218 (void) fprintf(stderr, "\t%s", cmdname);
1219 (void) fprintf(stderr,
1220 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1221 "[-f pattern_file]... [file...]\n"));
1222 }
1223 exit(2);
1224 /* NOTREACHED */
1225 }
1226
1227 /*
1228 * Compile literal pattern into BMG tables
1229 */
1230 static void
1231 bmgcomp(char *pat, int len)
1232 {
1233 int i;
1234 int tlen;
1235 unsigned char *uc = (unsigned char *)pat;
1236
1237 bmglen = len;
1238 bmgpat = pat;
1239
1240 for (i = 0; i < M_CSETSIZE; i++) {
|