37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 #include <ftw.h>
55 #include <sys/param.h>
56
57 #define BSIZE 512 /* Size of block for -b */
58 #define BUFSIZE 8192 /* Input buffer size */
59 #define MAX_DEPTH 1000 /* how deep to recurse */
60
61 #define M_CSETSIZE 256 /* singlebyte chars */
62 static int bmglen; /* length of BMG pattern */
63 static char *bmgpat; /* BMG pattern */
64 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
65
66 typedef struct _PATTERN {
67 char *pattern; /* original pattern */
68 wchar_t *wpattern; /* wide, lowercased pattern */
69 struct _PATTERN *next;
70 regex_t re; /* compiled pattern */
71 } PATTERN;
72
73 static PATTERN *patterns;
74 static char errstr[128]; /* regerror string buffer */
75 static int regflags = 0; /* regcomp options */
76 static int matched = 0; /* return of the grep() */
77 static int errors = 0; /* count of errors */
78 static uchar_t fgrep = 0; /* Invoked as fgrep */
79 static uchar_t egrep = 0; /* Invoked as egrep */
80 static uchar_t nvflag = 1; /* Print matching lines */
81 static uchar_t cflag; /* Count of matches */
82 static uchar_t iflag; /* Case insensitve matching */
83 static uchar_t hflag; /* Supress printing of filename */
84 static uchar_t lflag; /* Print file names of matches */
85 static uchar_t nflag; /* Precede lines by line number */
86 static uchar_t rflag; /* Search directories recursively */
87 static uchar_t bflag; /* Preccede matches by block number */
88 static uchar_t sflag; /* Suppress file error messages */
89 static uchar_t qflag; /* Suppress standard output */
90 static uchar_t wflag; /* Search for expression as a word */
91 static uchar_t xflag; /* Anchoring */
92 static uchar_t Eflag; /* Egrep or -E flag */
93 static uchar_t Fflag; /* Fgrep or -F flag */
94 static uchar_t Rflag; /* Like rflag, but follow symlinks */
95 static uchar_t outfn; /* Put out file name */
96 static char *cmdname;
97
98 static int use_wchar, use_bmg, mblocale;
99
100 static size_t outbuflen, prntbuflen;
101 static char *prntbuf;
102 static wchar_t *outline;
138 /*
139 * Skip leading slashes
140 */
141 cmdname = argv[0];
142 if (ap = strrchr(cmdname, '/'))
143 cmdname = ap + 1;
144
145 ap = cmdname;
146 /*
147 * Detect egrep/fgrep via command name, map to -E and -F options.
148 */
149 if (*ap == 'e' || *ap == 'E') {
150 regflags |= REG_EXTENDED;
151 egrep++;
152 } else {
153 if (*ap == 'f' || *ap == 'F') {
154 fgrep++;
155 }
156 }
157
158 while ((c = getopt(argc, argv, "vwchilnrbse:f:qxEFIR")) != EOF) {
159 switch (c) {
160 case 'v': /* POSIX: negate matches */
161 nvflag = 0;
162 break;
163
164 case 'c': /* POSIX: write count */
165 cflag++;
166 break;
167
168 case 'i': /* POSIX: ignore case */
169 iflag++;
170 regflags |= REG_ICASE;
171 break;
172
173 case 'l': /* POSIX: Write filenames only */
174 lflag++;
175 break;
176
177 case 'n': /* POSIX: Write line numbers */
178 nflag++;
199 gettext("%s: out of memory\n"),
200 cmdname);
201 exit(2);
202 }
203 *(pattern_list + n_pattern - 1) = optarg;
204 break;
205
206 case 'f': /* POSIX: pattern file */
207 fflag = 1;
208 n_file++;
209 file_list = realloc(file_list,
210 sizeof (char *) * n_file);
211 if (file_list == NULL) {
212 (void) fprintf(stderr,
213 gettext("%s: out of memory\n"),
214 cmdname);
215 exit(2);
216 }
217 *(file_list + n_file - 1) = optarg;
218 break;
219 case 'h': /* Solaris: supress printing of file name */
220 hflag = 1;
221 break;
222
223 case 'q': /* POSIX: quiet: status only */
224 qflag++;
225 break;
226
227 case 'w': /* Solaris: treat pattern as word */
228 wflag++;
229 break;
230
231 case 'x': /* POSIX: full line matches */
232 xflag++;
233 regflags |= REG_ANCHOR;
234 break;
235
236 case 'E': /* POSIX: Extended RE's */
237 regflags |= REG_EXTENDED;
238 Eflag++;
239 break;
240
277 if (egrep)
278 Eflag++;
279 else /* Else fgrep */
280 Fflag++;
281 }
282
283 if (wflag && (Eflag || Fflag)) {
284 /*
285 * -w cannot be specified with grep -F
286 */
287 usage();
288 }
289
290 /*
291 * -E and -F flags are mutually exclusive - check for this
292 */
293 if (Eflag && Fflag)
294 usage();
295
296 /*
297 * -c, -l and -q flags are mutually exclusive
298 * We have -c override -l like in Solaris.
299 * -q overrides -l & -c programmatically in grep() function.
300 */
301 if (cflag && lflag)
302 lflag = 0;
303
304 argv += optind - 1;
305 argc -= optind - 1;
306
307 /*
308 * Now handling -e and -f option
309 */
310 if (pattern_list) {
311 for (i = 0; i < n_pattern; i++) {
312 addpattern(pattern_list[i]);
313 }
314 free(pattern_list);
315 }
316 if (file_list) {
329 addpattern(argv[1]);
330 argc--;
331 argv++;
332 }
333
334 /*
335 * If -x flag is not specified or -i flag is specified
336 * with fgrep in a multibyte locale, need to use
337 * the wide character APIs. Otherwise, byte-oriented
338 * process will be done.
339 */
340 use_wchar = Fflag && mblocale && (!xflag || iflag);
341
342 /*
343 * Compile Patterns and also decide if BMG can be used
344 */
345 fixpatterns();
346
347 /* Process all files: stdin, or rest of arg list */
348 if (argc < 2) {
349 matched = grep(0, gettext("(standard input)"));
350 } else {
351 if (argc > 2 && hflag == 0)
352 outfn = 1; /* Print filename on match line */
353 for (argv++; *argv != NULL; argv++) {
354 process_path(*argv);
355 }
356 }
357 /*
358 * Return() here is used instead of exit
359 */
360
361 (void) fflush(stdout);
362
363 if (errors)
364 return (2);
365 return (matched ? 0 : 1);
366 }
367
368 static void
369 process_path(const char *path)
370 {
371 struct stat st;
1095 matches++;
1096 /*
1097 * Handle q, l, and c flags.
1098 */
1099 if (qflag) {
1100 /* no need to continue */
1101 /*
1102 * End of this line is ptrend.
1103 * We have read up to ptr + data_len.
1104 */
1105 off_t pos;
1106 pos = ptr + data_len - (ptrend + 1);
1107 (void) lseek(fd, -pos, SEEK_CUR);
1108 exit(0);
1109 }
1110 if (lflag) {
1111 (void) printf("%s\n", fn);
1112 break;
1113 }
1114 if (!cflag) {
1115 if (outfn) {
1116 (void) printf("%s:", fn);
1117 }
1118 if (bflag) {
1119 (void) printf("%lld:", (offset_t)
1120 (line_offset / BSIZE));
1121 }
1122 if (nflag) {
1123 (void) printf("%lld:", lineno);
1124 }
1125 *ptrend = '\n';
1126 (void) fwrite(ptr, 1, line_len + 1, stdout);
1127 }
1128 if (ferror(stdout)) {
1129 return (0);
1130 }
1131 }
1132 L_skip_line:
1133 if (!newlinep)
1134 break;
1135
1136 data_len -= line_len + 1;
1137 line_offset += line_len + 1;
1138 ptr = ptrend + 1;
1139 }
1140
1141 if (cflag) {
1142 if (outfn) {
1143 (void) printf("%s:", fn);
1144 }
1145 if (!qflag) {
1146 (void) printf("%lld\n", matches);
1147 }
1148 }
1149 return (matches != 0);
1150 }
1151
1152 /*
1153 * usage message for grep
1154 */
1155 static void
1156 usage(void)
1157 {
1158 if (egrep || fgrep) {
1159 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1160 (void) fprintf(stderr,
1161 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1162 "pattern_list [file ...]\n"));
1163
1164 (void) fprintf(stderr, "\t%s", cmdname);
1165 (void) fprintf(stderr,
1166 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvx] "
1167 "[-e pattern_list]... "
1168 "[-f pattern_file]... [file...]\n"));
1169 } else {
1170 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1171 (void) fprintf(stderr,
1172 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1173 "pattern_list [file ...]\n"));
1174
1175 (void) fprintf(stderr, "\t%s", cmdname);
1176 (void) fprintf(stderr,
1177 gettext(" [-c|-l|-q] [-r|-R] [-bhinsvwx] "
1178 "[-e pattern_list]... "
1179 "[-f pattern_file]... [file...]\n"));
1180
1181 (void) fprintf(stderr, "\t%s", cmdname);
1182 (void) fprintf(stderr,
1183 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1184 "pattern_list [file ...]\n"));
1185
1186 (void) fprintf(stderr, "\t%s", cmdname);
1187 (void) fprintf(stderr,
1188 gettext(" -E [-c|-l|-q] [-r|-R] [-bhinsvx] "
1189 "[-e pattern_list]... "
1190 "[-f pattern_file]... [file...]\n"));
1191
1192 (void) fprintf(stderr, "\t%s", cmdname);
1193 (void) fprintf(stderr,
1194 gettext(" -F [-c|-l|-q] [-r|-R] [-bhinsvx] "
1195 "pattern_list [file ...]\n"));
1196
1197 (void) fprintf(stderr, "\t%s", cmdname);
1198 (void) fprintf(stderr,
1199 gettext(" -F [-c|-l|-q] [-bhinsvx] [-e pattern_list]... "
1200 "[-f pattern_file]... [file...]\n"));
1201 }
1202 exit(2);
1203 /* NOTREACHED */
1204 }
1205
1206 /*
1207 * Compile literal pattern into BMG tables
1208 */
1209 static void
1210 bmgcomp(char *pat, int len)
1211 {
1212 int i;
1213 int tlen;
1214 unsigned char *uc = (unsigned char *)pat;
1215
1216 bmglen = len;
1217 bmgpat = pat;
1218
1219 for (i = 0; i < M_CSETSIZE; i++) {
|
37 /* Copyright 2012 Nexenta Systems, Inc. All rights reserved. */
38
39 #include <string.h>
40 #include <stdlib.h>
41 #include <ctype.h>
42 #include <stdarg.h>
43 #include <regex.h>
44 #include <limits.h>
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <fcntl.h>
48 #include <stdio.h>
49 #include <locale.h>
50 #include <wchar.h>
51 #include <errno.h>
52 #include <unistd.h>
53 #include <wctype.h>
54 #include <ftw.h>
55 #include <sys/param.h>
56
57 #define STDIN_FILENAME gettext("(standard input)")
58
59 #define BSIZE 512 /* Size of block for -b */
60 #define BUFSIZE 8192 /* Input buffer size */
61 #define MAX_DEPTH 1000 /* how deep to recurse */
62
63 #define M_CSETSIZE 256 /* singlebyte chars */
64 static int bmglen; /* length of BMG pattern */
65 static char *bmgpat; /* BMG pattern */
66 static int bmgtab[M_CSETSIZE]; /* BMG delta1 table */
67
68 typedef struct _PATTERN {
69 char *pattern; /* original pattern */
70 wchar_t *wpattern; /* wide, lowercased pattern */
71 struct _PATTERN *next;
72 regex_t re; /* compiled pattern */
73 } PATTERN;
74
75 static PATTERN *patterns;
76 static char errstr[128]; /* regerror string buffer */
77 static int regflags = 0; /* regcomp options */
78 static int matched = 0; /* return of the grep() */
79 static int errors = 0; /* count of errors */
80 static uchar_t fgrep = 0; /* Invoked as fgrep */
81 static uchar_t egrep = 0; /* Invoked as egrep */
82 static uchar_t nvflag = 1; /* Print matching lines */
83 static uchar_t cflag; /* Count of matches */
84 static uchar_t iflag; /* Case insensitve matching */
85 static uchar_t Hflag; /* Precede lines by file name */
86 static uchar_t hflag; /* Supress printing of filename */
87 static uchar_t lflag; /* Print file names of matches */
88 static uchar_t nflag; /* Precede lines by line number */
89 static uchar_t rflag; /* Search directories recursively */
90 static uchar_t bflag; /* Preccede matches by block number */
91 static uchar_t sflag; /* Suppress file error messages */
92 static uchar_t qflag; /* Suppress standard output */
93 static uchar_t wflag; /* Search for expression as a word */
94 static uchar_t xflag; /* Anchoring */
95 static uchar_t Eflag; /* Egrep or -E flag */
96 static uchar_t Fflag; /* Fgrep or -F flag */
97 static uchar_t Rflag; /* Like rflag, but follow symlinks */
98 static uchar_t outfn; /* Put out file name */
99 static char *cmdname;
100
101 static int use_wchar, use_bmg, mblocale;
102
103 static size_t outbuflen, prntbuflen;
104 static char *prntbuf;
105 static wchar_t *outline;
141 /*
142 * Skip leading slashes
143 */
144 cmdname = argv[0];
145 if (ap = strrchr(cmdname, '/'))
146 cmdname = ap + 1;
147
148 ap = cmdname;
149 /*
150 * Detect egrep/fgrep via command name, map to -E and -F options.
151 */
152 if (*ap == 'e' || *ap == 'E') {
153 regflags |= REG_EXTENDED;
154 egrep++;
155 } else {
156 if (*ap == 'f' || *ap == 'F') {
157 fgrep++;
158 }
159 }
160
161 while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
162 switch (c) {
163 case 'v': /* POSIX: negate matches */
164 nvflag = 0;
165 break;
166
167 case 'c': /* POSIX: write count */
168 cflag++;
169 break;
170
171 case 'i': /* POSIX: ignore case */
172 iflag++;
173 regflags |= REG_ICASE;
174 break;
175
176 case 'l': /* POSIX: Write filenames only */
177 lflag++;
178 break;
179
180 case 'n': /* POSIX: Write line numbers */
181 nflag++;
202 gettext("%s: out of memory\n"),
203 cmdname);
204 exit(2);
205 }
206 *(pattern_list + n_pattern - 1) = optarg;
207 break;
208
209 case 'f': /* POSIX: pattern file */
210 fflag = 1;
211 n_file++;
212 file_list = realloc(file_list,
213 sizeof (char *) * n_file);
214 if (file_list == NULL) {
215 (void) fprintf(stderr,
216 gettext("%s: out of memory\n"),
217 cmdname);
218 exit(2);
219 }
220 *(file_list + n_file - 1) = optarg;
221 break;
222
223 /* based on options order h or H is set as in GNU grep */
224 case 'h': /* Solaris: supress printing of file name */
225 hflag = 1;
226 Hflag = 0;
227 break;
228 /* Solaris: precede every matching with file name */
229 case 'H':
230 Hflag = 1;
231 hflag = 0;
232 break;
233
234 case 'q': /* POSIX: quiet: status only */
235 qflag++;
236 break;
237
238 case 'w': /* Solaris: treat pattern as word */
239 wflag++;
240 break;
241
242 case 'x': /* POSIX: full line matches */
243 xflag++;
244 regflags |= REG_ANCHOR;
245 break;
246
247 case 'E': /* POSIX: Extended RE's */
248 regflags |= REG_EXTENDED;
249 Eflag++;
250 break;
251
288 if (egrep)
289 Eflag++;
290 else /* Else fgrep */
291 Fflag++;
292 }
293
294 if (wflag && (Eflag || Fflag)) {
295 /*
296 * -w cannot be specified with grep -F
297 */
298 usage();
299 }
300
301 /*
302 * -E and -F flags are mutually exclusive - check for this
303 */
304 if (Eflag && Fflag)
305 usage();
306
307 /*
308 * -l overrides -H like in GNU grep
309 */
310 if (lflag)
311 Hflag = 0;
312
313 /*
314 * -c, -l and -q flags are mutually exclusive
315 * We have -c override -l like in Solaris.
316 * -q overrides -l & -c programmatically in grep() function.
317 */
318 if (cflag && lflag)
319 lflag = 0;
320
321 argv += optind - 1;
322 argc -= optind - 1;
323
324 /*
325 * Now handling -e and -f option
326 */
327 if (pattern_list) {
328 for (i = 0; i < n_pattern; i++) {
329 addpattern(pattern_list[i]);
330 }
331 free(pattern_list);
332 }
333 if (file_list) {
346 addpattern(argv[1]);
347 argc--;
348 argv++;
349 }
350
351 /*
352 * If -x flag is not specified or -i flag is specified
353 * with fgrep in a multibyte locale, need to use
354 * the wide character APIs. Otherwise, byte-oriented
355 * process will be done.
356 */
357 use_wchar = Fflag && mblocale && (!xflag || iflag);
358
359 /*
360 * Compile Patterns and also decide if BMG can be used
361 */
362 fixpatterns();
363
364 /* Process all files: stdin, or rest of arg list */
365 if (argc < 2) {
366 matched = grep(0, STDIN_FILENAME);
367 } else {
368 if (Hflag || (argc > 2 && hflag == 0))
369 outfn = 1; /* Print filename on match line */
370 for (argv++; *argv != NULL; argv++) {
371 process_path(*argv);
372 }
373 }
374 /*
375 * Return() here is used instead of exit
376 */
377
378 (void) fflush(stdout);
379
380 if (errors)
381 return (2);
382 return (matched ? 0 : 1);
383 }
384
385 static void
386 process_path(const char *path)
387 {
388 struct stat st;
1112 matches++;
1113 /*
1114 * Handle q, l, and c flags.
1115 */
1116 if (qflag) {
1117 /* no need to continue */
1118 /*
1119 * End of this line is ptrend.
1120 * We have read up to ptr + data_len.
1121 */
1122 off_t pos;
1123 pos = ptr + data_len - (ptrend + 1);
1124 (void) lseek(fd, -pos, SEEK_CUR);
1125 exit(0);
1126 }
1127 if (lflag) {
1128 (void) printf("%s\n", fn);
1129 break;
1130 }
1131 if (!cflag) {
1132 if (Hflag || outfn) {
1133 (void) printf("%s:", fn);
1134 }
1135 if (bflag) {
1136 (void) printf("%lld:", (offset_t)
1137 (line_offset / BSIZE));
1138 }
1139 if (nflag) {
1140 (void) printf("%lld:", lineno);
1141 }
1142 *ptrend = '\n';
1143 (void) fwrite(ptr, 1, line_len + 1, stdout);
1144 }
1145 if (ferror(stdout)) {
1146 return (0);
1147 }
1148 }
1149 L_skip_line:
1150 if (!newlinep)
1151 break;
1152
1153 data_len -= line_len + 1;
1154 line_offset += line_len + 1;
1155 ptr = ptrend + 1;
1156 }
1157
1158 if (cflag) {
1159 if (Hflag || outfn) {
1160 (void) printf("%s:", fn);
1161 }
1162 if (!qflag) {
1163 (void) printf("%lld\n", matches);
1164 }
1165 }
1166 return (matches != 0);
1167 }
1168
1169 /*
1170 * usage message for grep
1171 */
1172 static void
1173 usage(void)
1174 {
1175 if (egrep || fgrep) {
1176 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1177 (void) fprintf(stderr,
1178 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1179 "pattern_list [file ...]\n"));
1180
1181 (void) fprintf(stderr, "\t%s", cmdname);
1182 (void) fprintf(stderr,
1183 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1184 "[-e pattern_list]... "
1185 "[-f pattern_file]... [file...]\n"));
1186 } else {
1187 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
1188 (void) fprintf(stderr,
1189 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1190 "pattern_list [file ...]\n"));
1191
1192 (void) fprintf(stderr, "\t%s", cmdname);
1193 (void) fprintf(stderr,
1194 gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
1195 "[-e pattern_list]... "
1196 "[-f pattern_file]... [file...]\n"));
1197
1198 (void) fprintf(stderr, "\t%s", cmdname);
1199 (void) fprintf(stderr,
1200 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1201 "pattern_list [file ...]\n"));
1202
1203 (void) fprintf(stderr, "\t%s", cmdname);
1204 (void) fprintf(stderr,
1205 gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1206 "[-e pattern_list]... "
1207 "[-f pattern_file]... [file...]\n"));
1208
1209 (void) fprintf(stderr, "\t%s", cmdname);
1210 (void) fprintf(stderr,
1211 gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
1212 "pattern_list [file ...]\n"));
1213
1214 (void) fprintf(stderr, "\t%s", cmdname);
1215 (void) fprintf(stderr,
1216 gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
1217 "[-f pattern_file]... [file...]\n"));
1218 }
1219 exit(2);
1220 /* NOTREACHED */
1221 }
1222
1223 /*
1224 * Compile literal pattern into BMG tables
1225 */
1226 static void
1227 bmgcomp(char *pat, int len)
1228 {
1229 int i;
1230 int tlen;
1231 unsigned char *uc = (unsigned char *)pat;
1232
1233 bmglen = len;
1234 bmgpat = pat;
1235
1236 for (i = 0; i < M_CSETSIZE; i++) {
|