Print this page
4701 would like grep context options (-A, -B, -C)

@@ -62,10 +62,14 @@
 
 #define BSIZE           512             /* Size of block for -b */
 #define BUFSIZE         8192            /* Input buffer size */
 #define MAX_DEPTH       1000            /* how deep to recurse */
 
+#define AFTER   1                       /* 'After' Context */
+#define BEFORE  2                       /* 'Before' Context */
+#define CONTEXT (AFTER|BEFORE)          /* Full Context */
+
 #define M_CSETSIZE      256             /* singlebyte chars */
 static int      bmglen;                 /* length of BMG pattern */
 static char     *bmgpat;                /* BMG pattern */
 static int      bmgtab[M_CSETSIZE];     /* BMG delta1 table */
 

@@ -98,16 +102,18 @@
 static uchar_t  xflag;                  /* Anchoring */
 static uchar_t  Eflag;                  /* Egrep or -E flag */
 static uchar_t  Fflag;                  /* Fgrep or -F flag */
 static uchar_t  Rflag;                  /* Like rflag, but follow symlinks */
 static uchar_t  outfn;                  /* Put out file name */
+static uchar_t  conflag;                /* show context of matches */
 static char     *cmdname;
 
 static int      use_wchar, use_bmg, mblocale;
 
-static size_t   outbuflen, prntbuflen;
-static char     *prntbuf;
+static size_t   outbuflen, prntbuflen, conbuflen;
+static unsigned long    conalen, conblen, conmatches;
+static char     *prntbuf, *conbuf;
 static wchar_t  *outline;
 
 static void     addfile(const char *fn);
 static void     addpattern(char *s);
 static void     fixpatterns(void);

@@ -123,11 +129,11 @@
  * mainline for grep
  */
 int
 main(int argc, char **argv)
 {
-        char    *ap;
+        char    *ap, *test;
         int     c;
         int     fflag = 0;
         int     i, n_pattern = 0, n_file = 0;
         char    **pattern_list = NULL;
         char    **file_list = NULL;

@@ -160,11 +166,45 @@
                 if (*ap == 'f' || *ap == 'F') {
                         fgrep++;
                 }
         }
 
-        while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIR")) != EOF) {
+        /* check for non-standard "-line-count" option */
+        for (i = 1; i < argc; i++) {
+                if (strcmp(argv[i], "--") == 0)
+                        break;
+
+                if ((argv[i][0] == '-') && isdigit(argv[i][1])) {
+                        if (strlen(&argv[i][1]) !=
+                            strspn(&argv[i][1], "0123456789")) {
+                                (void) fprintf(stderr, gettext(
+                                    "%s: Bad number flag\n"), argv[0]);
+                                usage();
+                        }
+
+                        conalen = conblen = strtoul(&argv[i][1], (char **)NULL,
+                            10);
+
+                        /* isdigit() check prevents negative arguments */
+                        if (conalen >= ULONG_MAX) {
+                                (void) fprintf(stderr, gettext(
+                                    "%s: Bad context argument\n"), argv[0]);
+                        }
+
+                        if (conalen)
+                                conflag = CONTEXT;
+
+                        while (i < argc) {
+                                argv[i] = argv[i + 1];
+                                i++;
+                        }
+                        argc--;
+                }
+        }
+
+        while ((c = getopt(argc, argv, "vwchHilnrbse:f:qxEFIRA:B:C:")) != EOF) {
+                unsigned long tval;
                 switch (c) {
                 case 'v':       /* POSIX: negate matches */
                         nvflag = 0;
                         break;
 

@@ -260,10 +300,53 @@
                 case 'R':       /* Solaris: like rflag, but follow symlinks */
                         Rflag++;
                         rflag++;
                         break;
 
+                case 'A':       /* print N lines after each match */
+                        conalen = strtoul(optarg, &test, 10);
+                        /* *test will be non-null if optarg is negative */
+                        if (*test != '\0' || conalen >= ULONG_MAX) {
+                                (void) fprintf(stderr, gettext(
+                                    "%s: Bad context argument\n"), argv[0]);
+                                exit(2);
+                        }
+                        if (conalen)
+                                conflag |= AFTER;
+                        else
+                                conflag &= ~AFTER;
+                        break;
+                case 'B':       /* print N lines before each match */
+                        conblen = strtoul(optarg, &test, 10);
+                        /* *test will be non-null if optarg is negative */
+                        if (*test != '\0' || conblen >= ULONG_MAX) {
+                                (void) fprintf(stderr, gettext(
+                                    "%s: Bad context argument\n"), argv[0]);
+                                exit(2);
+                        }
+                        if (conblen)
+                                conflag |= BEFORE;
+                        else
+                                conflag &= ~BEFORE;
+                        break;
+                case 'C':       /* print N lines around each match */
+                        tval = strtoul(optarg, &test, 10);
+                        /* *test will be non-null if optarg is negative */
+                        if (*test != '\0' || tval >= ULONG_MAX) {
+                                (void) fprintf(stderr, gettext(
+                                    "%s: Bad context argument\n"), argv[0]);
+                                exit(2);
+                        }
+                        if (tval) {
+                                if (!(conflag & BEFORE))
+                                        conblen = tval;
+                                if (!(conflag & AFTER))
+                                        conalen = tval;
+                                conflag = CONTEXT;
+                        }
+                        break;
+
                 default:
                         usage();
                 }
         }
         /*

@@ -540,10 +623,12 @@
 
                 bufp = inbuf;
                 bufused = 0;
         }
         free(inbuf);
+        free(prntbuf);
+        free(conbuf);
         (void) fclose(fp);
 }
 
 /*
  * Add a string to the pattern list.

@@ -716,16 +801,17 @@
          * - no ignoring case           (!iflag)
          * - no printing line numbers   (!nflag)
          * - no negating the output     (nvflag)
          * - only one pattern           (npatterns == 1)
          * - non zero length pattern    (strlen(patterns->pattern) != 0)
+         * - no context required        (!conflag)
          *
          * It's guaranteed patterns->pattern is still alive
          * when Fflag && !mblocale.
          */
         use_bmg = Fflag && !mblocale && !iflag && !nflag && nvflag &&
-            (npatterns == 1) && (strlen(patterns->pattern) != 0);
+            (npatterns == 1) && (strlen(patterns->pattern) != 0) && !conflag;
 }
 
 /*
  * Search a newline from the beginning of the string
  */

@@ -802,16 +888,23 @@
 {
         PATTERN *pp;
         off_t   data_len;       /* length of the data chunk */
         off_t   line_len;       /* length of the current line */
         off_t   line_offset;    /* current line's offset from the beginning */
-        long long       lineno;
+        off_t   blkoffset;      /* line_offset but context-compatible */
+        long long       lineno, linenum;
         long long       matches = 0;    /* Number of matching lines */
+        long long       conacnt = 0, conbcnt = 0;       /* context line count */
         int     newlinep;       /* 0 if the last line of file has no newline */
-        char    *ptr, *ptrend;
+        char    *ptr, *ptrend, *prntptr, *prntptrend;
+        char    *nextptr = NULL, *nextend = NULL;
+        char    *conptr = NULL, *conptrend = NULL;
+        char    *matchptr = NULL;
+        int     conaprnt = 0, conbprnt = 0, lastmatch = 0;
+        int     nearmatch = conmatches ? 1 : 0; /* w/in N+1 of last match */
+        size_t  prntlen;
 
-
         if (patterns == NULL)
                 return (0);     /* no patterns to match -- just return */
 
         pp = patterns;
 

@@ -836,23 +929,37 @@
                             cmdname);
                         exit(2);
                 }
         }
 
-        line_offset = 0;
+        if (conflag && (conbuf == NULL)) {
+                conbuflen = BUFSIZE;
+                if ((conbuf = malloc(BUFSIZE+1)) == NULL) {
+                        (void) fprintf(stderr, gettext("%s: out of memory\n"),
+                            cmdname);
+                        exit(2);
+                }
+        }
+
+        blkoffset = line_offset = 0;
         lineno = 0;
+        linenum = 1;
         newlinep = 1;
         data_len = 0;
         for (; ; ) {
                 long    count;
                 off_t   offset = 0;
+                int     eof = 0, rv = REG_NOMATCH;
+                char    separate;
 
                 if (data_len == 0) {
                         /*
                          * If no data in the buffer, reset ptr
                          */
                         ptr = prntbuf;
+                        if (conptr == NULL)
+                                conptrend = conptr = conbuf;
                 }
                 if (ptr == prntbuf) {
                         /*
                          * The current data chunk starts from prntbuf.
                          * This means either the buffer has no data

@@ -873,14 +980,24 @@
                                         }
                                 }
                                 return (0);
                         } else if (count == 0) {
                                 /* no new data */
+                                eof = 1;
+
+                                /* we never want to match EOF */
+                                pp = (PATTERN *) !nvflag;
+
                                 if (data_len == 0) {
                                         /* end of file already reached */
-                                        break;
+                                        if (conflag) {
+                                                *conptrend = '\n';
+                                                goto L_next_line;
+                                        } else {
+                                                goto out;
                                 }
+                                }
                                 /* last line of file has no newline */
                                 ptrend = ptr + data_len;
                                 newlinep = 0;
                                 goto L_start_process;
                         }

@@ -904,11 +1021,11 @@
                                  */
                                 (void) memmove(prntbuf, ptr, data_len);
                         }
                         if (data_len == prntbuflen) {
                                 /*
-                                 * No enough room in the buffer
+                                 * Not enough room in the buffer
                                  */
                                 prntbuflen += BUFSIZE;
                                 prntbuf = realloc(prntbuf, prntbuflen + 1);
                                 if (prntbuf == NULL) {
                                         (void) fprintf(stderr,

@@ -1038,18 +1155,20 @@
                                 for (pp = patterns; pp; pp = pp->next) {
                                         if (outline[0] == pp->wpattern[0] &&
                                             wcscmp(outline,
                                             pp->wpattern) == 0) {
                                                 /* matched */
+                                                rv = REG_OK;
                                                 break;
                                         }
                                 }
                         } else {
                                 for (pp = patterns; pp; pp = pp->next) {
                                         if (wcswcs(outline, pp->wpattern)
                                             != NULL) {
                                                 /* matched */
+                                                rv = REG_OK;
                                                 break;
                                         }
                                 }
                         }
                 } else if (Fflag) {

@@ -1064,26 +1183,26 @@
                                 /* fgrep -x */
                                 for (pp = patterns; pp; pp = pp->next) {
                                         if (fptr[0] == pp->pattern[0] &&
                                             strcmp(fptr, pp->pattern) == 0) {
                                                 /* matched */
+                                                rv = REG_OK;
                                                 break;
                                         }
                                 }
                         } else {
                                 for (pp = patterns; pp; pp = pp->next) {
                                         if (strstr(fptr, pp->pattern) != NULL) {
                                                 /* matched */
+                                                rv = REG_OK;
                                                 break;
                                         }
                                 }
                         }
                 } else {
                         /* grep or egrep */
                         for (pp = patterns; pp; pp = pp->next) {
-                                int     rv;
-
                                 rv = regexec(&pp->re, ptr, 0, NULL, 0);
                                 if (rv == REG_OK) {
                                         /* matched */
                                         break;
                                 }

@@ -1105,18 +1224,183 @@
                                         exit(2);
                                 }
                         }
                 }
 
+                /*
+                 * Context is set up as follows:
+                 * For a 'Before' context, we maintain a set of pointers
+                 * containing 'N' lines of context. If the current number of
+                 * lines contained is greater than N, and N isn't a match, the
+                 * start pointer is moved forward to the next newline.
+                 *
+                 * If we ever find a match, we print out immediately.
+                 * 'nearmatch' tells us if we're within N+1 lines of the last
+                 * match ; if we are, and we find another match, we don't
+                 * separate the matches. 'nearmatch' becomes false when
+                 * a line gets rotated out of the context.
+                 *
+                 * For an 'After' context, we simply wait until we've found a
+                 * match, then create a context N+1 lines big. If we don't find
+                 * a match within the context, we print out the current context.
+                 * Otherwise, we save a reference to the new matching line,
+                 * print out the other context, and reset our context pointers
+                 * to the new matching line.
+                 *
+                 * 'nearmatch' becomes false when we find a non-matching line
+                 * that isn't a part of any context.
+                 *
+                 * A full-context is implemented as a combination of the
+                 * 'Before' and 'After' context logic. Before we find a match,
+                 * we follow the Before logic. When we find a match, we
+                 * follow the After logic. 'nearmatch' is handled by the Before
+                 * logic.
+                 */
+
+                if (!conflag)
+                        goto L_next_line;
+
+                if (line_len + (conptrend - conbuf) > conbuflen) {
+                        char *oldconbuf = conbuf;
+                        char *oldconptr = conptr;
+                        long tmp = matchptr - conptr;
+
+                        conbuflen += BUFSIZE;
+                        conbuf = realloc(conbuf, conbuflen + 1);
+                        if (conbuf == NULL) {
+                                (void) fprintf(stderr,
+                                    gettext("%s: out of memory\n"),
+                                    cmdname);
+                                exit(2);
+                        }
+
+                        conptr = conbuf + (conptr - oldconbuf);
+                        conptrend = conptr + (conptrend - oldconptr);
+                        if (matchptr)
+                                matchptr = conptr + tmp;
+                }
+                (void) memcpy((conptrend > conptr) ?
+                    conptrend + 1 : conptrend, ptr, line_len);
+                conptrend += line_len + (conptrend > conptr);
+                *conptrend = '\n';
+
+                if (!nvflag == rv) {
+                        /* matched */
+                        if (lastmatch) {
+                                if (conflag & AFTER) {
+                                        conaprnt = 1;
+                                        nextend = conptrend;
+                                        conptrend = conptr + lastmatch;
+                                        nextptr = conptrend + 1;
+                                        *nextend = '\n';
+                                }
+                        } else {
+                                if (conflag == AFTER) {
+                                        conptr = conptrend - (line_len);
+                                        linenum = lineno;
+                                        blkoffset = line_offset;
+                                }
+                                blkoffset = line_offset -
+                                    (conptrend - conptr - line_len);
+                        }
+
+                        if (conflag == BEFORE)
+                                conbprnt = 1;
+
+                        lastmatch = conptrend - conptr;
+                        goto L_next_line;
+                }
+
+                if (!lastmatch) {
+                        if (conflag & BEFORE) {
+                                if (conbcnt >= conblen) {
+                                        char *tmp = conptr;
+                                        conptr = find_nl(conptr,
+                                            conptrend - conptr) + 1;
+                                        if (bflag)
+                                                blkoffset += conptr - tmp;
+                                        linenum++;
+                                        nearmatch = 1;
+                                } else {
+                                        conbcnt++;
+                                }
+                        }
+                        if (conflag == AFTER)
+                                nearmatch = 1;
+                } else  {
+                        if (++conacnt >= conalen && !conaprnt && conalen)
+                                conaprnt = 1;
+                        else
+                                lastmatch = conptrend - conptr;
+                }
+
 L_next_line:
                 /*
                  * Here, if pp points to non-NULL, something has been matched
                  * to the pattern.
                  */
                 if (nvflag == (pp != NULL)) {
                         matches++;
+                        if (!nextend)
+                                matchptr = conflag ? conptrend : ptrend;
+                }
+
                         /*
+                 * Set up some print context so that we can treat
+                 * single-line matches as a zero-N context.
+                 * Apply CLI flags to each line of the context.
+                 *
+                 * For context, we only print if we both have a match and are
+                 * either at the end of the data stream, or we've previously
+                 * declared that we want to print for a particular context.
+                 */
+                if (lastmatch && (eof || conaprnt || conbprnt)) {
+
+                        /*
+                         * We'd normally do this earlier, but we had to
+                         * escape early because we reached the end of the data.
+                         */
+                        if (eof && nextptr)
+                                conptrend = nextend;
+
+                        prntlen = conptrend - conptr + 1;
+                        prntptrend = prntptr = conptr;
+                        if (conmatches++ && nearmatch && !cflag)
+                                (void) fwrite("--\n", 1, 3, stdout);
+                } else if (!conflag && nvflag == (pp != NULL)) {
+                        *ptrend = '\n';
+                        prntlen = line_len + 1;
+                        prntptrend = prntptr = ptr;
+                        linenum = lineno;
+                        blkoffset = line_offset;
+                } else if (eof) {
+                        /* No match and no more data */
+                        goto out;
+                } else {
+                        /* No match, or we're not done building context */
+                        goto L_skip_line;
+                }
+
+                while ((prntptrend = find_nl(prntptrend+1, prntlen)) != NULL) {
+
+                        /*
+                         * GNU grep uses '-' for context lines and ':' for
+                         * matching lines, so replicate that here.
+                         */
+                        if (prntptrend == matchptr) {
+                                if (eof && nextptr) {
+                                        matchptr = nextend;
+                                        nextptr = NULL;
+                                } else {
+                                        matchptr = NULL;
+                                }
+                                separate = ':';
+                        } else {
+                                separate = '-';
+                        }
+
+                        /*
                          * Handle q, l, and c flags.
                          */
                         if (qflag) {
                                 /* no need to continue */
                                 /*

@@ -1128,39 +1412,74 @@
                                 (void) lseek(fd, -pos, SEEK_CUR);
                                 exit(0);
                         }
                         if (lflag) {
                                 (void) printf("%s\n", fn);
-                                break;
+                                goto out;
                         }
                         if (!cflag) {
                                 if (Hflag || outfn) {
-                                        (void) printf("%s:", fn);
+                                        (void) printf("%s%c", fn, separate);
                                 }
                                 if (bflag) {
-                                        (void) printf("%lld:", (offset_t)
-                                            (line_offset / BSIZE));
+                                        (void) printf("%lld%c", (offset_t)
+                                            (blkoffset / BSIZE), separate);
                                 }
                                 if (nflag) {
-                                        (void) printf("%lld:", lineno);
+                                        (void) printf("%lld%c", linenum,
+                                            separate);
                                 }
-                                *ptrend = '\n';
-                                (void) fwrite(ptr, 1, line_len + 1, stdout);
+                                (void) fwrite(prntptr, 1,
+                                    prntptrend - prntptr + 1, stdout);
                         }
                         if (ferror(stdout)) {
                                 return (0);
                         }
+                        linenum++;
+                        prntlen -= prntptrend - prntptr + 1;
+                        blkoffset += prntptrend - prntptr + 1;
+                        prntptr = prntptrend + 1;
                 }
+
+                if (eof)
+                        goto out;
+
+                /*
+                 * Update context buffer and variables post-print
+                 */
+                if (conflag) {
+                        conptr = conbuf;
+                        conaprnt = conbprnt = 0;
+                        nearmatch = 0;
+                        conacnt = conbcnt = 0;
+
+                        if (nextptr) {
+                                (void) memmove(conbuf, nextptr,
+                                    nextend - nextptr + 1);
+                                blkoffset += nextptr - conptrend - 1;
+                                conptrend = conptr + (nextend - nextptr);
+                                matchptr = conptrend;
+                                linenum = lineno;
+                                lastmatch = conptrend - conptr;
+                        } else {
+                                conptrend = conptr;
+                                conacnt = 0;
+                                lastmatch = 0;
+                        }
+                        nextptr = nextend = NULL;
+                }
+
 L_skip_line:
                 if (!newlinep)
                         break;
 
                 data_len -= line_len + 1;
                 line_offset += line_len + 1;
                 ptr = ptrend + 1;
         }
 
+out:
         if (cflag) {
                 if (Hflag || outfn) {
                         (void) printf("%s:", fn);
                 }
                 if (!qflag) {

@@ -1177,49 +1496,50 @@
 usage(void)
 {
         if (egrep || fgrep) {
                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
                 (void) fprintf(stderr,
-                    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
-                    "pattern_list [file ...]\n"));
+                    gettext(" [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] pattern_list [file ...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvx] "
-                    "[-e pattern_list]... "
+                    gettext(" [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] [-e pattern_list]... "
                     "[-f pattern_file]... [file...]\n"));
         } else {
                 (void) fprintf(stderr, gettext("Usage:\t%s"), cmdname);
                 (void) fprintf(stderr,
-                    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
-                    "pattern_list [file ...]\n"));
+                    gettext(" [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] pattern_list [file ...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" [-c|-l|-q] [-r|-R] [-bhHinsvwx] "
-                    "[-e pattern_list]... "
+                    gettext(" [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] [-e pattern_list]... "
                     "[-f pattern_file]... [file...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
-                    "pattern_list [file ...]\n"));
+                    gettext(" -E [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] pattern_list [file ...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" -E [-c|-l|-q] [-r|-R] [-bhHinsvx] "
-                    "[-e pattern_list]... "
+                    gettext(" -E [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] [-e pattern_list]... "
                     "[-f pattern_file]... [file...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" -F [-c|-l|-q] [-r|-R] [-bhHinsvx] "
-                    "pattern_list [file ...]\n"));
+                    gettext(" -F [-c|-l|-q] [-r|-R] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] pattern_list [file ...]\n"));
 
                 (void) fprintf(stderr, "\t%s", cmdname);
                 (void) fprintf(stderr,
-                    gettext(" -F [-c|-l|-q] [-bhHinsvx] [-e pattern_list]... "
+                    gettext(" -F [-c|-l|-q] [-A #|-B #|-C #|-#] "
+                    "[-bhHinsvx] [-e pattern_list]... "
                     "[-f pattern_file]... [file...]\n"));
         }
         exit(2);
         /* NOTREACHED */
 }