1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * Copyright (c) 2018, Joyent, Inc.
  31  */
  32 
  33 /*
  34  * csplit - Context or line file splitter
  35  * Compile: cc -O -s -o csplit csplit.c
  36  */
  37 
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <unistd.h>
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <errno.h>
  44 #include <limits.h>
  45 #include <regexpr.h>
  46 #include <signal.h>
  47 #include <locale.h>
  48 #include <libintl.h>
  49 
  50 #define LAST    0LL
  51 #define ERR     -1
  52 #define FALSE   0
  53 #define TRUE    1
  54 #define EXPMODE 2
  55 #define LINMODE 3
  56 #define LINSIZ  LINE_MAX        /* POSIX.2 - read lines LINE_MAX long */
  57 
  58         /* Globals */
  59 
  60 char linbuf[LINSIZ];            /* Input line buffer */
  61 char *expbuf;
  62 char tmpbuf[BUFSIZ];            /* Temporary buffer for stdin */
  63 char file[8192] = "xx";         /* File name buffer */
  64 char *targ;                     /* Arg ptr for error messages */
  65 char *sptr;
  66 FILE *infile, *outfile;         /* I/O file streams */
  67 int silent, keep, create;       /* Flags: -s(ilent), -k(eep), (create) */
  68 int errflg;
  69 int fiwidth = 2;                /* file index width (output file names) */
  70 extern int optind;
  71 extern char *optarg;
  72 offset_t offset;                /* Regular expression offset value */
  73 offset_t curline;               /* Current line in input file */
  74 
  75 /*
  76  * These defines are needed for regexp handling(see regexp(7))
  77  */
  78 #define PERROR(x)       fatal("%s: Illegal Regular Expression\n", targ);
  79 
  80 static int asc_to_ll(char *, long long *);
  81 static void closefile(void);
  82 static void fatal(char *, char *);
  83 static offset_t findline(char *, offset_t);
  84 static void flush(void);
  85 static FILE *getfile(void);
  86 static char *getaline(int);
  87 static void line_arg(char *);
  88 static void num_arg(char *, int);
  89 static void re_arg(char *);
  90 static void sig(int);
  91 static void to_line(offset_t);
  92 static void usage(void);
  93 
  94 int
  95 main(int argc, char **argv)
  96 {
  97         int ch, mode;
  98         char *ptr;
  99 
 100         (void) setlocale(LC_ALL, "");
 101 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
 102 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
 103 #endif
 104         (void) textdomain(TEXT_DOMAIN);
 105 
 106         while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
 107                 switch (ch) {
 108                         case 'f':
 109                                 (void) strcpy(file, optarg);
 110                                 if ((ptr = strrchr(optarg, '/')) == NULL)
 111                                         ptr = optarg;
 112                                 else
 113                                         ptr++;
 114 
 115                                 break;
 116                         case 'n':               /* POSIX.2 */
 117                                 for (ptr = optarg; *ptr != NULL; ptr++)
 118                                         if (!isdigit((int)*ptr))
 119                                                 fatal("-n num\n", NULL);
 120                                 fiwidth = atoi(optarg);
 121                                 break;
 122                         case 'k':
 123                                 keep++;
 124                                 break;
 125                         case 's':
 126                                 silent++;
 127                                 break;
 128                         case '?':
 129                                 errflg++;
 130                 }
 131         }
 132 
 133         argv = &argv[optind];
 134         argc -= optind;
 135         if (argc <= 1 || errflg)
 136                 usage();
 137 
 138         if (strcmp(*argv, "-") == 0) {
 139                 infile = tmpfile();
 140 
 141                 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
 142                         if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
 143                                 if (errno == ENOSPC) {
 144                                         (void) fprintf(stderr, "csplit: ");
 145                                         (void) fprintf(stderr, gettext(
 146                                             "No space left on device\n"));
 147                                         exit(1);
 148                                 } else {
 149                                         (void) fprintf(stderr, "csplit: ");
 150                                         (void) fprintf(stderr, gettext(
 151                                             "Bad write to temporary "
 152                                             "file\n"));
 153                                         exit(1);
 154                                 }
 155 
 156         /* clear the buffer to get correct size when writing buffer */
 157 
 158                         (void) memset(tmpbuf, '\0', sizeof (tmpbuf));
 159                 }
 160                 rewind(infile);
 161         } else if ((infile = fopen(*argv, "r")) == NULL)
 162                 fatal("Cannot open %s\n", *argv);
 163         ++argv;
 164         curline = (offset_t)1;
 165         (void) signal(SIGINT, sig);
 166 
 167         /*
 168          * The following for loop handles the different argument types.
 169          * A switch is performed on the first character of the argument
 170          * and each case calls the appropriate argument handling routine.
 171          */
 172 
 173         for (; *argv; ++argv) {
 174                 targ = *argv;
 175                 switch (**argv) {
 176                 case '/':
 177                         mode = EXPMODE;
 178                         create = TRUE;
 179                         re_arg(*argv);
 180                         break;
 181                 case '%':
 182                         mode = EXPMODE;
 183                         create = FALSE;
 184                         re_arg(*argv);
 185                         break;
 186                 case '{':
 187                         num_arg(*argv, mode);
 188                         mode = FALSE;
 189                         break;
 190                 default:
 191                         mode = LINMODE;
 192                         create = TRUE;
 193                         line_arg(*argv);
 194                         break;
 195                 }
 196         }
 197         create = TRUE;
 198         to_line(LAST);
 199         return (0);
 200 }
 201 
 202 /*
 203  * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
 204  * It returns ERR if an illegal character.  The reason that asc_to_ll
 205  * does not return an answer(long long) is that any value for the long
 206  * long is legal, and this version of asc_to_ll detects error strings.
 207  */
 208 
 209 static int
 210 asc_to_ll(char *str, long long *plc)
 211 {
 212         int f;
 213         *plc = 0;
 214         f = 0;
 215         for (; ; str++) {
 216                 switch (*str) {
 217                 case ' ':
 218                 case '\t':
 219                         continue;
 220                 case '-':
 221                         f++;
 222                         /* FALLTHROUGH */
 223                 case '+':
 224                         str++;
 225                 }
 226                 break;
 227         }
 228         for (; *str != NULL; str++)
 229                 if (*str >= '0' && *str <= '9')
 230                         *plc = *plc * 10 + *str - '0';
 231                 else
 232                         return (ERR);
 233         if (f)
 234                 *plc = -(*plc);
 235         return (TRUE);  /* not error */
 236 }
 237 
 238 /*
 239  * Closefile prints the byte count of the file created,(via fseeko
 240  * and ftello), if the create flag is on and the silent flag is not on.
 241  * If the create flag is on closefile then closes the file(fclose).
 242  */
 243 
 244 static void
 245 closefile()
 246 {
 247         if (!silent && create) {
 248                 (void) fseeko(outfile, (offset_t)0, SEEK_END);
 249                 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
 250         }
 251         if (create)
 252                 (void) fclose(outfile);
 253 }
 254 
 255 /*
 256  * Fatal handles error messages and cleanup.
 257  * Because "arg" can be the global file, and the cleanup processing
 258  * uses the global file, the error message is printed first.  If the
 259  * "keep" flag is not set, fatal unlinks all created files.  If the
 260  * "keep" flag is set, fatal closes the current file(if there is one).
 261  * Fatal exits with a value of 1.
 262  */
 263 
 264 static void
 265 fatal(char *string, char *arg)
 266 {
 267         char *fls;
 268         int num;
 269 
 270         (void) fprintf(stderr, "csplit: ");
 271 
 272         /* gettext dynamically replaces string */
 273 
 274         (void) fprintf(stderr, gettext(string), arg);
 275         if (!keep) {
 276                 if (outfile) {
 277                         (void) fclose(outfile);
 278                         for (fls = file; *fls != '\0'; fls++)
 279                                 continue;
 280                         fls -= fiwidth;
 281                         for (num = atoi(fls); num >= 0; num--) {
 282                                 (void) sprintf(fls, "%.*d", fiwidth, num);
 283                                 (void) unlink(file);
 284                         }
 285                 }
 286         } else
 287                 if (outfile)
 288                         closefile();
 289         exit(1);
 290 }
 291 
 292 /*
 293  * Findline returns the line number referenced by the current argument.
 294  * Its arguments are a pointer to the compiled regular expression(expr),
 295  * and an offset(oset).  The variable lncnt is used to count the number
 296  * of lines searched.  First the current stream location is saved via
 297  * ftello(), and getaline is called so that R.E. searching starts at the
 298  * line after the previously referenced line.  The while loop checks
 299  * that there are more lines(error if none), bumps the line count, and
 300  * checks for the R.E. on each line.  If the R.E. matches on one of the
 301  * lines the old stream location is restored, and the line number
 302  * referenced by the R.E. and the offset is returned.
 303  */
 304 
 305 static offset_t
 306 findline(char *expr, offset_t oset)
 307 {
 308         static int benhere = 0;
 309         offset_t lncnt = 0, saveloc;
 310 
 311         saveloc = ftello(infile);
 312         if (curline != (offset_t)1 || benhere)  /* If first line, first time, */
 313                 (void) getaline(FALSE);         /* then don't skip */
 314         else
 315                 lncnt--;
 316         benhere = 1;
 317         while (getaline(FALSE) != NULL) {
 318                 lncnt++;
 319                 if ((sptr = strrchr(linbuf, '\n')) != NULL)
 320                         *sptr = '\0';
 321                 if (step(linbuf, expr)) {
 322                         (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
 323                         return (curline+lncnt+oset);
 324                 }
 325         }
 326         (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
 327         return (curline+lncnt+oset+2);
 328 }
 329 
 330 /*
 331  * Flush uses fputs to put lines on the output file stream(outfile)
 332  * Since fputs does its own buffering, flush doesn't need to.
 333  * Flush does nothing if the create flag is not set.
 334  */
 335 
 336 static void
 337 flush()
 338 {
 339         if (create)
 340                 (void) fputs(linbuf, outfile);
 341 }
 342 
 343 /*
 344  * Getfile does nothing if the create flag is not set.  If the create
 345  * flag is set, getfile positions the file pointer(fptr) at the end of
 346  * the file name prefix on the first call(fptr=0).  The file counter is
 347  * stored in the file name and incremented.  If the subsequent fopen
 348  * fails, the file name is copied to tfile for the error message, the
 349  * previous file name is restored for cleanup, and fatal is called.  If
 350  * the fopen succeeds, the stream(opfil) is returned.
 351  */
 352 
 353 FILE *
 354 getfile()
 355 {
 356         static char *fptr;
 357         static int ctr;
 358         FILE *opfil;
 359         char tfile[15];
 360         char *delim;
 361         char savedelim;
 362 
 363         if (create) {
 364                 if (fptr == 0)
 365                         for (fptr = file; *fptr != NULL; fptr++)
 366                                 continue;
 367                 (void) sprintf(fptr, "%.*d", fiwidth, ctr++);
 368 
 369                 /* check for suffix length overflow */
 370                 if (strlen(fptr) > fiwidth) {
 371                         fatal("Suffix longer than %ld chars; increase -n\n",
 372                             (char *)fiwidth);
 373                 }
 374 
 375                 /* check for filename length overflow */
 376 
 377                 delim = strrchr(file, '/');
 378                 if (delim == (char *)NULL) {
 379                         if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
 380                                 fatal("Name too long: %s\n", file);
 381                         }
 382                 } else {
 383                         /* truncate file at pathname delim to do pathconf */
 384                         savedelim = *delim;
 385                         *delim = '\0';
 386                         /*
 387                          * file: pppppppp\0fffff\0
 388                          * ..... ^ file
 389                          * ............. ^ delim
 390                          */
 391                         if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
 392                                 fatal("Name too long: %s\n", delim + 1);
 393                         }
 394                         *delim = savedelim;
 395                 }
 396 
 397                 if ((opfil = fopen(file, "w")) == NULL) {
 398                         (void) strlcpy(tfile, file, sizeof (tfile));
 399                         (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
 400                         fatal("Cannot create %s\n", tfile);
 401                 }
 402                 return (opfil);
 403         }
 404         return (NULL);
 405 }
 406 
 407 /*
 408  * Getline gets a line via fgets from the input stream "infile".
 409  * The line is put into linbuf and may not be larger than LINSIZ.
 410  * If getaline is called with a non-zero value, the current line
 411  * is bumped, otherwise it is not(for R.E. searching).
 412  */
 413 
 414 static char *
 415 getaline(int bumpcur)
 416 {
 417         char *ret;
 418         if (bumpcur)
 419                 curline++;
 420         ret = fgets(linbuf, LINSIZ, infile);
 421         return (ret);
 422 }
 423 
 424 /*
 425  * Line_arg handles line number arguments.
 426  * line_arg takes as its argument a pointer to a character string
 427  * (assumed to be a line number).  If that character string can be
 428  * converted to a number(long long), to_line is called with that number,
 429  * otherwise error.
 430  */
 431 
 432 static void
 433 line_arg(char *line)
 434 {
 435         long long to;
 436 
 437         if (asc_to_ll(line, &to) == ERR)
 438                 fatal("%s: bad line number\n", line);
 439         to_line(to);
 440 }
 441 
 442 /*
 443  * Num_arg handles repeat arguments.
 444  * Num_arg copies the numeric argument to "rep" (error if number is
 445  * larger than 20 characters or } is left off).  Num_arg then converts
 446  * the number and checks for validity.  Next num_arg checks the mode
 447  * of the previous argument, and applys the argument the correct number
 448  * of times. If the mode is not set properly its an error.
 449  */
 450 
 451 static void
 452 num_arg(char *arg, int md)
 453 {
 454         offset_t repeat, toline;
 455         char rep[21];
 456         char *ptr;
 457         int             len;
 458 
 459         ptr = rep;
 460         for (++arg; *arg != '}'; arg += len) {
 461                 if (*arg == NULL)
 462                         fatal("%s: missing '}'\n", targ);
 463                 if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
 464                         len = 1;
 465                 if ((ptr + len) >= &rep[20])
 466                         fatal("%s: Repeat count too large\n", targ);
 467                 (void) memcpy(ptr, arg, len);
 468                 ptr += len;
 469         }
 470         *ptr = NULL;
 471         if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
 472                 fatal("Illegal repeat count: %s\n", targ);
 473         if (md == LINMODE) {
 474                 toline = offset = curline;
 475                 for (; repeat > 0LL; repeat--) {
 476                         toline += offset;
 477                         to_line(toline);
 478                 }
 479         } else  if (md == EXPMODE)
 480                         for (; repeat > 0LL; repeat--)
 481                                 to_line(findline(expbuf, offset));
 482                 else
 483                         fatal("No operation for %s\n", targ);
 484 }
 485 
 486 /*
 487  * Re_arg handles regular expression arguments.
 488  * Re_arg takes a csplit regular expression argument.  It checks for
 489  * delimiter balance, computes any offset, and compiles the regular
 490  * expression.  Findline is called with the compiled expression and
 491  * offset, and returns the corresponding line number, which is used
 492  * as input to the to_line function.
 493  */
 494 
 495 static void
 496 re_arg(char *string)
 497 {
 498         char *ptr;
 499         char ch;
 500         int             len;
 501 
 502         ch = *string;
 503         ptr = string;
 504         ptr++;
 505         while (*ptr != ch) {
 506                 if (*ptr == '\\')
 507                         ++ptr;
 508 
 509                 if (*ptr == NULL)
 510                         fatal("%s: missing delimiter\n", targ);
 511 
 512                 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
 513                         len = 1;
 514                 ptr += len;
 515         }
 516 
 517         /*
 518          * The line below was added because compile no longer supports
 519          * the fourth argument being passed.  The fourth argument used
 520          * to be '/' or '%'.
 521          */
 522 
 523         *ptr = NULL;
 524         if (asc_to_ll(++ptr, &offset) == ERR)
 525                 fatal("%s: illegal offset\n", string);
 526 
 527         /*
 528          * The line below was added because INIT which did this for us
 529          * was removed from compile in regexp.h
 530          */
 531 
 532         string++;
 533         expbuf = compile(string, (char *)0, (char *)0);
 534         if (regerrno)
 535                 PERROR(regerrno);
 536         to_line(findline(expbuf, offset));
 537 }
 538 
 539 /*
 540  * Sig handles breaks.  When a break occurs the signal is reset,
 541  * and fatal is called to clean up and print the argument which
 542  * was being processed at the time the interrupt occured.
 543  */
 544 
 545 /* ARGSUSED */
 546 static void
 547 sig(int s)
 548 {
 549         (void) signal(SIGINT, sig);
 550         fatal("Interrupt - program aborted at arg '%s'\n", targ);
 551 }
 552 
 553 /*
 554  * To_line creates split files.
 555  * To_line gets as its argument the line which the current argument
 556  * referenced.  To_line calls getfile for a new output stream, which
 557  * does nothing if create is False.  If to_line's argument is not LAST
 558  * it checks that the current line is not greater than its argument.
 559  * While the current line is less than the desired line to_line gets
 560  * lines and flushes(error if EOF is reached).
 561  * If to_line's argument is LAST, it checks for more lines, and gets
 562  * and flushes lines till the end of file.
 563  * Finally, to_line calls closefile to close the output stream.
 564  */
 565 
 566 static void
 567 to_line(offset_t ln)
 568 {
 569         outfile = getfile();
 570         if (ln != LAST) {
 571                 if (curline > ln)
 572                         fatal("%s - out of range\n", targ);
 573                 while (curline < ln) {
 574                         if (getaline(TRUE) == NULL)
 575                                 fatal("%s - out of range\n", targ);
 576                         flush();
 577                 }
 578         } else          /* last file */
 579                 if (getaline(TRUE) != NULL) {
 580                         flush();
 581                         for (;;) {
 582                                 if (getaline(TRUE) == NULL)
 583                                         break;
 584                                 flush();
 585                         }
 586                 } else
 587                         fatal("%s - out of range\n", targ);
 588         closefile();
 589 }
 590 
 591 static void
 592 usage()
 593 {
 594         (void) fprintf(stderr, gettext(
 595             "usage: csplit [-ks] [-f prefix] [-n number] "
 596             "file arg1 ...argn\n"));
 597         exit(1);
 598 }