1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  27 /*        All Rights Reserved   */
  28 
  29 /*
  30  * csplit - Context or line file splitter
  31  * Compile: cc -O -s -o csplit csplit.c
  32  */
  33 
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <unistd.h>
  37 #include <string.h>
  38 #include <ctype.h>
  39 #include <errno.h>
  40 #include <limits.h>
  41 #include <regexpr.h>
  42 #include <signal.h>
  43 #include <locale.h>
  44 #include <libintl.h>
  45 
  46 #define LAST    0LL
  47 #define ERR     -1
  48 #define FALSE   0
  49 #define TRUE    1
  50 #define EXPMODE 2
  51 #define LINMODE 3
  52 #define LINSIZ  LINE_MAX        /* POSIX.2 - read lines LINE_MAX long */
  53 
  54         /* Globals */
  55 
  56 char linbuf[LINSIZ];            /* Input line buffer */
  57 char *expbuf;
  58 char tmpbuf[BUFSIZ];            /* Temporary buffer for stdin */
  59 char file[8192] = "xx";         /* File name buffer */
  60 char *targ;                     /* Arg ptr for error messages */
  61 char *sptr;
  62 FILE *infile, *outfile;         /* I/O file streams */
  63 int silent, keep, create;       /* Flags: -s(ilent), -k(eep), (create) */
  64 int errflg;
  65 int fiwidth = 2;                /* file index width (output file names) */
  66 extern int optind;
  67 extern char *optarg;
  68 offset_t offset;                /* Regular expression offset value */
  69 offset_t curline;               /* Current line in input file */
  70 
  71 /*
  72  * These defines are needed for regexp handling(see regexp(7))
  73  */
  74 #define PERROR(x)       fatal("%s: Illegal Regular Expression\n", targ);
  75 
  76 static int asc_to_ll(char *, long long *);
  77 static void closefile(void);
  78 static void fatal(char *, char *);
  79 static offset_t findline(char *, offset_t);
  80 static void flush(void);
  81 static FILE *getfile(void);
  82 static char *getaline(int);
  83 static void line_arg(char *);
  84 static void num_arg(char *, int);
  85 static void re_arg(char *);
  86 static void sig(int);
  87 static void to_line(offset_t);
  88 static void usage(void);
  89 
  90 int
  91 main(int argc, char **argv)
  92 {
  93         int ch, mode;
  94         char *ptr;
  95 
  96         (void) setlocale(LC_ALL, "");
  97 #if !defined(TEXT_DOMAIN)               /* Should be defined by cc -D */
  98 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it weren't */
  99 #endif
 100         (void) textdomain(TEXT_DOMAIN);
 101 
 102         while ((ch = getopt(argc, argv, "skf:n:")) != EOF) {
 103                 switch (ch) {
 104                         case 'f':
 105                                 (void) strcpy(file, optarg);
 106                                 if ((ptr = strrchr(optarg, '/')) == NULL)
 107                                         ptr = optarg;
 108                                 else
 109                                         ptr++;
 110 
 111                                 break;
 112                         case 'n':               /* POSIX.2 */
 113                                 for (ptr = optarg; *ptr != NULL; ptr++)
 114                                         if (!isdigit((int)*ptr))
 115                                                 fatal("-n num\n", NULL);
 116                                 fiwidth = atoi(optarg);
 117                                 break;
 118                         case 'k':
 119                                 keep++;
 120                                 break;
 121                         case 's':
 122                                 silent++;
 123                                 break;
 124                         case '?':
 125                                 errflg++;
 126                 }
 127         }
 128 
 129         argv = &argv[optind];
 130         argc -= optind;
 131         if (argc <= 1 || errflg)
 132                 usage();
 133 
 134         if (strcmp(*argv, "-") == 0) {
 135                 infile = tmpfile();
 136 
 137                 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) {
 138                         if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0)
 139                                 if (errno == ENOSPC) {
 140                                         (void) fprintf(stderr, "csplit: ");
 141                                         (void) fprintf(stderr, gettext(
 142                                             "No space left on device\n"));
 143                                         exit(1);
 144                                 } else {
 145                                         (void) fprintf(stderr, "csplit: ");
 146                                         (void) fprintf(stderr, gettext(
 147                                             "Bad write to temporary "
 148                                             "file\n"));
 149                                         exit(1);
 150                                 }
 151 
 152         /* clear the buffer to get correct size when writing buffer */
 153 
 154                         (void) memset(tmpbuf, '\0', sizeof (tmpbuf));
 155                 }
 156                 rewind(infile);
 157         } else if ((infile = fopen(*argv, "r")) == NULL)
 158                 fatal("Cannot open %s\n", *argv);
 159         ++argv;
 160         curline = (offset_t)1;
 161         (void) signal(SIGINT, sig);
 162 
 163         /*
 164          * The following for loop handles the different argument types.
 165          * A switch is performed on the first character of the argument
 166          * and each case calls the appropriate argument handling routine.
 167          */
 168 
 169         for (; *argv; ++argv) {
 170                 targ = *argv;
 171                 switch (**argv) {
 172                 case '/':
 173                         mode = EXPMODE;
 174                         create = TRUE;
 175                         re_arg(*argv);
 176                         break;
 177                 case '%':
 178                         mode = EXPMODE;
 179                         create = FALSE;
 180                         re_arg(*argv);
 181                         break;
 182                 case '{':
 183                         num_arg(*argv, mode);
 184                         mode = FALSE;
 185                         break;
 186                 default:
 187                         mode = LINMODE;
 188                         create = TRUE;
 189                         line_arg(*argv);
 190                         break;
 191                 }
 192         }
 193         create = TRUE;
 194         to_line(LAST);
 195         return (0);
 196 }
 197 
 198 /*
 199  * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc)
 200  * It returns ERR if an illegal character.  The reason that asc_to_ll
 201  * does not return an answer(long long) is that any value for the long
 202  * long is legal, and this version of asc_to_ll detects error strings.
 203  */
 204 
 205 static int
 206 asc_to_ll(char *str, long long *plc)
 207 {
 208         int f;
 209         *plc = 0;
 210         f = 0;
 211         for (; ; str++) {
 212                 switch (*str) {
 213                 case ' ':
 214                 case '\t':
 215                         continue;
 216                 case '-':
 217                         f++;
 218                         /* FALLTHROUGH */
 219                 case '+':
 220                         str++;
 221                 }
 222                 break;
 223         }
 224         for (; *str != NULL; str++)
 225                 if (*str >= '0' && *str <= '9')
 226                         *plc = *plc * 10 + *str - '0';
 227                 else
 228                         return (ERR);
 229         if (f)
 230                 *plc = -(*plc);
 231         return (TRUE);  /* not error */
 232 }
 233 
 234 /*
 235  * Closefile prints the byte count of the file created,(via fseeko
 236  * and ftello), if the create flag is on and the silent flag is not on.
 237  * If the create flag is on closefile then closes the file(fclose).
 238  */
 239 
 240 static void
 241 closefile()
 242 {
 243         if (!silent && create) {
 244                 (void) fseeko(outfile, (offset_t)0, SEEK_END);
 245                 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile));
 246         }
 247         if (create)
 248                 (void) fclose(outfile);
 249 }
 250 
 251 /*
 252  * Fatal handles error messages and cleanup.
 253  * Because "arg" can be the global file, and the cleanup processing
 254  * uses the global file, the error message is printed first.  If the
 255  * "keep" flag is not set, fatal unlinks all created files.  If the
 256  * "keep" flag is set, fatal closes the current file(if there is one).
 257  * Fatal exits with a value of 1.
 258  */
 259 
 260 static void
 261 fatal(char *string, char *arg)
 262 {
 263         char *fls;
 264         int num;
 265 
 266         (void) fprintf(stderr, "csplit: ");
 267 
 268         /* gettext dynamically replaces string */
 269 
 270         (void) fprintf(stderr, gettext(string), arg);
 271         if (!keep) {
 272                 if (outfile) {
 273                         (void) fclose(outfile);
 274                         for (fls = file; *fls != '\0'; fls++)
 275                                 continue;
 276                         fls -= fiwidth;
 277                         for (num = atoi(fls); num >= 0; num--) {
 278                                 (void) sprintf(fls, "%.*d", fiwidth, num);
 279                                 (void) unlink(file);
 280                         }
 281                 }
 282         } else
 283                 if (outfile)
 284                         closefile();
 285         exit(1);
 286 }
 287 
 288 /*
 289  * Findline returns the line number referenced by the current argument.
 290  * Its arguments are a pointer to the compiled regular expression(expr),
 291  * and an offset(oset).  The variable lncnt is used to count the number
 292  * of lines searched.  First the current stream location is saved via
 293  * ftello(), and getaline is called so that R.E. searching starts at the
 294  * line after the previously referenced line.  The while loop checks
 295  * that there are more lines(error if none), bumps the line count, and
 296  * checks for the R.E. on each line.  If the R.E. matches on one of the
 297  * lines the old stream location is restored, and the line number
 298  * referenced by the R.E. and the offset is returned.
 299  */
 300 
 301 static offset_t
 302 findline(char *expr, offset_t oset)
 303 {
 304         static int benhere = 0;
 305         offset_t lncnt = 0, saveloc;
 306 
 307         saveloc = ftello(infile);
 308         if (curline != (offset_t)1 || benhere)  /* If first line, first time, */
 309                 (void) getaline(FALSE);         /* then don't skip */
 310         else
 311                 lncnt--;
 312         benhere = 1;
 313         while (getaline(FALSE) != NULL) {
 314                 lncnt++;
 315                 if ((sptr = strrchr(linbuf, '\n')) != NULL)
 316                         *sptr = '\0';
 317                 if (step(linbuf, expr)) {
 318                         (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
 319                         return (curline+lncnt+oset);
 320                 }
 321         }
 322         (void) fseeko(infile, (offset_t)saveloc, SEEK_SET);
 323         return (curline+lncnt+oset+2);
 324 }
 325 
 326 /*
 327  * Flush uses fputs to put lines on the output file stream(outfile)
 328  * Since fputs does its own buffering, flush doesn't need to.
 329  * Flush does nothing if the create flag is not set.
 330  */
 331 
 332 static void
 333 flush()
 334 {
 335         if (create)
 336                 (void) fputs(linbuf, outfile);
 337 }
 338 
 339 /*
 340  * Getfile does nothing if the create flag is not set.  If the create
 341  * flag is set, getfile positions the file pointer(fptr) at the end of
 342  * the file name prefix on the first call(fptr=0).  The file counter is
 343  * stored in the file name and incremented.  If the subsequent fopen
 344  * fails, the file name is copied to tfile for the error message, the
 345  * previous file name is restored for cleanup, and fatal is called.  If
 346  * the fopen succeeds, the stream(opfil) is returned.
 347  */
 348 
 349 FILE *
 350 getfile()
 351 {
 352         static char *fptr;
 353         static int ctr;
 354         FILE *opfil;
 355         char tfile[15];
 356         char *delim;
 357         char savedelim;
 358 
 359         if (create) {
 360                 if (fptr == 0)
 361                         for (fptr = file; *fptr != NULL; fptr++)
 362                                 continue;
 363                 (void) sprintf(fptr, "%.*d", fiwidth, ctr++);
 364 
 365                 /* check for suffix length overflow */
 366                 if (strlen(fptr) > fiwidth) {
 367                         fatal("Suffix longer than %ld chars; increase -n\n",
 368                             (char *)fiwidth);
 369                 }
 370 
 371                 /* check for filename length overflow */
 372 
 373                 delim = strrchr(file, '/');
 374                 if (delim == (char *)NULL) {
 375                         if (strlen(file) > pathconf(".", _PC_NAME_MAX)) {
 376                                 fatal("Name too long: %s\n", file);
 377                         }
 378                 } else {
 379                         /* truncate file at pathname delim to do pathconf */
 380                         savedelim = *delim;
 381                         *delim = '\0';
 382                         /*
 383                          * file: pppppppp\0fffff\0
 384                          * ..... ^ file
 385                          * ............. ^ delim
 386                          */
 387                         if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) {
 388                                 fatal("Name too long: %s\n", delim + 1);
 389                         }
 390                         *delim = savedelim;
 391                 }
 392 
 393                 if ((opfil = fopen(file, "w")) == NULL) {
 394                         (void) strcpy(tfile, file);
 395                         (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2));
 396                         fatal("Cannot create %s\n", tfile);
 397                 }
 398                 return (opfil);
 399         }
 400         return (NULL);
 401 }
 402 
 403 /*
 404  * Getline gets a line via fgets from the input stream "infile".
 405  * The line is put into linbuf and may not be larger than LINSIZ.
 406  * If getaline is called with a non-zero value, the current line
 407  * is bumped, otherwise it is not(for R.E. searching).
 408  */
 409 
 410 static char *
 411 getaline(int bumpcur)
 412 {
 413         char *ret;
 414         if (bumpcur)
 415                 curline++;
 416         ret = fgets(linbuf, LINSIZ, infile);
 417         return (ret);
 418 }
 419 
 420 /*
 421  * Line_arg handles line number arguments.
 422  * line_arg takes as its argument a pointer to a character string
 423  * (assumed to be a line number).  If that character string can be
 424  * converted to a number(long long), to_line is called with that number,
 425  * otherwise error.
 426  */
 427 
 428 static void
 429 line_arg(char *line)
 430 {
 431         long long to;
 432 
 433         if (asc_to_ll(line, &to) == ERR)
 434                 fatal("%s: bad line number\n", line);
 435         to_line(to);
 436 }
 437 
 438 /*
 439  * Num_arg handles repeat arguments.
 440  * Num_arg copies the numeric argument to "rep" (error if number is
 441  * larger than 20 characters or } is left off).  Num_arg then converts
 442  * the number and checks for validity.  Next num_arg checks the mode
 443  * of the previous argument, and applys the argument the correct number
 444  * of times. If the mode is not set properly its an error.
 445  */
 446 
 447 static void
 448 num_arg(char *arg, int md)
 449 {
 450         offset_t repeat, toline;
 451         char rep[21];
 452         char *ptr;
 453         int             len;
 454 
 455         ptr = rep;
 456         for (++arg; *arg != '}'; arg += len) {
 457                 if (*arg == NULL)
 458                         fatal("%s: missing '}'\n", targ);
 459                 if ((len = mblen(arg, MB_LEN_MAX)) <= 0)
 460                         len = 1;
 461                 if ((ptr + len) >= &rep[20])
 462                         fatal("%s: Repeat count too large\n", targ);
 463                 (void) memcpy(ptr, arg, len);
 464                 ptr += len;
 465         }
 466         *ptr = NULL;
 467         if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L)
 468                 fatal("Illegal repeat count: %s\n", targ);
 469         if (md == LINMODE) {
 470                 toline = offset = curline;
 471                 for (; repeat > 0LL; repeat--) {
 472                         toline += offset;
 473                         to_line(toline);
 474                 }
 475         } else  if (md == EXPMODE)
 476                         for (; repeat > 0LL; repeat--)
 477                                 to_line(findline(expbuf, offset));
 478                 else
 479                         fatal("No operation for %s\n", targ);
 480 }
 481 
 482 /*
 483  * Re_arg handles regular expression arguments.
 484  * Re_arg takes a csplit regular expression argument.  It checks for
 485  * delimiter balance, computes any offset, and compiles the regular
 486  * expression.  Findline is called with the compiled expression and
 487  * offset, and returns the corresponding line number, which is used
 488  * as input to the to_line function.
 489  */
 490 
 491 static void
 492 re_arg(char *string)
 493 {
 494         char *ptr;
 495         char ch;
 496         int             len;
 497 
 498         ch = *string;
 499         ptr = string;
 500         ptr++;
 501         while (*ptr != ch) {
 502                 if (*ptr == '\\')
 503                         ++ptr;
 504 
 505                 if (*ptr == NULL)
 506                         fatal("%s: missing delimiter\n", targ);
 507 
 508                 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0)
 509                         len = 1;
 510                 ptr += len;
 511         }
 512 
 513         /*
 514          * The line below was added because compile no longer supports
 515          * the fourth argument being passed.  The fourth argument used
 516          * to be '/' or '%'.
 517          */
 518 
 519         *ptr = NULL;
 520         if (asc_to_ll(++ptr, &offset) == ERR)
 521                 fatal("%s: illegal offset\n", string);
 522 
 523         /*
 524          * The line below was added because INIT which did this for us
 525          * was removed from compile in regexp.h
 526          */
 527 
 528         string++;
 529         expbuf = compile(string, (char *)0, (char *)0);
 530         if (regerrno)
 531                 PERROR(regerrno);
 532         to_line(findline(expbuf, offset));
 533 }
 534 
 535 /*
 536  * Sig handles breaks.  When a break occurs the signal is reset,
 537  * and fatal is called to clean up and print the argument which
 538  * was being processed at the time the interrupt occured.
 539  */
 540 
 541 /* ARGSUSED */
 542 static void
 543 sig(int s)
 544 {
 545         (void) signal(SIGINT, sig);
 546         fatal("Interrupt - program aborted at arg '%s'\n", targ);
 547 }
 548 
 549 /*
 550  * To_line creates split files.
 551  * To_line gets as its argument the line which the current argument
 552  * referenced.  To_line calls getfile for a new output stream, which
 553  * does nothing if create is False.  If to_line's argument is not LAST
 554  * it checks that the current line is not greater than its argument.
 555  * While the current line is less than the desired line to_line gets
 556  * lines and flushes(error if EOF is reached).
 557  * If to_line's argument is LAST, it checks for more lines, and gets
 558  * and flushes lines till the end of file.
 559  * Finally, to_line calls closefile to close the output stream.
 560  */
 561 
 562 static void
 563 to_line(offset_t ln)
 564 {
 565         outfile = getfile();
 566         if (ln != LAST) {
 567                 if (curline > ln)
 568                         fatal("%s - out of range\n", targ);
 569                 while (curline < ln) {
 570                         if (getaline(TRUE) == NULL)
 571                                 fatal("%s - out of range\n", targ);
 572                         flush();
 573                 }
 574         } else          /* last file */
 575                 if (getaline(TRUE) != NULL) {
 576                         flush();
 577                         for (;;) {
 578                                 if (getaline(TRUE) == NULL)
 579                                         break;
 580                                 flush();
 581                         }
 582                 } else
 583                         fatal("%s - out of range\n", targ);
 584         closefile();
 585 }
 586 
 587 static void
 588 usage()
 589 {
 590         (void) fprintf(stderr, gettext(
 591             "usage: csplit [-ks] [-f prefix] [-n number] "
 592             "file arg1 ...argn\n"));
 593         exit(1);
 594 }