1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * Copyright (c) 2018, Joyent, Inc. 31 */ 32 33 /* 34 * csplit - Context or line file splitter 35 * Compile: cc -O -s -o csplit csplit.c 36 */ 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <unistd.h> 41 #include <string.h> 42 #include <ctype.h> 43 #include <errno.h> 44 #include <limits.h> 45 #include <regexpr.h> 46 #include <signal.h> 47 #include <locale.h> 48 #include <libintl.h> 49 50 #define LAST 0LL 51 #define ERR -1 52 #define FALSE 0 53 #define TRUE 1 54 #define EXPMODE 2 55 #define LINMODE 3 56 #define LINSIZ LINE_MAX /* POSIX.2 - read lines LINE_MAX long */ 57 58 /* Globals */ 59 60 char linbuf[LINSIZ]; /* Input line buffer */ 61 char *expbuf; 62 char tmpbuf[BUFSIZ]; /* Temporary buffer for stdin */ 63 char file[8192] = "xx"; /* File name buffer */ 64 char *targ; /* Arg ptr for error messages */ 65 char *sptr; 66 FILE *infile, *outfile; /* I/O file streams */ 67 int silent, keep, create; /* Flags: -s(ilent), -k(eep), (create) */ 68 int errflg; 69 int fiwidth = 2; /* file index width (output file names) */ 70 extern int optind; 71 extern char *optarg; 72 offset_t offset; /* Regular expression offset value */ 73 offset_t curline; /* Current line in input file */ 74 75 /* 76 * These defines are needed for regexp handling(see regexp(7)) 77 */ 78 #define PERROR(x) fatal("%s: Illegal Regular Expression\n", targ); 79 80 static int asc_to_ll(char *, long long *); 81 static void closefile(void); 82 static void fatal(char *, char *); 83 static offset_t findline(char *, offset_t); 84 static void flush(void); 85 static FILE *getfile(void); 86 static char *getaline(int); 87 static void line_arg(char *); 88 static void num_arg(char *, int); 89 static void re_arg(char *); 90 static void sig(int); 91 static void to_line(offset_t); 92 static void usage(void); 93 94 int 95 main(int argc, char **argv) 96 { 97 int ch, mode; 98 char *ptr; 99 100 (void) setlocale(LC_ALL, ""); 101 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */ 102 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it weren't */ 103 #endif 104 (void) textdomain(TEXT_DOMAIN); 105 106 while ((ch = getopt(argc, argv, "skf:n:")) != EOF) { 107 switch (ch) { 108 case 'f': 109 (void) strcpy(file, optarg); 110 if ((ptr = strrchr(optarg, '/')) == NULL) 111 ptr = optarg; 112 else 113 ptr++; 114 115 break; 116 case 'n': /* POSIX.2 */ 117 for (ptr = optarg; *ptr != NULL; ptr++) 118 if (!isdigit((int)*ptr)) 119 fatal("-n num\n", NULL); 120 fiwidth = atoi(optarg); 121 break; 122 case 'k': 123 keep++; 124 break; 125 case 's': 126 silent++; 127 break; 128 case '?': 129 errflg++; 130 } 131 } 132 133 argv = &argv[optind]; 134 argc -= optind; 135 if (argc <= 1 || errflg) 136 usage(); 137 138 if (strcmp(*argv, "-") == 0) { 139 infile = tmpfile(); 140 141 while (fread(tmpbuf, 1, BUFSIZ, stdin) != 0) { 142 if (fwrite(tmpbuf, 1, BUFSIZ, infile) == 0) 143 if (errno == ENOSPC) { 144 (void) fprintf(stderr, "csplit: "); 145 (void) fprintf(stderr, gettext( 146 "No space left on device\n")); 147 exit(1); 148 } else { 149 (void) fprintf(stderr, "csplit: "); 150 (void) fprintf(stderr, gettext( 151 "Bad write to temporary " 152 "file\n")); 153 exit(1); 154 } 155 156 /* clear the buffer to get correct size when writing buffer */ 157 158 (void) memset(tmpbuf, '\0', sizeof (tmpbuf)); 159 } 160 rewind(infile); 161 } else if ((infile = fopen(*argv, "r")) == NULL) 162 fatal("Cannot open %s\n", *argv); 163 ++argv; 164 curline = (offset_t)1; 165 (void) signal(SIGINT, sig); 166 167 /* 168 * The following for loop handles the different argument types. 169 * A switch is performed on the first character of the argument 170 * and each case calls the appropriate argument handling routine. 171 */ 172 173 for (; *argv; ++argv) { 174 targ = *argv; 175 switch (**argv) { 176 case '/': 177 mode = EXPMODE; 178 create = TRUE; 179 re_arg(*argv); 180 break; 181 case '%': 182 mode = EXPMODE; 183 create = FALSE; 184 re_arg(*argv); 185 break; 186 case '{': 187 num_arg(*argv, mode); 188 mode = FALSE; 189 break; 190 default: 191 mode = LINMODE; 192 create = TRUE; 193 line_arg(*argv); 194 break; 195 } 196 } 197 create = TRUE; 198 to_line(LAST); 199 return (0); 200 } 201 202 /* 203 * asc_to_ll takes an ascii argument(str) and converts it to a long long(plc) 204 * It returns ERR if an illegal character. The reason that asc_to_ll 205 * does not return an answer(long long) is that any value for the long 206 * long is legal, and this version of asc_to_ll detects error strings. 207 */ 208 209 static int 210 asc_to_ll(char *str, long long *plc) 211 { 212 int f; 213 *plc = 0; 214 f = 0; 215 for (; ; str++) { 216 switch (*str) { 217 case ' ': 218 case '\t': 219 continue; 220 case '-': 221 f++; 222 /* FALLTHROUGH */ 223 case '+': 224 str++; 225 } 226 break; 227 } 228 for (; *str != NULL; str++) 229 if (*str >= '0' && *str <= '9') 230 *plc = *plc * 10 + *str - '0'; 231 else 232 return (ERR); 233 if (f) 234 *plc = -(*plc); 235 return (TRUE); /* not error */ 236 } 237 238 /* 239 * Closefile prints the byte count of the file created,(via fseeko 240 * and ftello), if the create flag is on and the silent flag is not on. 241 * If the create flag is on closefile then closes the file(fclose). 242 */ 243 244 static void 245 closefile() 246 { 247 if (!silent && create) { 248 (void) fseeko(outfile, (offset_t)0, SEEK_END); 249 (void) fprintf(stdout, "%lld\n", (offset_t)ftello(outfile)); 250 } 251 if (create) 252 (void) fclose(outfile); 253 } 254 255 /* 256 * Fatal handles error messages and cleanup. 257 * Because "arg" can be the global file, and the cleanup processing 258 * uses the global file, the error message is printed first. If the 259 * "keep" flag is not set, fatal unlinks all created files. If the 260 * "keep" flag is set, fatal closes the current file(if there is one). 261 * Fatal exits with a value of 1. 262 */ 263 264 static void 265 fatal(char *string, char *arg) 266 { 267 char *fls; 268 int num; 269 270 (void) fprintf(stderr, "csplit: "); 271 272 /* gettext dynamically replaces string */ 273 274 (void) fprintf(stderr, gettext(string), arg); 275 if (!keep) { 276 if (outfile) { 277 (void) fclose(outfile); 278 for (fls = file; *fls != '\0'; fls++) 279 continue; 280 fls -= fiwidth; 281 for (num = atoi(fls); num >= 0; num--) { 282 (void) sprintf(fls, "%.*d", fiwidth, num); 283 (void) unlink(file); 284 } 285 } 286 } else 287 if (outfile) 288 closefile(); 289 exit(1); 290 } 291 292 /* 293 * Findline returns the line number referenced by the current argument. 294 * Its arguments are a pointer to the compiled regular expression(expr), 295 * and an offset(oset). The variable lncnt is used to count the number 296 * of lines searched. First the current stream location is saved via 297 * ftello(), and getaline is called so that R.E. searching starts at the 298 * line after the previously referenced line. The while loop checks 299 * that there are more lines(error if none), bumps the line count, and 300 * checks for the R.E. on each line. If the R.E. matches on one of the 301 * lines the old stream location is restored, and the line number 302 * referenced by the R.E. and the offset is returned. 303 */ 304 305 static offset_t 306 findline(char *expr, offset_t oset) 307 { 308 static int benhere = 0; 309 offset_t lncnt = 0, saveloc; 310 311 saveloc = ftello(infile); 312 if (curline != (offset_t)1 || benhere) /* If first line, first time, */ 313 (void) getaline(FALSE); /* then don't skip */ 314 else 315 lncnt--; 316 benhere = 1; 317 while (getaline(FALSE) != NULL) { 318 lncnt++; 319 if ((sptr = strrchr(linbuf, '\n')) != NULL) 320 *sptr = '\0'; 321 if (step(linbuf, expr)) { 322 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 323 return (curline+lncnt+oset); 324 } 325 } 326 (void) fseeko(infile, (offset_t)saveloc, SEEK_SET); 327 return (curline+lncnt+oset+2); 328 } 329 330 /* 331 * Flush uses fputs to put lines on the output file stream(outfile) 332 * Since fputs does its own buffering, flush doesn't need to. 333 * Flush does nothing if the create flag is not set. 334 */ 335 336 static void 337 flush() 338 { 339 if (create) 340 (void) fputs(linbuf, outfile); 341 } 342 343 /* 344 * Getfile does nothing if the create flag is not set. If the create 345 * flag is set, getfile positions the file pointer(fptr) at the end of 346 * the file name prefix on the first call(fptr=0). The file counter is 347 * stored in the file name and incremented. If the subsequent fopen 348 * fails, the file name is copied to tfile for the error message, the 349 * previous file name is restored for cleanup, and fatal is called. If 350 * the fopen succeeds, the stream(opfil) is returned. 351 */ 352 353 FILE * 354 getfile() 355 { 356 static char *fptr; 357 static int ctr; 358 FILE *opfil; 359 char tfile[15]; 360 char *delim; 361 char savedelim; 362 363 if (create) { 364 if (fptr == 0) 365 for (fptr = file; *fptr != NULL; fptr++) 366 continue; 367 (void) sprintf(fptr, "%.*d", fiwidth, ctr++); 368 369 /* check for suffix length overflow */ 370 if (strlen(fptr) > fiwidth) { 371 fatal("Suffix longer than %ld chars; increase -n\n", 372 (char *)fiwidth); 373 } 374 375 /* check for filename length overflow */ 376 377 delim = strrchr(file, '/'); 378 if (delim == (char *)NULL) { 379 if (strlen(file) > pathconf(".", _PC_NAME_MAX)) { 380 fatal("Name too long: %s\n", file); 381 } 382 } else { 383 /* truncate file at pathname delim to do pathconf */ 384 savedelim = *delim; 385 *delim = '\0'; 386 /* 387 * file: pppppppp\0fffff\0 388 * ..... ^ file 389 * ............. ^ delim 390 */ 391 if (strlen(delim + 1) > pathconf(file, _PC_NAME_MAX)) { 392 fatal("Name too long: %s\n", delim + 1); 393 } 394 *delim = savedelim; 395 } 396 397 if ((opfil = fopen(file, "w")) == NULL) { 398 (void) strlcpy(tfile, file, sizeof (tfile)); 399 (void) sprintf(fptr, "%.*d", fiwidth, (ctr-2)); 400 fatal("Cannot create %s\n", tfile); 401 } 402 return (opfil); 403 } 404 return (NULL); 405 } 406 407 /* 408 * Getline gets a line via fgets from the input stream "infile". 409 * The line is put into linbuf and may not be larger than LINSIZ. 410 * If getaline is called with a non-zero value, the current line 411 * is bumped, otherwise it is not(for R.E. searching). 412 */ 413 414 static char * 415 getaline(int bumpcur) 416 { 417 char *ret; 418 if (bumpcur) 419 curline++; 420 ret = fgets(linbuf, LINSIZ, infile); 421 return (ret); 422 } 423 424 /* 425 * Line_arg handles line number arguments. 426 * line_arg takes as its argument a pointer to a character string 427 * (assumed to be a line number). If that character string can be 428 * converted to a number(long long), to_line is called with that number, 429 * otherwise error. 430 */ 431 432 static void 433 line_arg(char *line) 434 { 435 long long to; 436 437 if (asc_to_ll(line, &to) == ERR) 438 fatal("%s: bad line number\n", line); 439 to_line(to); 440 } 441 442 /* 443 * Num_arg handles repeat arguments. 444 * Num_arg copies the numeric argument to "rep" (error if number is 445 * larger than 20 characters or } is left off). Num_arg then converts 446 * the number and checks for validity. Next num_arg checks the mode 447 * of the previous argument, and applys the argument the correct number 448 * of times. If the mode is not set properly its an error. 449 */ 450 451 static void 452 num_arg(char *arg, int md) 453 { 454 offset_t repeat, toline; 455 char rep[21]; 456 char *ptr; 457 int len; 458 459 ptr = rep; 460 for (++arg; *arg != '}'; arg += len) { 461 if (*arg == NULL) 462 fatal("%s: missing '}'\n", targ); 463 if ((len = mblen(arg, MB_LEN_MAX)) <= 0) 464 len = 1; 465 if ((ptr + len) >= &rep[20]) 466 fatal("%s: Repeat count too large\n", targ); 467 (void) memcpy(ptr, arg, len); 468 ptr += len; 469 } 470 *ptr = NULL; 471 if ((asc_to_ll(rep, &repeat) == ERR) || repeat < 0L) 472 fatal("Illegal repeat count: %s\n", targ); 473 if (md == LINMODE) { 474 toline = offset = curline; 475 for (; repeat > 0LL; repeat--) { 476 toline += offset; 477 to_line(toline); 478 } 479 } else if (md == EXPMODE) 480 for (; repeat > 0LL; repeat--) 481 to_line(findline(expbuf, offset)); 482 else 483 fatal("No operation for %s\n", targ); 484 } 485 486 /* 487 * Re_arg handles regular expression arguments. 488 * Re_arg takes a csplit regular expression argument. It checks for 489 * delimiter balance, computes any offset, and compiles the regular 490 * expression. Findline is called with the compiled expression and 491 * offset, and returns the corresponding line number, which is used 492 * as input to the to_line function. 493 */ 494 495 static void 496 re_arg(char *string) 497 { 498 char *ptr; 499 char ch; 500 int len; 501 502 ch = *string; 503 ptr = string; 504 ptr++; 505 while (*ptr != ch) { 506 if (*ptr == '\\') 507 ++ptr; 508 509 if (*ptr == NULL) 510 fatal("%s: missing delimiter\n", targ); 511 512 if ((len = mblen(ptr, MB_LEN_MAX)) <= 0) 513 len = 1; 514 ptr += len; 515 } 516 517 /* 518 * The line below was added because compile no longer supports 519 * the fourth argument being passed. The fourth argument used 520 * to be '/' or '%'. 521 */ 522 523 *ptr = NULL; 524 if (asc_to_ll(++ptr, &offset) == ERR) 525 fatal("%s: illegal offset\n", string); 526 527 /* 528 * The line below was added because INIT which did this for us 529 * was removed from compile in regexp.h 530 */ 531 532 string++; 533 expbuf = compile(string, (char *)0, (char *)0); 534 if (regerrno) 535 PERROR(regerrno); 536 to_line(findline(expbuf, offset)); 537 } 538 539 /* 540 * Sig handles breaks. When a break occurs the signal is reset, 541 * and fatal is called to clean up and print the argument which 542 * was being processed at the time the interrupt occured. 543 */ 544 545 /* ARGSUSED */ 546 static void 547 sig(int s) 548 { 549 (void) signal(SIGINT, sig); 550 fatal("Interrupt - program aborted at arg '%s'\n", targ); 551 } 552 553 /* 554 * To_line creates split files. 555 * To_line gets as its argument the line which the current argument 556 * referenced. To_line calls getfile for a new output stream, which 557 * does nothing if create is False. If to_line's argument is not LAST 558 * it checks that the current line is not greater than its argument. 559 * While the current line is less than the desired line to_line gets 560 * lines and flushes(error if EOF is reached). 561 * If to_line's argument is LAST, it checks for more lines, and gets 562 * and flushes lines till the end of file. 563 * Finally, to_line calls closefile to close the output stream. 564 */ 565 566 static void 567 to_line(offset_t ln) 568 { 569 outfile = getfile(); 570 if (ln != LAST) { 571 if (curline > ln) 572 fatal("%s - out of range\n", targ); 573 while (curline < ln) { 574 if (getaline(TRUE) == NULL) 575 fatal("%s - out of range\n", targ); 576 flush(); 577 } 578 } else /* last file */ 579 if (getaline(TRUE) != NULL) { 580 flush(); 581 for (;;) { 582 if (getaline(TRUE) == NULL) 583 break; 584 flush(); 585 } 586 } else 587 fatal("%s - out of range\n", targ); 588 closefile(); 589 } 590 591 static void 592 usage() 593 { 594 (void) fprintf(stderr, gettext( 595 "usage: csplit [-ks] [-f prefix] [-n number] " 596 "file arg1 ...argn\n")); 597 exit(1); 598 }