Print this page
5051 import mdocml-1.12.3
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Approved by: TBD
   1 /*      $Id: preconv.c,v 1.5 2011/07/24 18:15:14 kristaps Exp $ */
   2 /*
   3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #ifdef HAVE_MMAP
  22 #include <sys/stat.h>
  23 #include <sys/mman.h>
  24 #endif
  25 
  26 #include <assert.h>
  27 #include <fcntl.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 
  33 /* 
  34  * The read_whole_file() and resize_buf() functions are copied from
  35  * read.c, including all dependency code (MAP_FILE, etc.).
  36  */
  37 
  38 #ifndef MAP_FILE
  39 #define MAP_FILE        0
  40 #endif
  41 
  42 enum    enc {
  43         ENC_UTF_8, /* UTF-8 */
  44         ENC_US_ASCII, /* US-ASCII */
  45         ENC_LATIN_1, /* Latin-1 */
  46         ENC__MAX
  47 };
  48 
  49 struct  buf {
  50         char             *buf; /* binary input buffer */
  51         size_t            sz; /* size of binary buffer */
  52         size_t            offs; /* starting buffer offset */
  53 };
  54 
  55 struct  encode {
  56         const char       *name;
  57         int             (*conv)(const struct buf *);
  58 };
  59 
  60 static  int      cue_enc(const struct buf *, size_t *, enum enc *);
  61 static  int      conv_latin_1(const struct buf *);


 254         if (-1 == fstat(fd, &st)) {
 255                 perror(f);
 256                 return(0);
 257         }
 258 
 259         /*
 260          * If we're a regular file, try just reading in the whole entry
 261          * via mmap().  This is faster than reading it into blocks, and
 262          * since each file is only a few bytes to begin with, I'm not
 263          * concerned that this is going to tank any machines.
 264          */
 265 
 266         if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) {
 267                 fprintf(stderr, "%s: input too large\n", f);
 268                 return(0);
 269         } 
 270         
 271         if (S_ISREG(st.st_mode)) {
 272                 *with_mmap = 1;
 273                 fb->sz = (size_t)st.st_size;
 274                 fb->buf = mmap(NULL, fb->sz, PROT_READ, 
 275                                 MAP_FILE|MAP_SHARED, fd, 0);
 276                 if (fb->buf != MAP_FAILED)
 277                         return(1);
 278         }
 279 #endif
 280 
 281         /*
 282          * If this isn't a regular file (like, say, stdin), then we must
 283          * go the old way and just read things in bit by bit.
 284          */
 285 
 286         *with_mmap = 0;
 287         off = 0;
 288         fb->sz = 0;
 289         fb->buf = NULL;
 290         for (;;) {
 291                 if (off == fb->sz && fb->sz == (1U << 31)) {
 292                         fprintf(stderr, "%s: input too large\n", f);
 293                         break;
 294                 } 
 295                 


   1 /*      $Id: preconv.c,v 1.6 2013/06/02 03:52:21 schwarze Exp $ */
   2 /*
   3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
   4  *
   5  * Permission to use, copy, modify, and distribute this software for any
   6  * purpose with or without fee is hereby granted, provided that the above
   7  * copyright notice and this permission notice appear in all copies.
   8  *
   9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16  */
  17 #ifdef HAVE_CONFIG_H
  18 #include "config.h"
  19 #endif
  20 
  21 #ifdef HAVE_MMAP
  22 #include <sys/stat.h>
  23 #include <sys/mman.h>
  24 #endif
  25 
  26 #include <assert.h>
  27 #include <fcntl.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 
  33 /* 
  34  * The read_whole_file() and resize_buf() functions are copied from
  35  * read.c, including all dependency code.
  36  */
  37 




  38 enum    enc {
  39         ENC_UTF_8, /* UTF-8 */
  40         ENC_US_ASCII, /* US-ASCII */
  41         ENC_LATIN_1, /* Latin-1 */
  42         ENC__MAX
  43 };
  44 
  45 struct  buf {
  46         char             *buf; /* binary input buffer */
  47         size_t            sz; /* size of binary buffer */
  48         size_t            offs; /* starting buffer offset */
  49 };
  50 
  51 struct  encode {
  52         const char       *name;
  53         int             (*conv)(const struct buf *);
  54 };
  55 
  56 static  int      cue_enc(const struct buf *, size_t *, enum enc *);
  57 static  int      conv_latin_1(const struct buf *);


 250         if (-1 == fstat(fd, &st)) {
 251                 perror(f);
 252                 return(0);
 253         }
 254 
 255         /*
 256          * If we're a regular file, try just reading in the whole entry
 257          * via mmap().  This is faster than reading it into blocks, and
 258          * since each file is only a few bytes to begin with, I'm not
 259          * concerned that this is going to tank any machines.
 260          */
 261 
 262         if (S_ISREG(st.st_mode) && st.st_size >= (1U << 31)) {
 263                 fprintf(stderr, "%s: input too large\n", f);
 264                 return(0);
 265         } 
 266         
 267         if (S_ISREG(st.st_mode)) {
 268                 *with_mmap = 1;
 269                 fb->sz = (size_t)st.st_size;
 270                 fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);

 271                 if (fb->buf != MAP_FAILED)
 272                         return(1);
 273         }
 274 #endif
 275 
 276         /*
 277          * If this isn't a regular file (like, say, stdin), then we must
 278          * go the old way and just read things in bit by bit.
 279          */
 280 
 281         *with_mmap = 0;
 282         off = 0;
 283         fb->sz = 0;
 284         fb->buf = NULL;
 285         for (;;) {
 286                 if (off == fb->sz && fb->sz == (1U << 31)) {
 287                         fprintf(stderr, "%s: input too large\n", f);
 288                         break;
 289                 } 
 290