ilwluts Wdiff usr/src/uts/common/fs/udfs/udf_vnops.c

Print this page

8368 remove warlock leftovers from usr/src/uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/udfs/udf_vnops.c
          +++ new/usr/src/uts/common/fs/udfs/udf_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2015, Joyent, Inc.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/t_lock.h>
  32   32  #include <sys/param.h>
  33   33  #include <sys/time.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/resource.h>
  37   37  #include <sys/signal.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/user.h>
  40   40  #include <sys/buf.h>
  41   41  #include <sys/vfs.h>
  42   42  #include <sys/vfs_opreg.h>
  43   43  #include <sys/stat.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/mode.h>
  46   46  #include <sys/proc.h>
  47   47  #include <sys/disp.h>
  48   48  #include <sys/file.h>
  49   49  #include <sys/fcntl.h>
  50   50  #include <sys/flock.h>
  51   51  #include <sys/kmem.h>
  52   52  #include <sys/uio.h>
  53   53  #include <sys/dnlc.h>
  54   54  #include <sys/conf.h>
  55   55  #include <sys/errno.h>
  56   56  #include <sys/mman.h>
  57   57  #include <sys/fbuf.h>
  58   58  #include <sys/pathname.h>
  59   59  #include <sys/debug.h>
  60   60  #include <sys/vmsystm.h>
  61   61  #include <sys/cmn_err.h>
  62   62  #include <sys/dirent.h>
  63   63  #include <sys/errno.h>
  64   64  #include <sys/modctl.h>
  65   65  #include <sys/statvfs.h>
  66   66  #include <sys/mount.h>
  67   67  #include <sys/sunddi.h>
  68   68  #include <sys/bootconf.h>
  69   69  #include <sys/policy.h>
  70   70  
  71   71  #include <vm/hat.h>
  72   72  #include <vm/page.h>
  73   73  #include <vm/pvn.h>
  74   74  #include <vm/as.h>
  75   75  #include <vm/seg.h>
  76   76  #include <vm/seg_map.h>
  77   77  #include <vm/seg_kmem.h>
  78   78  #include <vm/seg_vn.h>
  79   79  #include <vm/rm.h>
  80   80  #include <vm/page.h>
  81   81  #include <sys/swap.h>
  82   82  
  83   83  #include <fs/fs_subr.h>
  84   84  
  85   85  #include <sys/fs/udf_volume.h>
  86   86  #include <sys/fs/udf_inode.h>
  87   87  
  88   88  static int32_t udf_open(struct vnode **,
  89   89          int32_t, struct cred *, caller_context_t *);
  90   90  static int32_t udf_close(struct vnode *,
  91   91          int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92   92  static int32_t udf_read(struct vnode *,
  93   93          struct uio *, int32_t, struct cred *, caller_context_t *);
  94   94  static int32_t udf_write(struct vnode *,
  95   95          struct uio *, int32_t, struct cred *, caller_context_t *);
  96   96  static int32_t udf_ioctl(struct vnode *,
  97   97          int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98   98          caller_context_t *);
  99   99  static int32_t udf_getattr(struct vnode *,
 100  100          struct vattr *, int32_t, struct cred *, caller_context_t *);
 101  101  static int32_t udf_setattr(struct vnode *,
 102  102          struct vattr *, int32_t, struct cred *, caller_context_t *);
 103  103  static int32_t udf_access(struct vnode *,
 104  104          int32_t, int32_t, struct cred *, caller_context_t *);
 105  105  static int32_t udf_lookup(struct vnode *,
 106  106          char *, struct vnode **, struct pathname *,
 107  107          int32_t, struct vnode *, struct cred *,
 108  108          caller_context_t *, int *, pathname_t *);
 109  109  static int32_t udf_create(struct vnode *,
 110  110          char *, struct vattr *, enum vcexcl,
 111  111          int32_t, struct vnode **, struct cred *, int32_t,
 112  112          caller_context_t *, vsecattr_t *);
 113  113  static int32_t udf_remove(struct vnode *,
 114  114          char *, struct cred *, caller_context_t *, int);
 115  115  static int32_t udf_link(struct vnode *,
 116  116          struct vnode *, char *, struct cred *, caller_context_t *, int);
 117  117  static int32_t udf_rename(struct vnode *,
 118  118          char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119  119  static int32_t udf_mkdir(struct vnode *,
 120  120          char *, struct vattr *, struct vnode **, struct cred *,
 121  121          caller_context_t *, int, vsecattr_t *);
 122  122  static int32_t udf_rmdir(struct vnode *,
 123  123          char *, struct vnode *, struct cred *, caller_context_t *, int);
 124  124  static int32_t udf_readdir(struct vnode *,
 125  125          struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126  126  static int32_t udf_symlink(struct vnode *,
 127  127          char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128  128  static int32_t udf_readlink(struct vnode *,
 129  129          struct uio *, struct cred *, caller_context_t *);
 130  130  static int32_t udf_fsync(struct vnode *,
 131  131          int32_t, struct cred *, caller_context_t *);
 132  132  static void udf_inactive(struct vnode *,
 133  133          struct cred *, caller_context_t *);
 134  134  static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135  135  static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136  136  static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137  137  static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138  138          caller_context_t *);
 139  139  static int32_t udf_frlock(struct vnode *, int32_t,
 140  140          struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141  141          caller_context_t *);
 142  142  static int32_t udf_space(struct vnode *, int32_t,
 143  143          struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144  144  static int32_t udf_getpage(struct vnode *, offset_t,
 145  145          size_t, uint32_t *, struct page **, size_t,
 146  146          struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147  147  static int32_t udf_putpage(struct vnode *, offset_t,
 148  148          size_t, int32_t, struct cred *, caller_context_t *);
 149  149  static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150  150          caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151  151          caller_context_t *);
 152  152  static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153  153          caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154  154          caller_context_t *);
 155  155  static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156  156          caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157  157          caller_context_t *);
 158  158  static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159  159          ulong_t *, struct cred *, caller_context_t *);
 160  160  static int32_t udf_pageio(struct vnode *, struct page *,
 161  161          u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162  162  
 163  163  int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164  164          size_t, struct seg *, caddr_t, page_t *pl[],
 165  165          size_t, enum seg_rw, int32_t);
 166  166  void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167  167  int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168  168  int32_t ud_page_fill(struct ud_inode *, page_t *,
 169  169          u_offset_t, uint32_t, u_offset_t *);
 170  170  int32_t ud_iodone(struct buf *);
 171  171  int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172  172  int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173  173  int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174  174  int32_t ud_slave_done(struct buf *);
 175  175  
 176  176  /*
 177  177   * Structures to control multiple IO operations to get or put pages
 178  178   * that are backed by discontiguous blocks. The master struct is
 179  179   * a dummy that holds the original bp from pageio_setup. The
 180  180   * slave struct holds the working bp's to do the actual IO. Once
 181  181   * all the slave IOs complete. The master is processed as if a single
 182  182   * IO op has completed.
 183  183   */
 184  184  uint32_t master_index = 0;
 185  185  typedef struct mio_master {
 186  186          kmutex_t        mm_mutex;       /* protect the fields below */
 187  187          int32_t         mm_size;
 188  188          buf_t           *mm_bp;         /* original bp */
 189  189          int32_t         mm_resid;       /* bytes remaining to transfer */
 190  190          int32_t         mm_error;       /* accumulated error from slaves */
 191  191          int32_t         mm_index;       /* XXX debugging */
 192  192  } mio_master_t;
 193  193  
 194  194  typedef struct mio_slave {
 195  195          buf_t           ms_buf;         /* working buffer for this IO chunk */
 196  196          mio_master_t    *ms_ptr;        /* pointer to master */
 197  197  } mio_slave_t;
 198  198  
 199  199  struct vnodeops *udf_vnodeops;
 200  200  
 201  201  const fs_operation_def_t udf_vnodeops_template[] = {
 202  202          VOPNAME_OPEN,           { .vop_open = udf_open },
 203  203          VOPNAME_CLOSE,          { .vop_close = udf_close },
 204  204          VOPNAME_READ,           { .vop_read = udf_read },
 205  205          VOPNAME_WRITE,          { .vop_write = udf_write },
 206  206          VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207  207          VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208  208          VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209  209          VOPNAME_ACCESS,         { .vop_access = udf_access },
 210  210          VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211  211          VOPNAME_CREATE,         { .vop_create = udf_create },
 212  212          VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213  213          VOPNAME_LINK,           { .vop_link = udf_link },
 214  214          VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215  215          VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216  216          VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217  217          VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218  218          VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219  219          VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220  220          VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221  221          VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222  222          VOPNAME_FID,            { .vop_fid = udf_fid },
 223  223          VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224  224          VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225  225          VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226  226          VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227  227          VOPNAME_SPACE,          { .vop_space = udf_space },
 228  228          VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229  229          VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230  230          VOPNAME_MAP,            { .vop_map = udf_map },
 231  231          VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232  232          VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233  233          VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234  234          VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235  235          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236  236          NULL,                   NULL
 237  237  };
 238  238  
 239  239  /* ARGSUSED */
 240  240  static int32_t
 241  241  udf_open(
 242  242          struct vnode **vpp,
 243  243          int32_t flag,
 244  244          struct cred *cr,
 245  245          caller_context_t *ct)
 246  246  {
 247  247          ud_printf("udf_open\n");
 248  248  
 249  249          return (0);
 250  250  }
 251  251  
 252  252  /* ARGSUSED */
 253  253  static int32_t
 254  254  udf_close(
 255  255          struct vnode *vp,
 256  256          int32_t flag,
 257  257          int32_t count,
 258  258          offset_t offset,
 259  259          struct cred *cr,
 260  260          caller_context_t *ct)
 261  261  {
 262  262          struct ud_inode *ip = VTOI(vp);
 263  263  
 264  264          ud_printf("udf_close\n");
 265  265  
 266  266          ITIMES(ip);
 267  267  
 268  268          cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269  269          cleanshares(vp, ttoproc(curthread)->p_pid);
 270  270  
 271  271          /*
 272  272           * Push partially filled cluster at last close.
 273  273           * ``last close'' is approximated because the dnlc
 274  274           * may have a hold on the vnode.
 275  275           */
 276  276          if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277  277                  struct ud_inode *ip = VTOI(vp);
 278  278                  if (ip->i_delaylen) {
 279  279                          (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280  280                              B_ASYNC | B_FREE, cr);
 281  281                          ip->i_delaylen = 0;
 282  282                  }
 283  283          }
 284  284  
 285  285          return (0);
 286  286  }
 287  287  
 288  288  /* ARGSUSED */
 289  289  static int32_t
 290  290  udf_read(
 291  291          struct vnode *vp,

↓ open down ↓

291 lines elided

↑ open up ↑

 292  292          struct uio *uiop,
 293  293          int32_t ioflag,
 294  294          struct cred *cr,
 295  295          caller_context_t *ct)
 296  296  {
 297  297          struct ud_inode *ip = VTOI(vp);
 298  298          int32_t error;
 299  299  
 300  300          ud_printf("udf_read\n");
 301  301  
 302      -#ifdef  __lock_lint
 303      -        rw_enter(&ip->i_rwlock, RW_READER);
 304      -#endif
 305      -
 306  302          ASSERT(RW_READ_HELD(&ip->i_rwlock));
 307  303  
 308  304          if (MANDLOCK(vp, ip->i_char)) {
 309  305                  /*
 310  306                   * udf_getattr ends up being called by chklock
 311  307                   */
 312  308                  error = chklock(vp, FREAD, uiop->uio_loffset,
 313  309                      uiop->uio_resid, uiop->uio_fmode, ct);
 314  310                  if (error) {
 315  311                          goto end;
 316  312                  }
 317  313          }
 318  314  
 319  315          rw_enter(&ip->i_contents, RW_READER);
 320  316          error = ud_rdip(ip, uiop, ioflag, cr);
 321  317          rw_exit(&ip->i_contents);
 322  318  
 323  319  end:
 324      -#ifdef  __lock_lint
 325      -        rw_exit(&ip->i_rwlock);
 326      -#endif
 327      -
 328  320          return (error);
 329  321  }
 330  322  
 331  323  
 332  324  int32_t ud_WRITES = 1;
 333  325  int32_t ud_HW = 96 * 1024;
 334  326  int32_t ud_LW = 64 * 1024;
 335  327  int32_t ud_throttles = 0;
 336  328  
 337  329  /* ARGSUSED */

 338  330  static int32_t
 339  331  udf_write(
 340  332          struct vnode *vp,

↓ open down ↓

3 lines elided

↑ open up ↑

 341  333          struct uio *uiop,
 342  334          int32_t ioflag,
 343  335          struct cred *cr,
 344  336          caller_context_t *ct)
 345  337  {
 346  338          struct ud_inode *ip = VTOI(vp);
 347  339          int32_t error = 0;
 348  340  
 349  341          ud_printf("udf_write\n");
 350  342  
 351      -#ifdef  __lock_lint
 352      -        rw_enter(&ip->i_rwlock, RW_WRITER);
 353      -#endif
 354      -
 355  343          ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 356  344  
 357  345          if (MANDLOCK(vp, ip->i_char)) {
 358  346                  /*
 359  347                   * ud_getattr ends up being called by chklock
 360  348                   */
 361  349                  error = chklock(vp, FWRITE, uiop->uio_loffset,
 362  350                      uiop->uio_resid, uiop->uio_fmode, ct);
 363  351                  if (error) {
 364  352                          goto end;

 365  353                  }
 366  354          }
 367  355          /*
 368  356           * Throttle writes.
 369  357           */
 370  358          mutex_enter(&ip->i_tlock);
 371  359          if (ud_WRITES && (ip->i_writes > ud_HW)) {
 372  360                  while (ip->i_writes > ud_HW) {
 373  361                          ud_throttles++;
 374  362                          cv_wait(&ip->i_wrcv, &ip->i_tlock);
 375  363                  }
 376  364          }
 377  365          mutex_exit(&ip->i_tlock);
 378  366  
 379  367          /*
 380  368           * Write to the file
 381  369           */
 382  370          rw_enter(&ip->i_contents, RW_WRITER);

↓ open down ↓

18 lines elided

↑ open up ↑

 383  371          if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 384  372                  /*
 385  373                   * In append mode start at end of file.
 386  374                   */
 387  375                  uiop->uio_loffset = ip->i_size;
 388  376          }
 389  377          error = ud_wrip(ip, uiop, ioflag, cr);
 390  378          rw_exit(&ip->i_contents);
 391  379  
 392  380  end:
 393      -#ifdef  __lock_lint
 394      -        rw_exit(&ip->i_rwlock);
 395      -#endif
 396      -
 397  381          return (error);
 398  382  }
 399  383  
 400  384  /* ARGSUSED */
 401  385  static int32_t
 402  386  udf_ioctl(
 403  387          struct vnode *vp,
 404  388          int32_t cmd,
 405  389          intptr_t arg,
 406  390          int32_t flag,

 407  391          struct cred *cr,
 408  392          int32_t *rvalp,
 409  393          caller_context_t *ct)
 410  394  {
 411  395          return (ENOTTY);
 412  396  }
 413  397  
 414  398  /* ARGSUSED */
 415  399  static int32_t
 416  400  udf_getattr(
 417  401          struct vnode *vp,
 418  402          struct vattr *vap,
 419  403          int32_t flags,
 420  404          struct cred *cr,
 421  405          caller_context_t *ct)
 422  406  {
 423  407          struct ud_inode *ip = VTOI(vp);
 424  408  
 425  409          ud_printf("udf_getattr\n");
 426  410  
 427  411          if (vap->va_mask == AT_SIZE) {
 428  412                  /*
 429  413                   * for performance, if only the size is requested don't bother
 430  414                   * with anything else.
 431  415                   */
 432  416                  vap->va_size = ip->i_size;
 433  417                  return (0);
 434  418          }
 435  419  
 436  420          rw_enter(&ip->i_contents, RW_READER);
 437  421  
 438  422          vap->va_type = vp->v_type;
 439  423          vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 440  424  
 441  425          vap->va_uid = ip->i_uid;
 442  426          vap->va_gid = ip->i_gid;
 443  427          vap->va_fsid = ip->i_dev;
 444  428          vap->va_nodeid = ip->i_icb_lbano;
 445  429          vap->va_nlink = ip->i_nlink;
 446  430          vap->va_size = ip->i_size;
 447  431          vap->va_seq = ip->i_seq;
 448  432          if (vp->v_type == VCHR || vp->v_type == VBLK) {
 449  433                  vap->va_rdev = ip->i_rdev;
 450  434          } else {
 451  435                  vap->va_rdev = 0;
 452  436          }
 453  437  
 454  438          mutex_enter(&ip->i_tlock);
 455  439          ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 456  440          vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 457  441          vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 458  442          vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 459  443          vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 460  444          vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 461  445          vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 462  446          mutex_exit(&ip->i_tlock);
 463  447  
 464  448          switch (ip->i_type) {
 465  449                  case VBLK:
 466  450                          vap->va_blksize = MAXBSIZE;
 467  451                          break;
 468  452                  case VCHR:
 469  453                          vap->va_blksize = MAXBSIZE;
 470  454                          break;
 471  455                  default:
 472  456                          vap->va_blksize = ip->i_udf->udf_lbsize;
 473  457                          break;
 474  458          }
 475  459          vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 476  460  
 477  461          rw_exit(&ip->i_contents);
 478  462  
 479  463          return (0);
 480  464  }
 481  465  
 482  466  static int
 483  467  ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 484  468  {
 485  469          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 486  470  }
 487  471  
 488  472  /*ARGSUSED4*/
 489  473  static int32_t
 490  474  udf_setattr(
 491  475          struct vnode *vp,
 492  476          struct vattr *vap,
 493  477          int32_t flags,
 494  478          struct cred *cr,
 495  479          caller_context_t *ct)
 496  480  {
 497  481          int32_t error = 0;
 498  482          uint32_t mask = vap->va_mask;
 499  483          struct ud_inode *ip;
 500  484          timestruc_t now;
 501  485          struct vattr ovap;
 502  486  
 503  487          ud_printf("udf_setattr\n");
 504  488  
 505  489          ip = VTOI(vp);
 506  490  
 507  491          /*
 508  492           * not updates allowed to 4096 files
 509  493           */
 510  494          if (ip->i_astrat == STRAT_TYPE4096) {
 511  495                  return (EINVAL);
 512  496          }
 513  497  
 514  498          /*
 515  499           * Cannot set these attributes
 516  500           */
 517  501          if (mask & AT_NOSET) {
 518  502                  return (EINVAL);
 519  503          }
 520  504  
 521  505          rw_enter(&ip->i_rwlock, RW_WRITER);
 522  506          rw_enter(&ip->i_contents, RW_WRITER);
 523  507  
 524  508          ovap.va_uid = ip->i_uid;
 525  509          ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 526  510          error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 527  511              ud_iaccess_vmode, ip);
 528  512          if (error)
 529  513                  goto update_inode;
 530  514  
 531  515          mask = vap->va_mask;
 532  516          /*
 533  517           * Change file access modes.
 534  518           */
 535  519          if (mask & AT_MODE) {
 536  520                  ip->i_perm = VA2UD_PERM(vap->va_mode);
 537  521                  ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 538  522                  mutex_enter(&ip->i_tlock);
 539  523                  ip->i_flag |= ICHG;
 540  524                  mutex_exit(&ip->i_tlock);
 541  525          }
 542  526          if (mask & (AT_UID|AT_GID)) {
 543  527                  if (mask & AT_UID) {
 544  528                          ip->i_uid = vap->va_uid;
 545  529                  }
 546  530                  if (mask & AT_GID) {
 547  531                          ip->i_gid = vap->va_gid;
 548  532                  }
 549  533                  mutex_enter(&ip->i_tlock);
 550  534                  ip->i_flag |= ICHG;
 551  535                  mutex_exit(&ip->i_tlock);
 552  536          }
 553  537          /*
 554  538           * Truncate file.  Must have write permission and not be a directory.
 555  539           */
 556  540          if (mask & AT_SIZE) {
 557  541                  if (vp->v_type == VDIR) {
 558  542                          error = EISDIR;
 559  543                          goto update_inode;
 560  544                  }
 561  545                  if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
 562  546                          goto update_inode;
 563  547                  }
 564  548                  if (vap->va_size > MAXOFFSET_T) {
 565  549                          error = EFBIG;
 566  550                          goto update_inode;
 567  551                  }
 568  552                  if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 569  553                          goto update_inode;
 570  554                  }
 571  555  
 572  556                  if (vap->va_size == 0)
 573  557                          vnevent_truncate(vp, ct);
 574  558          }
 575  559          /*
 576  560           * Change file access or modified times.
 577  561           */
 578  562          if (mask & (AT_ATIME|AT_MTIME)) {
 579  563                  mutex_enter(&ip->i_tlock);
 580  564                  if (mask & AT_ATIME) {
 581  565                          ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 582  566                          ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 583  567                          ip->i_flag &= ~IACC;
 584  568                  }
 585  569                  if (mask & AT_MTIME) {
 586  570                          ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 587  571                          ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 588  572                          gethrestime(&now);
 589  573                          ip->i_ctime.tv_sec = now.tv_sec;
 590  574                          ip->i_ctime.tv_nsec = now.tv_nsec;
 591  575                          ip->i_flag &= ~(IUPD|ICHG);
 592  576                          ip->i_flag |= IMODTIME;
 593  577                  }
 594  578                  ip->i_flag |= IMOD;
 595  579                  mutex_exit(&ip->i_tlock);
 596  580          }
 597  581  
 598  582  update_inode:
 599  583          if (curthread->t_flag & T_DONTPEND) {
 600  584                  ud_iupdat(ip, 1);
 601  585          } else {
 602  586                  ITIMES_NOLOCK(ip);
 603  587          }
 604  588          rw_exit(&ip->i_contents);
 605  589          rw_exit(&ip->i_rwlock);
 606  590  
 607  591          return (error);
 608  592  }
 609  593  
 610  594  /* ARGSUSED */
 611  595  static int32_t
 612  596  udf_access(
 613  597          struct vnode *vp,
 614  598          int32_t mode,
 615  599          int32_t flags,
 616  600          struct cred *cr,
 617  601          caller_context_t *ct)
 618  602  {
 619  603          struct ud_inode *ip = VTOI(vp);
 620  604  
 621  605          ud_printf("udf_access\n");
 622  606  
 623  607          if (ip->i_udf == NULL) {
 624  608                  return (EIO);
 625  609          }
 626  610  
 627  611          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 628  612  }
 629  613  
 630  614  int32_t udfs_stickyhack = 1;
 631  615  
 632  616  /* ARGSUSED */
 633  617  static int32_t
 634  618  udf_lookup(
 635  619          struct vnode *dvp,
 636  620          char *nm,
 637  621          struct vnode **vpp,
 638  622          struct pathname *pnp,
 639  623          int32_t flags,
 640  624          struct vnode *rdir,
 641  625          struct cred *cr,
 642  626          caller_context_t *ct,
 643  627          int *direntflags,
 644  628          pathname_t *realpnp)
 645  629  {
 646  630          int32_t error;
 647  631          struct vnode *vp;
 648  632          struct ud_inode *ip, *xip;
 649  633  
 650  634          ud_printf("udf_lookup\n");
 651  635          /*
 652  636           * Null component name is a synonym for directory being searched.
 653  637           */
 654  638          if (*nm == '\0') {
 655  639                  VN_HOLD(dvp);
 656  640                  *vpp = dvp;
 657  641                  error = 0;
 658  642                  goto out;
 659  643          }
 660  644  
 661  645          /*
 662  646           * Fast path: Check the directory name lookup cache.
 663  647           */
 664  648          ip = VTOI(dvp);
 665  649          if (vp = dnlc_lookup(dvp, nm)) {
 666  650                  /*
 667  651                   * Check accessibility of directory.
 668  652                   */
 669  653                  if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 670  654                          VN_RELE(vp);
 671  655                  }
 672  656                  xip = VTOI(vp);
 673  657          } else {
 674  658                  error = ud_dirlook(ip, nm, &xip, cr, 1);
 675  659                  ITIMES(ip);
 676  660          }
 677  661  
 678  662          if (error == 0) {
 679  663                  ip = xip;
 680  664                  *vpp = ITOV(ip);
 681  665                  if ((ip->i_type != VDIR) &&
 682  666                      (ip->i_char & ISVTX) &&
 683  667                      ((ip->i_perm & IEXEC) == 0) &&
 684  668                      udfs_stickyhack) {
 685  669                          mutex_enter(&(*vpp)->v_lock);
 686  670                          (*vpp)->v_flag |= VISSWAP;
 687  671                          mutex_exit(&(*vpp)->v_lock);
 688  672                  }
 689  673                  ITIMES(ip);
 690  674                  /*
 691  675                   * If vnode is a device return special vnode instead.
 692  676                   */
 693  677                  if (IS_DEVVP(*vpp)) {
 694  678                          struct vnode *newvp;
 695  679                          newvp = specvp(*vpp, (*vpp)->v_rdev,
 696  680                              (*vpp)->v_type, cr);
 697  681                          VN_RELE(*vpp);
 698  682                          if (newvp == NULL) {
 699  683                                  error = ENOSYS;
 700  684                          } else {
 701  685                                  *vpp = newvp;
 702  686                          }
 703  687                  }
 704  688          }
 705  689  out:
 706  690          return (error);
 707  691  }
 708  692  
 709  693  /* ARGSUSED */
 710  694  static int32_t
 711  695  udf_create(
 712  696          struct vnode *dvp,
 713  697          char *name,
 714  698          struct vattr *vap,
 715  699          enum vcexcl excl,
 716  700          int32_t mode,
 717  701          struct vnode **vpp,
 718  702          struct cred *cr,
 719  703          int32_t flag,
 720  704          caller_context_t *ct,
 721  705          vsecattr_t *vsecp)
 722  706  {
 723  707          int32_t error;
 724  708          struct ud_inode *ip = VTOI(dvp), *xip;
 725  709  
 726  710          ud_printf("udf_create\n");
 727  711  
 728  712          if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 729  713                  vap->va_mode &= ~VSVTX;
 730  714  
 731  715          if (*name == '\0') {
 732  716                  /*
 733  717                   * Null component name refers to the directory itself.
 734  718                   */
 735  719                  VN_HOLD(dvp);
 736  720                  ITIMES(ip);
 737  721                  error = EEXIST;

↓ open down ↓

331 lines elided

↑ open up ↑

 738  722          } else {
 739  723                  xip = NULL;
 740  724                  rw_enter(&ip->i_rwlock, RW_WRITER);
 741  725                  error = ud_direnter(ip, name, DE_CREATE,
 742  726                      (struct ud_inode *)0, (struct ud_inode *)0,
 743  727                      vap, &xip, cr, ct);
 744  728                  rw_exit(&ip->i_rwlock);
 745  729                  ITIMES(ip);
 746  730                  ip = xip;
 747  731          }
 748      -#ifdef  __lock_lint
 749      -        rw_enter(&ip->i_contents, RW_WRITER);
 750      -#else
 751  732          if (ip != NULL) {
 752  733                  rw_enter(&ip->i_contents, RW_WRITER);
 753  734          }
 754      -#endif
 755  735  
 756  736          /*
 757  737           * If the file already exists and this is a non-exclusive create,
 758  738           * check permissions and allow access for non-directories.
 759  739           * Read-only create of an existing directory is also allowed.
 760  740           * We fail an exclusive create of anything which already exists.
 761  741           */
 762  742          if (error == EEXIST) {
 763  743                  if (excl == NONEXCL) {
 764  744                          if ((ip->i_type == VDIR) && (mode & VWRITE)) {

 765  745                                  error = EISDIR;
 766  746                          } else if (mode) {
 767  747                                  error = ud_iaccess(ip,
 768  748                                      UD_UPERM2DPERM(mode), cr, 0);
 769  749                          } else {
 770  750                                  error = 0;
 771  751                          }
 772  752                  }
 773  753                  if (error) {
 774  754                          rw_exit(&ip->i_contents);
 775  755                          VN_RELE(ITOV(ip));
 776  756                          goto out;
 777  757                  } else if ((ip->i_type == VREG) &&
 778  758                      (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 779  759                          /*
 780  760                           * Truncate regular files, if requested by caller.
 781  761                           * Grab i_rwlock to make sure no one else is
 782  762                           * currently writing to the file (we promised
 783  763                           * bmap we would do this).
 784  764                           * Must get the locks in the correct order.
 785  765                           */
 786  766                          if (ip->i_size == 0) {
 787  767                                  ip->i_flag |= ICHG | IUPD;
 788  768                          } else {
 789  769                                  rw_exit(&ip->i_contents);
 790  770                                  rw_enter(&ip->i_rwlock, RW_WRITER);
 791  771                                  rw_enter(&ip->i_contents, RW_WRITER);
 792  772                                  (void) ud_itrunc(ip, 0, 0, cr);

↓ open down ↓

28 lines elided

↑ open up ↑

 793  773                                  rw_exit(&ip->i_rwlock);
 794  774                          }
 795  775                          vnevent_create(ITOV(ip), ct);
 796  776                  }
 797  777          }
 798  778  
 799  779          if (error == 0) {
 800  780                  *vpp = ITOV(ip);
 801  781                  ITIMES(ip);
 802  782          }
 803      -#ifdef  __lock_lint
 804      -        rw_exit(&ip->i_contents);
 805      -#else
 806  783          if (ip != NULL) {
 807  784                  rw_exit(&ip->i_contents);
 808  785          }
 809      -#endif
 810  786          if (error) {
 811  787                  goto out;
 812  788          }
 813  789  
 814  790          /*
 815  791           * If vnode is a device return special vnode instead.
 816  792           */
 817  793          if (!error && IS_DEVVP(*vpp)) {
 818  794                  struct vnode *newvp;
 819  795

 820  796                  newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 821  797                  VN_RELE(*vpp);
 822  798                  if (newvp == NULL) {
 823  799                          error = ENOSYS;
 824  800                          goto out;
 825  801                  }
 826  802                  *vpp = newvp;
 827  803          }
 828  804  out:
 829  805          return (error);
 830  806  }
 831  807  
 832  808  /* ARGSUSED */
 833  809  static int32_t
 834  810  udf_remove(
 835  811          struct vnode *vp,
 836  812          char *nm,
 837  813          struct cred *cr,
 838  814          caller_context_t *ct,
 839  815          int flags)
 840  816  {
 841  817          int32_t error;
 842  818          struct ud_inode *ip = VTOI(vp);
 843  819  
 844  820          ud_printf("udf_remove\n");
 845  821  
 846  822          rw_enter(&ip->i_rwlock, RW_WRITER);
 847  823          error = ud_dirremove(ip, nm,
 848  824              (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 849  825          rw_exit(&ip->i_rwlock);
 850  826          ITIMES(ip);
 851  827  
 852  828          return (error);
 853  829  }
 854  830  
 855  831  /* ARGSUSED */
 856  832  static int32_t
 857  833  udf_link(
 858  834          struct vnode *tdvp,
 859  835          struct vnode *svp,
 860  836          char *tnm,
 861  837          struct cred *cr,
 862  838          caller_context_t *ct,
 863  839          int flags)
 864  840  {
 865  841          int32_t error;
 866  842          struct vnode *realvp;
 867  843          struct ud_inode *sip;
 868  844          struct ud_inode *tdp;
 869  845  
 870  846          ud_printf("udf_link\n");
 871  847          if (VOP_REALVP(svp, &realvp, ct) == 0) {
 872  848                  svp = realvp;
 873  849          }
 874  850  
 875  851          /*
 876  852           * Do not allow links to directories
 877  853           */
 878  854          if (svp->v_type == VDIR) {
 879  855                  return (EPERM);
 880  856          }
 881  857  
 882  858          sip = VTOI(svp);
 883  859  
 884  860          if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 885  861                  return (EPERM);
 886  862  
 887  863          tdp = VTOI(tdvp);
 888  864  
 889  865          rw_enter(&tdp->i_rwlock, RW_WRITER);
 890  866          error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 891  867              sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 892  868          rw_exit(&tdp->i_rwlock);
 893  869          ITIMES(sip);
 894  870          ITIMES(tdp);
 895  871  
 896  872          if (error == 0) {
 897  873                  vnevent_link(svp, ct);
 898  874          }
 899  875  
 900  876          return (error);
 901  877  }
 902  878  
 903  879  /* ARGSUSED */
 904  880  static int32_t
 905  881  udf_rename(
 906  882          struct vnode *sdvp,
 907  883          char *snm,
 908  884          struct vnode *tdvp,
 909  885          char *tnm,
 910  886          struct cred *cr,
 911  887          caller_context_t *ct,
 912  888          int flags)
 913  889  {
 914  890          int32_t error = 0;
 915  891          struct udf_vfs *udf_vfsp;
 916  892          struct ud_inode *sip;           /* source inode */
 917  893          struct ud_inode *tip;           /* target inode */
 918  894          struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 919  895          struct vnode *realvp;
 920  896  
 921  897          ud_printf("udf_rename\n");
 922  898  
 923  899          if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 924  900                  tdvp = realvp;
 925  901          }
 926  902  
 927  903          sdp = VTOI(sdvp);
 928  904          tdp = VTOI(tdvp);
 929  905  
 930  906          udf_vfsp = sdp->i_udf;
 931  907  
 932  908          mutex_enter(&udf_vfsp->udf_rename_lck);
 933  909          /*
 934  910           * Look up inode of file we're supposed to rename.
 935  911           */
 936  912          if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 937  913                  mutex_exit(&udf_vfsp->udf_rename_lck);
 938  914                  return (error);
 939  915          }
 940  916          /*
 941  917           * be sure this is not a directory with another file system mounted
 942  918           * over it.  If it is just give up the locks, and return with
 943  919           * EBUSY
 944  920           */
 945  921          if (vn_mountedvfs(ITOV(sip)) != NULL) {
 946  922                  error = EBUSY;
 947  923                  goto errout;
 948  924          }
 949  925          /*
 950  926           * Make sure we can delete the source entry.  This requires
 951  927           * write permission on the containing directory.  If that
 952  928           * directory is "sticky" it further requires (except for
 953  929           * privileged users) that the user own the directory or the
 954  930           * source entry, or else have permission to write the source
 955  931           * entry.
 956  932           */
 957  933          rw_enter(&sdp->i_contents, RW_READER);
 958  934          rw_enter(&sip->i_contents, RW_READER);
 959  935          if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 960  936              (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 961  937                  rw_exit(&sip->i_contents);
 962  938                  rw_exit(&sdp->i_contents);
 963  939                  ITIMES(sip);
 964  940                  goto errout;
 965  941          }
 966  942  
 967  943          /*
 968  944           * Check for renaming '.' or '..' or alias of '.'
 969  945           */
 970  946          if ((strcmp(snm, ".") == 0) ||
 971  947              (strcmp(snm, "..") == 0) ||
 972  948              (sdp == sip)) {
 973  949                  error = EINVAL;
 974  950                  rw_exit(&sip->i_contents);
 975  951                  rw_exit(&sdp->i_contents);
 976  952                  goto errout;
 977  953          }
 978  954  
 979  955          rw_exit(&sip->i_contents);
 980  956          rw_exit(&sdp->i_contents);
 981  957  
 982  958          if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 983  959                  vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 984  960                  VN_RELE(ITOV(tip));
 985  961          }
 986  962  
 987  963          /* Notify the target dir. if not the same as the source dir. */
 988  964          if (sdvp != tdvp)
 989  965                  vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 990  966  
 991  967          vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 992  968  
 993  969          /*
 994  970           * Link source to the target.
 995  971           */
 996  972          rw_enter(&tdp->i_rwlock, RW_WRITER);
 997  973          if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
 998  974              (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
 999  975                  /*
1000  976                   * ESAME isn't really an error; it indicates that the
1001  977                   * operation should not be done because the source and target
1002  978                   * are the same file, but that no error should be reported.
1003  979                   */
1004  980                  if (error == ESAME) {
1005  981                          error = 0;
1006  982                  }
1007  983                  rw_exit(&tdp->i_rwlock);
1008  984                  goto errout;
1009  985          }
1010  986          rw_exit(&tdp->i_rwlock);
1011  987  
1012  988          rw_enter(&sdp->i_rwlock, RW_WRITER);
1013  989          /*
1014  990           * Unlink the source.
1015  991           * Remove the source entry.  ud_dirremove() checks that the entry
1016  992           * still reflects sip, and returns an error if it doesn't.
1017  993           * If the entry has changed just forget about it.  Release
1018  994           * the source inode.
1019  995           */
1020  996          if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1021  997              DR_RENAME, cr, ct)) == ENOENT) {
1022  998                  error = 0;
1023  999          }
1024 1000          rw_exit(&sdp->i_rwlock);
1025 1001  
1026 1002          if (error == 0) {
1027 1003                  vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1028 1004                  /*
1029 1005                   * vnevent_rename_dest and vnevent_rename_dest_dir are called
1030 1006                   * in ud_direnter().
1031 1007                   */
1032 1008          }
1033 1009  
1034 1010  errout:
1035 1011          ITIMES(sdp);
1036 1012          ITIMES(tdp);
1037 1013          VN_RELE(ITOV(sip));
1038 1014          mutex_exit(&udf_vfsp->udf_rename_lck);
1039 1015  
1040 1016          return (error);
1041 1017  }
1042 1018  
1043 1019  /* ARGSUSED */
1044 1020  static int32_t
1045 1021  udf_mkdir(
1046 1022          struct vnode *dvp,
1047 1023          char *dirname,
1048 1024          struct vattr *vap,
1049 1025          struct vnode **vpp,
1050 1026          struct cred *cr,
1051 1027          caller_context_t *ct,
1052 1028          int flags,
1053 1029          vsecattr_t *vsecp)
1054 1030  {
1055 1031          int32_t error;
1056 1032          struct ud_inode *ip;
1057 1033          struct ud_inode *xip;
1058 1034  
1059 1035          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1060 1036  
1061 1037          ud_printf("udf_mkdir\n");
1062 1038  
1063 1039          ip = VTOI(dvp);
1064 1040          rw_enter(&ip->i_rwlock, RW_WRITER);
1065 1041          error = ud_direnter(ip, dirname, DE_MKDIR,
1066 1042              (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1067 1043          rw_exit(&ip->i_rwlock);
1068 1044          ITIMES(ip);
1069 1045          if (error == 0) {
1070 1046                  ip = xip;
1071 1047                  *vpp = ITOV(ip);
1072 1048                  ITIMES(ip);
1073 1049          } else if (error == EEXIST) {
1074 1050                  ITIMES(xip);
1075 1051                  VN_RELE(ITOV(xip));
1076 1052          }
1077 1053  
1078 1054          return (error);
1079 1055  }
1080 1056  
1081 1057  /* ARGSUSED */
1082 1058  static int32_t
1083 1059  udf_rmdir(
1084 1060          struct vnode *vp,
1085 1061          char *nm,
1086 1062          struct vnode *cdir,
1087 1063          struct cred *cr,
1088 1064          caller_context_t *ct,
1089 1065          int flags)
1090 1066  {
1091 1067          int32_t error;
1092 1068          struct ud_inode *ip = VTOI(vp);
1093 1069  
1094 1070          ud_printf("udf_rmdir\n");
1095 1071  
1096 1072          rw_enter(&ip->i_rwlock, RW_WRITER);
1097 1073          error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1098 1074              cr, ct);
1099 1075          rw_exit(&ip->i_rwlock);
1100 1076          ITIMES(ip);
1101 1077  
1102 1078          return (error);
1103 1079  }
1104 1080  
1105 1081  /* ARGSUSED */
1106 1082  static int32_t
1107 1083  udf_readdir(
1108 1084          struct vnode *vp,
1109 1085          struct uio *uiop,
1110 1086          struct cred *cr,
1111 1087          int32_t *eofp,
1112 1088          caller_context_t *ct,
1113 1089          int flags)
1114 1090  {
1115 1091          struct ud_inode *ip;
1116 1092          struct dirent64 *nd;
1117 1093          struct udf_vfs *udf_vfsp;
1118 1094          int32_t error = 0, len, outcount = 0;
1119 1095          uint32_t dirsiz, offset;
1120 1096          uint32_t bufsize, ndlen, dummy;
1121 1097          caddr_t outbuf;
1122 1098          caddr_t outb, end_outb;
1123 1099          struct iovec *iovp;
1124 1100  
1125 1101          uint8_t *dname;
1126 1102          int32_t length;
1127 1103  
1128 1104          uint8_t *buf = NULL;
1129 1105  
1130 1106          struct fbuf *fbp = NULL;
1131 1107          struct file_id *fid;
1132 1108          uint8_t *name;
1133 1109  
1134 1110  
1135 1111          ud_printf("udf_readdir\n");
1136 1112  
1137 1113          ip = VTOI(vp);
1138 1114          udf_vfsp = ip->i_udf;
1139 1115  
1140 1116          dirsiz = ip->i_size;
1141 1117          if ((uiop->uio_offset >= dirsiz) ||
1142 1118              (ip->i_nlink <= 0)) {
1143 1119                  if (eofp) {
1144 1120                          *eofp = 1;
1145 1121                  }
1146 1122                  return (0);
1147 1123          }
1148 1124  
1149 1125          offset = uiop->uio_offset;
1150 1126          iovp = uiop->uio_iov;
1151 1127          bufsize = iovp->iov_len;
1152 1128  
1153 1129          outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1154 1130          end_outb = outb + bufsize;
1155 1131          nd = (struct dirent64 *)outbuf;
1156 1132  
1157 1133          dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1158 1134          buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1159 1135  
1160 1136          if (offset == 0) {
1161 1137                  len = DIRENT64_RECLEN(1);
1162 1138                  if (((caddr_t)nd + len) >= end_outb) {
1163 1139                          error = EINVAL;
1164 1140                          goto end;
1165 1141                  }
1166 1142                  nd->d_ino = ip->i_icb_lbano;
1167 1143                  nd->d_reclen = (uint16_t)len;
1168 1144                  nd->d_off = 0x10;
1169 1145                  nd->d_name[0] = '.';
1170 1146                  bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1171 1147                  nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1172 1148                  outcount++;
1173 1149          } else if (offset == 0x10) {
1174 1150                  offset = 0;
1175 1151          }
1176 1152  
1177 1153          while (offset < dirsiz) {
1178 1154                  error = ud_get_next_fid(ip, &fbp,
1179 1155                      offset, &fid, &name, buf);
1180 1156                  if (error != 0) {
1181 1157                          break;
1182 1158                  }
1183 1159  
1184 1160                  if ((fid->fid_flags & FID_DELETED) == 0) {
1185 1161                          if (fid->fid_flags & FID_PARENT) {
1186 1162  
1187 1163                                  len = DIRENT64_RECLEN(2);
1188 1164                                  if (((caddr_t)nd + len) >= end_outb) {
1189 1165                                          error = EINVAL;
1190 1166                                          break;
1191 1167                                  }
1192 1168  
1193 1169                                  nd->d_ino = ip->i_icb_lbano;
1194 1170                                  nd->d_reclen = (uint16_t)len;
1195 1171                                  nd->d_off = offset + FID_LEN(fid);
1196 1172                                  nd->d_name[0] = '.';
1197 1173                                  nd->d_name[1] = '.';
1198 1174                                  bzero(&nd->d_name[2],
1199 1175                                      DIRENT64_NAMELEN(len) - 2);
1200 1176                                  nd = (struct dirent64 *)
1201 1177                                      ((char *)nd + nd->d_reclen);
1202 1178                          } else {
1203 1179                                  if ((error = ud_uncompress(fid->fid_idlen,
1204 1180                                      &length, name, dname)) != 0) {
1205 1181                                          break;
1206 1182                                  }
1207 1183                                  if (length == 0) {
1208 1184                                          offset += FID_LEN(fid);
1209 1185                                          continue;
1210 1186                                  }
1211 1187                                  len = DIRENT64_RECLEN(length);
1212 1188                                  if (((caddr_t)nd + len) >= end_outb) {
1213 1189                                          if (!outcount) {
1214 1190                                                  error = EINVAL;
1215 1191                                          }
1216 1192                                          break;
1217 1193                                  }
1218 1194                                  (void) strncpy(nd->d_name,
1219 1195                                      (caddr_t)dname, length);
1220 1196                                  bzero(&nd->d_name[length],
1221 1197                                      DIRENT64_NAMELEN(len) - length);
1222 1198                                  nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1223 1199                                      SWAP_16(fid->fid_icb.lad_ext_prn),
1224 1200                                      SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1225 1201                                      &dummy);
1226 1202                                  nd->d_reclen = (uint16_t)len;
1227 1203                                  nd->d_off = offset + FID_LEN(fid);
1228 1204                                  nd = (struct dirent64 *)
1229 1205                                      ((char *)nd + nd->d_reclen);
1230 1206                          }
1231 1207                          outcount++;
1232 1208                  }
1233 1209  
1234 1210                  offset += FID_LEN(fid);
1235 1211          }
1236 1212  
1237 1213  end:
1238 1214          if (fbp != NULL) {
1239 1215                  fbrelse(fbp, S_OTHER);
1240 1216          }
1241 1217          ndlen = ((char *)nd - outbuf);
1242 1218          /*
1243 1219           * In case of error do not call uiomove.
1244 1220           * Return the error to the caller.
1245 1221           */
1246 1222          if ((error == 0) && (ndlen != 0)) {
1247 1223                  error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1248 1224                  uiop->uio_offset = offset;
1249 1225          }
1250 1226          kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1251 1227          kmem_free((caddr_t)dname, 1024);
1252 1228          kmem_free(outbuf, (uint32_t)bufsize);
1253 1229          if (eofp && error == 0) {
1254 1230                  *eofp = (uiop->uio_offset >= dirsiz);
1255 1231          }
1256 1232          return (error);
1257 1233  }
1258 1234  
1259 1235  /* ARGSUSED */
1260 1236  static int32_t
1261 1237  udf_symlink(
1262 1238          struct vnode *dvp,
1263 1239          char *linkname,
1264 1240          struct vattr *vap,
1265 1241          char *target,
1266 1242          struct cred *cr,
1267 1243          caller_context_t *ct,
1268 1244          int flags)
1269 1245  {
1270 1246          int32_t error = 0, outlen;
1271 1247          uint32_t ioflag = 0;
1272 1248          struct ud_inode *ip, *dip = VTOI(dvp);
1273 1249  
1274 1250          struct path_comp *pc;
1275 1251          int8_t *dname = NULL, *uname = NULL, *sp;
1276 1252  
1277 1253          ud_printf("udf_symlink\n");
1278 1254  
1279 1255          ip = (struct ud_inode *)0;
1280 1256          vap->va_type = VLNK;
1281 1257          vap->va_rdev = 0;
1282 1258  
1283 1259          rw_enter(&dip->i_rwlock, RW_WRITER);
1284 1260          error = ud_direnter(dip, linkname, DE_CREATE,
1285 1261              (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1286 1262          rw_exit(&dip->i_rwlock);
1287 1263          if (error == 0) {
1288 1264                  dname = kmem_zalloc(1024, KM_SLEEP);
1289 1265                  uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1290 1266  
1291 1267                  pc = (struct path_comp *)uname;
1292 1268                  /*
1293 1269                   * If the first character in target is "/"
1294 1270                   * then skip it and create entry for it
1295 1271                   */
1296 1272                  if (*target == '/') {
1297 1273                          pc->pc_type = 2;
1298 1274                          pc->pc_len = 0;
1299 1275                          pc = (struct path_comp *)(((char *)pc) + 4);
1300 1276                          while (*target == '/') {
1301 1277                                  target++;
1302 1278                          }
1303 1279                  }
1304 1280  
1305 1281                  while (*target != NULL) {
1306 1282                          sp = target;
1307 1283                          while ((*target != '/') && (*target != '\0')) {
1308 1284                                  target ++;
1309 1285                          }
1310 1286                          /*
1311 1287                           * We got the next component of the
1312 1288                           * path name. Create path_comp of
1313 1289                           * appropriate type
1314 1290                           */
1315 1291                          if (((target - sp) == 1) && (*sp == '.')) {
1316 1292                                  /*
1317 1293                                   * Dot entry.
1318 1294                                   */
1319 1295                                  pc->pc_type = 4;
1320 1296                                  pc = (struct path_comp *)(((char *)pc) + 4);
1321 1297                          } else if (((target - sp) == 2) &&
1322 1298                              (*sp == '.') && ((*(sp + 1)) == '.')) {
1323 1299                                  /*
1324 1300                                   * DotDot entry.
1325 1301                                   */
1326 1302                                  pc->pc_type = 3;
1327 1303                                  pc = (struct path_comp *)(((char *)pc) + 4);
1328 1304                          } else {
1329 1305                                  /*
1330 1306                                   * convert the user given name
1331 1307                                   * into appropriate form to be put
1332 1308                                   * on the media
1333 1309                                   */
1334 1310                                  outlen = 1024;  /* set to size of dname */
1335 1311                                  if (error = ud_compress(target - sp, &outlen,
1336 1312                                      (uint8_t *)sp, (uint8_t *)dname)) {
1337 1313                                          break;
1338 1314                                  }
1339 1315                                  pc->pc_type = 5;
1340 1316                                  /* LINTED */
1341 1317                                  pc->pc_len = outlen;
1342 1318                                  dname[outlen] = '\0';
1343 1319                                  (void) strcpy((char *)pc->pc_id, dname);
1344 1320                                  pc = (struct path_comp *)
1345 1321                                      (((char *)pc) + 4 + outlen);
1346 1322                          }
1347 1323                          while (*target == '/') {
1348 1324                                  target++;
1349 1325                          }
1350 1326                          if (*target == NULL) {
1351 1327                                  break;
1352 1328                          }
1353 1329                  }
1354 1330  
1355 1331                  rw_enter(&ip->i_contents, RW_WRITER);
1356 1332                  if (error == 0) {
1357 1333                          ioflag = FWRITE;
1358 1334                          if (curthread->t_flag & T_DONTPEND) {
1359 1335                                  ioflag |= FDSYNC;
1360 1336                          }
1361 1337                          error = ud_rdwri(UIO_WRITE, ioflag, ip,
1362 1338                              uname, ((int8_t *)pc) - uname,
1363 1339                              (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1364 1340                  }
1365 1341                  if (error) {
1366 1342                          ud_idrop(ip);
1367 1343                          rw_exit(&ip->i_contents);
1368 1344                          rw_enter(&dip->i_rwlock, RW_WRITER);
1369 1345                          (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1370 1346                              (struct vnode *)0, DR_REMOVE, cr, ct);
1371 1347                          rw_exit(&dip->i_rwlock);
1372 1348                          goto update_inode;
1373 1349                  }
1374 1350                  rw_exit(&ip->i_contents);
1375 1351          }
1376 1352  
1377 1353          if ((error == 0) || (error == EEXIST)) {
1378 1354                  VN_RELE(ITOV(ip));
1379 1355          }
1380 1356  
1381 1357  update_inode:
1382 1358          ITIMES(VTOI(dvp));
1383 1359          if (uname != NULL) {
1384 1360                  kmem_free(uname, PAGESIZE);
1385 1361          }
1386 1362          if (dname != NULL) {
1387 1363                  kmem_free(dname, 1024);
1388 1364          }
1389 1365  
1390 1366          return (error);
1391 1367  }
1392 1368  
1393 1369  /* ARGSUSED */
1394 1370  static int32_t
1395 1371  udf_readlink(
1396 1372          struct vnode *vp,
1397 1373          struct uio *uiop,
1398 1374          struct cred *cr,
1399 1375          caller_context_t *ct)
1400 1376  {
1401 1377          int32_t error = 0, off, id_len, size, len;
1402 1378          int8_t *dname = NULL, *uname = NULL;
1403 1379          struct ud_inode *ip;
1404 1380          struct fbuf *fbp = NULL;
1405 1381          struct path_comp *pc;
1406 1382  
1407 1383          ud_printf("udf_readlink\n");
1408 1384  
1409 1385          if (vp->v_type != VLNK) {
1410 1386                  return (EINVAL);
1411 1387          }
1412 1388  
1413 1389          ip = VTOI(vp);
1414 1390          size = ip->i_size;
1415 1391          if (size > PAGESIZE) {
1416 1392                  return (EIO);
1417 1393          }
1418 1394  
1419 1395          if (size == 0) {
1420 1396                  return (0);
1421 1397          }
1422 1398  
1423 1399          dname = kmem_zalloc(1024, KM_SLEEP);
1424 1400          uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1425 1401  
1426 1402          rw_enter(&ip->i_contents, RW_READER);
1427 1403  
1428 1404          if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1429 1405                  goto end;
1430 1406          }
1431 1407  
1432 1408          off = 0;
1433 1409  
1434 1410          while (off < size) {
1435 1411                  pc = (struct path_comp *)(fbp->fb_addr + off);
1436 1412                  switch (pc->pc_type) {
1437 1413                          case 1 :
1438 1414                                  (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1439 1415                                  (void) strcat(uname, "/");
1440 1416                                  break;
1441 1417                          case 2 :
1442 1418                                  if (pc->pc_len != 0) {
1443 1419                                          goto end;
1444 1420                                  }
1445 1421                                  uname[0] = '/';
1446 1422                                  uname[1] = '\0';
1447 1423                                  break;
1448 1424                          case 3 :
1449 1425                                  (void) strcat(uname, "../");
1450 1426                                  break;
1451 1427                          case 4 :
1452 1428                                  (void) strcat(uname, "./");
1453 1429                                  break;
1454 1430                          case 5 :
1455 1431                                  if ((error = ud_uncompress(pc->pc_len, &id_len,
1456 1432                                      pc->pc_id, (uint8_t *)dname)) != 0) {
1457 1433                                          break;
1458 1434                                  }
1459 1435                                  dname[id_len] = '\0';
1460 1436                                  (void) strcat(uname, dname);
1461 1437                                  (void) strcat(uname, "/");
1462 1438                                  break;
1463 1439                          default :
1464 1440                                  error = EINVAL;
1465 1441                                  goto end;
1466 1442                  }
1467 1443                  off += 4 + pc->pc_len;
1468 1444          }
1469 1445          len = strlen(uname) - 1;
1470 1446          if (uname[len] == '/') {
1471 1447                  if (len == 0) {
1472 1448                          /*
1473 1449                           * special case link to /
1474 1450                           */
1475 1451                          len = 1;
1476 1452                  } else {
1477 1453                          uname[len] = '\0';
1478 1454                  }
1479 1455          }
1480 1456  
1481 1457          error = uiomove(uname, len, UIO_READ, uiop);
1482 1458  
1483 1459          ITIMES(ip);
1484 1460  
1485 1461  end:
1486 1462          if (fbp != NULL) {
1487 1463                  fbrelse(fbp, S_OTHER);
1488 1464          }
1489 1465          rw_exit(&ip->i_contents);
1490 1466          if (uname != NULL) {
1491 1467                  kmem_free(uname, PAGESIZE);
1492 1468          }
1493 1469          if (dname != NULL) {
1494 1470                  kmem_free(dname, 1024);
1495 1471          }
1496 1472          return (error);
1497 1473  }
1498 1474  
1499 1475  /* ARGSUSED */
1500 1476  static int32_t
1501 1477  udf_fsync(
1502 1478          struct vnode *vp,
1503 1479          int32_t syncflag,
1504 1480          struct cred *cr,
1505 1481          caller_context_t *ct)
1506 1482  {
1507 1483          int32_t error = 0;
1508 1484          struct ud_inode *ip = VTOI(vp);
1509 1485  
1510 1486          ud_printf("udf_fsync\n");
1511 1487  
1512 1488          rw_enter(&ip->i_contents, RW_WRITER);
1513 1489          if (!(IS_SWAPVP(vp))) {
1514 1490                  error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1515 1491          }
1516 1492          if (error == 0) {
1517 1493                  error = ud_sync_indir(ip);
1518 1494          }
1519 1495          ITIMES(ip);             /* XXX: is this necessary ??? */
1520 1496          rw_exit(&ip->i_contents);
1521 1497  
1522 1498          return (error);
1523 1499  }
1524 1500  
1525 1501  /* ARGSUSED */
1526 1502  static void
1527 1503  udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1528 1504  {
1529 1505          ud_printf("udf_iinactive\n");
1530 1506  
1531 1507          ud_iinactive(VTOI(vp), cr);
1532 1508  }
1533 1509  
1534 1510  /* ARGSUSED */
1535 1511  static int32_t
1536 1512  udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1537 1513  {
1538 1514          struct udf_fid *udfidp;
1539 1515          struct ud_inode *ip = VTOI(vp);
1540 1516  
1541 1517          ud_printf("udf_fid\n");
1542 1518  
1543 1519          if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1544 1520                  fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1545 1521                  return (ENOSPC);
1546 1522          }
1547 1523  
1548 1524          udfidp = (struct udf_fid *)fidp;
1549 1525          bzero((char *)udfidp, sizeof (struct udf_fid));
1550 1526          rw_enter(&ip->i_contents, RW_READER);
1551 1527          udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1552 1528          udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1553 1529          udfidp->udfid_prn = ip->i_icb_prn;
1554 1530          udfidp->udfid_icb_lbn = ip->i_icb_block;
1555 1531          rw_exit(&ip->i_contents);
1556 1532  
1557 1533          return (0);
1558 1534  }
1559 1535  
1560 1536  /* ARGSUSED2 */
1561 1537  static int
1562 1538  udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)

↓ open down ↓

743 lines elided

↑ open up ↑

1563 1539  {
1564 1540          struct ud_inode *ip = VTOI(vp);
1565 1541  
1566 1542          ud_printf("udf_rwlock\n");
1567 1543  
1568 1544          if (write_lock) {
1569 1545                  rw_enter(&ip->i_rwlock, RW_WRITER);
1570 1546          } else {
1571 1547                  rw_enter(&ip->i_rwlock, RW_READER);
1572 1548          }
1573      -#ifdef  __lock_lint
1574      -        rw_exit(&ip->i_rwlock);
1575      -#endif
1576 1549          return (write_lock);
1577 1550  }
1578 1551  
1579 1552  /* ARGSUSED */
1580 1553  static void
1581 1554  udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1582 1555  {
1583 1556          struct ud_inode *ip = VTOI(vp);
1584 1557  
1585 1558          ud_printf("udf_rwunlock\n");
1586 1559  
1587      -#ifdef  __lock_lint
1588      -        rw_enter(&ip->i_rwlock, RW_WRITER);
1589      -#endif
1590      -
1591 1560          rw_exit(&ip->i_rwlock);
1592 1561  
1593 1562  }
1594 1563  
1595 1564  /* ARGSUSED */
1596 1565  static int32_t
1597 1566  udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1598 1567  {
1599 1568          return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1600 1569  }

1601 1570  
1602 1571  static int32_t
1603 1572  udf_frlock(
1604 1573          struct vnode *vp,
1605 1574          int32_t cmd,
1606 1575          struct flock64 *bfp,
1607 1576          int32_t flag,
1608 1577          offset_t offset,
1609 1578          struct flk_callback *flk_cbp,
1610 1579          cred_t *cr,
1611 1580          caller_context_t *ct)
1612 1581  {
1613 1582          struct ud_inode *ip = VTOI(vp);
1614 1583  
1615 1584          ud_printf("udf_frlock\n");
1616 1585  
1617 1586          /*
1618 1587           * If file is being mapped, disallow frlock.
1619 1588           * XXX I am not holding tlock while checking i_mapcnt because the
1620 1589           * current locking strategy drops all locks before calling fs_frlock.
1621 1590           * So, mapcnt could change before we enter fs_frlock making is
1622 1591           * meaningless to have held tlock in the first place.
1623 1592           */
1624 1593          if ((ip->i_mapcnt > 0) &&
1625 1594              (MANDLOCK(vp, ip->i_char))) {
1626 1595                  return (EAGAIN);
1627 1596          }
1628 1597  
1629 1598          return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1630 1599  }
1631 1600  
1632 1601  /*ARGSUSED6*/
1633 1602  static int32_t
1634 1603  udf_space(
1635 1604          struct vnode *vp,
1636 1605          int32_t cmd,
1637 1606          struct flock64 *bfp,
1638 1607          int32_t flag,
1639 1608          offset_t offset,
1640 1609          cred_t *cr,
1641 1610          caller_context_t *ct)
1642 1611  {
1643 1612          int32_t error = 0;
1644 1613  
1645 1614          ud_printf("udf_space\n");
1646 1615  
1647 1616          if (cmd != F_FREESP) {
1648 1617                  error =  EINVAL;
1649 1618          } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1650 1619                  error = ud_freesp(vp, bfp, flag, cr);
1651 1620  
1652 1621                  if (error == 0 && bfp->l_start == 0)
1653 1622                          vnevent_truncate(vp, ct);
1654 1623          }
1655 1624  
1656 1625          return (error);
1657 1626  }
1658 1627  
1659 1628  /* ARGSUSED */
1660 1629  static int32_t
1661 1630  udf_getpage(
1662 1631          struct vnode *vp,
1663 1632          offset_t off,
1664 1633          size_t len,
1665 1634          uint32_t *protp,
1666 1635          struct page **plarr,
1667 1636          size_t plsz,
1668 1637          struct seg *seg,
1669 1638          caddr_t addr,
1670 1639          enum seg_rw rw,
1671 1640          struct cred *cr,
1672 1641          caller_context_t *ct)
1673 1642  {
1674 1643          struct ud_inode *ip = VTOI(vp);
1675 1644          int32_t error, has_holes, beyond_eof, seqmode, dolock;
1676 1645          int32_t pgsize = PAGESIZE;
1677 1646          struct udf_vfs *udf_vfsp = ip->i_udf;
1678 1647          page_t **pl;
1679 1648          u_offset_t pgoff, eoff, uoff;
1680 1649          krw_t rwtype;
1681 1650          caddr_t pgaddr;
1682 1651  
1683 1652          ud_printf("udf_getpage\n");
1684 1653  
1685 1654          uoff = (u_offset_t)off; /* type conversion */
1686 1655          if (protp) {

↓ open down ↓

86 lines elided

↑ open up ↑

1687 1656                  *protp = PROT_ALL;
1688 1657          }
1689 1658          if (vp->v_flag & VNOMAP) {
1690 1659                  return (ENOSYS);
1691 1660          }
1692 1661          seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1693 1662  
1694 1663          rwtype = RW_READER;
1695 1664          dolock = (rw_owner(&ip->i_contents) != curthread);
1696 1665  retrylock:
1697      -#ifdef  __lock_lint
1698      -        rw_enter(&ip->i_contents, rwtype);
1699      -#else
1700 1666          if (dolock) {
1701 1667                  rw_enter(&ip->i_contents, rwtype);
1702 1668          }
1703      -#endif
1704 1669  
1705 1670          /*
1706 1671           * We may be getting called as a side effect of a bmap using
1707 1672           * fbread() when the blocks might be being allocated and the
1708 1673           * size has not yet been up'ed.  In this case we want to be
1709 1674           * able to return zero pages if we get back UDF_HOLE from
1710 1675           * calling bmap for a non write case here.  We also might have
1711 1676           * to read some frags from the disk into a page if we are
1712 1677           * extending the number of frags for a given lbn in bmap().
1713 1678           */
1714 1679          beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1715 1680          if (beyond_eof && seg != segkmap) {
1716      -#ifdef  __lock_lint
1717      -                rw_exit(&ip->i_contents);
1718      -#else
1719 1681                  if (dolock) {
1720 1682                          rw_exit(&ip->i_contents);
1721 1683                  }
1722      -#endif
1723 1684                  return (EFAULT);
1724 1685          }
1725 1686  
1726 1687          /*
1727 1688           * Must hold i_contents lock throughout the call to pvn_getpages
1728 1689           * since locked pages are returned from each call to ud_getapage.
1729 1690           * Must *not* return locked pages and then try for contents lock
1730 1691           * due to lock ordering requirements (inode > page)
1731 1692           */
1732 1693

1733 1694          has_holes = ud_bmap_has_holes(ip);
1734 1695  
1735 1696          if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1736 1697                  int32_t blk_size, count;
1737 1698                  u_offset_t offset;
1738 1699  
1739 1700                  /*
1740 1701                   * We must acquire the RW_WRITER lock in order to
1741 1702                   * call bmap_write().
1742 1703                   */
1743 1704                  if (dolock && rwtype == RW_READER) {
1744 1705                          rwtype = RW_WRITER;
1745 1706  
1746 1707                          if (!rw_tryupgrade(&ip->i_contents)) {
1747 1708  
1748 1709                                  rw_exit(&ip->i_contents);
1749 1710  
1750 1711                                  goto retrylock;
1751 1712                          }
1752 1713                  }
1753 1714  
1754 1715                  /*
1755 1716                   * May be allocating disk blocks for holes here as
1756 1717                   * a result of mmap faults. write(2) does the bmap_write
1757 1718                   * in rdip/wrip, not here. We are not dealing with frags
1758 1719                   * in this case.
1759 1720                   */
1760 1721                  offset = uoff;
1761 1722                  while ((offset < uoff + len) &&
1762 1723                      (offset < ip->i_size)) {
1763 1724                          /*
1764 1725                           * the variable "bnp" is to simplify the expression for
1765 1726                           * the compiler; * just passing in &bn to bmap_write
1766 1727                           * causes a compiler "loop"
1767 1728                           */
1768 1729  
1769 1730                          blk_size = udf_vfsp->udf_lbsize;
1770 1731                          if ((offset + blk_size) > ip->i_size) {
1771 1732                                  count = ip->i_size - offset;
1772 1733                          } else {
1773 1734                                  count = blk_size;
1774 1735                          }
1775 1736                          error = ud_bmap_write(ip, offset, count, 0, cr);

↓ open down ↓

43 lines elided

↑ open up ↑

1776 1737                          if (error) {
1777 1738                                  goto update_inode;
1778 1739                          }
1779 1740                          offset += count; /* XXX - make this contig */
1780 1741                  }
1781 1742          }
1782 1743  
1783 1744          /*
1784 1745           * Can be a reader from now on.
1785 1746           */
1786      -#ifdef  __lock_lint
1787      -        if (rwtype == RW_WRITER) {
1788      -                rw_downgrade(&ip->i_contents);
1789      -        }
1790      -#else
1791 1747          if (dolock && rwtype == RW_WRITER) {
1792 1748                  rw_downgrade(&ip->i_contents);
1793 1749          }
1794      -#endif
1795 1750  
1796 1751          /*
1797 1752           * We remove PROT_WRITE in cases when the file has UDF holes
1798 1753           * because we don't  want to call bmap_read() to check each
1799 1754           * page if it is backed with a disk block.
1800 1755           */
1801 1756          if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1802 1757                  *protp &= ~PROT_WRITE;
1803 1758          }
1804 1759

1805 1760          error = 0;
1806 1761  
1807 1762          /*
1808 1763           * The loop looks up pages in the range <off, off + len).
1809 1764           * For each page, we first check if we should initiate an asynchronous
1810 1765           * read ahead before we call page_lookup (we may sleep in page_lookup
1811 1766           * for a previously initiated disk read).
1812 1767           */
1813 1768          eoff = (uoff + len);
1814 1769          for (pgoff = uoff, pgaddr = addr, pl = plarr;
1815 1770              pgoff < eoff; /* empty */) {
1816 1771                  page_t  *pp;
1817 1772                  u_offset_t      nextrio;
1818 1773                  se_t    se;
1819 1774  
1820 1775                  se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1821 1776  
1822 1777                  /*
1823 1778                   * Handle async getpage (faultahead)
1824 1779                   */
1825 1780                  if (plarr == NULL) {
1826 1781                          ip->i_nextrio = pgoff;
1827 1782                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1828 1783                          pgoff += pgsize;
1829 1784                          pgaddr += pgsize;
1830 1785                          continue;
1831 1786                  }
1832 1787  
1833 1788                  /*
1834 1789                   * Check if we should initiate read ahead of next cluster.
1835 1790                   * We call page_exists only when we need to confirm that
1836 1791                   * we have the current page before we initiate the read ahead.
1837 1792                   */
1838 1793                  nextrio = ip->i_nextrio;
1839 1794                  if (seqmode &&
1840 1795                      pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1841 1796                      nextrio < ip->i_size && page_exists(vp, pgoff))
1842 1797                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1843 1798  
1844 1799                  if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1845 1800  
1846 1801                          /*
1847 1802                           * We found the page in the page cache.
1848 1803                           */
1849 1804                          *pl++ = pp;
1850 1805                          pgoff += pgsize;
1851 1806                          pgaddr += pgsize;
1852 1807                          len -= pgsize;
1853 1808                          plsz -= pgsize;
1854 1809                  } else  {
1855 1810  
1856 1811                          /*
1857 1812                           * We have to create the page, or read it from disk.
1858 1813                           */
1859 1814                          if (error = ud_getpage_miss(vp, pgoff, len,
1860 1815                              seg, pgaddr, pl, plsz, rw, seqmode)) {
1861 1816                                  goto error_out;
1862 1817                          }
1863 1818  
1864 1819                          while (*pl != NULL) {
1865 1820                                  pl++;
1866 1821                                  pgoff += pgsize;
1867 1822                                  pgaddr += pgsize;
1868 1823                                  len -= pgsize;
1869 1824                                  plsz -= pgsize;
1870 1825                          }
1871 1826                  }
1872 1827          }
1873 1828  
1874 1829          /*
1875 1830           * Return pages up to plsz if they are in the page cache.
1876 1831           * We cannot return pages if there is a chance that they are
1877 1832           * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1878 1833           */
1879 1834          if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1880 1835  
1881 1836                  ASSERT((protp == NULL) ||
1882 1837                      !(has_holes && (*protp & PROT_WRITE)));
1883 1838  
1884 1839                  eoff = pgoff + plsz;
1885 1840                  while (pgoff < eoff) {
1886 1841                          page_t          *pp;
1887 1842  
1888 1843                          if ((pp = page_lookup_nowait(vp, pgoff,
1889 1844                              SE_SHARED)) == NULL)
1890 1845                                  break;
1891 1846  
1892 1847                          *pl++ = pp;
1893 1848                          pgoff += pgsize;
1894 1849                          plsz -= pgsize;
1895 1850                  }
1896 1851          }
1897 1852  
1898 1853          if (plarr)
1899 1854                  *pl = NULL;                     /* Terminate page list */
1900 1855          ip->i_nextr = pgoff;
1901 1856  
1902 1857  error_out:
1903 1858          if (error && plarr) {

↓ open down ↓

99 lines elided

↑ open up ↑

1904 1859                  /*
1905 1860                   * Release any pages we have locked.
1906 1861                   */
1907 1862                  while (pl > &plarr[0])
1908 1863                          page_unlock(*--pl);
1909 1864  
1910 1865                  plarr[0] = NULL;
1911 1866          }
1912 1867  
1913 1868  update_inode:
1914      -#ifdef  __lock_lint
1915      -        rw_exit(&ip->i_contents);
1916      -#else
1917 1869          if (dolock) {
1918 1870                  rw_exit(&ip->i_contents);
1919 1871          }
1920      -#endif
1921 1872  
1922 1873          /*
1923 1874           * If the inode is not already marked for IACC (in rwip() for read)
1924 1875           * and the inode is not marked for no access time update (in rwip()
1925 1876           * for write) then update the inode access time and mod time now.
1926 1877           */
1927 1878          mutex_enter(&ip->i_tlock);
1928 1879          if ((ip->i_flag & (IACC | INOACC)) == 0) {
1929 1880                  if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1930 1881                          ip->i_flag |= IACC;

1931 1882                  }
1932 1883                  if (rw == S_WRITE) {
1933 1884                          ip->i_flag |= IUPD;
1934 1885                  }
1935 1886                  ITIMES_NOLOCK(ip);
1936 1887          }
1937 1888          mutex_exit(&ip->i_tlock);
1938 1889  
1939 1890          return (error);
1940 1891  }
1941 1892  
1942 1893  int32_t ud_delay = 1;
1943 1894  
1944 1895  /* ARGSUSED */
1945 1896  static int32_t
1946 1897  udf_putpage(
1947 1898          struct vnode *vp,
1948 1899          offset_t off,
1949 1900          size_t len,

↓ open down ↓

19 lines elided

↑ open up ↑

1950 1901          int32_t flags,
1951 1902          struct cred *cr,
1952 1903          caller_context_t *ct)
1953 1904  {
1954 1905          struct ud_inode *ip;
1955 1906          int32_t error = 0;
1956 1907  
1957 1908          ud_printf("udf_putpage\n");
1958 1909  
1959 1910          ip = VTOI(vp);
1960      -#ifdef  __lock_lint
1961      -        rw_enter(&ip->i_contents, RW_WRITER);
1962      -#endif
1963 1911  
1964 1912          if (vp->v_count == 0) {
1965 1913                  cmn_err(CE_WARN, "ud_putpage : bad v_count");
1966 1914                  error = EINVAL;
1967 1915                  goto out;
1968 1916          }
1969 1917  
1970 1918          if (vp->v_flag & VNOMAP) {
1971 1919                  error = ENOSYS;
1972 1920                  goto out;

1973 1921          }
1974 1922  
1975 1923          if (flags & B_ASYNC) {
1976 1924                  if (ud_delay && len &&
1977 1925                      (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1978 1926                          mutex_enter(&ip->i_tlock);
1979 1927  
1980 1928                          /*
1981 1929                           * If nobody stalled, start a new cluster.
1982 1930                           */
1983 1931                          if (ip->i_delaylen == 0) {
1984 1932                                  ip->i_delayoff = off;
1985 1933                                  ip->i_delaylen = len;
1986 1934                                  mutex_exit(&ip->i_tlock);
1987 1935                                  goto out;
1988 1936                          }
1989 1937  
1990 1938                          /*
1991 1939                           * If we have a full cluster or they are not contig,
1992 1940                           * then push last cluster and start over.
1993 1941                           */
1994 1942                          if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1995 1943                              ip->i_delayoff + ip->i_delaylen != off) {
1996 1944                                  u_offset_t doff;
1997 1945                                  size_t dlen;
1998 1946  
1999 1947                                  doff = ip->i_delayoff;
2000 1948                                  dlen = ip->i_delaylen;
2001 1949                                  ip->i_delayoff = off;
2002 1950                                  ip->i_delaylen = len;
2003 1951                                  mutex_exit(&ip->i_tlock);
2004 1952                                  error = ud_putpages(vp, doff, dlen, flags, cr);
2005 1953                                  /* LMXXX - flags are new val, not old */
2006 1954                                  goto out;
2007 1955                          }
2008 1956  
2009 1957                          /*
2010 1958                           * There is something there, it's not full, and
2011 1959                           * it is contig.
2012 1960                           */
2013 1961                          ip->i_delaylen += len;
2014 1962                          mutex_exit(&ip->i_tlock);
2015 1963                          goto out;

↓ open down ↓

43 lines elided

↑ open up ↑

2016 1964                  }
2017 1965  
2018 1966                  /*
2019 1967                   * Must have weird flags or we are not clustering.
2020 1968                   */
2021 1969          }
2022 1970  
2023 1971          error = ud_putpages(vp, off, len, flags, cr);
2024 1972  
2025 1973  out:
2026      -#ifdef  __lock_lint
2027      -        rw_exit(&ip->i_contents);
2028      -#endif
2029 1974          return (error);
2030 1975  }
2031 1976  
2032 1977  /* ARGSUSED */
2033 1978  static int32_t
2034 1979  udf_map(
2035 1980          struct vnode *vp,
2036 1981          offset_t off,
2037 1982          struct as *as,
2038 1983          caddr_t *addrp,

2039 1984          size_t len,
2040 1985          uint8_t prot,
2041 1986          uint8_t maxprot,
2042 1987          uint32_t flags,
2043 1988          struct cred *cr,
2044 1989          caller_context_t *ct)
2045 1990  {
2046 1991          struct segvn_crargs vn_a;
2047 1992          int32_t error = 0;
2048 1993  
2049 1994          ud_printf("udf_map\n");
2050 1995  
2051 1996          if (vp->v_flag & VNOMAP) {
2052 1997                  error = ENOSYS;
2053 1998                  goto end;
2054 1999          }
2055 2000  
2056 2001          if ((off < (offset_t)0) ||
2057 2002              ((off + len) < (offset_t)0)) {
2058 2003                  error = EINVAL;
2059 2004                  goto end;
2060 2005          }
2061 2006  
2062 2007          if (vp->v_type != VREG) {
2063 2008                  error = ENODEV;
2064 2009                  goto end;
2065 2010          }
2066 2011  
2067 2012          /*
2068 2013           * If file is being locked, disallow mapping.
2069 2014           */
2070 2015          if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2071 2016                  error = EAGAIN;
2072 2017                  goto end;
2073 2018          }
2074 2019  
2075 2020          as_rangelock(as);
2076 2021          error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2077 2022          if (error != 0) {
2078 2023                  as_rangeunlock(as);
2079 2024                  goto end;
2080 2025          }
2081 2026  
2082 2027          vn_a.vp = vp;
2083 2028          vn_a.offset = off;
2084 2029          vn_a.type = flags & MAP_TYPE;
2085 2030          vn_a.prot = prot;
2086 2031          vn_a.maxprot = maxprot;
2087 2032          vn_a.cred = cr;
2088 2033          vn_a.amp = NULL;
2089 2034          vn_a.flags = flags & ~MAP_TYPE;
2090 2035          vn_a.szc = 0;
2091 2036          vn_a.lgrp_mem_policy_flags = 0;
2092 2037  
2093 2038          error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2094 2039          as_rangeunlock(as);
2095 2040  
2096 2041  end:
2097 2042          return (error);
2098 2043  }
2099 2044  
2100 2045  /* ARGSUSED */
2101 2046  static int32_t
2102 2047  udf_addmap(struct vnode *vp,
2103 2048          offset_t off,
2104 2049          struct as *as,
2105 2050          caddr_t addr,
2106 2051          size_t len,
2107 2052          uint8_t prot,
2108 2053          uint8_t maxprot,
2109 2054          uint32_t flags,
2110 2055          struct cred *cr,
2111 2056          caller_context_t *ct)
2112 2057  {
2113 2058          struct ud_inode *ip = VTOI(vp);
2114 2059  
2115 2060          ud_printf("udf_addmap\n");
2116 2061  
2117 2062          if (vp->v_flag & VNOMAP) {
2118 2063                  return (ENOSYS);
2119 2064          }
2120 2065  
2121 2066          mutex_enter(&ip->i_tlock);
2122 2067          ip->i_mapcnt += btopr(len);
2123 2068          mutex_exit(&ip->i_tlock);
2124 2069  
2125 2070          return (0);
2126 2071  }
2127 2072  
2128 2073  /* ARGSUSED */
2129 2074  static int32_t
2130 2075  udf_delmap(
2131 2076          struct vnode *vp, offset_t off,
2132 2077          struct as *as,
2133 2078          caddr_t addr,
2134 2079          size_t len,
2135 2080          uint32_t prot,
2136 2081          uint32_t maxprot,
2137 2082          uint32_t flags,
2138 2083          struct cred *cr,
2139 2084          caller_context_t *ct)
2140 2085  {
2141 2086          struct ud_inode *ip = VTOI(vp);
2142 2087  
2143 2088          ud_printf("udf_delmap\n");
2144 2089  
2145 2090          if (vp->v_flag & VNOMAP) {
2146 2091                  return (ENOSYS);
2147 2092          }
2148 2093  
2149 2094          mutex_enter(&ip->i_tlock);
2150 2095          ip->i_mapcnt -= btopr(len);     /* Count released mappings */
2151 2096          ASSERT(ip->i_mapcnt >= 0);
2152 2097          mutex_exit(&ip->i_tlock);
2153 2098  
2154 2099          return (0);
2155 2100  }
2156 2101  
2157 2102  /* ARGSUSED */
2158 2103  static int32_t
2159 2104  udf_l_pathconf(
2160 2105          struct vnode *vp,
2161 2106          int32_t cmd,
2162 2107          ulong_t *valp,
2163 2108          struct cred *cr,
2164 2109          caller_context_t *ct)
2165 2110  {
2166 2111          int32_t error = 0;
2167 2112  
2168 2113          ud_printf("udf_l_pathconf\n");
2169 2114  
2170 2115          if (cmd == _PC_FILESIZEBITS) {
2171 2116                  /*
2172 2117                   * udf supports 64 bits as file size
2173 2118                   * but there are several other restrictions
2174 2119                   * it only supports 32-bit block numbers and
2175 2120                   * daddr32_t is only and int32_t so taking these
2176 2121                   * into account we can stay just as where ufs is
2177 2122                   */
2178 2123                  *valp = 41;
2179 2124          } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {

↓ open down ↓

141 lines elided

↑ open up ↑

2180 2125                  /* nanosecond timestamp resolution */
2181 2126                  *valp = 1L;
2182 2127          } else {
2183 2128                  error = fs_pathconf(vp, cmd, valp, cr, ct);
2184 2129          }
2185 2130  
2186 2131          return (error);
2187 2132  }
2188 2133  
2189 2134  uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2190      -#ifndef __lint
2191      -_NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2192      -_NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2193      -#endif
     2135 +
2194 2136  /*
2195 2137   * Assumption is that there will not be a pageio request
2196 2138   * to a enbedded file
2197 2139   */
2198 2140  /* ARGSUSED */
2199 2141  static int32_t
2200 2142  udf_pageio(
2201 2143          struct vnode *vp,
2202 2144          struct page *pp,
2203 2145          u_offset_t io_off,

2204 2146          size_t io_len,
2205 2147          int32_t flags,
2206 2148          struct cred *cr,
2207 2149          caller_context_t *ct)
2208 2150  {
2209 2151          daddr_t bn;
2210 2152          struct buf *bp;
2211 2153          struct ud_inode *ip = VTOI(vp);
2212 2154          int32_t dolock, error = 0, contig, multi_io;
2213 2155          size_t done_len = 0, cur_len = 0;
2214 2156          page_t *npp = NULL, *opp = NULL, *cpp = pp;
2215 2157  
2216 2158          if (pp == NULL) {
2217 2159                  return (EINVAL);
2218 2160          }
2219 2161  
2220 2162          dolock = (rw_owner(&ip->i_contents) != curthread);
2221 2163

↓ open down ↓

18 lines elided

↑ open up ↑

2222 2164          /*
2223 2165           * We need a better check.  Ideally, we would use another
2224 2166           * vnodeops so that hlocked and forcibly unmounted file
2225 2167           * systems would return EIO where appropriate and w/o the
2226 2168           * need for these checks.
2227 2169           */
2228 2170          if (ip->i_udf == NULL) {
2229 2171                  return (EIO);
2230 2172          }
2231 2173  
2232      -#ifdef  __lock_lint
2233      -        rw_enter(&ip->i_contents, RW_READER);
2234      -#else
2235 2174          if (dolock) {
2236 2175                  rw_enter(&ip->i_contents, RW_READER);
2237 2176          }
2238      -#endif
2239 2177  
2240 2178          /*
2241 2179           * Break the io request into chunks, one for each contiguous
2242 2180           * stretch of disk blocks in the target file.
2243 2181           */
2244 2182          while (done_len < io_len) {
2245 2183                  ASSERT(cpp);
2246 2184                  bp = NULL;
2247 2185                  contig = 0;
2248 2186                  if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),

2249 2187                      &bn, &contig)) {
2250 2188                          break;
2251 2189                  }
2252 2190  
2253 2191                  if (bn == UDF_HOLE) {   /* No holey swapfiles */
2254 2192                          cmn_err(CE_WARN, "SWAP file has HOLES");
2255 2193                          error = EINVAL;
2256 2194                          break;
2257 2195                  }
2258 2196  
2259 2197                  cur_len = MIN(io_len - done_len, contig);
2260 2198  
2261 2199                  /*
2262 2200                   * Check if more than one I/O is
2263 2201                   * required to complete the given
2264 2202                   * I/O operation
2265 2203                   */
2266 2204                  if (ip->i_udf->udf_lbsize < PAGESIZE) {
2267 2205                          if (cur_len >= PAGESIZE) {
2268 2206                                  multi_io = 0;
2269 2207                                  cur_len &= PAGEMASK;
2270 2208                          } else {
2271 2209                                  multi_io = 1;
2272 2210                                  cur_len = MIN(io_len - done_len, PAGESIZE);
2273 2211                          }
2274 2212                  }
2275 2213                  page_list_break(&cpp, &npp, btop(cur_len));
2276 2214  
2277 2215                  bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2278 2216                  ASSERT(bp != NULL);
2279 2217  
2280 2218                  bp->b_edev = ip->i_dev;
2281 2219                  bp->b_dev = cmpdev(ip->i_dev);
2282 2220                  bp->b_blkno = bn;
2283 2221                  bp->b_un.b_addr = (caddr_t)0;
2284 2222                  bp->b_file = vp;
2285 2223                  bp->b_offset = (offset_t)(io_off + done_len);
2286 2224  
2287 2225  /*
2288 2226   *              ub.ub_pageios.value.ul++;
2289 2227   */
2290 2228                  if (multi_io == 0) {
2291 2229                          (void) bdev_strategy(bp);
2292 2230                  } else {
2293 2231                          error = ud_multi_strat(ip, cpp, bp,
2294 2232                              (u_offset_t)(io_off + done_len));
2295 2233                          if (error != 0) {
2296 2234                                  pageio_done(bp);
2297 2235                                  break;
2298 2236                          }
2299 2237                  }
2300 2238                  if (flags & B_READ) {
2301 2239                          ud_pageio_reads++;
2302 2240                  } else {
2303 2241                          ud_pageio_writes++;
2304 2242                  }
2305 2243  
2306 2244                  /*
2307 2245                   * If the request is not B_ASYNC, wait for i/o to complete
2308 2246                   * and re-assemble the page list to return to the caller.
2309 2247                   * If it is B_ASYNC we leave the page list in pieces and
2310 2248                   * cleanup() will dispose of them.
2311 2249                   */
2312 2250                  if ((flags & B_ASYNC) == 0) {
2313 2251                          error = biowait(bp);
2314 2252                          pageio_done(bp);
2315 2253                          if (error) {
2316 2254                                  break;
2317 2255                          }
2318 2256                          page_list_concat(&opp, &cpp);
2319 2257                  }
2320 2258                  cpp = npp;
2321 2259                  npp = NULL;
2322 2260                  done_len += cur_len;
2323 2261          }
2324 2262  
2325 2263          ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2326 2264          if (error) {
2327 2265                  if (flags & B_ASYNC) {
2328 2266                          /* Cleanup unprocessed parts of list */
2329 2267                          page_list_concat(&cpp, &npp);
2330 2268                          if (flags & B_READ) {
2331 2269                                  pvn_read_done(cpp, B_ERROR);

↓ open down ↓

83 lines elided

↑ open up ↑

2332 2270                          } else {
2333 2271                                  pvn_write_done(cpp, B_ERROR);
2334 2272                          }
2335 2273                  } else {
2336 2274                          /* Re-assemble list and let caller clean up */
2337 2275                          page_list_concat(&opp, &cpp);
2338 2276                          page_list_concat(&opp, &npp);
2339 2277                  }
2340 2278          }
2341 2279  
2342      -#ifdef  __lock_lint
2343      -        rw_exit(&ip->i_contents);
2344      -#else
2345 2280          if (dolock) {
2346 2281                  rw_exit(&ip->i_contents);
2347 2282          }
2348      -#endif
     2283 +
2349 2284          return (error);
2350 2285  }
2351 2286  
2352 2287  
2353 2288  
2354 2289  
2355 2290  /* -------------------- local functions --------------------------- */
2356 2291  
2357 2292  
2358 2293

2359 2294  int32_t
2360 2295  ud_rdwri(enum uio_rw rw, int32_t ioflag,
2361 2296          struct ud_inode *ip, caddr_t base, int32_t len,
2362 2297          offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2363 2298  {
2364 2299          int32_t error;
2365 2300          struct uio auio;
2366 2301          struct iovec aiov;
2367 2302  
2368 2303          ud_printf("ud_rdwri\n");
2369 2304  
2370 2305          bzero((caddr_t)&auio, sizeof (uio_t));
2371 2306          bzero((caddr_t)&aiov, sizeof (iovec_t));
2372 2307  
2373 2308          aiov.iov_base = base;
2374 2309          aiov.iov_len = len;
2375 2310          auio.uio_iov = &aiov;
2376 2311          auio.uio_iovcnt = 1;
2377 2312          auio.uio_loffset = offset;
2378 2313          auio.uio_segflg = (int16_t)seg;
2379 2314          auio.uio_resid = len;
2380 2315  
2381 2316          if (rw == UIO_WRITE) {
2382 2317                  auio.uio_fmode = FWRITE;
2383 2318                  auio.uio_extflg = UIO_COPY_DEFAULT;
2384 2319                  auio.uio_llimit = curproc->p_fsz_ctl;
2385 2320                  error = ud_wrip(ip, &auio, ioflag, cr);
2386 2321          } else {
2387 2322                  auio.uio_fmode = FREAD;
2388 2323                  auio.uio_extflg = UIO_COPY_CACHED;
2389 2324                  auio.uio_llimit = MAXOFFSET_T;
2390 2325                  error = ud_rdip(ip, &auio, ioflag, cr);
2391 2326          }
2392 2327  
2393 2328          if (aresid) {
2394 2329                  *aresid = auio.uio_resid;
2395 2330          } else if (auio.uio_resid) {
2396 2331                  error = EIO;
2397 2332          }
2398 2333          return (error);
2399 2334  }
2400 2335  
2401 2336  /*
2402 2337   * Free behind hacks.  The pager is busted.
2403 2338   * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2404 2339   * or B_FREE_IF_TIGHT_ON_MEMORY.
2405 2340   */
2406 2341  int32_t ud_freebehind = 1;
2407 2342  int32_t ud_smallfile = 32 * 1024;
2408 2343  
2409 2344  /* ARGSUSED */
2410 2345  int32_t
2411 2346  ud_getpage_miss(struct vnode *vp, u_offset_t off,
2412 2347          size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2413 2348          size_t plsz, enum seg_rw rw, int32_t seq)
2414 2349  {
2415 2350          struct ud_inode *ip = VTOI(vp);
2416 2351          int32_t err = 0;
2417 2352          size_t io_len;
2418 2353          u_offset_t io_off;
2419 2354          u_offset_t pgoff;
2420 2355          page_t *pp;
2421 2356  
2422 2357          pl[0] = NULL;
2423 2358  
2424 2359          /*
2425 2360           * Figure out whether the page can be created, or must be
2426 2361           * read from the disk
2427 2362           */
2428 2363          if (rw == S_CREATE) {
2429 2364                  if ((pp = page_create_va(vp, off,
2430 2365                      PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2431 2366                          cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2432 2367                          return (EINVAL);
2433 2368                  }
2434 2369                  io_len = PAGESIZE;
2435 2370          } else {
2436 2371                  pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2437 2372                      &io_len, off, PAGESIZE, 0);
2438 2373  
2439 2374                  /*
2440 2375                   * Some other thread has entered the page.
2441 2376                   * ud_getpage will retry page_lookup.
2442 2377                   */
2443 2378                  if (pp == NULL) {
2444 2379                          return (0);
2445 2380                  }
2446 2381  
2447 2382                  /*
2448 2383                   * Fill the page with as much data as we can from the file.
2449 2384                   */
2450 2385                  err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2451 2386                  if (err) {
2452 2387                          pvn_read_done(pp, B_ERROR);
2453 2388                          return (err);
2454 2389                  }
2455 2390  
2456 2391                  /*
2457 2392                   * XXX ??? ufs has io_len instead of pgoff below
2458 2393                   */
2459 2394                  ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2460 2395  
2461 2396                  /*
2462 2397                   * If the file access is sequential, initiate read ahead
2463 2398                   * of the next cluster.
2464 2399                   */
2465 2400                  if (seq && ip->i_nextrio < ip->i_size) {
2466 2401                          ud_getpage_ra(vp, off, seg, addr);
2467 2402                  }
2468 2403          }
2469 2404  
2470 2405  outmiss:
2471 2406          pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2472 2407          return (err);
2473 2408  }
2474 2409  
2475 2410  /* ARGSUSED */
2476 2411  void
2477 2412  ud_getpage_ra(struct vnode *vp,
2478 2413          u_offset_t off, struct seg *seg, caddr_t addr)
2479 2414  {
2480 2415          page_t *pp;
2481 2416          size_t io_len;
2482 2417          struct ud_inode *ip = VTOI(vp);
2483 2418          u_offset_t io_off = ip->i_nextrio, pgoff;
2484 2419          caddr_t addr2 = addr + (io_off - off);
2485 2420          daddr_t bn;
2486 2421          int32_t contig = 0;
2487 2422  
2488 2423          /*
2489 2424           * Is this test needed?
2490 2425           */
2491 2426  
2492 2427          if (addr2 >= seg->s_base + seg->s_size) {
2493 2428                  return;
2494 2429          }
2495 2430  
2496 2431          contig = 0;
2497 2432          if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2498 2433                  return;
2499 2434          }
2500 2435  
2501 2436          pp = pvn_read_kluster(vp, io_off, seg, addr2,
2502 2437              &io_off, &io_len, io_off, PAGESIZE, 1);
2503 2438  
2504 2439          /*
2505 2440           * Some other thread has entered the page.
2506 2441           * So no read head done here (ie we will have to and wait
2507 2442           * for the read when needed).
2508 2443           */
2509 2444  
2510 2445          if (pp == NULL) {
2511 2446                  return;
2512 2447          }
2513 2448  
2514 2449          (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2515 2450          ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2516 2451  }
2517 2452  
2518 2453  int
2519 2454  ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2520 2455          uint32_t bflgs, u_offset_t *pg_off)
2521 2456  {
2522 2457          daddr_t bn;
2523 2458          struct buf *bp;
2524 2459          caddr_t kaddr, caddr;
2525 2460          int32_t error = 0, contig = 0, multi_io = 0;
2526 2461          int32_t lbsize = ip->i_udf->udf_lbsize;
2527 2462          int32_t lbmask = ip->i_udf->udf_lbmask;
2528 2463          uint64_t isize;
2529 2464  
2530 2465          isize = (ip->i_size + lbmask) & (~lbmask);
2531 2466          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2532 2467  
2533 2468                  /*
2534 2469                   * Embedded file read file_entry
2535 2470                   * from buffer cache and copy the required
2536 2471                   * portions
2537 2472                   */
2538 2473                  bp = ud_bread(ip->i_dev,
2539 2474                      ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2540 2475                  if ((bp->b_error == 0) &&
2541 2476                      (bp->b_resid == 0)) {
2542 2477  
2543 2478                          caddr = bp->b_un.b_addr + ip->i_data_off;
2544 2479  
2545 2480                          /*
2546 2481                           * mapin to kvm
2547 2482                           */
2548 2483                          kaddr = (caddr_t)ppmapin(pp,
2549 2484                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2550 2485                          (void) kcopy(caddr, kaddr, ip->i_size);
2551 2486  
2552 2487                          /*
2553 2488                           * mapout of kvm
2554 2489                           */
2555 2490                          ppmapout(kaddr);
2556 2491                  }
2557 2492                  brelse(bp);
2558 2493                  contig = ip->i_size;
2559 2494          } else {
2560 2495  
2561 2496                  /*
2562 2497                   * Get the continuous size and block number
2563 2498                   * at offset "off"
2564 2499                   */
2565 2500                  if (error = ud_bmap_read(ip, off, &bn, &contig))
2566 2501                          goto out;
2567 2502                  contig = MIN(contig, PAGESIZE);
2568 2503                  contig = (contig + lbmask) & (~lbmask);
2569 2504  
2570 2505                  /*
2571 2506                   * Zero part of the page which we are not
2572 2507                   * going to read from the disk.
2573 2508                   */
2574 2509  
2575 2510                  if (bn == UDF_HOLE) {
2576 2511  
2577 2512                          /*
2578 2513                           * This is a HOLE. Just zero out
2579 2514                           * the page
2580 2515                           */
2581 2516                          if (((off + contig) == isize) ||
2582 2517                              (contig == PAGESIZE)) {
2583 2518                                  pagezero(pp->p_prev, 0, PAGESIZE);
2584 2519                                  goto out;
2585 2520                          }
2586 2521                  }
2587 2522  
2588 2523                  if (contig < PAGESIZE) {
2589 2524                          uint64_t count;
2590 2525  
2591 2526                          count = isize - off;
2592 2527                          if (contig != count) {
2593 2528                                  multi_io = 1;
2594 2529                                  contig = (int32_t)(MIN(count, PAGESIZE));
2595 2530                          } else {
2596 2531                                  pagezero(pp->p_prev, contig, PAGESIZE - contig);
2597 2532                          }
2598 2533                  }
2599 2534  
2600 2535                  /*
2601 2536                   * Get a bp and initialize it
2602 2537                   */
2603 2538                  bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2604 2539                  ASSERT(bp != NULL);
2605 2540  
2606 2541                  bp->b_edev = ip->i_dev;
2607 2542                  bp->b_dev = cmpdev(ip->i_dev);
2608 2543                  bp->b_blkno = bn;
2609 2544                  bp->b_un.b_addr = 0;
2610 2545                  bp->b_file = ip->i_vnode;
2611 2546  
2612 2547                  /*
2613 2548                   * Start I/O
2614 2549                   */
2615 2550                  if (multi_io == 0) {
2616 2551  
2617 2552                          /*
2618 2553                           * Single I/O is sufficient for this page
2619 2554                           */
2620 2555                          (void) bdev_strategy(bp);
2621 2556                  } else {
2622 2557  
2623 2558                          /*
2624 2559                           * We need to do the I/O in
2625 2560                           * piece's
2626 2561                           */
2627 2562                          error = ud_multi_strat(ip, pp, bp, off);
2628 2563                          if (error != 0) {
2629 2564                                  goto out;
2630 2565                          }
2631 2566                  }
2632 2567                  if ((bflgs & B_ASYNC) == 0) {
2633 2568  
2634 2569                          /*
2635 2570                           * Wait for i/o to complete.
2636 2571                           */
2637 2572  
2638 2573                          error = biowait(bp);
2639 2574                          pageio_done(bp);
2640 2575                          if (error) {
2641 2576                                  goto out;
2642 2577                          }
2643 2578                  }
2644 2579          }
2645 2580          if ((off + contig) >= ip->i_size) {
2646 2581                  contig = ip->i_size - off;
2647 2582          }
2648 2583  
2649 2584  out:
2650 2585          *pg_off = contig;
2651 2586          return (error);
2652 2587  }
2653 2588  
2654 2589  int32_t
2655 2590  ud_putpages(struct vnode *vp, offset_t off,
2656 2591          size_t len, int32_t flags, struct cred *cr)
2657 2592  {
2658 2593          struct ud_inode *ip;
2659 2594          page_t *pp;
2660 2595          u_offset_t io_off;
2661 2596          size_t io_len;
2662 2597          u_offset_t eoff;
2663 2598          int32_t err = 0;
2664 2599          int32_t dolock;
2665 2600  
2666 2601          ud_printf("ud_putpages\n");
2667 2602  
2668 2603          if (vp->v_count == 0) {
2669 2604                  cmn_err(CE_WARN, "ud_putpages: bad v_count");
2670 2605                  return (EINVAL);
2671 2606          }
2672 2607  
2673 2608          ip = VTOI(vp);
2674 2609

↓ open down ↓

316 lines elided

↑ open up ↑

2675 2610          /*
2676 2611           * Acquire the readers/write inode lock before locking
2677 2612           * any pages in this inode.
2678 2613           * The inode lock is held during i/o.
2679 2614           */
2680 2615          if (len == 0) {
2681 2616                  mutex_enter(&ip->i_tlock);
2682 2617                  ip->i_delayoff = ip->i_delaylen = 0;
2683 2618                  mutex_exit(&ip->i_tlock);
2684 2619          }
2685      -#ifdef  __lock_lint
2686      -        rw_enter(&ip->i_contents, RW_READER);
2687      -#else
2688 2620          dolock = (rw_owner(&ip->i_contents) != curthread);
2689 2621          if (dolock) {
2690 2622                  rw_enter(&ip->i_contents, RW_READER);
2691 2623          }
2692      -#endif
2693 2624  
2694 2625          if (!vn_has_cached_data(vp)) {
2695      -#ifdef  __lock_lint
2696      -                rw_exit(&ip->i_contents);
2697      -#else
2698 2626                  if (dolock) {
2699 2627                          rw_exit(&ip->i_contents);
2700 2628                  }
2701      -#endif
2702 2629                  return (0);
2703 2630          }
2704 2631  
2705 2632          if (len == 0) {
2706 2633                  /*
2707 2634                   * Search the entire vp list for pages >= off.
2708 2635                   */
2709 2636                  err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2710 2637                      flags, cr);
2711 2638          } else {

2712 2639                  /*
2713 2640                   * Loop over all offsets in the range looking for
2714 2641                   * pages to deal with.
2715 2642                   */
2716 2643                  if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2717 2644                          eoff = MIN(off + len, eoff);
2718 2645                  } else {
2719 2646                          eoff = off + len;
2720 2647                  }
2721 2648  
2722 2649                  for (io_off = off; io_off < eoff; io_off += io_len) {
2723 2650                          /*
2724 2651                           * If we are not invalidating, synchronously
2725 2652                           * freeing or writing pages, use the routine
2726 2653                           * page_lookup_nowait() to prevent reclaiming
2727 2654                           * them from the free list.
2728 2655                           */
2729 2656                          if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2730 2657                                  pp = page_lookup(vp, io_off,
2731 2658                                      (flags & (B_INVAL | B_FREE)) ?
2732 2659                                      SE_EXCL : SE_SHARED);
2733 2660                          } else {
2734 2661                                  pp = page_lookup_nowait(vp, io_off,
2735 2662                                      (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2736 2663                          }
2737 2664  
2738 2665                          if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2739 2666                                  io_len = PAGESIZE;
2740 2667                          } else {
2741 2668  
2742 2669                                  err = ud_putapage(vp, pp,
2743 2670                                      &io_off, &io_len, flags, cr);
2744 2671                                  if (err != 0) {
2745 2672                                          break;
2746 2673                                  }
2747 2674                                  /*
2748 2675                                   * "io_off" and "io_len" are returned as
2749 2676                                   * the range of pages we actually wrote.
2750 2677                                   * This allows us to skip ahead more quickly
2751 2678                                   * since several pages may've been dealt
2752 2679                                   * with by this iteration of the loop.
2753 2680                                   */
2754 2681                          }
2755 2682                  }

↓ open down ↓

44 lines elided

↑ open up ↑

2756 2683          }
2757 2684          if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2758 2685                  /*
2759 2686                   * We have just sync'ed back all the pages on
2760 2687                   * the inode, turn off the IMODTIME flag.
2761 2688                   */
2762 2689                  mutex_enter(&ip->i_tlock);
2763 2690                  ip->i_flag &= ~IMODTIME;
2764 2691                  mutex_exit(&ip->i_tlock);
2765 2692          }
2766      -#ifdef  __lock_lint
2767      -        rw_exit(&ip->i_contents);
2768      -#else
2769 2693          if (dolock) {
2770 2694                  rw_exit(&ip->i_contents);
2771 2695          }
2772      -#endif
2773 2696          return (err);
2774 2697  }
2775 2698  
2776 2699  /* ARGSUSED */
2777 2700  int32_t
2778 2701  ud_putapage(struct vnode *vp,
2779 2702          page_t *pp, u_offset_t *offp,
2780 2703          size_t *lenp, int32_t flags, struct cred *cr)
2781 2704  {
2782 2705          daddr_t bn;

2783 2706          size_t io_len;
2784 2707          struct ud_inode *ip;
2785 2708          int32_t error = 0, contig, multi_io = 0;
2786 2709          struct udf_vfs *udf_vfsp;
2787 2710          u_offset_t off, io_off;
2788 2711          caddr_t kaddr, caddr;
2789 2712          struct buf *bp = NULL;
2790 2713          int32_t lbmask;
2791 2714          uint64_t isize;
2792 2715          uint16_t crc_len;
2793 2716          struct file_entry *fe;
2794 2717  
2795 2718          ud_printf("ud_putapage\n");
2796 2719  
2797 2720          ip = VTOI(vp);
2798 2721          ASSERT(ip);
2799 2722          ASSERT(RW_LOCK_HELD(&ip->i_contents));
2800 2723          lbmask = ip->i_udf->udf_lbmask;
2801 2724          isize = (ip->i_size + lbmask) & (~lbmask);
2802 2725  
2803 2726          udf_vfsp = ip->i_udf;
2804 2727          ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2805 2728  
2806 2729          /*
2807 2730           * If the modified time on the inode has not already been
2808 2731           * set elsewhere (e.g. for write/setattr) we set the time now.
2809 2732           * This gives us approximate modified times for mmap'ed files
2810 2733           * which are modified via stores in the user address space.
2811 2734           */
2812 2735          if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2813 2736                  mutex_enter(&ip->i_tlock);
2814 2737                  ip->i_flag |= IUPD;
2815 2738                  ITIMES_NOLOCK(ip);
2816 2739                  mutex_exit(&ip->i_tlock);
2817 2740          }
2818 2741  
2819 2742  
2820 2743          /*
2821 2744           * Align the request to a block boundry (for old file systems),
2822 2745           * and go ask bmap() how contiguous things are for this file.
2823 2746           */
2824 2747          off = pp->p_offset & ~(offset_t)lbmask;
2825 2748                                  /* block align it */
2826 2749  
2827 2750  
2828 2751          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2829 2752                  ASSERT(ip->i_size <= ip->i_max_emb);
2830 2753  
2831 2754                  pp = pvn_write_kluster(vp, pp, &io_off,
2832 2755                      &io_len, off, PAGESIZE, flags);
2833 2756                  if (io_len == 0) {
2834 2757                          io_len = PAGESIZE;
2835 2758                  }
2836 2759  
2837 2760                  bp = ud_bread(ip->i_dev,
2838 2761                      ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2839 2762                      udf_vfsp->udf_lbsize);
2840 2763                  fe = (struct file_entry *)bp->b_un.b_addr;
2841 2764                  if ((bp->b_flags & B_ERROR) ||
2842 2765                      (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2843 2766                      ip->i_icb_block,
2844 2767                      1, udf_vfsp->udf_lbsize) != 0)) {
2845 2768                          if (pp != NULL)
2846 2769                                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2847 2770                          if (bp->b_flags & B_ERROR) {
2848 2771                                  error = EIO;
2849 2772                          } else {
2850 2773                                  error = EINVAL;
2851 2774                          }
2852 2775                          brelse(bp);
2853 2776                          return (error);
2854 2777                  }
2855 2778                  if ((bp->b_error == 0) &&
2856 2779                      (bp->b_resid == 0)) {
2857 2780  
2858 2781                          caddr = bp->b_un.b_addr + ip->i_data_off;
2859 2782                          kaddr = (caddr_t)ppmapin(pp,
2860 2783                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2861 2784                          (void) kcopy(kaddr, caddr, ip->i_size);
2862 2785                          ppmapout(kaddr);
2863 2786                  }
2864 2787                  crc_len = offsetof(struct file_entry, fe_spec) +
2865 2788                      SWAP_32(fe->fe_len_ear);
2866 2789                  crc_len += ip->i_size;
2867 2790                  ud_make_tag(ip->i_udf, &fe->fe_tag,
2868 2791                      UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2869 2792  
2870 2793                  bwrite(bp);
2871 2794  
2872 2795                  if (flags & B_ASYNC) {
2873 2796                          pvn_write_done(pp, flags);
2874 2797                  }
2875 2798                  contig = ip->i_size;
2876 2799          } else {
2877 2800  
2878 2801                  if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2879 2802                          goto out;
2880 2803                  }
2881 2804                  contig = MIN(contig, PAGESIZE);
2882 2805                  contig = (contig + lbmask) & (~lbmask);
2883 2806  
2884 2807                  if (contig < PAGESIZE) {
2885 2808                          uint64_t count;
2886 2809  
2887 2810                          count = isize - off;
2888 2811                          if (contig != count) {
2889 2812                                  multi_io = 1;
2890 2813                                  contig = (int32_t)(MIN(count, PAGESIZE));
2891 2814                          }
2892 2815                  }
2893 2816  
2894 2817                  if ((off + contig) > isize) {
2895 2818                          contig = isize - off;
2896 2819                  }
2897 2820  
2898 2821                  if (contig > PAGESIZE) {
2899 2822                          if (contig & PAGEOFFSET) {
2900 2823                                  contig &= PAGEMASK;
2901 2824                          }
2902 2825                  }
2903 2826  
2904 2827                  pp = pvn_write_kluster(vp, pp, &io_off,
2905 2828                      &io_len, off, contig, flags);
2906 2829                  if (io_len == 0) {
2907 2830                          io_len = PAGESIZE;
2908 2831                  }
2909 2832  
2910 2833                  bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2911 2834                  ASSERT(bp != NULL);
2912 2835  
2913 2836                  bp->b_edev = ip->i_dev;
2914 2837                  bp->b_dev = cmpdev(ip->i_dev);
2915 2838                  bp->b_blkno = bn;
2916 2839                  bp->b_un.b_addr = 0;
2917 2840                  bp->b_file = vp;
2918 2841                  bp->b_offset = (offset_t)off;
2919 2842  
2920 2843  
2921 2844                  /*
2922 2845                   * write throttle
2923 2846                   */
2924 2847                  ASSERT(bp->b_iodone == NULL);
2925 2848                  bp->b_iodone = ud_iodone;
2926 2849                  mutex_enter(&ip->i_tlock);
2927 2850                  ip->i_writes += bp->b_bcount;
2928 2851                  mutex_exit(&ip->i_tlock);
2929 2852  
2930 2853                  if (multi_io == 0) {
2931 2854  
2932 2855                          (void) bdev_strategy(bp);
2933 2856                  } else {
2934 2857                          error = ud_multi_strat(ip, pp, bp, off);
2935 2858                          if (error != 0) {
2936 2859                                  goto out;
2937 2860                          }
2938 2861                  }
2939 2862  
2940 2863                  if ((flags & B_ASYNC) == 0) {
2941 2864                          /*
2942 2865                           * Wait for i/o to complete.
2943 2866                           */
2944 2867                          error = biowait(bp);
2945 2868                          pageio_done(bp);
2946 2869                  }
2947 2870          }
2948 2871  
2949 2872          if ((flags & B_ASYNC) == 0) {
2950 2873                  pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2951 2874          }
2952 2875  
2953 2876          pp = NULL;
2954 2877  
2955 2878  out:
2956 2879          if (error != 0 && pp != NULL) {
2957 2880                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2958 2881          }
2959 2882  
2960 2883          if (offp) {
2961 2884                  *offp = io_off;
2962 2885          }
2963 2886          if (lenp) {
2964 2887                  *lenp = io_len;
2965 2888          }
2966 2889  
2967 2890          return (error);
2968 2891  }
2969 2892  
2970 2893  
2971 2894  int32_t
2972 2895  ud_iodone(struct buf *bp)
2973 2896  {
2974 2897          struct ud_inode *ip;
2975 2898  
2976 2899          ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2977 2900  
2978 2901          bp->b_iodone = NULL;
2979 2902  
2980 2903          ip = VTOI(bp->b_pages->p_vnode);
2981 2904  
2982 2905          mutex_enter(&ip->i_tlock);
2983 2906          if (ip->i_writes >= ud_LW) {
2984 2907                  if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2985 2908                          if (ud_WRITES) {
2986 2909                                  cv_broadcast(&ip->i_wrcv); /* wake all up */
2987 2910                          }
2988 2911                  }
2989 2912          } else {
2990 2913                  ip->i_writes -= bp->b_bcount;
2991 2914          }
2992 2915          mutex_exit(&ip->i_tlock);
2993 2916          iodone(bp);
2994 2917          return (0);
2995 2918  }
2996 2919  
2997 2920  /* ARGSUSED3 */
2998 2921  int32_t
2999 2922  ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
3000 2923  {
3001 2924          struct vnode *vp;
3002 2925          struct udf_vfs *udf_vfsp;
3003 2926          krw_t rwtype;
3004 2927          caddr_t base;
3005 2928          uint32_t flags;
3006 2929          int32_t error, n, on, mapon, dofree;
3007 2930          u_offset_t off;
3008 2931          long oresid = uio->uio_resid;
3009 2932  
3010 2933          ASSERT(RW_LOCK_HELD(&ip->i_contents));
3011 2934          if ((ip->i_type != VREG) &&
3012 2935              (ip->i_type != VDIR) &&
3013 2936              (ip->i_type != VLNK)) {
3014 2937                  return (EIO);
3015 2938          }
3016 2939  
3017 2940          if (uio->uio_loffset > MAXOFFSET_T) {
3018 2941                  return (0);
3019 2942          }
3020 2943  
3021 2944          if ((uio->uio_loffset < (offset_t)0) ||
3022 2945              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3023 2946                  return (EINVAL);
3024 2947          }
3025 2948          if (uio->uio_resid == 0) {
3026 2949                  return (0);
3027 2950          }
3028 2951  
3029 2952          vp = ITOV(ip);
3030 2953          udf_vfsp = ip->i_udf;
3031 2954          mutex_enter(&ip->i_tlock);
3032 2955          ip->i_flag |= IACC;
3033 2956          mutex_exit(&ip->i_tlock);
3034 2957  
3035 2958          rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3036 2959  
3037 2960          do {
3038 2961                  offset_t diff;
3039 2962                  u_offset_t uoff = uio->uio_loffset;
3040 2963                  off = uoff & (offset_t)MAXBMASK;
3041 2964                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3042 2965                  on = (int)blkoff(udf_vfsp, uoff);
3043 2966                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3044 2967  
3045 2968                  diff = ip->i_size - uoff;
3046 2969  
3047 2970                  if (diff <= (offset_t)0) {

↓ open down ↓

265 lines elided

↑ open up ↑

3048 2971                          error = 0;
3049 2972                          goto out;
3050 2973                  }
3051 2974                  if (diff < (offset_t)n) {
3052 2975                          n = (int)diff;
3053 2976                  }
3054 2977                  dofree = ud_freebehind &&
3055 2978                      ip->i_nextr == (off & PAGEMASK) &&
3056 2979                      off > ud_smallfile;
3057 2980  
3058      -#ifndef __lock_lint
3059 2981                  if (rwtype == RW_READER) {
3060 2982                          rw_exit(&ip->i_contents);
3061 2983                  }
3062      -#endif
3063 2984  
3064 2985                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3065 2986                      (uint32_t)n, 1, S_READ);
3066 2987                  error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3067 2988  
3068 2989                  flags = 0;
3069 2990                  if (!error) {
3070 2991                          /*
3071 2992                           * If read a whole block, or read to eof,
3072 2993                           * won't need this buffer again soon.

3073 2994                           */
3074 2995                          if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3075 2996                              freemem < lotsfree + pages_before_pager) {
3076 2997                                  flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3077 2998                          }
3078 2999                          /*
3079 3000                           * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3080 3001                           * we want to make sure that the page which has
3081 3002                           * been read, is written on disk if it is dirty.
3082 3003                           * And corresponding indirect blocks should also
3083 3004                           * be flushed out.

↓ open down ↓

11 lines elided

↑ open up ↑

3084 3005                           */
3085 3006                          if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3086 3007                                  flags &= ~SM_ASYNC;
3087 3008                                  flags |= SM_WRITE;
3088 3009                          }
3089 3010                          error = segmap_release(segkmap, base, flags);
3090 3011                  } else    {
3091 3012                          (void) segmap_release(segkmap, base, flags);
3092 3013                  }
3093 3014  
3094      -#ifndef __lock_lint
3095 3015                  if (rwtype == RW_READER) {
3096 3016                          rw_enter(&ip->i_contents, rwtype);
3097 3017                  }
3098      -#endif
3099 3018          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3100 3019  out:
3101 3020          /*
3102 3021           * Inode is updated according to this table if FRSYNC is set.
3103 3022           *
3104 3023           *      FSYNC   FDSYNC(posix.4)
3105 3024           *      --------------------------
3106 3025           *      always  IATTCHG|IBDWRITE
3107 3026           */
3108 3027          if (ioflag & FRSYNC) {

3109 3028                  if ((ioflag & FSYNC) ||
3110 3029                      ((ioflag & FDSYNC) &&
3111 3030                      (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3112 3031                  rw_exit(&ip->i_contents);
3113 3032                  rw_enter(&ip->i_contents, RW_WRITER);
3114 3033                  ud_iupdat(ip, 1);
3115 3034                  }
3116 3035          }
3117 3036          /*
3118 3037           * If we've already done a partial read, terminate
3119 3038           * the read but return no error.
3120 3039           */
3121 3040          if (oresid != uio->uio_resid) {
3122 3041                  error = 0;
3123 3042          }
3124 3043          ITIMES(ip);
3125 3044  
3126 3045          return (error);
3127 3046  }
3128 3047  
3129 3048  int32_t
3130 3049  ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3131 3050  {
3132 3051          caddr_t base;
3133 3052          struct vnode *vp;
3134 3053          struct udf_vfs *udf_vfsp;
3135 3054          uint32_t flags;
3136 3055          int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3137 3056          int32_t pagecreate, newpage;
3138 3057          uint64_t old_i_size;
3139 3058          u_offset_t off;
3140 3059          long start_resid = uio->uio_resid, premove_resid;
3141 3060          rlim64_t limit = uio->uio_limit;
3142 3061  
3143 3062  
3144 3063          ASSERT(RW_WRITE_HELD(&ip->i_contents));
3145 3064          if ((ip->i_type != VREG) &&
3146 3065              (ip->i_type != VDIR) &&
3147 3066              (ip->i_type != VLNK)) {
3148 3067                  return (EIO);
3149 3068          }
3150 3069  
3151 3070          if (uio->uio_loffset >= MAXOFFSET_T) {
3152 3071                  return (EFBIG);
3153 3072          }
3154 3073          /*
3155 3074           * see udf_l_pathconf
3156 3075           */
3157 3076          if (limit > (((uint64_t)1 << 40) - 1)) {
3158 3077                  limit = ((uint64_t)1 << 40) - 1;
3159 3078          }
3160 3079          if (uio->uio_loffset >= limit) {
3161 3080                  proc_t *p = ttoproc(curthread);
3162 3081  
3163 3082                  mutex_enter(&p->p_lock);
3164 3083                  (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3165 3084                      p, RCA_UNSAFE_SIGINFO);
3166 3085                  mutex_exit(&p->p_lock);
3167 3086                  return (EFBIG);
3168 3087          }
3169 3088          if ((uio->uio_loffset < (offset_t)0) ||
3170 3089              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3171 3090                  return (EINVAL);
3172 3091          }
3173 3092          if (uio->uio_resid == 0) {
3174 3093                  return (0);
3175 3094          }
3176 3095  
3177 3096          mutex_enter(&ip->i_tlock);
3178 3097          ip->i_flag |= INOACC;
3179 3098  
3180 3099          if (ioflag & (FSYNC | FDSYNC)) {
3181 3100                  ip->i_flag |= ISYNC;
3182 3101                  iupdat_flag = 1;
3183 3102          }
3184 3103          mutex_exit(&ip->i_tlock);
3185 3104  
3186 3105          udf_vfsp = ip->i_udf;
3187 3106          vp = ITOV(ip);
3188 3107  
3189 3108          do {
3190 3109                  u_offset_t uoff = uio->uio_loffset;
3191 3110                  off = uoff & (offset_t)MAXBMASK;
3192 3111                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3193 3112                  on = (int)blkoff(udf_vfsp, uoff);
3194 3113                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3195 3114  
3196 3115                  if (ip->i_type == VREG && uoff + n >= limit) {
3197 3116                          if (uoff >= limit) {
3198 3117                                  error = EFBIG;
3199 3118                                  goto out;
3200 3119                          }
3201 3120                          n = (int)(limit - (rlim64_t)uoff);
3202 3121                  }
3203 3122                  if (uoff + n > ip->i_size) {
3204 3123                          /*
3205 3124                           * We are extending the length of the file.
3206 3125                           * bmap is used so that we are sure that
3207 3126                           * if we need to allocate new blocks, that it
3208 3127                           * is done here before we up the file size.
3209 3128                           */
3210 3129                          error = ud_bmap_write(ip, uoff,
3211 3130                              (int)(on + n), mapon == 0, cr);
3212 3131                          if (error) {
3213 3132                                  break;
3214 3133                          }
3215 3134                          i_size_changed = 1;
3216 3135                          old_i_size = ip->i_size;
3217 3136                          ip->i_size = uoff + n;
3218 3137                          /*
3219 3138                           * If we are writing from the beginning of
3220 3139                           * the mapping, we can just create the
3221 3140                           * pages without having to read them.
3222 3141                           */
3223 3142                          pagecreate = (mapon == 0);
3224 3143                  } else if (n == MAXBSIZE) {
3225 3144                          /*
3226 3145                           * Going to do a whole mappings worth,
3227 3146                           * so we can just create the pages w/o
3228 3147                           * having to read them in.  But before
3229 3148                           * we do that, we need to make sure any
3230 3149                           * needed blocks are allocated first.
3231 3150                           */
3232 3151                          error = ud_bmap_write(ip, uoff,
3233 3152                              (int)(on + n), 1, cr);
3234 3153                          if (error) {
3235 3154                                  break;
3236 3155                          }
3237 3156                          pagecreate = 1;
3238 3157                  } else {
3239 3158                          pagecreate = 0;
3240 3159                  }
3241 3160  
3242 3161                  rw_exit(&ip->i_contents);
3243 3162  
3244 3163                  /*
3245 3164                   * Touch the page and fault it in if it is not in
3246 3165                   * core before segmap_getmapflt can lock it. This
3247 3166                   * is to avoid the deadlock if the buffer is mapped
3248 3167                   * to the same file through mmap which we want to
3249 3168                   * write to.
3250 3169                   */
3251 3170                  uio_prefaultpages((long)n, uio);
3252 3171  
3253 3172                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3254 3173                      (uint32_t)n, !pagecreate, S_WRITE);
3255 3174  
3256 3175                  /*
3257 3176                   * segmap_pagecreate() returns 1 if it calls
3258 3177                   * page_create_va() to allocate any pages.
3259 3178                   */
3260 3179                  newpage = 0;
3261 3180                  if (pagecreate) {
3262 3181                          newpage = segmap_pagecreate(segkmap, base,
3263 3182                              (size_t)n, 0);
3264 3183                  }
3265 3184  
3266 3185                  premove_resid = uio->uio_resid;
3267 3186                  error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3268 3187  
3269 3188                  if (pagecreate &&
3270 3189                      uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3271 3190                          /*
3272 3191                           * We created pages w/o initializing them completely,
3273 3192                           * thus we need to zero the part that wasn't set up.
3274 3193                           * This happens on most EOF write cases and if
3275 3194                           * we had some sort of error during the uiomove.
3276 3195                           */
3277 3196                          int nzero, nmoved;
3278 3197  
3279 3198                          nmoved = (int)(uio->uio_loffset - (off + mapon));
3280 3199                          ASSERT(nmoved >= 0 && nmoved <= n);
3281 3200                          nzero = roundup(on + n, PAGESIZE) - nmoved;
3282 3201                          ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3283 3202                          (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3284 3203                  }
3285 3204  
3286 3205                  /*
3287 3206                   * Unlock the pages allocated by page_create_va()
3288 3207                   * in segmap_pagecreate()
3289 3208                   */
3290 3209                  if (newpage) {
3291 3210                          segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3292 3211                  }
3293 3212  
3294 3213                  if (error) {
3295 3214                          /*
3296 3215                           * If we failed on a write, we may have already
3297 3216                           * allocated file blocks as well as pages.  It's
3298 3217                           * hard to undo the block allocation, but we must
3299 3218                           * be sure to invalidate any pages that may have
3300 3219                           * been allocated.
3301 3220                           */
3302 3221                          (void) segmap_release(segkmap, base, SM_INVAL);
3303 3222                  } else {
3304 3223                          flags = 0;
3305 3224                          /*
3306 3225                           * Force write back for synchronous write cases.
3307 3226                           */
3308 3227                          if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3309 3228                                  /*
3310 3229                                   * If the sticky bit is set but the
3311 3230                                   * execute bit is not set, we do a
3312 3231                                   * synchronous write back and free
3313 3232                                   * the page when done.  We set up swap
3314 3233                                   * files to be handled this way to
3315 3234                                   * prevent servers from keeping around
3316 3235                                   * the client's swap pages too long.
3317 3236                                   * XXX - there ought to be a better way.
3318 3237                                   */
3319 3238                                  if (IS_SWAPVP(vp)) {
3320 3239                                          flags = SM_WRITE | SM_FREE |
3321 3240                                              SM_DONTNEED;
3322 3241                                          iupdat_flag = 0;
3323 3242                                  } else {
3324 3243                                          flags = SM_WRITE;
3325 3244                                  }
3326 3245                          } else if (((mapon + n) == MAXBSIZE) ||
3327 3246                              IS_SWAPVP(vp)) {
3328 3247                                  /*
3329 3248                                   * Have written a whole block.
3330 3249                                   * Start an asynchronous write and
3331 3250                                   * mark the buffer to indicate that
3332 3251                                   * it won't be needed again soon.
3333 3252                                   */
3334 3253                                  flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3335 3254                          }
3336 3255                          error = segmap_release(segkmap, base, flags);
3337 3256  
3338 3257                          /*
3339 3258                           * If the operation failed and is synchronous,
3340 3259                           * then we need to unwind what uiomove() last
3341 3260                           * did so we can potentially return an error to
3342 3261                           * the caller.  If this write operation was
3343 3262                           * done in two pieces and the first succeeded,
3344 3263                           * then we won't return an error for the second
3345 3264                           * piece that failed.  However, we only want to
3346 3265                           * return a resid value that reflects what was
3347 3266                           * really done.
3348 3267                           *
3349 3268                           * Failures for non-synchronous operations can
3350 3269                           * be ignored since the page subsystem will
3351 3270                           * retry the operation until it succeeds or the
3352 3271                           * file system is unmounted.
3353 3272                           */
3354 3273                          if (error) {
3355 3274                                  if ((ioflag & (FSYNC | FDSYNC)) ||
3356 3275                                      ip->i_type == VDIR) {
3357 3276                                          uio->uio_resid = premove_resid;
3358 3277                                  } else {
3359 3278                                          error = 0;
3360 3279                                  }
3361 3280                          }
3362 3281                  }
3363 3282  
3364 3283                  /*
3365 3284                   * Re-acquire contents lock.
3366 3285                   */
3367 3286                  rw_enter(&ip->i_contents, RW_WRITER);
3368 3287                  /*
3369 3288                   * If the uiomove() failed or if a synchronous
3370 3289                   * page push failed, fix up i_size.
3371 3290                   */
3372 3291                  if (error) {
3373 3292                          if (i_size_changed) {
3374 3293                                  /*
3375 3294                                   * The uiomove failed, and we
3376 3295                                   * allocated blocks,so get rid
3377 3296                                   * of them.
3378 3297                                   */
3379 3298                                  (void) ud_itrunc(ip, old_i_size, 0, cr);
3380 3299                          }
3381 3300                  } else {
3382 3301                          /*
3383 3302                           * XXX - Can this be out of the loop?
3384 3303                           */
3385 3304                          ip->i_flag |= IUPD | ICHG;
3386 3305                          if (i_size_changed) {
3387 3306                                  ip->i_flag |= IATTCHG;
3388 3307                          }
3389 3308                          if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3390 3309                              (IEXEC >> 10))) != 0 &&
3391 3310                              (ip->i_char & (ISUID | ISGID)) != 0 &&
3392 3311                              secpolicy_vnode_setid_retain(cr,
3393 3312                              (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3394 3313                                  /*
3395 3314                                   * Clear Set-UID & Set-GID bits on
3396 3315                                   * successful write if not privileged
3397 3316                                   * and at least one of the execute bits
3398 3317                                   * is set.  If we always clear Set-GID,
3399 3318                                   * mandatory file and record locking is
3400 3319                                   * unuseable.
3401 3320                                   */
3402 3321                                  ip->i_char &= ~(ISUID | ISGID);
3403 3322                          }
3404 3323                  }
3405 3324          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3406 3325  
3407 3326  out:
3408 3327          /*
3409 3328           * Inode is updated according to this table -
3410 3329           *
3411 3330           *      FSYNC   FDSYNC(posix.4)
3412 3331           *      --------------------------
3413 3332           *      always@ IATTCHG|IBDWRITE
3414 3333           *
3415 3334           * @ -  If we are doing synchronous write the only time we should
3416 3335           *      not be sync'ing the ip here is if we have the stickyhack
3417 3336           *      activated, the file is marked with the sticky bit and
3418 3337           *      no exec bit, the file length has not been changed and
3419 3338           *      no new blocks have been allocated during this write.
3420 3339           */
3421 3340          if ((ip->i_flag & ISYNC) != 0) {
3422 3341                  /*
3423 3342                   * we have eliminated nosync
3424 3343                   */
3425 3344                  if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3426 3345                      ((ioflag & FSYNC) && iupdat_flag)) {
3427 3346                          ud_iupdat(ip, 1);
3428 3347                  }
3429 3348          }
3430 3349  
3431 3350          /*
3432 3351           * If we've already done a partial-write, terminate
3433 3352           * the write but return no error.
3434 3353           */
3435 3354          if (start_resid != uio->uio_resid) {
3436 3355                  error = 0;
3437 3356          }
3438 3357          ip->i_flag &= ~(INOACC | ISYNC);
3439 3358          ITIMES_NOLOCK(ip);
3440 3359  
3441 3360          return (error);
3442 3361  }
3443 3362  
3444 3363  int32_t
3445 3364  ud_multi_strat(struct ud_inode *ip,
3446 3365          page_t *pp, struct buf *bp, u_offset_t start)
3447 3366  {
3448 3367          daddr_t bn;
3449 3368          int32_t error = 0, io_count, contig, alloc_sz, i;
3450 3369          uint32_t io_off;
3451 3370          mio_master_t *mm = NULL;
3452 3371          mio_slave_t *ms = NULL;
3453 3372          struct buf *rbp;
3454 3373  
3455 3374          ASSERT(!(start & PAGEOFFSET));
3456 3375  
3457 3376          /*
3458 3377           * Figure out how many buffers to allocate
3459 3378           */
3460 3379          io_count = 0;
3461 3380          for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3462 3381                  contig = 0;
3463 3382                  if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3464 3383                      &bn, &contig)) {
3465 3384                          goto end;
3466 3385                  }
3467 3386                  if (contig == 0) {
3468 3387                          goto end;
3469 3388                  }
3470 3389                  contig = MIN(contig, PAGESIZE - io_off);
3471 3390                  if (bn != UDF_HOLE) {
3472 3391                          io_count ++;
3473 3392                  } else {
3474 3393                          /*
3475 3394                           * HOLE
3476 3395                           */
3477 3396                          if (bp->b_flags & B_READ) {
3478 3397  
3479 3398                                  /*
3480 3399                                   * This is a hole and is read
3481 3400                                   * it should be filled with 0's
3482 3401                                   */
3483 3402                                  pagezero(pp, io_off, contig);
3484 3403                          }
3485 3404                  }
3486 3405          }
3487 3406  
3488 3407  
3489 3408          if (io_count != 0) {
3490 3409  
3491 3410                  /*
3492 3411                   * Allocate memory for all the
3493 3412                   * required number of buffers
3494 3413                   */
3495 3414                  alloc_sz = sizeof (mio_master_t) +
3496 3415                      (sizeof (mio_slave_t) * io_count);
3497 3416                  mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3498 3417                  if (mm == NULL) {
3499 3418                          error = ENOMEM;
3500 3419                          goto end;
3501 3420                  }
3502 3421  
3503 3422                  /*
3504 3423                   * initialize master
3505 3424                   */
3506 3425                  mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3507 3426                  mm->mm_size = alloc_sz;
3508 3427                  mm->mm_bp = bp;
3509 3428                  mm->mm_resid = 0;
3510 3429                  mm->mm_error = 0;
3511 3430                  mm->mm_index = master_index++;
3512 3431  
3513 3432                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3514 3433  
3515 3434                  /*
3516 3435                   * Initialize buffers
3517 3436                   */
3518 3437                  io_count = 0;
3519 3438                  for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3520 3439                          contig = 0;
3521 3440                          if (error = ud_bmap_read(ip,
3522 3441                              (u_offset_t)(start + io_off),
3523 3442                              &bn, &contig)) {
3524 3443                                  goto end;
3525 3444                          }
3526 3445                          ASSERT(contig);
3527 3446                          if ((io_off + contig) > bp->b_bcount) {
3528 3447                                  contig = bp->b_bcount - io_off;
3529 3448                          }
3530 3449                          if (bn != UDF_HOLE) {
3531 3450                                  /*
3532 3451                                   * Clone the buffer
3533 3452                                   * and prepare to start I/O
3534 3453                                   */
3535 3454                                  ms->ms_ptr = mm;
3536 3455                                  bioinit(&ms->ms_buf);
3537 3456                                  rbp = bioclone(bp, io_off, (size_t)contig,
3538 3457                                      bp->b_edev, bn, ud_slave_done,
3539 3458                                      &ms->ms_buf, KM_NOSLEEP);
3540 3459                                  ASSERT(rbp == &ms->ms_buf);
3541 3460                                  mm->mm_resid += contig;
3542 3461                                  io_count++;
3543 3462                                  ms ++;
3544 3463                          }
3545 3464                  }
3546 3465  
3547 3466                  /*
3548 3467                   * Start I/O's
3549 3468                   */
3550 3469                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3551 3470                  for (i = 0; i < io_count; i++) {
3552 3471                          (void) bdev_strategy(&ms->ms_buf);
3553 3472                          ms ++;
3554 3473                  }
3555 3474          }
3556 3475  
3557 3476  end:
3558 3477          if (error != 0) {
3559 3478                  bp->b_flags |= B_ERROR;
3560 3479                  bp->b_error = error;
3561 3480                  if (mm != NULL) {
3562 3481                          mutex_destroy(&mm->mm_mutex);
3563 3482                          kmem_free(mm, mm->mm_size);
3564 3483                  }
3565 3484          }
3566 3485          return (error);
3567 3486  }
3568 3487  
3569 3488  int32_t
3570 3489  ud_slave_done(struct buf *bp)
3571 3490  {
3572 3491          mio_master_t *mm;
3573 3492          int32_t resid;
3574 3493  
3575 3494          ASSERT(SEMA_HELD(&bp->b_sem));
3576 3495          ASSERT((bp->b_flags & B_DONE) == 0);
3577 3496  
3578 3497          mm = ((mio_slave_t *)bp)->ms_ptr;
3579 3498  
3580 3499          /*
3581 3500           * Propagate error and byte count info from slave struct to
3582 3501           * the master struct
3583 3502           */
3584 3503          mutex_enter(&mm->mm_mutex);
3585 3504          if (bp->b_flags & B_ERROR) {
3586 3505  
3587 3506                  /*
3588 3507                   * If multiple slave buffers get
3589 3508                   * error we forget the old errors
3590 3509                   * this is ok because we any way
3591 3510                   * cannot return multiple errors
3592 3511                   */
3593 3512                  mm->mm_error = bp->b_error;
3594 3513          }
3595 3514          mm->mm_resid -= bp->b_bcount;
3596 3515          resid = mm->mm_resid;
3597 3516          mutex_exit(&mm->mm_mutex);
3598 3517  
3599 3518          /*
3600 3519           * free up the resources allocated to cloned buffers.
3601 3520           */
3602 3521          bp_mapout(bp);
3603 3522          biofini(bp);
3604 3523  
3605 3524          if (resid == 0) {
3606 3525  
3607 3526                  /*
3608 3527                   * This is the last I/O operation
3609 3528                   * clean up and return the original buffer
3610 3529                   */
3611 3530                  if (mm->mm_error) {
3612 3531                          mm->mm_bp->b_flags |= B_ERROR;
3613 3532                          mm->mm_bp->b_error = mm->mm_error;
3614 3533                  }
3615 3534                  biodone(mm->mm_bp);
3616 3535                  mutex_destroy(&mm->mm_mutex);
3617 3536                  kmem_free(mm, mm->mm_size);
3618 3537          }
3619 3538          return (0);
3620 3539  }

↓ open down ↓

512 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX