illumos-gate Wdiff usr/src/uts/common/fs/udfs/udf_vnops.c

Print this page

7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/udfs/udf_vnops.c
          +++ new/usr/src/uts/common/fs/udfs/udf_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * Copyright 2015, Joyent, Inc.
  28   28   */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/t_lock.h>
  32   32  #include <sys/param.h>
  33   33  #include <sys/time.h>
  34   34  #include <sys/systm.h>
  35   35  #include <sys/sysmacros.h>
  36   36  #include <sys/resource.h>
  37   37  #include <sys/signal.h>
  38   38  #include <sys/cred.h>
  39   39  #include <sys/user.h>
  40   40  #include <sys/buf.h>
  41   41  #include <sys/vfs.h>
  42   42  #include <sys/vfs_opreg.h>
  43   43  #include <sys/stat.h>
  44   44  #include <sys/vnode.h>
  45   45  #include <sys/mode.h>
  46   46  #include <sys/proc.h>
  47   47  #include <sys/disp.h>
  48   48  #include <sys/file.h>
  49   49  #include <sys/fcntl.h>
  50   50  #include <sys/flock.h>
  51   51  #include <sys/kmem.h>
  52   52  #include <sys/uio.h>
  53   53  #include <sys/dnlc.h>
  54   54  #include <sys/conf.h>
  55   55  #include <sys/errno.h>
  56   56  #include <sys/mman.h>
  57   57  #include <sys/fbuf.h>
  58   58  #include <sys/pathname.h>
  59   59  #include <sys/debug.h>
  60   60  #include <sys/vmsystm.h>
  61   61  #include <sys/cmn_err.h>
  62   62  #include <sys/dirent.h>
  63   63  #include <sys/errno.h>
  64   64  #include <sys/modctl.h>
  65   65  #include <sys/statvfs.h>
  66   66  #include <sys/mount.h>
  67   67  #include <sys/sunddi.h>
  68   68  #include <sys/bootconf.h>
  69   69  #include <sys/policy.h>
  70   70  
  71   71  #include <vm/hat.h>
  72   72  #include <vm/page.h>
  73   73  #include <vm/pvn.h>
  74   74  #include <vm/as.h>
  75   75  #include <vm/seg.h>
  76   76  #include <vm/seg_map.h>
  77   77  #include <vm/seg_kmem.h>
  78   78  #include <vm/seg_vn.h>
  79   79  #include <vm/rm.h>
  80   80  #include <vm/page.h>
  81   81  #include <sys/swap.h>
  82   82  
  83   83  #include <fs/fs_subr.h>
  84   84  
  85   85  #include <sys/fs/udf_volume.h>
  86   86  #include <sys/fs/udf_inode.h>
  87   87  
  88   88  static int32_t udf_open(struct vnode **,
  89   89          int32_t, struct cred *, caller_context_t *);
  90   90  static int32_t udf_close(struct vnode *,
  91   91          int32_t, int32_t, offset_t, struct cred *, caller_context_t *);
  92   92  static int32_t udf_read(struct vnode *,
  93   93          struct uio *, int32_t, struct cred *, caller_context_t *);
  94   94  static int32_t udf_write(struct vnode *,
  95   95          struct uio *, int32_t, struct cred *, caller_context_t *);
  96   96  static int32_t udf_ioctl(struct vnode *,
  97   97          int32_t, intptr_t, int32_t, struct cred *, int32_t *,
  98   98          caller_context_t *);
  99   99  static int32_t udf_getattr(struct vnode *,
 100  100          struct vattr *, int32_t, struct cred *, caller_context_t *);
 101  101  static int32_t udf_setattr(struct vnode *,
 102  102          struct vattr *, int32_t, struct cred *, caller_context_t *);
 103  103  static int32_t udf_access(struct vnode *,
 104  104          int32_t, int32_t, struct cred *, caller_context_t *);
 105  105  static int32_t udf_lookup(struct vnode *,
 106  106          char *, struct vnode **, struct pathname *,
 107  107          int32_t, struct vnode *, struct cred *,
 108  108          caller_context_t *, int *, pathname_t *);
 109  109  static int32_t udf_create(struct vnode *,
 110  110          char *, struct vattr *, enum vcexcl,
 111  111          int32_t, struct vnode **, struct cred *, int32_t,
 112  112          caller_context_t *, vsecattr_t *);
 113  113  static int32_t udf_remove(struct vnode *,
 114  114          char *, struct cred *, caller_context_t *, int);
 115  115  static int32_t udf_link(struct vnode *,
 116  116          struct vnode *, char *, struct cred *, caller_context_t *, int);
 117  117  static int32_t udf_rename(struct vnode *,
 118  118          char *, struct vnode *, char *, struct cred *, caller_context_t *, int);
 119  119  static int32_t udf_mkdir(struct vnode *,
 120  120          char *, struct vattr *, struct vnode **, struct cred *,
 121  121          caller_context_t *, int, vsecattr_t *);
 122  122  static int32_t udf_rmdir(struct vnode *,
 123  123          char *, struct vnode *, struct cred *, caller_context_t *, int);
 124  124  static int32_t udf_readdir(struct vnode *,
 125  125          struct uio *, struct cred *, int32_t *, caller_context_t *, int);
 126  126  static int32_t udf_symlink(struct vnode *,
 127  127          char *, struct vattr *, char *, struct cred *, caller_context_t *, int);
 128  128  static int32_t udf_readlink(struct vnode *,
 129  129          struct uio *, struct cred *, caller_context_t *);
 130  130  static int32_t udf_fsync(struct vnode *,
 131  131          int32_t, struct cred *, caller_context_t *);
 132  132  static void udf_inactive(struct vnode *,
 133  133          struct cred *, caller_context_t *);
 134  134  static int32_t udf_fid(struct vnode *, struct fid *, caller_context_t *);
 135  135  static int udf_rwlock(struct vnode *, int32_t, caller_context_t *);
 136  136  static void udf_rwunlock(struct vnode *, int32_t, caller_context_t *);
 137  137  static int32_t udf_seek(struct vnode *, offset_t, offset_t *,
 138  138          caller_context_t *);
 139  139  static int32_t udf_frlock(struct vnode *, int32_t,
 140  140          struct flock64 *, int32_t, offset_t, struct flk_callback *, cred_t *,
 141  141          caller_context_t *);
 142  142  static int32_t udf_space(struct vnode *, int32_t,
 143  143          struct flock64 *, int32_t, offset_t, cred_t *, caller_context_t *);
 144  144  static int32_t udf_getpage(struct vnode *, offset_t,
 145  145          size_t, uint32_t *, struct page **, size_t,
 146  146          struct seg *, caddr_t, enum seg_rw, struct cred *, caller_context_t *);
 147  147  static int32_t udf_putpage(struct vnode *, offset_t,
 148  148          size_t, int32_t, struct cred *, caller_context_t *);
 149  149  static int32_t udf_map(struct vnode *, offset_t, struct as *,
 150  150          caddr_t *, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 151  151          caller_context_t *);
 152  152  static int32_t udf_addmap(struct vnode *, offset_t, struct as *,
 153  153          caddr_t, size_t, uint8_t, uint8_t, uint32_t, struct cred *,
 154  154          caller_context_t *);
 155  155  static int32_t udf_delmap(struct vnode *, offset_t, struct as *,
 156  156          caddr_t, size_t, uint32_t, uint32_t, uint32_t, struct cred *,
 157  157          caller_context_t *);
 158  158  static int32_t udf_l_pathconf(struct vnode *, int32_t,
 159  159          ulong_t *, struct cred *, caller_context_t *);
 160  160  static int32_t udf_pageio(struct vnode *, struct page *,
 161  161          u_offset_t, size_t, int32_t, struct cred *, caller_context_t *);
 162  162  
 163  163  int32_t ud_getpage_miss(struct vnode *, u_offset_t,
 164  164          size_t, struct seg *, caddr_t, page_t *pl[],
 165  165          size_t, enum seg_rw, int32_t);
 166  166  void ud_getpage_ra(struct vnode *, u_offset_t, struct seg *, caddr_t);
 167  167  int32_t ud_putpages(struct vnode *, offset_t, size_t, int32_t, struct cred *);
 168  168  int32_t ud_page_fill(struct ud_inode *, page_t *,
 169  169          u_offset_t, uint32_t, u_offset_t *);
 170  170  int32_t ud_iodone(struct buf *);
 171  171  int32_t ud_rdip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 172  172  int32_t ud_wrip(struct ud_inode *, struct uio *, int32_t, cred_t *);
 173  173  int32_t ud_multi_strat(struct ud_inode *, page_t *, struct buf *, u_offset_t);
 174  174  int32_t ud_slave_done(struct buf *);
 175  175  
 176  176  /*
 177  177   * Structures to control multiple IO operations to get or put pages
 178  178   * that are backed by discontiguous blocks. The master struct is
 179  179   * a dummy that holds the original bp from pageio_setup. The
 180  180   * slave struct holds the working bp's to do the actual IO. Once
 181  181   * all the slave IOs complete. The master is processed as if a single
 182  182   * IO op has completed.
 183  183   */
 184  184  uint32_t master_index = 0;
 185  185  typedef struct mio_master {
 186  186          kmutex_t        mm_mutex;       /* protect the fields below */
 187  187          int32_t         mm_size;
 188  188          buf_t           *mm_bp;         /* original bp */
 189  189          int32_t         mm_resid;       /* bytes remaining to transfer */
 190  190          int32_t         mm_error;       /* accumulated error from slaves */
 191  191          int32_t         mm_index;       /* XXX debugging */

↓ open down ↓

191 lines elided

↑ open up ↑

 192  192  } mio_master_t;
 193  193  
 194  194  typedef struct mio_slave {
 195  195          buf_t           ms_buf;         /* working buffer for this IO chunk */
 196  196          mio_master_t    *ms_ptr;        /* pointer to master */
 197  197  } mio_slave_t;
 198  198  
 199  199  struct vnodeops *udf_vnodeops;
 200  200  
 201  201  const fs_operation_def_t udf_vnodeops_template[] = {
 202      -        VOPNAME_OPEN,           { .vop_open = udf_open },
 203      -        VOPNAME_CLOSE,          { .vop_close = udf_close },
 204      -        VOPNAME_READ,           { .vop_read = udf_read },
 205      -        VOPNAME_WRITE,          { .vop_write = udf_write },
 206      -        VOPNAME_IOCTL,          { .vop_ioctl = udf_ioctl },
 207      -        VOPNAME_GETATTR,        { .vop_getattr = udf_getattr },
 208      -        VOPNAME_SETATTR,        { .vop_setattr = udf_setattr },
 209      -        VOPNAME_ACCESS,         { .vop_access = udf_access },
 210      -        VOPNAME_LOOKUP,         { .vop_lookup = udf_lookup },
 211      -        VOPNAME_CREATE,         { .vop_create = udf_create },
 212      -        VOPNAME_REMOVE,         { .vop_remove = udf_remove },
 213      -        VOPNAME_LINK,           { .vop_link = udf_link },
 214      -        VOPNAME_RENAME,         { .vop_rename = udf_rename },
 215      -        VOPNAME_MKDIR,          { .vop_mkdir = udf_mkdir },
 216      -        VOPNAME_RMDIR,          { .vop_rmdir = udf_rmdir },
 217      -        VOPNAME_READDIR,        { .vop_readdir = udf_readdir },
 218      -        VOPNAME_SYMLINK,        { .vop_symlink = udf_symlink },
 219      -        VOPNAME_READLINK,       { .vop_readlink = udf_readlink },
 220      -        VOPNAME_FSYNC,          { .vop_fsync = udf_fsync },
 221      -        VOPNAME_INACTIVE,       { .vop_inactive = udf_inactive },
 222      -        VOPNAME_FID,            { .vop_fid = udf_fid },
 223      -        VOPNAME_RWLOCK,         { .vop_rwlock = udf_rwlock },
 224      -        VOPNAME_RWUNLOCK,       { .vop_rwunlock = udf_rwunlock },
 225      -        VOPNAME_SEEK,           { .vop_seek = udf_seek },
 226      -        VOPNAME_FRLOCK,         { .vop_frlock = udf_frlock },
 227      -        VOPNAME_SPACE,          { .vop_space = udf_space },
 228      -        VOPNAME_GETPAGE,        { .vop_getpage = udf_getpage },
 229      -        VOPNAME_PUTPAGE,        { .vop_putpage = udf_putpage },
 230      -        VOPNAME_MAP,            { .vop_map = udf_map },
 231      -        VOPNAME_ADDMAP,         { .vop_addmap = udf_addmap },
 232      -        VOPNAME_DELMAP,         { .vop_delmap = udf_delmap },
 233      -        VOPNAME_PATHCONF,       { .vop_pathconf = udf_l_pathconf },
 234      -        VOPNAME_PAGEIO,         { .vop_pageio = udf_pageio },
 235      -        VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 236      -        NULL,                   NULL
      202 +        { VOPNAME_OPEN,         { .vop_open = udf_open } },
      203 +        { VOPNAME_CLOSE,        { .vop_close = udf_close } },
      204 +        { VOPNAME_READ,         { .vop_read = udf_read } },
      205 +        { VOPNAME_WRITE,        { .vop_write = udf_write } },
      206 +        { VOPNAME_IOCTL,        { .vop_ioctl = udf_ioctl } },
      207 +        { VOPNAME_GETATTR,      { .vop_getattr = udf_getattr } },
      208 +        { VOPNAME_SETATTR,      { .vop_setattr = udf_setattr } },
      209 +        { VOPNAME_ACCESS,       { .vop_access = udf_access } },
      210 +        { VOPNAME_LOOKUP,       { .vop_lookup = udf_lookup } },
      211 +        { VOPNAME_CREATE,       { .vop_create = udf_create } },
      212 +        { VOPNAME_REMOVE,       { .vop_remove = udf_remove } },
      213 +        { VOPNAME_LINK,         { .vop_link = udf_link } },
      214 +        { VOPNAME_RENAME,       { .vop_rename = udf_rename } },
      215 +        { VOPNAME_MKDIR,        { .vop_mkdir = udf_mkdir } },
      216 +        { VOPNAME_RMDIR,        { .vop_rmdir = udf_rmdir } },
      217 +        { VOPNAME_READDIR,      { .vop_readdir = udf_readdir } },
      218 +        { VOPNAME_SYMLINK,      { .vop_symlink = udf_symlink } },
      219 +        { VOPNAME_READLINK,     { .vop_readlink = udf_readlink } },
      220 +        { VOPNAME_FSYNC,        { .vop_fsync = udf_fsync } },
      221 +        { VOPNAME_INACTIVE,     { .vop_inactive = udf_inactive } },
      222 +        { VOPNAME_FID,          { .vop_fid = udf_fid } },
      223 +        { VOPNAME_RWLOCK,       { .vop_rwlock = udf_rwlock } },
      224 +        { VOPNAME_RWUNLOCK,     { .vop_rwunlock = udf_rwunlock } },
      225 +        { VOPNAME_SEEK,         { .vop_seek = udf_seek } },
      226 +        { VOPNAME_FRLOCK,       { .vop_frlock = udf_frlock } },
      227 +        { VOPNAME_SPACE,        { .vop_space = udf_space } },
      228 +        { VOPNAME_GETPAGE,      { .vop_getpage = udf_getpage } },
      229 +        { VOPNAME_PUTPAGE,      { .vop_putpage = udf_putpage } },
      230 +        { VOPNAME_MAP,          { .vop_map = udf_map } },
      231 +        { VOPNAME_ADDMAP,       { .vop_addmap = udf_addmap } },
      232 +        { VOPNAME_DELMAP,       { .vop_delmap = udf_delmap } },
      233 +        { VOPNAME_PATHCONF,     { .vop_pathconf = udf_l_pathconf } },
      234 +        { VOPNAME_PAGEIO,       { .vop_pageio = udf_pageio } },
      235 +        { VOPNAME_VNEVENT,      { .vop_vnevent = fs_vnevent_support } },
      236 +        { NULL,                 { NULL } }
 237  237  };
 238  238  
 239  239  /* ARGSUSED */
 240  240  static int32_t
 241  241  udf_open(
 242  242          struct vnode **vpp,
 243  243          int32_t flag,
 244  244          struct cred *cr,
 245  245          caller_context_t *ct)
 246  246  {

 247  247          ud_printf("udf_open\n");
 248  248  
 249  249          return (0);
 250  250  }
 251  251  
 252  252  /* ARGSUSED */
 253  253  static int32_t
 254  254  udf_close(
 255  255          struct vnode *vp,
 256  256          int32_t flag,
 257  257          int32_t count,
 258  258          offset_t offset,
 259  259          struct cred *cr,
 260  260          caller_context_t *ct)
 261  261  {
 262  262          struct ud_inode *ip = VTOI(vp);
 263  263  
 264  264          ud_printf("udf_close\n");
 265  265  
 266  266          ITIMES(ip);
 267  267  
 268  268          cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
 269  269          cleanshares(vp, ttoproc(curthread)->p_pid);
 270  270  
 271  271          /*
 272  272           * Push partially filled cluster at last close.
 273  273           * ``last close'' is approximated because the dnlc
 274  274           * may have a hold on the vnode.
 275  275           */
 276  276          if (vp->v_count <= 2 && vp->v_type != VBAD) {
 277  277                  struct ud_inode *ip = VTOI(vp);
 278  278                  if (ip->i_delaylen) {
 279  279                          (void) ud_putpages(vp, ip->i_delayoff, ip->i_delaylen,
 280  280                              B_ASYNC | B_FREE, cr);
 281  281                          ip->i_delaylen = 0;
 282  282                  }
 283  283          }
 284  284  
 285  285          return (0);
 286  286  }
 287  287  
 288  288  /* ARGSUSED */
 289  289  static int32_t
 290  290  udf_read(
 291  291          struct vnode *vp,
 292  292          struct uio *uiop,
 293  293          int32_t ioflag,
 294  294          struct cred *cr,
 295  295          caller_context_t *ct)
 296  296  {
 297  297          struct ud_inode *ip = VTOI(vp);
 298  298          int32_t error;
 299  299  
 300  300          ud_printf("udf_read\n");
 301  301  
 302  302  #ifdef  __lock_lint
 303  303          rw_enter(&ip->i_rwlock, RW_READER);
 304  304  #endif
 305  305  
 306  306          ASSERT(RW_READ_HELD(&ip->i_rwlock));
 307  307  
 308  308          if (MANDLOCK(vp, ip->i_char)) {
 309  309                  /*
 310  310                   * udf_getattr ends up being called by chklock
 311  311                   */
 312  312                  error = chklock(vp, FREAD, uiop->uio_loffset,
 313  313                      uiop->uio_resid, uiop->uio_fmode, ct);
 314  314                  if (error) {
 315  315                          goto end;
 316  316                  }
 317  317          }
 318  318  
 319  319          rw_enter(&ip->i_contents, RW_READER);
 320  320          error = ud_rdip(ip, uiop, ioflag, cr);
 321  321          rw_exit(&ip->i_contents);
 322  322  
 323  323  end:
 324  324  #ifdef  __lock_lint
 325  325          rw_exit(&ip->i_rwlock);
 326  326  #endif
 327  327  
 328  328          return (error);
 329  329  }
 330  330  
 331  331  
 332  332  int32_t ud_WRITES = 1;
 333  333  int32_t ud_HW = 96 * 1024;
 334  334  int32_t ud_LW = 64 * 1024;
 335  335  int32_t ud_throttles = 0;
 336  336  
 337  337  /* ARGSUSED */
 338  338  static int32_t
 339  339  udf_write(
 340  340          struct vnode *vp,
 341  341          struct uio *uiop,
 342  342          int32_t ioflag,
 343  343          struct cred *cr,
 344  344          caller_context_t *ct)
 345  345  {
 346  346          struct ud_inode *ip = VTOI(vp);
 347  347          int32_t error = 0;
 348  348  
 349  349          ud_printf("udf_write\n");
 350  350  
 351  351  #ifdef  __lock_lint
 352  352          rw_enter(&ip->i_rwlock, RW_WRITER);
 353  353  #endif
 354  354  
 355  355          ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
 356  356  
 357  357          if (MANDLOCK(vp, ip->i_char)) {
 358  358                  /*
 359  359                   * ud_getattr ends up being called by chklock
 360  360                   */
 361  361                  error = chklock(vp, FWRITE, uiop->uio_loffset,
 362  362                      uiop->uio_resid, uiop->uio_fmode, ct);
 363  363                  if (error) {
 364  364                          goto end;
 365  365                  }
 366  366          }
 367  367          /*
 368  368           * Throttle writes.
 369  369           */
 370  370          mutex_enter(&ip->i_tlock);
 371  371          if (ud_WRITES && (ip->i_writes > ud_HW)) {
 372  372                  while (ip->i_writes > ud_HW) {
 373  373                          ud_throttles++;
 374  374                          cv_wait(&ip->i_wrcv, &ip->i_tlock);
 375  375                  }
 376  376          }
 377  377          mutex_exit(&ip->i_tlock);
 378  378  
 379  379          /*
 380  380           * Write to the file
 381  381           */
 382  382          rw_enter(&ip->i_contents, RW_WRITER);
 383  383          if ((ioflag & FAPPEND) != 0 && (ip->i_type == VREG)) {
 384  384                  /*
 385  385                   * In append mode start at end of file.
 386  386                   */
 387  387                  uiop->uio_loffset = ip->i_size;
 388  388          }
 389  389          error = ud_wrip(ip, uiop, ioflag, cr);
 390  390          rw_exit(&ip->i_contents);
 391  391  
 392  392  end:
 393  393  #ifdef  __lock_lint
 394  394          rw_exit(&ip->i_rwlock);
 395  395  #endif
 396  396  
 397  397          return (error);
 398  398  }
 399  399  
 400  400  /* ARGSUSED */
 401  401  static int32_t
 402  402  udf_ioctl(
 403  403          struct vnode *vp,
 404  404          int32_t cmd,
 405  405          intptr_t arg,
 406  406          int32_t flag,
 407  407          struct cred *cr,
 408  408          int32_t *rvalp,
 409  409          caller_context_t *ct)
 410  410  {
 411  411          return (ENOTTY);
 412  412  }
 413  413  
 414  414  /* ARGSUSED */
 415  415  static int32_t
 416  416  udf_getattr(
 417  417          struct vnode *vp,
 418  418          struct vattr *vap,
 419  419          int32_t flags,
 420  420          struct cred *cr,
 421  421          caller_context_t *ct)
 422  422  {
 423  423          struct ud_inode *ip = VTOI(vp);
 424  424  
 425  425          ud_printf("udf_getattr\n");
 426  426  
 427  427          if (vap->va_mask == AT_SIZE) {
 428  428                  /*
 429  429                   * for performance, if only the size is requested don't bother
 430  430                   * with anything else.
 431  431                   */
 432  432                  vap->va_size = ip->i_size;
 433  433                  return (0);
 434  434          }
 435  435  
 436  436          rw_enter(&ip->i_contents, RW_READER);
 437  437  
 438  438          vap->va_type = vp->v_type;
 439  439          vap->va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 440  440  
 441  441          vap->va_uid = ip->i_uid;
 442  442          vap->va_gid = ip->i_gid;
 443  443          vap->va_fsid = ip->i_dev;
 444  444          vap->va_nodeid = ip->i_icb_lbano;
 445  445          vap->va_nlink = ip->i_nlink;
 446  446          vap->va_size = ip->i_size;
 447  447          vap->va_seq = ip->i_seq;
 448  448          if (vp->v_type == VCHR || vp->v_type == VBLK) {
 449  449                  vap->va_rdev = ip->i_rdev;
 450  450          } else {
 451  451                  vap->va_rdev = 0;
 452  452          }
 453  453  
 454  454          mutex_enter(&ip->i_tlock);
 455  455          ITIMES_NOLOCK(ip);      /* mark correct time in inode */
 456  456          vap->va_atime.tv_sec = (time_t)ip->i_atime.tv_sec;
 457  457          vap->va_atime.tv_nsec = ip->i_atime.tv_nsec;
 458  458          vap->va_mtime.tv_sec = (time_t)ip->i_mtime.tv_sec;
 459  459          vap->va_mtime.tv_nsec = ip->i_mtime.tv_nsec;
 460  460          vap->va_ctime.tv_sec = (time_t)ip->i_ctime.tv_sec;
 461  461          vap->va_ctime.tv_nsec = ip->i_ctime.tv_nsec;
 462  462          mutex_exit(&ip->i_tlock);
 463  463  
 464  464          switch (ip->i_type) {
 465  465                  case VBLK:
 466  466                          vap->va_blksize = MAXBSIZE;
 467  467                          break;
 468  468                  case VCHR:
 469  469                          vap->va_blksize = MAXBSIZE;
 470  470                          break;
 471  471                  default:
 472  472                          vap->va_blksize = ip->i_udf->udf_lbsize;
 473  473                          break;
 474  474          }
 475  475          vap->va_nblocks = ip->i_lbr << ip->i_udf->udf_l2d_shift;
 476  476  
 477  477          rw_exit(&ip->i_contents);
 478  478  
 479  479          return (0);
 480  480  }
 481  481  
 482  482  static int
 483  483  ud_iaccess_vmode(void *ip, int mode, struct cred *cr)
 484  484  {
 485  485          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 0));
 486  486  }
 487  487  
 488  488  /*ARGSUSED4*/
 489  489  static int32_t
 490  490  udf_setattr(
 491  491          struct vnode *vp,
 492  492          struct vattr *vap,
 493  493          int32_t flags,
 494  494          struct cred *cr,
 495  495          caller_context_t *ct)
 496  496  {
 497  497          int32_t error = 0;
 498  498          uint32_t mask = vap->va_mask;
 499  499          struct ud_inode *ip;
 500  500          timestruc_t now;
 501  501          struct vattr ovap;
 502  502  
 503  503          ud_printf("udf_setattr\n");
 504  504  
 505  505          ip = VTOI(vp);
 506  506  
 507  507          /*
 508  508           * not updates allowed to 4096 files
 509  509           */
 510  510          if (ip->i_astrat == STRAT_TYPE4096) {
 511  511                  return (EINVAL);
 512  512          }
 513  513  
 514  514          /*
 515  515           * Cannot set these attributes
 516  516           */
 517  517          if (mask & AT_NOSET) {
 518  518                  return (EINVAL);
 519  519          }
 520  520  
 521  521          rw_enter(&ip->i_rwlock, RW_WRITER);
 522  522          rw_enter(&ip->i_contents, RW_WRITER);
 523  523  
 524  524          ovap.va_uid = ip->i_uid;
 525  525          ovap.va_mode = UD2VA_PERM(ip->i_perm) | ip->i_char;
 526  526          error = secpolicy_vnode_setattr(cr, vp, vap, &ovap, flags,
 527  527              ud_iaccess_vmode, ip);
 528  528          if (error)
 529  529                  goto update_inode;
 530  530  
 531  531          mask = vap->va_mask;
 532  532          /*
 533  533           * Change file access modes.
 534  534           */
 535  535          if (mask & AT_MODE) {
 536  536                  ip->i_perm = VA2UD_PERM(vap->va_mode);
 537  537                  ip->i_char = vap->va_mode & (VSUID | VSGID | VSVTX);
 538  538                  mutex_enter(&ip->i_tlock);
 539  539                  ip->i_flag |= ICHG;
 540  540                  mutex_exit(&ip->i_tlock);
 541  541          }
 542  542          if (mask & (AT_UID|AT_GID)) {
 543  543                  if (mask & AT_UID) {
 544  544                          ip->i_uid = vap->va_uid;
 545  545                  }
 546  546                  if (mask & AT_GID) {
 547  547                          ip->i_gid = vap->va_gid;
 548  548                  }
 549  549                  mutex_enter(&ip->i_tlock);
 550  550                  ip->i_flag |= ICHG;
 551  551                  mutex_exit(&ip->i_tlock);
 552  552          }
 553  553          /*
 554  554           * Truncate file.  Must have write permission and not be a directory.
 555  555           */
 556  556          if (mask & AT_SIZE) {
 557  557                  if (vp->v_type == VDIR) {
 558  558                          error = EISDIR;
 559  559                          goto update_inode;
 560  560                  }
 561  561                  if (error = ud_iaccess(ip, IWRITE, cr, 0)) {
 562  562                          goto update_inode;
 563  563                  }
 564  564                  if (vap->va_size > MAXOFFSET_T) {
 565  565                          error = EFBIG;
 566  566                          goto update_inode;
 567  567                  }
 568  568                  if (error = ud_itrunc(ip, vap->va_size, 0, cr)) {
 569  569                          goto update_inode;
 570  570                  }
 571  571  
 572  572                  if (vap->va_size == 0)
 573  573                          vnevent_truncate(vp, ct);
 574  574          }
 575  575          /*
 576  576           * Change file access or modified times.
 577  577           */
 578  578          if (mask & (AT_ATIME|AT_MTIME)) {
 579  579                  mutex_enter(&ip->i_tlock);
 580  580                  if (mask & AT_ATIME) {
 581  581                          ip->i_atime.tv_sec = vap->va_atime.tv_sec;
 582  582                          ip->i_atime.tv_nsec = vap->va_atime.tv_nsec;
 583  583                          ip->i_flag &= ~IACC;
 584  584                  }
 585  585                  if (mask & AT_MTIME) {
 586  586                          ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
 587  587                          ip->i_mtime.tv_nsec = vap->va_mtime.tv_nsec;
 588  588                          gethrestime(&now);
 589  589                          ip->i_ctime.tv_sec = now.tv_sec;
 590  590                          ip->i_ctime.tv_nsec = now.tv_nsec;
 591  591                          ip->i_flag &= ~(IUPD|ICHG);
 592  592                          ip->i_flag |= IMODTIME;
 593  593                  }
 594  594                  ip->i_flag |= IMOD;
 595  595                  mutex_exit(&ip->i_tlock);
 596  596          }
 597  597  
 598  598  update_inode:
 599  599          if (curthread->t_flag & T_DONTPEND) {
 600  600                  ud_iupdat(ip, 1);
 601  601          } else {
 602  602                  ITIMES_NOLOCK(ip);
 603  603          }
 604  604          rw_exit(&ip->i_contents);
 605  605          rw_exit(&ip->i_rwlock);
 606  606  
 607  607          return (error);
 608  608  }
 609  609  
 610  610  /* ARGSUSED */
 611  611  static int32_t
 612  612  udf_access(
 613  613          struct vnode *vp,
 614  614          int32_t mode,
 615  615          int32_t flags,
 616  616          struct cred *cr,
 617  617          caller_context_t *ct)
 618  618  {
 619  619          struct ud_inode *ip = VTOI(vp);
 620  620  
 621  621          ud_printf("udf_access\n");
 622  622  
 623  623          if (ip->i_udf == NULL) {
 624  624                  return (EIO);
 625  625          }
 626  626  
 627  627          return (ud_iaccess(ip, UD_UPERM2DPERM(mode), cr, 1));
 628  628  }
 629  629  
 630  630  int32_t udfs_stickyhack = 1;
 631  631  
 632  632  /* ARGSUSED */
 633  633  static int32_t
 634  634  udf_lookup(
 635  635          struct vnode *dvp,
 636  636          char *nm,
 637  637          struct vnode **vpp,
 638  638          struct pathname *pnp,
 639  639          int32_t flags,
 640  640          struct vnode *rdir,
 641  641          struct cred *cr,
 642  642          caller_context_t *ct,
 643  643          int *direntflags,
 644  644          pathname_t *realpnp)
 645  645  {
 646  646          int32_t error;
 647  647          struct vnode *vp;
 648  648          struct ud_inode *ip, *xip;
 649  649  
 650  650          ud_printf("udf_lookup\n");
 651  651          /*
 652  652           * Null component name is a synonym for directory being searched.
 653  653           */
 654  654          if (*nm == '\0') {
 655  655                  VN_HOLD(dvp);
 656  656                  *vpp = dvp;
 657  657                  error = 0;
 658  658                  goto out;
 659  659          }
 660  660  
 661  661          /*
 662  662           * Fast path: Check the directory name lookup cache.
 663  663           */
 664  664          ip = VTOI(dvp);
 665  665          if (vp = dnlc_lookup(dvp, nm)) {
 666  666                  /*
 667  667                   * Check accessibility of directory.
 668  668                   */
 669  669                  if ((error = ud_iaccess(ip, IEXEC, cr, 1)) != 0) {
 670  670                          VN_RELE(vp);
 671  671                  }
 672  672                  xip = VTOI(vp);
 673  673          } else {
 674  674                  error = ud_dirlook(ip, nm, &xip, cr, 1);
 675  675                  ITIMES(ip);
 676  676          }
 677  677  
 678  678          if (error == 0) {
 679  679                  ip = xip;
 680  680                  *vpp = ITOV(ip);
 681  681                  if ((ip->i_type != VDIR) &&
 682  682                      (ip->i_char & ISVTX) &&
 683  683                      ((ip->i_perm & IEXEC) == 0) &&
 684  684                      udfs_stickyhack) {
 685  685                          mutex_enter(&(*vpp)->v_lock);
 686  686                          (*vpp)->v_flag |= VISSWAP;
 687  687                          mutex_exit(&(*vpp)->v_lock);
 688  688                  }
 689  689                  ITIMES(ip);
 690  690                  /*
 691  691                   * If vnode is a device return special vnode instead.
 692  692                   */
 693  693                  if (IS_DEVVP(*vpp)) {
 694  694                          struct vnode *newvp;
 695  695                          newvp = specvp(*vpp, (*vpp)->v_rdev,
 696  696                              (*vpp)->v_type, cr);
 697  697                          VN_RELE(*vpp);
 698  698                          if (newvp == NULL) {
 699  699                                  error = ENOSYS;
 700  700                          } else {
 701  701                                  *vpp = newvp;
 702  702                          }
 703  703                  }
 704  704          }
 705  705  out:
 706  706          return (error);
 707  707  }
 708  708  
 709  709  /* ARGSUSED */
 710  710  static int32_t
 711  711  udf_create(
 712  712          struct vnode *dvp,
 713  713          char *name,
 714  714          struct vattr *vap,
 715  715          enum vcexcl excl,
 716  716          int32_t mode,
 717  717          struct vnode **vpp,
 718  718          struct cred *cr,
 719  719          int32_t flag,
 720  720          caller_context_t *ct,
 721  721          vsecattr_t *vsecp)
 722  722  {
 723  723          int32_t error;
 724  724          struct ud_inode *ip = VTOI(dvp), *xip;
 725  725  
 726  726          ud_printf("udf_create\n");
 727  727  
 728  728          if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
 729  729                  vap->va_mode &= ~VSVTX;
 730  730  
 731  731          if (*name == '\0') {
 732  732                  /*
 733  733                   * Null component name refers to the directory itself.
 734  734                   */
 735  735                  VN_HOLD(dvp);
 736  736                  ITIMES(ip);
 737  737                  error = EEXIST;
 738  738          } else {
 739  739                  xip = NULL;
 740  740                  rw_enter(&ip->i_rwlock, RW_WRITER);
 741  741                  error = ud_direnter(ip, name, DE_CREATE,
 742  742                      (struct ud_inode *)0, (struct ud_inode *)0,
 743  743                      vap, &xip, cr, ct);
 744  744                  rw_exit(&ip->i_rwlock);
 745  745                  ITIMES(ip);
 746  746                  ip = xip;
 747  747          }
 748  748  #ifdef  __lock_lint
 749  749          rw_enter(&ip->i_contents, RW_WRITER);
 750  750  #else
 751  751          if (ip != NULL) {
 752  752                  rw_enter(&ip->i_contents, RW_WRITER);
 753  753          }
 754  754  #endif
 755  755  
 756  756          /*
 757  757           * If the file already exists and this is a non-exclusive create,
 758  758           * check permissions and allow access for non-directories.
 759  759           * Read-only create of an existing directory is also allowed.
 760  760           * We fail an exclusive create of anything which already exists.
 761  761           */
 762  762          if (error == EEXIST) {
 763  763                  if (excl == NONEXCL) {
 764  764                          if ((ip->i_type == VDIR) && (mode & VWRITE)) {
 765  765                                  error = EISDIR;
 766  766                          } else if (mode) {
 767  767                                  error = ud_iaccess(ip,
 768  768                                      UD_UPERM2DPERM(mode), cr, 0);
 769  769                          } else {
 770  770                                  error = 0;
 771  771                          }
 772  772                  }
 773  773                  if (error) {
 774  774                          rw_exit(&ip->i_contents);
 775  775                          VN_RELE(ITOV(ip));
 776  776                          goto out;
 777  777                  } else if ((ip->i_type == VREG) &&
 778  778                      (vap->va_mask & AT_SIZE) && vap->va_size == 0) {
 779  779                          /*
 780  780                           * Truncate regular files, if requested by caller.
 781  781                           * Grab i_rwlock to make sure no one else is
 782  782                           * currently writing to the file (we promised
 783  783                           * bmap we would do this).
 784  784                           * Must get the locks in the correct order.
 785  785                           */
 786  786                          if (ip->i_size == 0) {
 787  787                                  ip->i_flag |= ICHG | IUPD;
 788  788                          } else {
 789  789                                  rw_exit(&ip->i_contents);
 790  790                                  rw_enter(&ip->i_rwlock, RW_WRITER);
 791  791                                  rw_enter(&ip->i_contents, RW_WRITER);
 792  792                                  (void) ud_itrunc(ip, 0, 0, cr);
 793  793                                  rw_exit(&ip->i_rwlock);
 794  794                          }
 795  795                          vnevent_create(ITOV(ip), ct);
 796  796                  }
 797  797          }
 798  798  
 799  799          if (error == 0) {
 800  800                  *vpp = ITOV(ip);
 801  801                  ITIMES(ip);
 802  802          }
 803  803  #ifdef  __lock_lint
 804  804          rw_exit(&ip->i_contents);
 805  805  #else
 806  806          if (ip != NULL) {
 807  807                  rw_exit(&ip->i_contents);
 808  808          }
 809  809  #endif
 810  810          if (error) {
 811  811                  goto out;
 812  812          }
 813  813  
 814  814          /*
 815  815           * If vnode is a device return special vnode instead.
 816  816           */
 817  817          if (!error && IS_DEVVP(*vpp)) {
 818  818                  struct vnode *newvp;
 819  819  
 820  820                  newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
 821  821                  VN_RELE(*vpp);
 822  822                  if (newvp == NULL) {
 823  823                          error = ENOSYS;
 824  824                          goto out;
 825  825                  }
 826  826                  *vpp = newvp;
 827  827          }
 828  828  out:
 829  829          return (error);
 830  830  }
 831  831  
 832  832  /* ARGSUSED */
 833  833  static int32_t
 834  834  udf_remove(
 835  835          struct vnode *vp,
 836  836          char *nm,
 837  837          struct cred *cr,
 838  838          caller_context_t *ct,
 839  839          int flags)
 840  840  {
 841  841          int32_t error;
 842  842          struct ud_inode *ip = VTOI(vp);
 843  843  
 844  844          ud_printf("udf_remove\n");
 845  845  
 846  846          rw_enter(&ip->i_rwlock, RW_WRITER);
 847  847          error = ud_dirremove(ip, nm,
 848  848              (struct ud_inode *)0, (struct vnode *)0, DR_REMOVE, cr, ct);
 849  849          rw_exit(&ip->i_rwlock);
 850  850          ITIMES(ip);
 851  851  
 852  852          return (error);
 853  853  }
 854  854  
 855  855  /* ARGSUSED */
 856  856  static int32_t
 857  857  udf_link(
 858  858          struct vnode *tdvp,
 859  859          struct vnode *svp,
 860  860          char *tnm,
 861  861          struct cred *cr,
 862  862          caller_context_t *ct,
 863  863          int flags)
 864  864  {
 865  865          int32_t error;
 866  866          struct vnode *realvp;
 867  867          struct ud_inode *sip;
 868  868          struct ud_inode *tdp;
 869  869  
 870  870          ud_printf("udf_link\n");
 871  871          if (VOP_REALVP(svp, &realvp, ct) == 0) {
 872  872                  svp = realvp;
 873  873          }
 874  874  
 875  875          /*
 876  876           * Do not allow links to directories
 877  877           */
 878  878          if (svp->v_type == VDIR) {
 879  879                  return (EPERM);
 880  880          }
 881  881  
 882  882          sip = VTOI(svp);
 883  883  
 884  884          if (sip->i_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
 885  885                  return (EPERM);
 886  886  
 887  887          tdp = VTOI(tdvp);
 888  888  
 889  889          rw_enter(&tdp->i_rwlock, RW_WRITER);
 890  890          error = ud_direnter(tdp, tnm, DE_LINK, (struct ud_inode *)0,
 891  891              sip, (struct vattr *)0, (struct ud_inode **)0, cr, ct);
 892  892          rw_exit(&tdp->i_rwlock);
 893  893          ITIMES(sip);
 894  894          ITIMES(tdp);
 895  895  
 896  896          if (error == 0) {
 897  897                  vnevent_link(svp, ct);
 898  898          }
 899  899  
 900  900          return (error);
 901  901  }
 902  902  
 903  903  /* ARGSUSED */
 904  904  static int32_t
 905  905  udf_rename(
 906  906          struct vnode *sdvp,
 907  907          char *snm,
 908  908          struct vnode *tdvp,
 909  909          char *tnm,
 910  910          struct cred *cr,
 911  911          caller_context_t *ct,
 912  912          int flags)
 913  913  {
 914  914          int32_t error = 0;
 915  915          struct udf_vfs *udf_vfsp;
 916  916          struct ud_inode *sip;           /* source inode */
 917  917          struct ud_inode *tip;           /* target inode */
 918  918          struct ud_inode *sdp, *tdp;     /* source and target parent inode */
 919  919          struct vnode *realvp;
 920  920  
 921  921          ud_printf("udf_rename\n");
 922  922  
 923  923          if (VOP_REALVP(tdvp, &realvp, ct) == 0) {
 924  924                  tdvp = realvp;
 925  925          }
 926  926  
 927  927          sdp = VTOI(sdvp);
 928  928          tdp = VTOI(tdvp);
 929  929  
 930  930          udf_vfsp = sdp->i_udf;
 931  931  
 932  932          mutex_enter(&udf_vfsp->udf_rename_lck);
 933  933          /*
 934  934           * Look up inode of file we're supposed to rename.
 935  935           */
 936  936          if (error = ud_dirlook(sdp, snm, &sip, cr, 0)) {
 937  937                  mutex_exit(&udf_vfsp->udf_rename_lck);
 938  938                  return (error);
 939  939          }
 940  940          /*
 941  941           * be sure this is not a directory with another file system mounted
 942  942           * over it.  If it is just give up the locks, and return with
 943  943           * EBUSY
 944  944           */
 945  945          if (vn_mountedvfs(ITOV(sip)) != NULL) {
 946  946                  error = EBUSY;
 947  947                  goto errout;
 948  948          }
 949  949          /*
 950  950           * Make sure we can delete the source entry.  This requires
 951  951           * write permission on the containing directory.  If that
 952  952           * directory is "sticky" it further requires (except for
 953  953           * privileged users) that the user own the directory or the
 954  954           * source entry, or else have permission to write the source
 955  955           * entry.
 956  956           */
 957  957          rw_enter(&sdp->i_contents, RW_READER);
 958  958          rw_enter(&sip->i_contents, RW_READER);
 959  959          if ((error = ud_iaccess(sdp, IWRITE, cr, 0)) != 0 ||
 960  960              (error = ud_sticky_remove_access(sdp, sip, cr)) != 0) {
 961  961                  rw_exit(&sip->i_contents);
 962  962                  rw_exit(&sdp->i_contents);
 963  963                  ITIMES(sip);
 964  964                  goto errout;
 965  965          }
 966  966  
 967  967          /*
 968  968           * Check for renaming '.' or '..' or alias of '.'
 969  969           */
 970  970          if ((strcmp(snm, ".") == 0) ||
 971  971              (strcmp(snm, "..") == 0) ||
 972  972              (sdp == sip)) {
 973  973                  error = EINVAL;
 974  974                  rw_exit(&sip->i_contents);
 975  975                  rw_exit(&sdp->i_contents);
 976  976                  goto errout;
 977  977          }
 978  978  
 979  979          rw_exit(&sip->i_contents);
 980  980          rw_exit(&sdp->i_contents);
 981  981  
 982  982          if (ud_dirlook(tdp, tnm, &tip, cr, 0) == 0) {
 983  983                  vnevent_pre_rename_dest(ITOV(tip), tdvp, tnm, ct);
 984  984                  VN_RELE(ITOV(tip));
 985  985          }
 986  986  
 987  987          /* Notify the target dir. if not the same as the source dir. */
 988  988          if (sdvp != tdvp)
 989  989                  vnevent_pre_rename_dest_dir(tdvp, ITOV(sip), tnm, ct);
 990  990  
 991  991          vnevent_pre_rename_src(ITOV(sip), sdvp, snm, ct);
 992  992  
 993  993          /*
 994  994           * Link source to the target.
 995  995           */
 996  996          rw_enter(&tdp->i_rwlock, RW_WRITER);
 997  997          if (error = ud_direnter(tdp, tnm, DE_RENAME, sdp, sip,
 998  998              (struct vattr *)0, (struct ud_inode **)0, cr, ct)) {
 999  999                  /*
1000 1000                   * ESAME isn't really an error; it indicates that the
1001 1001                   * operation should not be done because the source and target
1002 1002                   * are the same file, but that no error should be reported.
1003 1003                   */
1004 1004                  if (error == ESAME) {
1005 1005                          error = 0;
1006 1006                  }
1007 1007                  rw_exit(&tdp->i_rwlock);
1008 1008                  goto errout;
1009 1009          }
1010 1010          rw_exit(&tdp->i_rwlock);
1011 1011  
1012 1012          rw_enter(&sdp->i_rwlock, RW_WRITER);
1013 1013          /*
1014 1014           * Unlink the source.
1015 1015           * Remove the source entry.  ud_dirremove() checks that the entry
1016 1016           * still reflects sip, and returns an error if it doesn't.
1017 1017           * If the entry has changed just forget about it.  Release
1018 1018           * the source inode.
1019 1019           */
1020 1020          if ((error = ud_dirremove(sdp, snm, sip, (struct vnode *)0,
1021 1021              DR_RENAME, cr, ct)) == ENOENT) {
1022 1022                  error = 0;
1023 1023          }
1024 1024          rw_exit(&sdp->i_rwlock);
1025 1025  
1026 1026          if (error == 0) {
1027 1027                  vnevent_rename_src(ITOV(sip), sdvp, snm, ct);
1028 1028                  /*
1029 1029                   * vnevent_rename_dest and vnevent_rename_dest_dir are called
1030 1030                   * in ud_direnter().
1031 1031                   */
1032 1032          }
1033 1033  
1034 1034  errout:
1035 1035          ITIMES(sdp);
1036 1036          ITIMES(tdp);
1037 1037          VN_RELE(ITOV(sip));
1038 1038          mutex_exit(&udf_vfsp->udf_rename_lck);
1039 1039  
1040 1040          return (error);
1041 1041  }
1042 1042  
1043 1043  /* ARGSUSED */
1044 1044  static int32_t
1045 1045  udf_mkdir(
1046 1046          struct vnode *dvp,
1047 1047          char *dirname,
1048 1048          struct vattr *vap,
1049 1049          struct vnode **vpp,
1050 1050          struct cred *cr,
1051 1051          caller_context_t *ct,
1052 1052          int flags,
1053 1053          vsecattr_t *vsecp)
1054 1054  {
1055 1055          int32_t error;
1056 1056          struct ud_inode *ip;
1057 1057          struct ud_inode *xip;
1058 1058  
1059 1059          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1060 1060  
1061 1061          ud_printf("udf_mkdir\n");
1062 1062  
1063 1063          ip = VTOI(dvp);
1064 1064          rw_enter(&ip->i_rwlock, RW_WRITER);
1065 1065          error = ud_direnter(ip, dirname, DE_MKDIR,
1066 1066              (struct ud_inode *)0, (struct ud_inode *)0, vap, &xip, cr, ct);
1067 1067          rw_exit(&ip->i_rwlock);
1068 1068          ITIMES(ip);
1069 1069          if (error == 0) {
1070 1070                  ip = xip;
1071 1071                  *vpp = ITOV(ip);
1072 1072                  ITIMES(ip);
1073 1073          } else if (error == EEXIST) {
1074 1074                  ITIMES(xip);
1075 1075                  VN_RELE(ITOV(xip));
1076 1076          }
1077 1077  
1078 1078          return (error);
1079 1079  }
1080 1080  
1081 1081  /* ARGSUSED */
1082 1082  static int32_t
1083 1083  udf_rmdir(
1084 1084          struct vnode *vp,
1085 1085          char *nm,
1086 1086          struct vnode *cdir,
1087 1087          struct cred *cr,
1088 1088          caller_context_t *ct,
1089 1089          int flags)
1090 1090  {
1091 1091          int32_t error;
1092 1092          struct ud_inode *ip = VTOI(vp);
1093 1093  
1094 1094          ud_printf("udf_rmdir\n");
1095 1095  
1096 1096          rw_enter(&ip->i_rwlock, RW_WRITER);
1097 1097          error = ud_dirremove(ip, nm, (struct ud_inode *)0, cdir, DR_RMDIR,
1098 1098              cr, ct);
1099 1099          rw_exit(&ip->i_rwlock);
1100 1100          ITIMES(ip);
1101 1101  
1102 1102          return (error);
1103 1103  }
1104 1104  
1105 1105  /* ARGSUSED */
1106 1106  static int32_t
1107 1107  udf_readdir(
1108 1108          struct vnode *vp,
1109 1109          struct uio *uiop,
1110 1110          struct cred *cr,
1111 1111          int32_t *eofp,
1112 1112          caller_context_t *ct,
1113 1113          int flags)
1114 1114  {
1115 1115          struct ud_inode *ip;
1116 1116          struct dirent64 *nd;
1117 1117          struct udf_vfs *udf_vfsp;
1118 1118          int32_t error = 0, len, outcount = 0;
1119 1119          uint32_t dirsiz, offset;
1120 1120          uint32_t bufsize, ndlen, dummy;
1121 1121          caddr_t outbuf;
1122 1122          caddr_t outb, end_outb;
1123 1123          struct iovec *iovp;
1124 1124  
1125 1125          uint8_t *dname;
1126 1126          int32_t length;
1127 1127  
1128 1128          uint8_t *buf = NULL;
1129 1129  
1130 1130          struct fbuf *fbp = NULL;
1131 1131          struct file_id *fid;
1132 1132          uint8_t *name;
1133 1133  
1134 1134  
1135 1135          ud_printf("udf_readdir\n");
1136 1136  
1137 1137          ip = VTOI(vp);
1138 1138          udf_vfsp = ip->i_udf;
1139 1139  
1140 1140          dirsiz = ip->i_size;
1141 1141          if ((uiop->uio_offset >= dirsiz) ||
1142 1142              (ip->i_nlink <= 0)) {
1143 1143                  if (eofp) {
1144 1144                          *eofp = 1;
1145 1145                  }
1146 1146                  return (0);
1147 1147          }
1148 1148  
1149 1149          offset = uiop->uio_offset;
1150 1150          iovp = uiop->uio_iov;
1151 1151          bufsize = iovp->iov_len;
1152 1152  
1153 1153          outb = outbuf = (char *)kmem_alloc((uint32_t)bufsize, KM_SLEEP);
1154 1154          end_outb = outb + bufsize;
1155 1155          nd = (struct dirent64 *)outbuf;
1156 1156  
1157 1157          dname = (uint8_t *)kmem_zalloc(1024, KM_SLEEP);
1158 1158          buf = (uint8_t *)kmem_zalloc(udf_vfsp->udf_lbsize, KM_SLEEP);
1159 1159  
1160 1160          if (offset == 0) {
1161 1161                  len = DIRENT64_RECLEN(1);
1162 1162                  if (((caddr_t)nd + len) >= end_outb) {
1163 1163                          error = EINVAL;
1164 1164                          goto end;
1165 1165                  }
1166 1166                  nd->d_ino = ip->i_icb_lbano;
1167 1167                  nd->d_reclen = (uint16_t)len;
1168 1168                  nd->d_off = 0x10;
1169 1169                  nd->d_name[0] = '.';
1170 1170                  bzero(&nd->d_name[1], DIRENT64_NAMELEN(len) - 1);
1171 1171                  nd = (struct dirent64 *)((char *)nd + nd->d_reclen);
1172 1172                  outcount++;
1173 1173          } else if (offset == 0x10) {
1174 1174                  offset = 0;
1175 1175          }
1176 1176  
1177 1177          while (offset < dirsiz) {
1178 1178                  error = ud_get_next_fid(ip, &fbp,
1179 1179                      offset, &fid, &name, buf);
1180 1180                  if (error != 0) {
1181 1181                          break;
1182 1182                  }
1183 1183  
1184 1184                  if ((fid->fid_flags & FID_DELETED) == 0) {
1185 1185                          if (fid->fid_flags & FID_PARENT) {
1186 1186  
1187 1187                                  len = DIRENT64_RECLEN(2);
1188 1188                                  if (((caddr_t)nd + len) >= end_outb) {
1189 1189                                          error = EINVAL;
1190 1190                                          break;
1191 1191                                  }
1192 1192  
1193 1193                                  nd->d_ino = ip->i_icb_lbano;
1194 1194                                  nd->d_reclen = (uint16_t)len;
1195 1195                                  nd->d_off = offset + FID_LEN(fid);
1196 1196                                  nd->d_name[0] = '.';
1197 1197                                  nd->d_name[1] = '.';
1198 1198                                  bzero(&nd->d_name[2],
1199 1199                                      DIRENT64_NAMELEN(len) - 2);
1200 1200                                  nd = (struct dirent64 *)
1201 1201                                      ((char *)nd + nd->d_reclen);
1202 1202                          } else {
1203 1203                                  if ((error = ud_uncompress(fid->fid_idlen,
1204 1204                                      &length, name, dname)) != 0) {
1205 1205                                          break;
1206 1206                                  }
1207 1207                                  if (length == 0) {
1208 1208                                          offset += FID_LEN(fid);
1209 1209                                          continue;
1210 1210                                  }
1211 1211                                  len = DIRENT64_RECLEN(length);
1212 1212                                  if (((caddr_t)nd + len) >= end_outb) {
1213 1213                                          if (!outcount) {
1214 1214                                                  error = EINVAL;
1215 1215                                          }
1216 1216                                          break;
1217 1217                                  }
1218 1218                                  (void) strncpy(nd->d_name,
1219 1219                                      (caddr_t)dname, length);
1220 1220                                  bzero(&nd->d_name[length],
1221 1221                                      DIRENT64_NAMELEN(len) - length);
1222 1222                                  nd->d_ino = ud_xlate_to_daddr(udf_vfsp,
1223 1223                                      SWAP_16(fid->fid_icb.lad_ext_prn),
1224 1224                                      SWAP_32(fid->fid_icb.lad_ext_loc), 1,
1225 1225                                      &dummy);
1226 1226                                  nd->d_reclen = (uint16_t)len;
1227 1227                                  nd->d_off = offset + FID_LEN(fid);
1228 1228                                  nd = (struct dirent64 *)
1229 1229                                      ((char *)nd + nd->d_reclen);
1230 1230                          }
1231 1231                          outcount++;
1232 1232                  }
1233 1233  
1234 1234                  offset += FID_LEN(fid);
1235 1235          }
1236 1236  
1237 1237  end:
1238 1238          if (fbp != NULL) {
1239 1239                  fbrelse(fbp, S_OTHER);
1240 1240          }
1241 1241          ndlen = ((char *)nd - outbuf);
1242 1242          /*
1243 1243           * In case of error do not call uiomove.
1244 1244           * Return the error to the caller.
1245 1245           */
1246 1246          if ((error == 0) && (ndlen != 0)) {
1247 1247                  error = uiomove(outbuf, (long)ndlen, UIO_READ, uiop);
1248 1248                  uiop->uio_offset = offset;
1249 1249          }
1250 1250          kmem_free((caddr_t)buf, udf_vfsp->udf_lbsize);
1251 1251          kmem_free((caddr_t)dname, 1024);
1252 1252          kmem_free(outbuf, (uint32_t)bufsize);
1253 1253          if (eofp && error == 0) {
1254 1254                  *eofp = (uiop->uio_offset >= dirsiz);
1255 1255          }
1256 1256          return (error);
1257 1257  }
1258 1258  
1259 1259  /* ARGSUSED */
1260 1260  static int32_t
1261 1261  udf_symlink(
1262 1262          struct vnode *dvp,
1263 1263          char *linkname,
1264 1264          struct vattr *vap,
1265 1265          char *target,
1266 1266          struct cred *cr,
1267 1267          caller_context_t *ct,
1268 1268          int flags)
1269 1269  {
1270 1270          int32_t error = 0, outlen;
1271 1271          uint32_t ioflag = 0;
1272 1272          struct ud_inode *ip, *dip = VTOI(dvp);
1273 1273  
1274 1274          struct path_comp *pc;
1275 1275          int8_t *dname = NULL, *uname = NULL, *sp;
1276 1276  
1277 1277          ud_printf("udf_symlink\n");
1278 1278  
1279 1279          ip = (struct ud_inode *)0;
1280 1280          vap->va_type = VLNK;
1281 1281          vap->va_rdev = 0;
1282 1282  
1283 1283          rw_enter(&dip->i_rwlock, RW_WRITER);
1284 1284          error = ud_direnter(dip, linkname, DE_CREATE,
1285 1285              (struct ud_inode *)0, (struct ud_inode *)0, vap, &ip, cr, ct);
1286 1286          rw_exit(&dip->i_rwlock);
1287 1287          if (error == 0) {
1288 1288                  dname = kmem_zalloc(1024, KM_SLEEP);
1289 1289                  uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1290 1290  
1291 1291                  pc = (struct path_comp *)uname;
1292 1292                  /*
1293 1293                   * If the first character in target is "/"
1294 1294                   * then skip it and create entry for it
1295 1295                   */
1296 1296                  if (*target == '/') {
1297 1297                          pc->pc_type = 2;
1298 1298                          pc->pc_len = 0;
1299 1299                          pc = (struct path_comp *)(((char *)pc) + 4);
1300 1300                          while (*target == '/') {
1301 1301                                  target++;
1302 1302                          }
1303 1303                  }
1304 1304  
1305 1305                  while (*target != NULL) {
1306 1306                          sp = target;
1307 1307                          while ((*target != '/') && (*target != '\0')) {
1308 1308                                  target ++;
1309 1309                          }
1310 1310                          /*
1311 1311                           * We got the next component of the
1312 1312                           * path name. Create path_comp of
1313 1313                           * appropriate type
1314 1314                           */
1315 1315                          if (((target - sp) == 1) && (*sp == '.')) {
1316 1316                                  /*
1317 1317                                   * Dot entry.
1318 1318                                   */
1319 1319                                  pc->pc_type = 4;
1320 1320                                  pc = (struct path_comp *)(((char *)pc) + 4);
1321 1321                          } else if (((target - sp) == 2) &&
1322 1322                              (*sp == '.') && ((*(sp + 1)) == '.')) {
1323 1323                                  /*
1324 1324                                   * DotDot entry.
1325 1325                                   */
1326 1326                                  pc->pc_type = 3;
1327 1327                                  pc = (struct path_comp *)(((char *)pc) + 4);
1328 1328                          } else {
1329 1329                                  /*
1330 1330                                   * convert the user given name
1331 1331                                   * into appropriate form to be put
1332 1332                                   * on the media
1333 1333                                   */
1334 1334                                  outlen = 1024;  /* set to size of dname */
1335 1335                                  if (error = ud_compress(target - sp, &outlen,
1336 1336                                      (uint8_t *)sp, (uint8_t *)dname)) {
1337 1337                                          break;
1338 1338                                  }
1339 1339                                  pc->pc_type = 5;
1340 1340                                  /* LINTED */
1341 1341                                  pc->pc_len = outlen;
1342 1342                                  dname[outlen] = '\0';
1343 1343                                  (void) strcpy((char *)pc->pc_id, dname);
1344 1344                                  pc = (struct path_comp *)
1345 1345                                      (((char *)pc) + 4 + outlen);
1346 1346                          }
1347 1347                          while (*target == '/') {
1348 1348                                  target++;
1349 1349                          }
1350 1350                          if (*target == NULL) {
1351 1351                                  break;
1352 1352                          }
1353 1353                  }
1354 1354  
1355 1355                  rw_enter(&ip->i_contents, RW_WRITER);
1356 1356                  if (error == 0) {
1357 1357                          ioflag = FWRITE;
1358 1358                          if (curthread->t_flag & T_DONTPEND) {
1359 1359                                  ioflag |= FDSYNC;
1360 1360                          }
1361 1361                          error = ud_rdwri(UIO_WRITE, ioflag, ip,
1362 1362                              uname, ((int8_t *)pc) - uname,
1363 1363                              (offset_t)0, UIO_SYSSPACE, (int32_t *)0, cr);
1364 1364                  }
1365 1365                  if (error) {
1366 1366                          ud_idrop(ip);
1367 1367                          rw_exit(&ip->i_contents);
1368 1368                          rw_enter(&dip->i_rwlock, RW_WRITER);
1369 1369                          (void) ud_dirremove(dip, linkname, (struct ud_inode *)0,
1370 1370                              (struct vnode *)0, DR_REMOVE, cr, ct);
1371 1371                          rw_exit(&dip->i_rwlock);
1372 1372                          goto update_inode;
1373 1373                  }
1374 1374                  rw_exit(&ip->i_contents);
1375 1375          }
1376 1376  
1377 1377          if ((error == 0) || (error == EEXIST)) {
1378 1378                  VN_RELE(ITOV(ip));
1379 1379          }
1380 1380  
1381 1381  update_inode:
1382 1382          ITIMES(VTOI(dvp));
1383 1383          if (uname != NULL) {
1384 1384                  kmem_free(uname, PAGESIZE);
1385 1385          }
1386 1386          if (dname != NULL) {
1387 1387                  kmem_free(dname, 1024);
1388 1388          }
1389 1389  
1390 1390          return (error);
1391 1391  }
1392 1392  
1393 1393  /* ARGSUSED */
1394 1394  static int32_t
1395 1395  udf_readlink(
1396 1396          struct vnode *vp,
1397 1397          struct uio *uiop,
1398 1398          struct cred *cr,
1399 1399          caller_context_t *ct)
1400 1400  {
1401 1401          int32_t error = 0, off, id_len, size, len;
1402 1402          int8_t *dname = NULL, *uname = NULL;
1403 1403          struct ud_inode *ip;
1404 1404          struct fbuf *fbp = NULL;
1405 1405          struct path_comp *pc;
1406 1406  
1407 1407          ud_printf("udf_readlink\n");
1408 1408  
1409 1409          if (vp->v_type != VLNK) {
1410 1410                  return (EINVAL);
1411 1411          }
1412 1412  
1413 1413          ip = VTOI(vp);
1414 1414          size = ip->i_size;
1415 1415          if (size > PAGESIZE) {
1416 1416                  return (EIO);
1417 1417          }
1418 1418  
1419 1419          if (size == 0) {
1420 1420                  return (0);
1421 1421          }
1422 1422  
1423 1423          dname = kmem_zalloc(1024, KM_SLEEP);
1424 1424          uname = kmem_zalloc(PAGESIZE, KM_SLEEP);
1425 1425  
1426 1426          rw_enter(&ip->i_contents, RW_READER);
1427 1427  
1428 1428          if ((error = fbread(vp, 0, size, S_READ, &fbp)) != 0) {
1429 1429                  goto end;
1430 1430          }
1431 1431  
1432 1432          off = 0;
1433 1433  
1434 1434          while (off < size) {
1435 1435                  pc = (struct path_comp *)(fbp->fb_addr + off);
1436 1436                  switch (pc->pc_type) {
1437 1437                          case 1 :
1438 1438                                  (void) strcpy(uname, ip->i_udf->udf_fsmnt);
1439 1439                                  (void) strcat(uname, "/");
1440 1440                                  break;
1441 1441                          case 2 :
1442 1442                                  if (pc->pc_len != 0) {
1443 1443                                          goto end;
1444 1444                                  }
1445 1445                                  uname[0] = '/';
1446 1446                                  uname[1] = '\0';
1447 1447                                  break;
1448 1448                          case 3 :
1449 1449                                  (void) strcat(uname, "../");
1450 1450                                  break;
1451 1451                          case 4 :
1452 1452                                  (void) strcat(uname, "./");
1453 1453                                  break;
1454 1454                          case 5 :
1455 1455                                  if ((error = ud_uncompress(pc->pc_len, &id_len,
1456 1456                                      pc->pc_id, (uint8_t *)dname)) != 0) {
1457 1457                                          break;
1458 1458                                  }
1459 1459                                  dname[id_len] = '\0';
1460 1460                                  (void) strcat(uname, dname);
1461 1461                                  (void) strcat(uname, "/");
1462 1462                                  break;
1463 1463                          default :
1464 1464                                  error = EINVAL;
1465 1465                                  goto end;
1466 1466                  }
1467 1467                  off += 4 + pc->pc_len;
1468 1468          }
1469 1469          len = strlen(uname) - 1;
1470 1470          if (uname[len] == '/') {
1471 1471                  if (len == 0) {
1472 1472                          /*
1473 1473                           * special case link to /
1474 1474                           */
1475 1475                          len = 1;
1476 1476                  } else {
1477 1477                          uname[len] = '\0';
1478 1478                  }
1479 1479          }
1480 1480  
1481 1481          error = uiomove(uname, len, UIO_READ, uiop);
1482 1482  
1483 1483          ITIMES(ip);
1484 1484  
1485 1485  end:
1486 1486          if (fbp != NULL) {
1487 1487                  fbrelse(fbp, S_OTHER);
1488 1488          }
1489 1489          rw_exit(&ip->i_contents);
1490 1490          if (uname != NULL) {
1491 1491                  kmem_free(uname, PAGESIZE);
1492 1492          }
1493 1493          if (dname != NULL) {
1494 1494                  kmem_free(dname, 1024);
1495 1495          }
1496 1496          return (error);
1497 1497  }
1498 1498  
1499 1499  /* ARGSUSED */
1500 1500  static int32_t
1501 1501  udf_fsync(
1502 1502          struct vnode *vp,
1503 1503          int32_t syncflag,
1504 1504          struct cred *cr,
1505 1505          caller_context_t *ct)
1506 1506  {
1507 1507          int32_t error = 0;
1508 1508          struct ud_inode *ip = VTOI(vp);
1509 1509  
1510 1510          ud_printf("udf_fsync\n");
1511 1511  
1512 1512          rw_enter(&ip->i_contents, RW_WRITER);
1513 1513          if (!(IS_SWAPVP(vp))) {
1514 1514                  error = ud_syncip(ip, 0, I_SYNC); /* Do synchronous writes */
1515 1515          }
1516 1516          if (error == 0) {
1517 1517                  error = ud_sync_indir(ip);
1518 1518          }
1519 1519          ITIMES(ip);             /* XXX: is this necessary ??? */
1520 1520          rw_exit(&ip->i_contents);
1521 1521  
1522 1522          return (error);
1523 1523  }
1524 1524  
1525 1525  /* ARGSUSED */
1526 1526  static void
1527 1527  udf_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct)
1528 1528  {
1529 1529          ud_printf("udf_iinactive\n");
1530 1530  
1531 1531          ud_iinactive(VTOI(vp), cr);
1532 1532  }
1533 1533  
1534 1534  /* ARGSUSED */
1535 1535  static int32_t
1536 1536  udf_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
1537 1537  {
1538 1538          struct udf_fid *udfidp;
1539 1539          struct ud_inode *ip = VTOI(vp);
1540 1540  
1541 1541          ud_printf("udf_fid\n");
1542 1542  
1543 1543          if (fidp->fid_len < (sizeof (struct udf_fid) - sizeof (uint16_t))) {
1544 1544                  fidp->fid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1545 1545                  return (ENOSPC);
1546 1546          }
1547 1547  
1548 1548          udfidp = (struct udf_fid *)fidp;
1549 1549          bzero((char *)udfidp, sizeof (struct udf_fid));
1550 1550          rw_enter(&ip->i_contents, RW_READER);
1551 1551          udfidp->udfid_len = sizeof (struct udf_fid) - sizeof (uint16_t);
1552 1552          udfidp->udfid_uinq_lo = ip->i_uniqid & 0xffffffff;
1553 1553          udfidp->udfid_prn = ip->i_icb_prn;
1554 1554          udfidp->udfid_icb_lbn = ip->i_icb_block;
1555 1555          rw_exit(&ip->i_contents);
1556 1556  
1557 1557          return (0);
1558 1558  }
1559 1559  
1560 1560  /* ARGSUSED2 */
1561 1561  static int
1562 1562  udf_rwlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1563 1563  {
1564 1564          struct ud_inode *ip = VTOI(vp);
1565 1565  
1566 1566          ud_printf("udf_rwlock\n");
1567 1567  
1568 1568          if (write_lock) {
1569 1569                  rw_enter(&ip->i_rwlock, RW_WRITER);
1570 1570          } else {
1571 1571                  rw_enter(&ip->i_rwlock, RW_READER);
1572 1572          }
1573 1573  #ifdef  __lock_lint
1574 1574          rw_exit(&ip->i_rwlock);
1575 1575  #endif
1576 1576          return (write_lock);
1577 1577  }
1578 1578  
1579 1579  /* ARGSUSED */
1580 1580  static void
1581 1581  udf_rwunlock(struct vnode *vp, int32_t write_lock, caller_context_t *ctp)
1582 1582  {
1583 1583          struct ud_inode *ip = VTOI(vp);
1584 1584  
1585 1585          ud_printf("udf_rwunlock\n");
1586 1586  
1587 1587  #ifdef  __lock_lint
1588 1588          rw_enter(&ip->i_rwlock, RW_WRITER);
1589 1589  #endif
1590 1590  
1591 1591          rw_exit(&ip->i_rwlock);
1592 1592  
1593 1593  }
1594 1594  
1595 1595  /* ARGSUSED */
1596 1596  static int32_t
1597 1597  udf_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
1598 1598  {
1599 1599          return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
1600 1600  }
1601 1601  
1602 1602  static int32_t
1603 1603  udf_frlock(
1604 1604          struct vnode *vp,
1605 1605          int32_t cmd,
1606 1606          struct flock64 *bfp,
1607 1607          int32_t flag,
1608 1608          offset_t offset,
1609 1609          struct flk_callback *flk_cbp,
1610 1610          cred_t *cr,
1611 1611          caller_context_t *ct)
1612 1612  {
1613 1613          struct ud_inode *ip = VTOI(vp);
1614 1614  
1615 1615          ud_printf("udf_frlock\n");
1616 1616  
1617 1617          /*
1618 1618           * If file is being mapped, disallow frlock.
1619 1619           * XXX I am not holding tlock while checking i_mapcnt because the
1620 1620           * current locking strategy drops all locks before calling fs_frlock.
1621 1621           * So, mapcnt could change before we enter fs_frlock making is
1622 1622           * meaningless to have held tlock in the first place.
1623 1623           */
1624 1624          if ((ip->i_mapcnt > 0) &&
1625 1625              (MANDLOCK(vp, ip->i_char))) {
1626 1626                  return (EAGAIN);
1627 1627          }
1628 1628  
1629 1629          return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
1630 1630  }
1631 1631  
1632 1632  /*ARGSUSED6*/
1633 1633  static int32_t
1634 1634  udf_space(
1635 1635          struct vnode *vp,
1636 1636          int32_t cmd,
1637 1637          struct flock64 *bfp,
1638 1638          int32_t flag,
1639 1639          offset_t offset,
1640 1640          cred_t *cr,
1641 1641          caller_context_t *ct)
1642 1642  {
1643 1643          int32_t error = 0;
1644 1644  
1645 1645          ud_printf("udf_space\n");
1646 1646  
1647 1647          if (cmd != F_FREESP) {
1648 1648                  error =  EINVAL;
1649 1649          } else if ((error = convoff(vp, bfp, 0, offset)) == 0) {
1650 1650                  error = ud_freesp(vp, bfp, flag, cr);
1651 1651  
1652 1652                  if (error == 0 && bfp->l_start == 0)
1653 1653                          vnevent_truncate(vp, ct);
1654 1654          }
1655 1655  
1656 1656          return (error);
1657 1657  }
1658 1658  
1659 1659  /* ARGSUSED */
1660 1660  static int32_t
1661 1661  udf_getpage(
1662 1662          struct vnode *vp,
1663 1663          offset_t off,
1664 1664          size_t len,
1665 1665          uint32_t *protp,
1666 1666          struct page **plarr,
1667 1667          size_t plsz,
1668 1668          struct seg *seg,
1669 1669          caddr_t addr,
1670 1670          enum seg_rw rw,
1671 1671          struct cred *cr,
1672 1672          caller_context_t *ct)
1673 1673  {
1674 1674          struct ud_inode *ip = VTOI(vp);
1675 1675          int32_t error, has_holes, beyond_eof, seqmode, dolock;
1676 1676          int32_t pgsize = PAGESIZE;
1677 1677          struct udf_vfs *udf_vfsp = ip->i_udf;
1678 1678          page_t **pl;
1679 1679          u_offset_t pgoff, eoff, uoff;
1680 1680          krw_t rwtype;
1681 1681          caddr_t pgaddr;
1682 1682  
1683 1683          ud_printf("udf_getpage\n");
1684 1684  
1685 1685          uoff = (u_offset_t)off; /* type conversion */
1686 1686          if (protp) {
1687 1687                  *protp = PROT_ALL;
1688 1688          }
1689 1689          if (vp->v_flag & VNOMAP) {
1690 1690                  return (ENOSYS);
1691 1691          }
1692 1692          seqmode = ip->i_nextr == uoff && rw != S_CREATE;
1693 1693  
1694 1694          rwtype = RW_READER;
1695 1695          dolock = (rw_owner(&ip->i_contents) != curthread);
1696 1696  retrylock:
1697 1697  #ifdef  __lock_lint
1698 1698          rw_enter(&ip->i_contents, rwtype);
1699 1699  #else
1700 1700          if (dolock) {
1701 1701                  rw_enter(&ip->i_contents, rwtype);
1702 1702          }
1703 1703  #endif
1704 1704  
1705 1705          /*
1706 1706           * We may be getting called as a side effect of a bmap using
1707 1707           * fbread() when the blocks might be being allocated and the
1708 1708           * size has not yet been up'ed.  In this case we want to be
1709 1709           * able to return zero pages if we get back UDF_HOLE from
1710 1710           * calling bmap for a non write case here.  We also might have
1711 1711           * to read some frags from the disk into a page if we are
1712 1712           * extending the number of frags for a given lbn in bmap().
1713 1713           */
1714 1714          beyond_eof = uoff + len > ip->i_size + PAGEOFFSET;
1715 1715          if (beyond_eof && seg != segkmap) {
1716 1716  #ifdef  __lock_lint
1717 1717                  rw_exit(&ip->i_contents);
1718 1718  #else
1719 1719                  if (dolock) {
1720 1720                          rw_exit(&ip->i_contents);
1721 1721                  }
1722 1722  #endif
1723 1723                  return (EFAULT);
1724 1724          }
1725 1725  
1726 1726          /*
1727 1727           * Must hold i_contents lock throughout the call to pvn_getpages
1728 1728           * since locked pages are returned from each call to ud_getapage.
1729 1729           * Must *not* return locked pages and then try for contents lock
1730 1730           * due to lock ordering requirements (inode > page)
1731 1731           */
1732 1732  
1733 1733          has_holes = ud_bmap_has_holes(ip);
1734 1734  
1735 1735          if ((rw == S_WRITE || rw == S_CREATE) && (has_holes || beyond_eof)) {
1736 1736                  int32_t blk_size, count;
1737 1737                  u_offset_t offset;
1738 1738  
1739 1739                  /*
1740 1740                   * We must acquire the RW_WRITER lock in order to
1741 1741                   * call bmap_write().
1742 1742                   */
1743 1743                  if (dolock && rwtype == RW_READER) {
1744 1744                          rwtype = RW_WRITER;
1745 1745  
1746 1746                          if (!rw_tryupgrade(&ip->i_contents)) {
1747 1747  
1748 1748                                  rw_exit(&ip->i_contents);
1749 1749  
1750 1750                                  goto retrylock;
1751 1751                          }
1752 1752                  }
1753 1753  
1754 1754                  /*
1755 1755                   * May be allocating disk blocks for holes here as
1756 1756                   * a result of mmap faults. write(2) does the bmap_write
1757 1757                   * in rdip/wrip, not here. We are not dealing with frags
1758 1758                   * in this case.
1759 1759                   */
1760 1760                  offset = uoff;
1761 1761                  while ((offset < uoff + len) &&
1762 1762                      (offset < ip->i_size)) {
1763 1763                          /*
1764 1764                           * the variable "bnp" is to simplify the expression for
1765 1765                           * the compiler; * just passing in &bn to bmap_write
1766 1766                           * causes a compiler "loop"
1767 1767                           */
1768 1768  
1769 1769                          blk_size = udf_vfsp->udf_lbsize;
1770 1770                          if ((offset + blk_size) > ip->i_size) {
1771 1771                                  count = ip->i_size - offset;
1772 1772                          } else {
1773 1773                                  count = blk_size;
1774 1774                          }
1775 1775                          error = ud_bmap_write(ip, offset, count, 0, cr);
1776 1776                          if (error) {
1777 1777                                  goto update_inode;
1778 1778                          }
1779 1779                          offset += count; /* XXX - make this contig */
1780 1780                  }
1781 1781          }
1782 1782  
1783 1783          /*
1784 1784           * Can be a reader from now on.
1785 1785           */
1786 1786  #ifdef  __lock_lint
1787 1787          if (rwtype == RW_WRITER) {
1788 1788                  rw_downgrade(&ip->i_contents);
1789 1789          }
1790 1790  #else
1791 1791          if (dolock && rwtype == RW_WRITER) {
1792 1792                  rw_downgrade(&ip->i_contents);
1793 1793          }
1794 1794  #endif
1795 1795  
1796 1796          /*
1797 1797           * We remove PROT_WRITE in cases when the file has UDF holes
1798 1798           * because we don't  want to call bmap_read() to check each
1799 1799           * page if it is backed with a disk block.
1800 1800           */
1801 1801          if (protp && has_holes && rw != S_WRITE && rw != S_CREATE) {
1802 1802                  *protp &= ~PROT_WRITE;
1803 1803          }
1804 1804  
1805 1805          error = 0;
1806 1806  
1807 1807          /*
1808 1808           * The loop looks up pages in the range <off, off + len).
1809 1809           * For each page, we first check if we should initiate an asynchronous
1810 1810           * read ahead before we call page_lookup (we may sleep in page_lookup
1811 1811           * for a previously initiated disk read).
1812 1812           */
1813 1813          eoff = (uoff + len);
1814 1814          for (pgoff = uoff, pgaddr = addr, pl = plarr;
1815 1815              pgoff < eoff; /* empty */) {
1816 1816                  page_t  *pp;
1817 1817                  u_offset_t      nextrio;
1818 1818                  se_t    se;
1819 1819  
1820 1820                  se = ((rw == S_CREATE) ? SE_EXCL : SE_SHARED);
1821 1821  
1822 1822                  /*
1823 1823                   * Handle async getpage (faultahead)
1824 1824                   */
1825 1825                  if (plarr == NULL) {
1826 1826                          ip->i_nextrio = pgoff;
1827 1827                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1828 1828                          pgoff += pgsize;
1829 1829                          pgaddr += pgsize;
1830 1830                          continue;
1831 1831                  }
1832 1832  
1833 1833                  /*
1834 1834                   * Check if we should initiate read ahead of next cluster.
1835 1835                   * We call page_exists only when we need to confirm that
1836 1836                   * we have the current page before we initiate the read ahead.
1837 1837                   */
1838 1838                  nextrio = ip->i_nextrio;
1839 1839                  if (seqmode &&
1840 1840                      pgoff + RD_CLUSTSZ(ip) >= nextrio && pgoff <= nextrio &&
1841 1841                      nextrio < ip->i_size && page_exists(vp, pgoff))
1842 1842                          ud_getpage_ra(vp, pgoff, seg, pgaddr);
1843 1843  
1844 1844                  if ((pp = page_lookup(vp, pgoff, se)) != NULL) {
1845 1845  
1846 1846                          /*
1847 1847                           * We found the page in the page cache.
1848 1848                           */
1849 1849                          *pl++ = pp;
1850 1850                          pgoff += pgsize;
1851 1851                          pgaddr += pgsize;
1852 1852                          len -= pgsize;
1853 1853                          plsz -= pgsize;
1854 1854                  } else  {
1855 1855  
1856 1856                          /*
1857 1857                           * We have to create the page, or read it from disk.
1858 1858                           */
1859 1859                          if (error = ud_getpage_miss(vp, pgoff, len,
1860 1860                              seg, pgaddr, pl, plsz, rw, seqmode)) {
1861 1861                                  goto error_out;
1862 1862                          }
1863 1863  
1864 1864                          while (*pl != NULL) {
1865 1865                                  pl++;
1866 1866                                  pgoff += pgsize;
1867 1867                                  pgaddr += pgsize;
1868 1868                                  len -= pgsize;
1869 1869                                  plsz -= pgsize;
1870 1870                          }
1871 1871                  }
1872 1872          }
1873 1873  
1874 1874          /*
1875 1875           * Return pages up to plsz if they are in the page cache.
1876 1876           * We cannot return pages if there is a chance that they are
1877 1877           * backed with a UDF hole and rw is S_WRITE or S_CREATE.
1878 1878           */
1879 1879          if (plarr && !(has_holes && (rw == S_WRITE || rw == S_CREATE))) {
1880 1880  
1881 1881                  ASSERT((protp == NULL) ||
1882 1882                      !(has_holes && (*protp & PROT_WRITE)));
1883 1883  
1884 1884                  eoff = pgoff + plsz;
1885 1885                  while (pgoff < eoff) {
1886 1886                          page_t          *pp;
1887 1887  
1888 1888                          if ((pp = page_lookup_nowait(vp, pgoff,
1889 1889                              SE_SHARED)) == NULL)
1890 1890                                  break;
1891 1891  
1892 1892                          *pl++ = pp;
1893 1893                          pgoff += pgsize;
1894 1894                          plsz -= pgsize;
1895 1895                  }
1896 1896          }
1897 1897  
1898 1898          if (plarr)
1899 1899                  *pl = NULL;                     /* Terminate page list */
1900 1900          ip->i_nextr = pgoff;
1901 1901  
1902 1902  error_out:
1903 1903          if (error && plarr) {
1904 1904                  /*
1905 1905                   * Release any pages we have locked.
1906 1906                   */
1907 1907                  while (pl > &plarr[0])
1908 1908                          page_unlock(*--pl);
1909 1909  
1910 1910                  plarr[0] = NULL;
1911 1911          }
1912 1912  
1913 1913  update_inode:
1914 1914  #ifdef  __lock_lint
1915 1915          rw_exit(&ip->i_contents);
1916 1916  #else
1917 1917          if (dolock) {
1918 1918                  rw_exit(&ip->i_contents);
1919 1919          }
1920 1920  #endif
1921 1921  
1922 1922          /*
1923 1923           * If the inode is not already marked for IACC (in rwip() for read)
1924 1924           * and the inode is not marked for no access time update (in rwip()
1925 1925           * for write) then update the inode access time and mod time now.
1926 1926           */
1927 1927          mutex_enter(&ip->i_tlock);
1928 1928          if ((ip->i_flag & (IACC | INOACC)) == 0) {
1929 1929                  if ((rw != S_OTHER) && (ip->i_type != VDIR)) {
1930 1930                          ip->i_flag |= IACC;
1931 1931                  }
1932 1932                  if (rw == S_WRITE) {
1933 1933                          ip->i_flag |= IUPD;
1934 1934                  }
1935 1935                  ITIMES_NOLOCK(ip);
1936 1936          }
1937 1937          mutex_exit(&ip->i_tlock);
1938 1938  
1939 1939          return (error);
1940 1940  }
1941 1941  
1942 1942  int32_t ud_delay = 1;
1943 1943  
1944 1944  /* ARGSUSED */
1945 1945  static int32_t
1946 1946  udf_putpage(
1947 1947          struct vnode *vp,
1948 1948          offset_t off,
1949 1949          size_t len,
1950 1950          int32_t flags,
1951 1951          struct cred *cr,
1952 1952          caller_context_t *ct)
1953 1953  {
1954 1954          struct ud_inode *ip;
1955 1955          int32_t error = 0;
1956 1956  
1957 1957          ud_printf("udf_putpage\n");
1958 1958  
1959 1959          ip = VTOI(vp);
1960 1960  #ifdef  __lock_lint
1961 1961          rw_enter(&ip->i_contents, RW_WRITER);
1962 1962  #endif
1963 1963  
1964 1964          if (vp->v_count == 0) {
1965 1965                  cmn_err(CE_WARN, "ud_putpage : bad v_count");
1966 1966                  error = EINVAL;
1967 1967                  goto out;
1968 1968          }
1969 1969  
1970 1970          if (vp->v_flag & VNOMAP) {
1971 1971                  error = ENOSYS;
1972 1972                  goto out;
1973 1973          }
1974 1974  
1975 1975          if (flags & B_ASYNC) {
1976 1976                  if (ud_delay && len &&
1977 1977                      (flags & ~(B_ASYNC|B_DONTNEED|B_FREE)) == 0) {
1978 1978                          mutex_enter(&ip->i_tlock);
1979 1979  
1980 1980                          /*
1981 1981                           * If nobody stalled, start a new cluster.
1982 1982                           */
1983 1983                          if (ip->i_delaylen == 0) {
1984 1984                                  ip->i_delayoff = off;
1985 1985                                  ip->i_delaylen = len;
1986 1986                                  mutex_exit(&ip->i_tlock);
1987 1987                                  goto out;
1988 1988                          }
1989 1989  
1990 1990                          /*
1991 1991                           * If we have a full cluster or they are not contig,
1992 1992                           * then push last cluster and start over.
1993 1993                           */
1994 1994                          if (ip->i_delaylen >= WR_CLUSTSZ(ip) ||
1995 1995                              ip->i_delayoff + ip->i_delaylen != off) {
1996 1996                                  u_offset_t doff;
1997 1997                                  size_t dlen;
1998 1998  
1999 1999                                  doff = ip->i_delayoff;
2000 2000                                  dlen = ip->i_delaylen;
2001 2001                                  ip->i_delayoff = off;
2002 2002                                  ip->i_delaylen = len;
2003 2003                                  mutex_exit(&ip->i_tlock);
2004 2004                                  error = ud_putpages(vp, doff, dlen, flags, cr);
2005 2005                                  /* LMXXX - flags are new val, not old */
2006 2006                                  goto out;
2007 2007                          }
2008 2008  
2009 2009                          /*
2010 2010                           * There is something there, it's not full, and
2011 2011                           * it is contig.
2012 2012                           */
2013 2013                          ip->i_delaylen += len;
2014 2014                          mutex_exit(&ip->i_tlock);
2015 2015                          goto out;
2016 2016                  }
2017 2017  
2018 2018                  /*
2019 2019                   * Must have weird flags or we are not clustering.
2020 2020                   */
2021 2021          }
2022 2022  
2023 2023          error = ud_putpages(vp, off, len, flags, cr);
2024 2024  
2025 2025  out:
2026 2026  #ifdef  __lock_lint
2027 2027          rw_exit(&ip->i_contents);
2028 2028  #endif
2029 2029          return (error);
2030 2030  }
2031 2031  
2032 2032  /* ARGSUSED */
2033 2033  static int32_t
2034 2034  udf_map(
2035 2035          struct vnode *vp,
2036 2036          offset_t off,
2037 2037          struct as *as,
2038 2038          caddr_t *addrp,
2039 2039          size_t len,
2040 2040          uint8_t prot,
2041 2041          uint8_t maxprot,
2042 2042          uint32_t flags,
2043 2043          struct cred *cr,
2044 2044          caller_context_t *ct)
2045 2045  {
2046 2046          struct segvn_crargs vn_a;
2047 2047          int32_t error = 0;
2048 2048  
2049 2049          ud_printf("udf_map\n");
2050 2050  
2051 2051          if (vp->v_flag & VNOMAP) {
2052 2052                  error = ENOSYS;
2053 2053                  goto end;
2054 2054          }
2055 2055  
2056 2056          if ((off < (offset_t)0) ||
2057 2057              ((off + len) < (offset_t)0)) {
2058 2058                  error = EINVAL;
2059 2059                  goto end;
2060 2060          }
2061 2061  
2062 2062          if (vp->v_type != VREG) {
2063 2063                  error = ENODEV;
2064 2064                  goto end;
2065 2065          }
2066 2066  
2067 2067          /*
2068 2068           * If file is being locked, disallow mapping.
2069 2069           */
2070 2070          if (vn_has_mandatory_locks(vp, VTOI(vp)->i_char)) {
2071 2071                  error = EAGAIN;
2072 2072                  goto end;
2073 2073          }
2074 2074  
2075 2075          as_rangelock(as);
2076 2076          error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
2077 2077          if (error != 0) {
2078 2078                  as_rangeunlock(as);
2079 2079                  goto end;
2080 2080          }
2081 2081  
2082 2082          vn_a.vp = vp;
2083 2083          vn_a.offset = off;
2084 2084          vn_a.type = flags & MAP_TYPE;
2085 2085          vn_a.prot = prot;
2086 2086          vn_a.maxprot = maxprot;
2087 2087          vn_a.cred = cr;
2088 2088          vn_a.amp = NULL;
2089 2089          vn_a.flags = flags & ~MAP_TYPE;
2090 2090          vn_a.szc = 0;
2091 2091          vn_a.lgrp_mem_policy_flags = 0;
2092 2092  
2093 2093          error = as_map(as, *addrp, len, segvn_create, (caddr_t)&vn_a);
2094 2094          as_rangeunlock(as);
2095 2095  
2096 2096  end:
2097 2097          return (error);
2098 2098  }
2099 2099  
2100 2100  /* ARGSUSED */
2101 2101  static int32_t
2102 2102  udf_addmap(struct vnode *vp,
2103 2103          offset_t off,
2104 2104          struct as *as,
2105 2105          caddr_t addr,
2106 2106          size_t len,
2107 2107          uint8_t prot,
2108 2108          uint8_t maxprot,
2109 2109          uint32_t flags,
2110 2110          struct cred *cr,
2111 2111          caller_context_t *ct)
2112 2112  {
2113 2113          struct ud_inode *ip = VTOI(vp);
2114 2114  
2115 2115          ud_printf("udf_addmap\n");
2116 2116  
2117 2117          if (vp->v_flag & VNOMAP) {
2118 2118                  return (ENOSYS);
2119 2119          }
2120 2120  
2121 2121          mutex_enter(&ip->i_tlock);
2122 2122          ip->i_mapcnt += btopr(len);
2123 2123          mutex_exit(&ip->i_tlock);
2124 2124  
2125 2125          return (0);
2126 2126  }
2127 2127  
2128 2128  /* ARGSUSED */
2129 2129  static int32_t
2130 2130  udf_delmap(
2131 2131          struct vnode *vp, offset_t off,
2132 2132          struct as *as,
2133 2133          caddr_t addr,
2134 2134          size_t len,
2135 2135          uint32_t prot,
2136 2136          uint32_t maxprot,
2137 2137          uint32_t flags,
2138 2138          struct cred *cr,
2139 2139          caller_context_t *ct)
2140 2140  {
2141 2141          struct ud_inode *ip = VTOI(vp);
2142 2142  
2143 2143          ud_printf("udf_delmap\n");
2144 2144  
2145 2145          if (vp->v_flag & VNOMAP) {
2146 2146                  return (ENOSYS);
2147 2147          }
2148 2148  
2149 2149          mutex_enter(&ip->i_tlock);
2150 2150          ip->i_mapcnt -= btopr(len);     /* Count released mappings */
2151 2151          ASSERT(ip->i_mapcnt >= 0);
2152 2152          mutex_exit(&ip->i_tlock);
2153 2153  
2154 2154          return (0);
2155 2155  }
2156 2156  
2157 2157  /* ARGSUSED */
2158 2158  static int32_t
2159 2159  udf_l_pathconf(
2160 2160          struct vnode *vp,
2161 2161          int32_t cmd,
2162 2162          ulong_t *valp,
2163 2163          struct cred *cr,
2164 2164          caller_context_t *ct)
2165 2165  {
2166 2166          int32_t error = 0;
2167 2167  
2168 2168          ud_printf("udf_l_pathconf\n");
2169 2169  
2170 2170          if (cmd == _PC_FILESIZEBITS) {
2171 2171                  /*
2172 2172                   * udf supports 64 bits as file size
2173 2173                   * but there are several other restrictions
2174 2174                   * it only supports 32-bit block numbers and
2175 2175                   * daddr32_t is only and int32_t so taking these
2176 2176                   * into account we can stay just as where ufs is
2177 2177                   */
2178 2178                  *valp = 41;
2179 2179          } else if (cmd == _PC_TIMESTAMP_RESOLUTION) {
2180 2180                  /* nanosecond timestamp resolution */
2181 2181                  *valp = 1L;
2182 2182          } else {
2183 2183                  error = fs_pathconf(vp, cmd, valp, cr, ct);
2184 2184          }
2185 2185  
2186 2186          return (error);
2187 2187  }
2188 2188  
2189 2189  uint32_t ud_pageio_reads = 0, ud_pageio_writes = 0;
2190 2190  #ifndef __lint
2191 2191  _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_reads))
2192 2192  _NOTE(SCHEME_PROTECTS_DATA("safe sharing", ud_pageio_writes))
2193 2193  #endif
2194 2194  /*
2195 2195   * Assumption is that there will not be a pageio request
2196 2196   * to a enbedded file
2197 2197   */
2198 2198  /* ARGSUSED */
2199 2199  static int32_t
2200 2200  udf_pageio(
2201 2201          struct vnode *vp,
2202 2202          struct page *pp,
2203 2203          u_offset_t io_off,
2204 2204          size_t io_len,
2205 2205          int32_t flags,
2206 2206          struct cred *cr,
2207 2207          caller_context_t *ct)
2208 2208  {
2209 2209          daddr_t bn;
2210 2210          struct buf *bp;
2211 2211          struct ud_inode *ip = VTOI(vp);
2212 2212          int32_t dolock, error = 0, contig, multi_io;
2213 2213          size_t done_len = 0, cur_len = 0;
2214 2214          page_t *npp = NULL, *opp = NULL, *cpp = pp;
2215 2215  
2216 2216          if (pp == NULL) {
2217 2217                  return (EINVAL);
2218 2218          }
2219 2219  
2220 2220          dolock = (rw_owner(&ip->i_contents) != curthread);
2221 2221  
2222 2222          /*
2223 2223           * We need a better check.  Ideally, we would use another
2224 2224           * vnodeops so that hlocked and forcibly unmounted file
2225 2225           * systems would return EIO where appropriate and w/o the
2226 2226           * need for these checks.
2227 2227           */
2228 2228          if (ip->i_udf == NULL) {
2229 2229                  return (EIO);
2230 2230          }
2231 2231  
2232 2232  #ifdef  __lock_lint
2233 2233          rw_enter(&ip->i_contents, RW_READER);
2234 2234  #else
2235 2235          if (dolock) {
2236 2236                  rw_enter(&ip->i_contents, RW_READER);
2237 2237          }
2238 2238  #endif
2239 2239  
2240 2240          /*
2241 2241           * Break the io request into chunks, one for each contiguous
2242 2242           * stretch of disk blocks in the target file.
2243 2243           */
2244 2244          while (done_len < io_len) {
2245 2245                  ASSERT(cpp);
2246 2246                  bp = NULL;
2247 2247                  contig = 0;
2248 2248                  if (error = ud_bmap_read(ip, (u_offset_t)(io_off + done_len),
2249 2249                      &bn, &contig)) {
2250 2250                          break;
2251 2251                  }
2252 2252  
2253 2253                  if (bn == UDF_HOLE) {   /* No holey swapfiles */
2254 2254                          cmn_err(CE_WARN, "SWAP file has HOLES");
2255 2255                          error = EINVAL;
2256 2256                          break;
2257 2257                  }
2258 2258  
2259 2259                  cur_len = MIN(io_len - done_len, contig);
2260 2260  
2261 2261                  /*
2262 2262                   * Check if more than one I/O is
2263 2263                   * required to complete the given
2264 2264                   * I/O operation
2265 2265                   */
2266 2266                  if (ip->i_udf->udf_lbsize < PAGESIZE) {
2267 2267                          if (cur_len >= PAGESIZE) {
2268 2268                                  multi_io = 0;
2269 2269                                  cur_len &= PAGEMASK;
2270 2270                          } else {
2271 2271                                  multi_io = 1;
2272 2272                                  cur_len = MIN(io_len - done_len, PAGESIZE);
2273 2273                          }
2274 2274                  }
2275 2275                  page_list_break(&cpp, &npp, btop(cur_len));
2276 2276  
2277 2277                  bp = pageio_setup(cpp, cur_len, ip->i_devvp, flags);
2278 2278                  ASSERT(bp != NULL);
2279 2279  
2280 2280                  bp->b_edev = ip->i_dev;
2281 2281                  bp->b_dev = cmpdev(ip->i_dev);
2282 2282                  bp->b_blkno = bn;
2283 2283                  bp->b_un.b_addr = (caddr_t)0;
2284 2284                  bp->b_file = vp;
2285 2285                  bp->b_offset = (offset_t)(io_off + done_len);
2286 2286  
2287 2287  /*
2288 2288   *              ub.ub_pageios.value.ul++;
2289 2289   */
2290 2290                  if (multi_io == 0) {
2291 2291                          (void) bdev_strategy(bp);
2292 2292                  } else {
2293 2293                          error = ud_multi_strat(ip, cpp, bp,
2294 2294                              (u_offset_t)(io_off + done_len));
2295 2295                          if (error != 0) {
2296 2296                                  pageio_done(bp);
2297 2297                                  break;
2298 2298                          }
2299 2299                  }
2300 2300                  if (flags & B_READ) {
2301 2301                          ud_pageio_reads++;
2302 2302                  } else {
2303 2303                          ud_pageio_writes++;
2304 2304                  }
2305 2305  
2306 2306                  /*
2307 2307                   * If the request is not B_ASYNC, wait for i/o to complete
2308 2308                   * and re-assemble the page list to return to the caller.
2309 2309                   * If it is B_ASYNC we leave the page list in pieces and
2310 2310                   * cleanup() will dispose of them.
2311 2311                   */
2312 2312                  if ((flags & B_ASYNC) == 0) {
2313 2313                          error = biowait(bp);
2314 2314                          pageio_done(bp);
2315 2315                          if (error) {
2316 2316                                  break;
2317 2317                          }
2318 2318                          page_list_concat(&opp, &cpp);
2319 2319                  }
2320 2320                  cpp = npp;
2321 2321                  npp = NULL;
2322 2322                  done_len += cur_len;
2323 2323          }
2324 2324  
2325 2325          ASSERT(error || (cpp == NULL && npp == NULL && done_len == io_len));
2326 2326          if (error) {
2327 2327                  if (flags & B_ASYNC) {
2328 2328                          /* Cleanup unprocessed parts of list */
2329 2329                          page_list_concat(&cpp, &npp);
2330 2330                          if (flags & B_READ) {
2331 2331                                  pvn_read_done(cpp, B_ERROR);
2332 2332                          } else {
2333 2333                                  pvn_write_done(cpp, B_ERROR);
2334 2334                          }
2335 2335                  } else {
2336 2336                          /* Re-assemble list and let caller clean up */
2337 2337                          page_list_concat(&opp, &cpp);
2338 2338                          page_list_concat(&opp, &npp);
2339 2339                  }
2340 2340          }
2341 2341  
2342 2342  #ifdef  __lock_lint
2343 2343          rw_exit(&ip->i_contents);
2344 2344  #else
2345 2345          if (dolock) {
2346 2346                  rw_exit(&ip->i_contents);
2347 2347          }
2348 2348  #endif
2349 2349          return (error);
2350 2350  }
2351 2351  
2352 2352  
2353 2353  
2354 2354  
2355 2355  /* -------------------- local functions --------------------------- */
2356 2356  
2357 2357  
2358 2358  
2359 2359  int32_t
2360 2360  ud_rdwri(enum uio_rw rw, int32_t ioflag,
2361 2361          struct ud_inode *ip, caddr_t base, int32_t len,
2362 2362          offset_t offset, enum uio_seg seg, int32_t *aresid, struct cred *cr)
2363 2363  {
2364 2364          int32_t error;
2365 2365          struct uio auio;
2366 2366          struct iovec aiov;
2367 2367  
2368 2368          ud_printf("ud_rdwri\n");
2369 2369  
2370 2370          bzero((caddr_t)&auio, sizeof (uio_t));
2371 2371          bzero((caddr_t)&aiov, sizeof (iovec_t));
2372 2372  
2373 2373          aiov.iov_base = base;
2374 2374          aiov.iov_len = len;
2375 2375          auio.uio_iov = &aiov;
2376 2376          auio.uio_iovcnt = 1;
2377 2377          auio.uio_loffset = offset;
2378 2378          auio.uio_segflg = (int16_t)seg;
2379 2379          auio.uio_resid = len;
2380 2380  
2381 2381          if (rw == UIO_WRITE) {
2382 2382                  auio.uio_fmode = FWRITE;
2383 2383                  auio.uio_extflg = UIO_COPY_DEFAULT;
2384 2384                  auio.uio_llimit = curproc->p_fsz_ctl;
2385 2385                  error = ud_wrip(ip, &auio, ioflag, cr);
2386 2386          } else {
2387 2387                  auio.uio_fmode = FREAD;
2388 2388                  auio.uio_extflg = UIO_COPY_CACHED;
2389 2389                  auio.uio_llimit = MAXOFFSET_T;
2390 2390                  error = ud_rdip(ip, &auio, ioflag, cr);
2391 2391          }
2392 2392  
2393 2393          if (aresid) {
2394 2394                  *aresid = auio.uio_resid;
2395 2395          } else if (auio.uio_resid) {
2396 2396                  error = EIO;
2397 2397          }
2398 2398          return (error);
2399 2399  }
2400 2400  
2401 2401  /*
2402 2402   * Free behind hacks.  The pager is busted.
2403 2403   * XXX - need to pass the information down to writedone() in a flag like B_SEQ
2404 2404   * or B_FREE_IF_TIGHT_ON_MEMORY.
2405 2405   */
2406 2406  int32_t ud_freebehind = 1;
2407 2407  int32_t ud_smallfile = 32 * 1024;
2408 2408  
2409 2409  /* ARGSUSED */
2410 2410  int32_t
2411 2411  ud_getpage_miss(struct vnode *vp, u_offset_t off,
2412 2412          size_t len, struct seg *seg, caddr_t addr, page_t *pl[],
2413 2413          size_t plsz, enum seg_rw rw, int32_t seq)
2414 2414  {
2415 2415          struct ud_inode *ip = VTOI(vp);
2416 2416          int32_t err = 0;
2417 2417          size_t io_len;
2418 2418          u_offset_t io_off;
2419 2419          u_offset_t pgoff;
2420 2420          page_t *pp;
2421 2421  
2422 2422          pl[0] = NULL;
2423 2423  
2424 2424          /*
2425 2425           * Figure out whether the page can be created, or must be
2426 2426           * read from the disk
2427 2427           */
2428 2428          if (rw == S_CREATE) {
2429 2429                  if ((pp = page_create_va(vp, off,
2430 2430                      PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
2431 2431                          cmn_err(CE_WARN, "ud_getpage_miss: page_create");
2432 2432                          return (EINVAL);
2433 2433                  }
2434 2434                  io_len = PAGESIZE;
2435 2435          } else {
2436 2436                  pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
2437 2437                      &io_len, off, PAGESIZE, 0);
2438 2438  
2439 2439                  /*
2440 2440                   * Some other thread has entered the page.
2441 2441                   * ud_getpage will retry page_lookup.
2442 2442                   */
2443 2443                  if (pp == NULL) {
2444 2444                          return (0);
2445 2445                  }
2446 2446  
2447 2447                  /*
2448 2448                   * Fill the page with as much data as we can from the file.
2449 2449                   */
2450 2450                  err = ud_page_fill(ip, pp, off, B_READ, &pgoff);
2451 2451                  if (err) {
2452 2452                          pvn_read_done(pp, B_ERROR);
2453 2453                          return (err);
2454 2454                  }
2455 2455  
2456 2456                  /*
2457 2457                   * XXX ??? ufs has io_len instead of pgoff below
2458 2458                   */
2459 2459                  ip->i_nextrio = off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2460 2460  
2461 2461                  /*
2462 2462                   * If the file access is sequential, initiate read ahead
2463 2463                   * of the next cluster.
2464 2464                   */
2465 2465                  if (seq && ip->i_nextrio < ip->i_size) {
2466 2466                          ud_getpage_ra(vp, off, seg, addr);
2467 2467                  }
2468 2468          }
2469 2469  
2470 2470  outmiss:
2471 2471          pvn_plist_init(pp, pl, plsz, (offset_t)off, io_len, rw);
2472 2472          return (err);
2473 2473  }
2474 2474  
2475 2475  /* ARGSUSED */
2476 2476  void
2477 2477  ud_getpage_ra(struct vnode *vp,
2478 2478          u_offset_t off, struct seg *seg, caddr_t addr)
2479 2479  {
2480 2480          page_t *pp;
2481 2481          size_t io_len;
2482 2482          struct ud_inode *ip = VTOI(vp);
2483 2483          u_offset_t io_off = ip->i_nextrio, pgoff;
2484 2484          caddr_t addr2 = addr + (io_off - off);
2485 2485          daddr_t bn;
2486 2486          int32_t contig = 0;
2487 2487  
2488 2488          /*
2489 2489           * Is this test needed?
2490 2490           */
2491 2491  
2492 2492          if (addr2 >= seg->s_base + seg->s_size) {
2493 2493                  return;
2494 2494          }
2495 2495  
2496 2496          contig = 0;
2497 2497          if (ud_bmap_read(ip, io_off, &bn, &contig) != 0 || bn == UDF_HOLE) {
2498 2498                  return;
2499 2499          }
2500 2500  
2501 2501          pp = pvn_read_kluster(vp, io_off, seg, addr2,
2502 2502              &io_off, &io_len, io_off, PAGESIZE, 1);
2503 2503  
2504 2504          /*
2505 2505           * Some other thread has entered the page.
2506 2506           * So no read head done here (ie we will have to and wait
2507 2507           * for the read when needed).
2508 2508           */
2509 2509  
2510 2510          if (pp == NULL) {
2511 2511                  return;
2512 2512          }
2513 2513  
2514 2514          (void) ud_page_fill(ip, pp, io_off, (B_READ|B_ASYNC), &pgoff);
2515 2515          ip->i_nextrio =  io_off + ((pgoff + PAGESIZE - 1) & PAGEMASK);
2516 2516  }
2517 2517  
2518 2518  int
2519 2519  ud_page_fill(struct ud_inode *ip, page_t *pp, u_offset_t off,
2520 2520          uint32_t bflgs, u_offset_t *pg_off)
2521 2521  {
2522 2522          daddr_t bn;
2523 2523          struct buf *bp;
2524 2524          caddr_t kaddr, caddr;
2525 2525          int32_t error = 0, contig = 0, multi_io = 0;
2526 2526          int32_t lbsize = ip->i_udf->udf_lbsize;
2527 2527          int32_t lbmask = ip->i_udf->udf_lbmask;
2528 2528          uint64_t isize;
2529 2529  
2530 2530          isize = (ip->i_size + lbmask) & (~lbmask);
2531 2531          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2532 2532  
2533 2533                  /*
2534 2534                   * Embedded file read file_entry
2535 2535                   * from buffer cache and copy the required
2536 2536                   * portions
2537 2537                   */
2538 2538                  bp = ud_bread(ip->i_dev,
2539 2539                      ip->i_icb_lbano << ip->i_udf->udf_l2d_shift, lbsize);
2540 2540                  if ((bp->b_error == 0) &&
2541 2541                      (bp->b_resid == 0)) {
2542 2542  
2543 2543                          caddr = bp->b_un.b_addr + ip->i_data_off;
2544 2544  
2545 2545                          /*
2546 2546                           * mapin to kvm
2547 2547                           */
2548 2548                          kaddr = (caddr_t)ppmapin(pp,
2549 2549                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2550 2550                          (void) kcopy(caddr, kaddr, ip->i_size);
2551 2551  
2552 2552                          /*
2553 2553                           * mapout of kvm
2554 2554                           */
2555 2555                          ppmapout(kaddr);
2556 2556                  }
2557 2557                  brelse(bp);
2558 2558                  contig = ip->i_size;
2559 2559          } else {
2560 2560  
2561 2561                  /*
2562 2562                   * Get the continuous size and block number
2563 2563                   * at offset "off"
2564 2564                   */
2565 2565                  if (error = ud_bmap_read(ip, off, &bn, &contig))
2566 2566                          goto out;
2567 2567                  contig = MIN(contig, PAGESIZE);
2568 2568                  contig = (contig + lbmask) & (~lbmask);
2569 2569  
2570 2570                  /*
2571 2571                   * Zero part of the page which we are not
2572 2572                   * going to read from the disk.
2573 2573                   */
2574 2574  
2575 2575                  if (bn == UDF_HOLE) {
2576 2576  
2577 2577                          /*
2578 2578                           * This is a HOLE. Just zero out
2579 2579                           * the page
2580 2580                           */
2581 2581                          if (((off + contig) == isize) ||
2582 2582                              (contig == PAGESIZE)) {
2583 2583                                  pagezero(pp->p_prev, 0, PAGESIZE);
2584 2584                                  goto out;
2585 2585                          }
2586 2586                  }
2587 2587  
2588 2588                  if (contig < PAGESIZE) {
2589 2589                          uint64_t count;
2590 2590  
2591 2591                          count = isize - off;
2592 2592                          if (contig != count) {
2593 2593                                  multi_io = 1;
2594 2594                                  contig = (int32_t)(MIN(count, PAGESIZE));
2595 2595                          } else {
2596 2596                                  pagezero(pp->p_prev, contig, PAGESIZE - contig);
2597 2597                          }
2598 2598                  }
2599 2599  
2600 2600                  /*
2601 2601                   * Get a bp and initialize it
2602 2602                   */
2603 2603                  bp = pageio_setup(pp, contig, ip->i_devvp, bflgs);
2604 2604                  ASSERT(bp != NULL);
2605 2605  
2606 2606                  bp->b_edev = ip->i_dev;
2607 2607                  bp->b_dev = cmpdev(ip->i_dev);
2608 2608                  bp->b_blkno = bn;
2609 2609                  bp->b_un.b_addr = 0;
2610 2610                  bp->b_file = ip->i_vnode;
2611 2611  
2612 2612                  /*
2613 2613                   * Start I/O
2614 2614                   */
2615 2615                  if (multi_io == 0) {
2616 2616  
2617 2617                          /*
2618 2618                           * Single I/O is sufficient for this page
2619 2619                           */
2620 2620                          (void) bdev_strategy(bp);
2621 2621                  } else {
2622 2622  
2623 2623                          /*
2624 2624                           * We need to do the I/O in
2625 2625                           * piece's
2626 2626                           */
2627 2627                          error = ud_multi_strat(ip, pp, bp, off);
2628 2628                          if (error != 0) {
2629 2629                                  goto out;
2630 2630                          }
2631 2631                  }
2632 2632                  if ((bflgs & B_ASYNC) == 0) {
2633 2633  
2634 2634                          /*
2635 2635                           * Wait for i/o to complete.
2636 2636                           */
2637 2637  
2638 2638                          error = biowait(bp);
2639 2639                          pageio_done(bp);
2640 2640                          if (error) {
2641 2641                                  goto out;
2642 2642                          }
2643 2643                  }
2644 2644          }
2645 2645          if ((off + contig) >= ip->i_size) {
2646 2646                  contig = ip->i_size - off;
2647 2647          }
2648 2648  
2649 2649  out:
2650 2650          *pg_off = contig;
2651 2651          return (error);
2652 2652  }
2653 2653  
2654 2654  int32_t
2655 2655  ud_putpages(struct vnode *vp, offset_t off,
2656 2656          size_t len, int32_t flags, struct cred *cr)
2657 2657  {
2658 2658          struct ud_inode *ip;
2659 2659          page_t *pp;
2660 2660          u_offset_t io_off;
2661 2661          size_t io_len;
2662 2662          u_offset_t eoff;
2663 2663          int32_t err = 0;
2664 2664          int32_t dolock;
2665 2665  
2666 2666          ud_printf("ud_putpages\n");
2667 2667  
2668 2668          if (vp->v_count == 0) {
2669 2669                  cmn_err(CE_WARN, "ud_putpages: bad v_count");
2670 2670                  return (EINVAL);
2671 2671          }
2672 2672  
2673 2673          ip = VTOI(vp);
2674 2674  
2675 2675          /*
2676 2676           * Acquire the readers/write inode lock before locking
2677 2677           * any pages in this inode.
2678 2678           * The inode lock is held during i/o.
2679 2679           */
2680 2680          if (len == 0) {
2681 2681                  mutex_enter(&ip->i_tlock);
2682 2682                  ip->i_delayoff = ip->i_delaylen = 0;
2683 2683                  mutex_exit(&ip->i_tlock);
2684 2684          }
2685 2685  #ifdef  __lock_lint
2686 2686          rw_enter(&ip->i_contents, RW_READER);
2687 2687  #else
2688 2688          dolock = (rw_owner(&ip->i_contents) != curthread);
2689 2689          if (dolock) {
2690 2690                  rw_enter(&ip->i_contents, RW_READER);
2691 2691          }
2692 2692  #endif
2693 2693  
2694 2694          if (!vn_has_cached_data(vp)) {
2695 2695  #ifdef  __lock_lint
2696 2696                  rw_exit(&ip->i_contents);
2697 2697  #else
2698 2698                  if (dolock) {
2699 2699                          rw_exit(&ip->i_contents);
2700 2700                  }
2701 2701  #endif
2702 2702                  return (0);
2703 2703          }
2704 2704  
2705 2705          if (len == 0) {
2706 2706                  /*
2707 2707                   * Search the entire vp list for pages >= off.
2708 2708                   */
2709 2709                  err = pvn_vplist_dirty(vp, (u_offset_t)off, ud_putapage,
2710 2710                      flags, cr);
2711 2711          } else {
2712 2712                  /*
2713 2713                   * Loop over all offsets in the range looking for
2714 2714                   * pages to deal with.
2715 2715                   */
2716 2716                  if ((eoff = blkroundup(ip->i_udf, ip->i_size)) != 0) {
2717 2717                          eoff = MIN(off + len, eoff);
2718 2718                  } else {
2719 2719                          eoff = off + len;
2720 2720                  }
2721 2721  
2722 2722                  for (io_off = off; io_off < eoff; io_off += io_len) {
2723 2723                          /*
2724 2724                           * If we are not invalidating, synchronously
2725 2725                           * freeing or writing pages, use the routine
2726 2726                           * page_lookup_nowait() to prevent reclaiming
2727 2727                           * them from the free list.
2728 2728                           */
2729 2729                          if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
2730 2730                                  pp = page_lookup(vp, io_off,
2731 2731                                      (flags & (B_INVAL | B_FREE)) ?
2732 2732                                      SE_EXCL : SE_SHARED);
2733 2733                          } else {
2734 2734                                  pp = page_lookup_nowait(vp, io_off,
2735 2735                                      (flags & B_FREE) ? SE_EXCL : SE_SHARED);
2736 2736                          }
2737 2737  
2738 2738                          if (pp == NULL || pvn_getdirty(pp, flags) == 0) {
2739 2739                                  io_len = PAGESIZE;
2740 2740                          } else {
2741 2741  
2742 2742                                  err = ud_putapage(vp, pp,
2743 2743                                      &io_off, &io_len, flags, cr);
2744 2744                                  if (err != 0) {
2745 2745                                          break;
2746 2746                                  }
2747 2747                                  /*
2748 2748                                   * "io_off" and "io_len" are returned as
2749 2749                                   * the range of pages we actually wrote.
2750 2750                                   * This allows us to skip ahead more quickly
2751 2751                                   * since several pages may've been dealt
2752 2752                                   * with by this iteration of the loop.
2753 2753                                   */
2754 2754                          }
2755 2755                  }
2756 2756          }
2757 2757          if (err == 0 && off == 0 && (len == 0 || len >= ip->i_size)) {
2758 2758                  /*
2759 2759                   * We have just sync'ed back all the pages on
2760 2760                   * the inode, turn off the IMODTIME flag.
2761 2761                   */
2762 2762                  mutex_enter(&ip->i_tlock);
2763 2763                  ip->i_flag &= ~IMODTIME;
2764 2764                  mutex_exit(&ip->i_tlock);
2765 2765          }
2766 2766  #ifdef  __lock_lint
2767 2767          rw_exit(&ip->i_contents);
2768 2768  #else
2769 2769          if (dolock) {
2770 2770                  rw_exit(&ip->i_contents);
2771 2771          }
2772 2772  #endif
2773 2773          return (err);
2774 2774  }
2775 2775  
2776 2776  /* ARGSUSED */
2777 2777  int32_t
2778 2778  ud_putapage(struct vnode *vp,
2779 2779          page_t *pp, u_offset_t *offp,
2780 2780          size_t *lenp, int32_t flags, struct cred *cr)
2781 2781  {
2782 2782          daddr_t bn;
2783 2783          size_t io_len;
2784 2784          struct ud_inode *ip;
2785 2785          int32_t error = 0, contig, multi_io = 0;
2786 2786          struct udf_vfs *udf_vfsp;
2787 2787          u_offset_t off, io_off;
2788 2788          caddr_t kaddr, caddr;
2789 2789          struct buf *bp = NULL;
2790 2790          int32_t lbmask;
2791 2791          uint64_t isize;
2792 2792          uint16_t crc_len;
2793 2793          struct file_entry *fe;
2794 2794  
2795 2795          ud_printf("ud_putapage\n");
2796 2796  
2797 2797          ip = VTOI(vp);
2798 2798          ASSERT(ip);
2799 2799          ASSERT(RW_LOCK_HELD(&ip->i_contents));
2800 2800          lbmask = ip->i_udf->udf_lbmask;
2801 2801          isize = (ip->i_size + lbmask) & (~lbmask);
2802 2802  
2803 2803          udf_vfsp = ip->i_udf;
2804 2804          ASSERT(udf_vfsp->udf_flags & UDF_FL_RW);
2805 2805  
2806 2806          /*
2807 2807           * If the modified time on the inode has not already been
2808 2808           * set elsewhere (e.g. for write/setattr) we set the time now.
2809 2809           * This gives us approximate modified times for mmap'ed files
2810 2810           * which are modified via stores in the user address space.
2811 2811           */
2812 2812          if (((ip->i_flag & IMODTIME) == 0) || (flags & B_FORCE)) {
2813 2813                  mutex_enter(&ip->i_tlock);
2814 2814                  ip->i_flag |= IUPD;
2815 2815                  ITIMES_NOLOCK(ip);
2816 2816                  mutex_exit(&ip->i_tlock);
2817 2817          }
2818 2818  
2819 2819  
2820 2820          /*
2821 2821           * Align the request to a block boundry (for old file systems),
2822 2822           * and go ask bmap() how contiguous things are for this file.
2823 2823           */
2824 2824          off = pp->p_offset & ~(offset_t)lbmask;
2825 2825                                  /* block align it */
2826 2826  
2827 2827  
2828 2828          if (ip->i_desc_type == ICB_FLAG_ONE_AD) {
2829 2829                  ASSERT(ip->i_size <= ip->i_max_emb);
2830 2830  
2831 2831                  pp = pvn_write_kluster(vp, pp, &io_off,
2832 2832                      &io_len, off, PAGESIZE, flags);
2833 2833                  if (io_len == 0) {
2834 2834                          io_len = PAGESIZE;
2835 2835                  }
2836 2836  
2837 2837                  bp = ud_bread(ip->i_dev,
2838 2838                      ip->i_icb_lbano << udf_vfsp->udf_l2d_shift,
2839 2839                      udf_vfsp->udf_lbsize);
2840 2840                  fe = (struct file_entry *)bp->b_un.b_addr;
2841 2841                  if ((bp->b_flags & B_ERROR) ||
2842 2842                      (ud_verify_tag_and_desc(&fe->fe_tag, UD_FILE_ENTRY,
2843 2843                      ip->i_icb_block,
2844 2844                      1, udf_vfsp->udf_lbsize) != 0)) {
2845 2845                          if (pp != NULL)
2846 2846                                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2847 2847                          if (bp->b_flags & B_ERROR) {
2848 2848                                  error = EIO;
2849 2849                          } else {
2850 2850                                  error = EINVAL;
2851 2851                          }
2852 2852                          brelse(bp);
2853 2853                          return (error);
2854 2854                  }
2855 2855                  if ((bp->b_error == 0) &&
2856 2856                      (bp->b_resid == 0)) {
2857 2857  
2858 2858                          caddr = bp->b_un.b_addr + ip->i_data_off;
2859 2859                          kaddr = (caddr_t)ppmapin(pp,
2860 2860                              PROT_READ | PROT_WRITE, (caddr_t)-1);
2861 2861                          (void) kcopy(kaddr, caddr, ip->i_size);
2862 2862                          ppmapout(kaddr);
2863 2863                  }
2864 2864                  crc_len = offsetof(struct file_entry, fe_spec) +
2865 2865                      SWAP_32(fe->fe_len_ear);
2866 2866                  crc_len += ip->i_size;
2867 2867                  ud_make_tag(ip->i_udf, &fe->fe_tag,
2868 2868                      UD_FILE_ENTRY, ip->i_icb_block, crc_len);
2869 2869  
2870 2870                  bwrite(bp);
2871 2871  
2872 2872                  if (flags & B_ASYNC) {
2873 2873                          pvn_write_done(pp, flags);
2874 2874                  }
2875 2875                  contig = ip->i_size;
2876 2876          } else {
2877 2877  
2878 2878                  if (error = ud_bmap_read(ip, off, &bn, &contig)) {
2879 2879                          goto out;
2880 2880                  }
2881 2881                  contig = MIN(contig, PAGESIZE);
2882 2882                  contig = (contig + lbmask) & (~lbmask);
2883 2883  
2884 2884                  if (contig < PAGESIZE) {
2885 2885                          uint64_t count;
2886 2886  
2887 2887                          count = isize - off;
2888 2888                          if (contig != count) {
2889 2889                                  multi_io = 1;
2890 2890                                  contig = (int32_t)(MIN(count, PAGESIZE));
2891 2891                          }
2892 2892                  }
2893 2893  
2894 2894                  if ((off + contig) > isize) {
2895 2895                          contig = isize - off;
2896 2896                  }
2897 2897  
2898 2898                  if (contig > PAGESIZE) {
2899 2899                          if (contig & PAGEOFFSET) {
2900 2900                                  contig &= PAGEMASK;
2901 2901                          }
2902 2902                  }
2903 2903  
2904 2904                  pp = pvn_write_kluster(vp, pp, &io_off,
2905 2905                      &io_len, off, contig, flags);
2906 2906                  if (io_len == 0) {
2907 2907                          io_len = PAGESIZE;
2908 2908                  }
2909 2909  
2910 2910                  bp = pageio_setup(pp, contig, ip->i_devvp, B_WRITE | flags);
2911 2911                  ASSERT(bp != NULL);
2912 2912  
2913 2913                  bp->b_edev = ip->i_dev;
2914 2914                  bp->b_dev = cmpdev(ip->i_dev);
2915 2915                  bp->b_blkno = bn;
2916 2916                  bp->b_un.b_addr = 0;
2917 2917                  bp->b_file = vp;
2918 2918                  bp->b_offset = (offset_t)off;
2919 2919  
2920 2920  
2921 2921                  /*
2922 2922                   * write throttle
2923 2923                   */
2924 2924                  ASSERT(bp->b_iodone == NULL);
2925 2925                  bp->b_iodone = ud_iodone;
2926 2926                  mutex_enter(&ip->i_tlock);
2927 2927                  ip->i_writes += bp->b_bcount;
2928 2928                  mutex_exit(&ip->i_tlock);
2929 2929  
2930 2930                  if (multi_io == 0) {
2931 2931  
2932 2932                          (void) bdev_strategy(bp);
2933 2933                  } else {
2934 2934                          error = ud_multi_strat(ip, pp, bp, off);
2935 2935                          if (error != 0) {
2936 2936                                  goto out;
2937 2937                          }
2938 2938                  }
2939 2939  
2940 2940                  if ((flags & B_ASYNC) == 0) {
2941 2941                          /*
2942 2942                           * Wait for i/o to complete.
2943 2943                           */
2944 2944                          error = biowait(bp);
2945 2945                          pageio_done(bp);
2946 2946                  }
2947 2947          }
2948 2948  
2949 2949          if ((flags & B_ASYNC) == 0) {
2950 2950                  pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
2951 2951          }
2952 2952  
2953 2953          pp = NULL;
2954 2954  
2955 2955  out:
2956 2956          if (error != 0 && pp != NULL) {
2957 2957                  pvn_write_done(pp, B_ERROR | B_WRITE | flags);
2958 2958          }
2959 2959  
2960 2960          if (offp) {
2961 2961                  *offp = io_off;
2962 2962          }
2963 2963          if (lenp) {
2964 2964                  *lenp = io_len;
2965 2965          }
2966 2966  
2967 2967          return (error);
2968 2968  }
2969 2969  
2970 2970  
2971 2971  int32_t
2972 2972  ud_iodone(struct buf *bp)
2973 2973  {
2974 2974          struct ud_inode *ip;
2975 2975  
2976 2976          ASSERT((bp->b_pages->p_vnode != NULL) && !(bp->b_flags & B_READ));
2977 2977  
2978 2978          bp->b_iodone = NULL;
2979 2979  
2980 2980          ip = VTOI(bp->b_pages->p_vnode);
2981 2981  
2982 2982          mutex_enter(&ip->i_tlock);
2983 2983          if (ip->i_writes >= ud_LW) {
2984 2984                  if ((ip->i_writes -= bp->b_bcount) <= ud_LW) {
2985 2985                          if (ud_WRITES) {
2986 2986                                  cv_broadcast(&ip->i_wrcv); /* wake all up */
2987 2987                          }
2988 2988                  }
2989 2989          } else {
2990 2990                  ip->i_writes -= bp->b_bcount;
2991 2991          }
2992 2992          mutex_exit(&ip->i_tlock);
2993 2993          iodone(bp);
2994 2994          return (0);
2995 2995  }
2996 2996  
2997 2997  /* ARGSUSED3 */
2998 2998  int32_t
2999 2999  ud_rdip(struct ud_inode *ip, struct uio *uio, int32_t ioflag, cred_t *cr)
3000 3000  {
3001 3001          struct vnode *vp;
3002 3002          struct udf_vfs *udf_vfsp;
3003 3003          krw_t rwtype;
3004 3004          caddr_t base;
3005 3005          uint32_t flags;
3006 3006          int32_t error, n, on, mapon, dofree;
3007 3007          u_offset_t off;
3008 3008          long oresid = uio->uio_resid;
3009 3009  
3010 3010          ASSERT(RW_LOCK_HELD(&ip->i_contents));
3011 3011          if ((ip->i_type != VREG) &&
3012 3012              (ip->i_type != VDIR) &&
3013 3013              (ip->i_type != VLNK)) {
3014 3014                  return (EIO);
3015 3015          }
3016 3016  
3017 3017          if (uio->uio_loffset > MAXOFFSET_T) {
3018 3018                  return (0);
3019 3019          }
3020 3020  
3021 3021          if ((uio->uio_loffset < (offset_t)0) ||
3022 3022              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3023 3023                  return (EINVAL);
3024 3024          }
3025 3025          if (uio->uio_resid == 0) {
3026 3026                  return (0);
3027 3027          }
3028 3028  
3029 3029          vp = ITOV(ip);
3030 3030          udf_vfsp = ip->i_udf;
3031 3031          mutex_enter(&ip->i_tlock);
3032 3032          ip->i_flag |= IACC;
3033 3033          mutex_exit(&ip->i_tlock);
3034 3034  
3035 3035          rwtype = (rw_write_held(&ip->i_contents)?RW_WRITER:RW_READER);
3036 3036  
3037 3037          do {
3038 3038                  offset_t diff;
3039 3039                  u_offset_t uoff = uio->uio_loffset;
3040 3040                  off = uoff & (offset_t)MAXBMASK;
3041 3041                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3042 3042                  on = (int)blkoff(udf_vfsp, uoff);
3043 3043                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3044 3044  
3045 3045                  diff = ip->i_size - uoff;
3046 3046  
3047 3047                  if (diff <= (offset_t)0) {
3048 3048                          error = 0;
3049 3049                          goto out;
3050 3050                  }
3051 3051                  if (diff < (offset_t)n) {
3052 3052                          n = (int)diff;
3053 3053                  }
3054 3054                  dofree = ud_freebehind &&
3055 3055                      ip->i_nextr == (off & PAGEMASK) &&
3056 3056                      off > ud_smallfile;
3057 3057  
3058 3058  #ifndef __lock_lint
3059 3059                  if (rwtype == RW_READER) {
3060 3060                          rw_exit(&ip->i_contents);
3061 3061                  }
3062 3062  #endif
3063 3063  
3064 3064                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3065 3065                      (uint32_t)n, 1, S_READ);
3066 3066                  error = uiomove(base + mapon, (long)n, UIO_READ, uio);
3067 3067  
3068 3068                  flags = 0;
3069 3069                  if (!error) {
3070 3070                          /*
3071 3071                           * If read a whole block, or read to eof,
3072 3072                           * won't need this buffer again soon.
3073 3073                           */
3074 3074                          if (n + on == MAXBSIZE && ud_freebehind && dofree &&
3075 3075                              freemem < lotsfree + pages_before_pager) {
3076 3076                                  flags = SM_FREE | SM_DONTNEED |SM_ASYNC;
3077 3077                          }
3078 3078                          /*
3079 3079                           * In POSIX SYNC (FSYNC and FDSYNC) read mode,
3080 3080                           * we want to make sure that the page which has
3081 3081                           * been read, is written on disk if it is dirty.
3082 3082                           * And corresponding indirect blocks should also
3083 3083                           * be flushed out.
3084 3084                           */
3085 3085                          if ((ioflag & FRSYNC) && (ioflag & (FSYNC|FDSYNC))) {
3086 3086                                  flags &= ~SM_ASYNC;
3087 3087                                  flags |= SM_WRITE;
3088 3088                          }
3089 3089                          error = segmap_release(segkmap, base, flags);
3090 3090                  } else    {
3091 3091                          (void) segmap_release(segkmap, base, flags);
3092 3092                  }
3093 3093  
3094 3094  #ifndef __lock_lint
3095 3095                  if (rwtype == RW_READER) {
3096 3096                          rw_enter(&ip->i_contents, rwtype);
3097 3097                  }
3098 3098  #endif
3099 3099          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3100 3100  out:
3101 3101          /*
3102 3102           * Inode is updated according to this table if FRSYNC is set.
3103 3103           *
3104 3104           *      FSYNC   FDSYNC(posix.4)
3105 3105           *      --------------------------
3106 3106           *      always  IATTCHG|IBDWRITE
3107 3107           */
3108 3108          if (ioflag & FRSYNC) {
3109 3109                  if ((ioflag & FSYNC) ||
3110 3110                      ((ioflag & FDSYNC) &&
3111 3111                      (ip->i_flag & (IATTCHG|IBDWRITE)))) {
3112 3112                  rw_exit(&ip->i_contents);
3113 3113                  rw_enter(&ip->i_contents, RW_WRITER);
3114 3114                  ud_iupdat(ip, 1);
3115 3115                  }
3116 3116          }
3117 3117          /*
3118 3118           * If we've already done a partial read, terminate
3119 3119           * the read but return no error.
3120 3120           */
3121 3121          if (oresid != uio->uio_resid) {
3122 3122                  error = 0;
3123 3123          }
3124 3124          ITIMES(ip);
3125 3125  
3126 3126          return (error);
3127 3127  }
3128 3128  
3129 3129  int32_t
3130 3130  ud_wrip(struct ud_inode *ip, struct uio *uio, int ioflag, struct cred *cr)
3131 3131  {
3132 3132          caddr_t base;
3133 3133          struct vnode *vp;
3134 3134          struct udf_vfs *udf_vfsp;
3135 3135          uint32_t flags;
3136 3136          int32_t error = 0, iupdat_flag, n, on, mapon, i_size_changed = 0;
3137 3137          int32_t pagecreate, newpage;
3138 3138          uint64_t old_i_size;
3139 3139          u_offset_t off;
3140 3140          long start_resid = uio->uio_resid, premove_resid;
3141 3141          rlim64_t limit = uio->uio_limit;
3142 3142  
3143 3143  
3144 3144          ASSERT(RW_WRITE_HELD(&ip->i_contents));
3145 3145          if ((ip->i_type != VREG) &&
3146 3146              (ip->i_type != VDIR) &&
3147 3147              (ip->i_type != VLNK)) {
3148 3148                  return (EIO);
3149 3149          }
3150 3150  
3151 3151          if (uio->uio_loffset >= MAXOFFSET_T) {
3152 3152                  return (EFBIG);
3153 3153          }
3154 3154          /*
3155 3155           * see udf_l_pathconf
3156 3156           */
3157 3157          if (limit > (((uint64_t)1 << 40) - 1)) {
3158 3158                  limit = ((uint64_t)1 << 40) - 1;
3159 3159          }
3160 3160          if (uio->uio_loffset >= limit) {
3161 3161                  proc_t *p = ttoproc(curthread);
3162 3162  
3163 3163                  mutex_enter(&p->p_lock);
3164 3164                  (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
3165 3165                      p, RCA_UNSAFE_SIGINFO);
3166 3166                  mutex_exit(&p->p_lock);
3167 3167                  return (EFBIG);
3168 3168          }
3169 3169          if ((uio->uio_loffset < (offset_t)0) ||
3170 3170              ((uio->uio_loffset + uio->uio_resid) < 0)) {
3171 3171                  return (EINVAL);
3172 3172          }
3173 3173          if (uio->uio_resid == 0) {
3174 3174                  return (0);
3175 3175          }
3176 3176  
3177 3177          mutex_enter(&ip->i_tlock);
3178 3178          ip->i_flag |= INOACC;
3179 3179  
3180 3180          if (ioflag & (FSYNC | FDSYNC)) {
3181 3181                  ip->i_flag |= ISYNC;
3182 3182                  iupdat_flag = 1;
3183 3183          }
3184 3184          mutex_exit(&ip->i_tlock);
3185 3185  
3186 3186          udf_vfsp = ip->i_udf;
3187 3187          vp = ITOV(ip);
3188 3188  
3189 3189          do {
3190 3190                  u_offset_t uoff = uio->uio_loffset;
3191 3191                  off = uoff & (offset_t)MAXBMASK;
3192 3192                  mapon = (int)(uoff & (offset_t)MAXBOFFSET);
3193 3193                  on = (int)blkoff(udf_vfsp, uoff);
3194 3194                  n = (int)MIN(udf_vfsp->udf_lbsize - on, uio->uio_resid);
3195 3195  
3196 3196                  if (ip->i_type == VREG && uoff + n >= limit) {
3197 3197                          if (uoff >= limit) {
3198 3198                                  error = EFBIG;
3199 3199                                  goto out;
3200 3200                          }
3201 3201                          n = (int)(limit - (rlim64_t)uoff);
3202 3202                  }
3203 3203                  if (uoff + n > ip->i_size) {
3204 3204                          /*
3205 3205                           * We are extending the length of the file.
3206 3206                           * bmap is used so that we are sure that
3207 3207                           * if we need to allocate new blocks, that it
3208 3208                           * is done here before we up the file size.
3209 3209                           */
3210 3210                          error = ud_bmap_write(ip, uoff,
3211 3211                              (int)(on + n), mapon == 0, cr);
3212 3212                          if (error) {
3213 3213                                  break;
3214 3214                          }
3215 3215                          i_size_changed = 1;
3216 3216                          old_i_size = ip->i_size;
3217 3217                          ip->i_size = uoff + n;
3218 3218                          /*
3219 3219                           * If we are writing from the beginning of
3220 3220                           * the mapping, we can just create the
3221 3221                           * pages without having to read them.
3222 3222                           */
3223 3223                          pagecreate = (mapon == 0);
3224 3224                  } else if (n == MAXBSIZE) {
3225 3225                          /*
3226 3226                           * Going to do a whole mappings worth,
3227 3227                           * so we can just create the pages w/o
3228 3228                           * having to read them in.  But before
3229 3229                           * we do that, we need to make sure any
3230 3230                           * needed blocks are allocated first.
3231 3231                           */
3232 3232                          error = ud_bmap_write(ip, uoff,
3233 3233                              (int)(on + n), 1, cr);
3234 3234                          if (error) {
3235 3235                                  break;
3236 3236                          }
3237 3237                          pagecreate = 1;
3238 3238                  } else {
3239 3239                          pagecreate = 0;
3240 3240                  }
3241 3241  
3242 3242                  rw_exit(&ip->i_contents);
3243 3243  
3244 3244                  /*
3245 3245                   * Touch the page and fault it in if it is not in
3246 3246                   * core before segmap_getmapflt can lock it. This
3247 3247                   * is to avoid the deadlock if the buffer is mapped
3248 3248                   * to the same file through mmap which we want to
3249 3249                   * write to.
3250 3250                   */
3251 3251                  uio_prefaultpages((long)n, uio);
3252 3252  
3253 3253                  base = segmap_getmapflt(segkmap, vp, (off + mapon),
3254 3254                      (uint32_t)n, !pagecreate, S_WRITE);
3255 3255  
3256 3256                  /*
3257 3257                   * segmap_pagecreate() returns 1 if it calls
3258 3258                   * page_create_va() to allocate any pages.
3259 3259                   */
3260 3260                  newpage = 0;
3261 3261                  if (pagecreate) {
3262 3262                          newpage = segmap_pagecreate(segkmap, base,
3263 3263                              (size_t)n, 0);
3264 3264                  }
3265 3265  
3266 3266                  premove_resid = uio->uio_resid;
3267 3267                  error = uiomove(base + mapon, (long)n, UIO_WRITE, uio);
3268 3268  
3269 3269                  if (pagecreate &&
3270 3270                      uio->uio_loffset < roundup(off + mapon + n, PAGESIZE)) {
3271 3271                          /*
3272 3272                           * We created pages w/o initializing them completely,
3273 3273                           * thus we need to zero the part that wasn't set up.
3274 3274                           * This happens on most EOF write cases and if
3275 3275                           * we had some sort of error during the uiomove.
3276 3276                           */
3277 3277                          int nzero, nmoved;
3278 3278  
3279 3279                          nmoved = (int)(uio->uio_loffset - (off + mapon));
3280 3280                          ASSERT(nmoved >= 0 && nmoved <= n);
3281 3281                          nzero = roundup(on + n, PAGESIZE) - nmoved;
3282 3282                          ASSERT(nzero > 0 && mapon + nmoved + nzero <= MAXBSIZE);
3283 3283                          (void) kzero(base + mapon + nmoved, (uint32_t)nzero);
3284 3284                  }
3285 3285  
3286 3286                  /*
3287 3287                   * Unlock the pages allocated by page_create_va()
3288 3288                   * in segmap_pagecreate()
3289 3289                   */
3290 3290                  if (newpage) {
3291 3291                          segmap_pageunlock(segkmap, base, (size_t)n, S_WRITE);
3292 3292                  }
3293 3293  
3294 3294                  if (error) {
3295 3295                          /*
3296 3296                           * If we failed on a write, we may have already
3297 3297                           * allocated file blocks as well as pages.  It's
3298 3298                           * hard to undo the block allocation, but we must
3299 3299                           * be sure to invalidate any pages that may have
3300 3300                           * been allocated.
3301 3301                           */
3302 3302                          (void) segmap_release(segkmap, base, SM_INVAL);
3303 3303                  } else {
3304 3304                          flags = 0;
3305 3305                          /*
3306 3306                           * Force write back for synchronous write cases.
3307 3307                           */
3308 3308                          if ((ioflag & (FSYNC|FDSYNC)) || ip->i_type == VDIR) {
3309 3309                                  /*
3310 3310                                   * If the sticky bit is set but the
3311 3311                                   * execute bit is not set, we do a
3312 3312                                   * synchronous write back and free
3313 3313                                   * the page when done.  We set up swap
3314 3314                                   * files to be handled this way to
3315 3315                                   * prevent servers from keeping around
3316 3316                                   * the client's swap pages too long.
3317 3317                                   * XXX - there ought to be a better way.
3318 3318                                   */
3319 3319                                  if (IS_SWAPVP(vp)) {
3320 3320                                          flags = SM_WRITE | SM_FREE |
3321 3321                                              SM_DONTNEED;
3322 3322                                          iupdat_flag = 0;
3323 3323                                  } else {
3324 3324                                          flags = SM_WRITE;
3325 3325                                  }
3326 3326                          } else if (((mapon + n) == MAXBSIZE) ||
3327 3327                              IS_SWAPVP(vp)) {
3328 3328                                  /*
3329 3329                                   * Have written a whole block.
3330 3330                                   * Start an asynchronous write and
3331 3331                                   * mark the buffer to indicate that
3332 3332                                   * it won't be needed again soon.
3333 3333                                   */
3334 3334                                  flags = SM_WRITE |SM_ASYNC | SM_DONTNEED;
3335 3335                          }
3336 3336                          error = segmap_release(segkmap, base, flags);
3337 3337  
3338 3338                          /*
3339 3339                           * If the operation failed and is synchronous,
3340 3340                           * then we need to unwind what uiomove() last
3341 3341                           * did so we can potentially return an error to
3342 3342                           * the caller.  If this write operation was
3343 3343                           * done in two pieces and the first succeeded,
3344 3344                           * then we won't return an error for the second
3345 3345                           * piece that failed.  However, we only want to
3346 3346                           * return a resid value that reflects what was
3347 3347                           * really done.
3348 3348                           *
3349 3349                           * Failures for non-synchronous operations can
3350 3350                           * be ignored since the page subsystem will
3351 3351                           * retry the operation until it succeeds or the
3352 3352                           * file system is unmounted.
3353 3353                           */
3354 3354                          if (error) {
3355 3355                                  if ((ioflag & (FSYNC | FDSYNC)) ||
3356 3356                                      ip->i_type == VDIR) {
3357 3357                                          uio->uio_resid = premove_resid;
3358 3358                                  } else {
3359 3359                                          error = 0;
3360 3360                                  }
3361 3361                          }
3362 3362                  }
3363 3363  
3364 3364                  /*
3365 3365                   * Re-acquire contents lock.
3366 3366                   */
3367 3367                  rw_enter(&ip->i_contents, RW_WRITER);
3368 3368                  /*
3369 3369                   * If the uiomove() failed or if a synchronous
3370 3370                   * page push failed, fix up i_size.
3371 3371                   */
3372 3372                  if (error) {
3373 3373                          if (i_size_changed) {
3374 3374                                  /*
3375 3375                                   * The uiomove failed, and we
3376 3376                                   * allocated blocks,so get rid
3377 3377                                   * of them.
3378 3378                                   */
3379 3379                                  (void) ud_itrunc(ip, old_i_size, 0, cr);
3380 3380                          }
3381 3381                  } else {
3382 3382                          /*
3383 3383                           * XXX - Can this be out of the loop?
3384 3384                           */
3385 3385                          ip->i_flag |= IUPD | ICHG;
3386 3386                          if (i_size_changed) {
3387 3387                                  ip->i_flag |= IATTCHG;
3388 3388                          }
3389 3389                          if ((ip->i_perm & (IEXEC | (IEXEC >> 5) |
3390 3390                              (IEXEC >> 10))) != 0 &&
3391 3391                              (ip->i_char & (ISUID | ISGID)) != 0 &&
3392 3392                              secpolicy_vnode_setid_retain(cr,
3393 3393                              (ip->i_char & ISUID) != 0 && ip->i_uid == 0) != 0) {
3394 3394                                  /*
3395 3395                                   * Clear Set-UID & Set-GID bits on
3396 3396                                   * successful write if not privileged
3397 3397                                   * and at least one of the execute bits
3398 3398                                   * is set.  If we always clear Set-GID,
3399 3399                                   * mandatory file and record locking is
3400 3400                                   * unuseable.
3401 3401                                   */
3402 3402                                  ip->i_char &= ~(ISUID | ISGID);
3403 3403                          }
3404 3404                  }
3405 3405          } while (error == 0 && uio->uio_resid > 0 && n != 0);
3406 3406  
3407 3407  out:
3408 3408          /*
3409 3409           * Inode is updated according to this table -
3410 3410           *
3411 3411           *      FSYNC   FDSYNC(posix.4)
3412 3412           *      --------------------------
3413 3413           *      always@ IATTCHG|IBDWRITE
3414 3414           *
3415 3415           * @ -  If we are doing synchronous write the only time we should
3416 3416           *      not be sync'ing the ip here is if we have the stickyhack
3417 3417           *      activated, the file is marked with the sticky bit and
3418 3418           *      no exec bit, the file length has not been changed and
3419 3419           *      no new blocks have been allocated during this write.
3420 3420           */
3421 3421          if ((ip->i_flag & ISYNC) != 0) {
3422 3422                  /*
3423 3423                   * we have eliminated nosync
3424 3424                   */
3425 3425                  if ((ip->i_flag & (IATTCHG|IBDWRITE)) ||
3426 3426                      ((ioflag & FSYNC) && iupdat_flag)) {
3427 3427                          ud_iupdat(ip, 1);
3428 3428                  }
3429 3429          }
3430 3430  
3431 3431          /*
3432 3432           * If we've already done a partial-write, terminate
3433 3433           * the write but return no error.
3434 3434           */
3435 3435          if (start_resid != uio->uio_resid) {
3436 3436                  error = 0;
3437 3437          }
3438 3438          ip->i_flag &= ~(INOACC | ISYNC);
3439 3439          ITIMES_NOLOCK(ip);
3440 3440  
3441 3441          return (error);
3442 3442  }
3443 3443  
3444 3444  int32_t
3445 3445  ud_multi_strat(struct ud_inode *ip,
3446 3446          page_t *pp, struct buf *bp, u_offset_t start)
3447 3447  {
3448 3448          daddr_t bn;
3449 3449          int32_t error = 0, io_count, contig, alloc_sz, i;
3450 3450          uint32_t io_off;
3451 3451          mio_master_t *mm = NULL;
3452 3452          mio_slave_t *ms = NULL;
3453 3453          struct buf *rbp;
3454 3454  
3455 3455          ASSERT(!(start & PAGEOFFSET));
3456 3456  
3457 3457          /*
3458 3458           * Figure out how many buffers to allocate
3459 3459           */
3460 3460          io_count = 0;
3461 3461          for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3462 3462                  contig = 0;
3463 3463                  if (error = ud_bmap_read(ip, (u_offset_t)(start + io_off),
3464 3464                      &bn, &contig)) {
3465 3465                          goto end;
3466 3466                  }
3467 3467                  if (contig == 0) {
3468 3468                          goto end;
3469 3469                  }
3470 3470                  contig = MIN(contig, PAGESIZE - io_off);
3471 3471                  if (bn != UDF_HOLE) {
3472 3472                          io_count ++;
3473 3473                  } else {
3474 3474                          /*
3475 3475                           * HOLE
3476 3476                           */
3477 3477                          if (bp->b_flags & B_READ) {
3478 3478  
3479 3479                                  /*
3480 3480                                   * This is a hole and is read
3481 3481                                   * it should be filled with 0's
3482 3482                                   */
3483 3483                                  pagezero(pp, io_off, contig);
3484 3484                          }
3485 3485                  }
3486 3486          }
3487 3487  
3488 3488  
3489 3489          if (io_count != 0) {
3490 3490  
3491 3491                  /*
3492 3492                   * Allocate memory for all the
3493 3493                   * required number of buffers
3494 3494                   */
3495 3495                  alloc_sz = sizeof (mio_master_t) +
3496 3496                      (sizeof (mio_slave_t) * io_count);
3497 3497                  mm = (mio_master_t *)kmem_zalloc(alloc_sz, KM_SLEEP);
3498 3498                  if (mm == NULL) {
3499 3499                          error = ENOMEM;
3500 3500                          goto end;
3501 3501                  }
3502 3502  
3503 3503                  /*
3504 3504                   * initialize master
3505 3505                   */
3506 3506                  mutex_init(&mm->mm_mutex, NULL, MUTEX_DEFAULT, NULL);
3507 3507                  mm->mm_size = alloc_sz;
3508 3508                  mm->mm_bp = bp;
3509 3509                  mm->mm_resid = 0;
3510 3510                  mm->mm_error = 0;
3511 3511                  mm->mm_index = master_index++;
3512 3512  
3513 3513                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3514 3514  
3515 3515                  /*
3516 3516                   * Initialize buffers
3517 3517                   */
3518 3518                  io_count = 0;
3519 3519                  for (io_off = 0; io_off < bp->b_bcount; io_off += contig) {
3520 3520                          contig = 0;
3521 3521                          if (error = ud_bmap_read(ip,
3522 3522                              (u_offset_t)(start + io_off),
3523 3523                              &bn, &contig)) {
3524 3524                                  goto end;
3525 3525                          }
3526 3526                          ASSERT(contig);
3527 3527                          if ((io_off + contig) > bp->b_bcount) {
3528 3528                                  contig = bp->b_bcount - io_off;
3529 3529                          }
3530 3530                          if (bn != UDF_HOLE) {
3531 3531                                  /*
3532 3532                                   * Clone the buffer
3533 3533                                   * and prepare to start I/O
3534 3534                                   */
3535 3535                                  ms->ms_ptr = mm;
3536 3536                                  bioinit(&ms->ms_buf);
3537 3537                                  rbp = bioclone(bp, io_off, (size_t)contig,
3538 3538                                      bp->b_edev, bn, ud_slave_done,
3539 3539                                      &ms->ms_buf, KM_NOSLEEP);
3540 3540                                  ASSERT(rbp == &ms->ms_buf);
3541 3541                                  mm->mm_resid += contig;
3542 3542                                  io_count++;
3543 3543                                  ms ++;
3544 3544                          }
3545 3545                  }
3546 3546  
3547 3547                  /*
3548 3548                   * Start I/O's
3549 3549                   */
3550 3550                  ms = (mio_slave_t *)(((caddr_t)mm) + sizeof (mio_master_t));
3551 3551                  for (i = 0; i < io_count; i++) {
3552 3552                          (void) bdev_strategy(&ms->ms_buf);
3553 3553                          ms ++;
3554 3554                  }
3555 3555          }
3556 3556  
3557 3557  end:
3558 3558          if (error != 0) {
3559 3559                  bp->b_flags |= B_ERROR;
3560 3560                  bp->b_error = error;
3561 3561                  if (mm != NULL) {
3562 3562                          mutex_destroy(&mm->mm_mutex);
3563 3563                          kmem_free(mm, mm->mm_size);
3564 3564                  }
3565 3565          }
3566 3566          return (error);
3567 3567  }
3568 3568  
3569 3569  int32_t
3570 3570  ud_slave_done(struct buf *bp)
3571 3571  {
3572 3572          mio_master_t *mm;
3573 3573          int32_t resid;
3574 3574  
3575 3575          ASSERT(SEMA_HELD(&bp->b_sem));
3576 3576          ASSERT((bp->b_flags & B_DONE) == 0);
3577 3577  
3578 3578          mm = ((mio_slave_t *)bp)->ms_ptr;
3579 3579  
3580 3580          /*
3581 3581           * Propagate error and byte count info from slave struct to
3582 3582           * the master struct
3583 3583           */
3584 3584          mutex_enter(&mm->mm_mutex);
3585 3585          if (bp->b_flags & B_ERROR) {
3586 3586  
3587 3587                  /*
3588 3588                   * If multiple slave buffers get
3589 3589                   * error we forget the old errors
3590 3590                   * this is ok because we any way
3591 3591                   * cannot return multiple errors
3592 3592                   */
3593 3593                  mm->mm_error = bp->b_error;
3594 3594          }
3595 3595          mm->mm_resid -= bp->b_bcount;
3596 3596          resid = mm->mm_resid;
3597 3597          mutex_exit(&mm->mm_mutex);
3598 3598  
3599 3599          /*
3600 3600           * free up the resources allocated to cloned buffers.
3601 3601           */
3602 3602          bp_mapout(bp);
3603 3603          biofini(bp);
3604 3604  
3605 3605          if (resid == 0) {
3606 3606  
3607 3607                  /*
3608 3608                   * This is the last I/O operation
3609 3609                   * clean up and return the original buffer
3610 3610                   */
3611 3611                  if (mm->mm_error) {
3612 3612                          mm->mm_bp->b_flags |= B_ERROR;
3613 3613                          mm->mm_bp->b_error = mm->mm_error;
3614 3614                  }
3615 3615                  biodone(mm->mm_bp);
3616 3616                  mutex_destroy(&mm->mm_mutex);
3617 3617                  kmem_free(mm, mm->mm_size);
3618 3618          }
3619 3619          return (0);
3620 3620  }

↓ open down ↓

3374 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX